blob: acb35479abe924e2d7406d7364a1a3f740990a46 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""Base classes for server/gateway implementations"""
2
Guido van Rossum06a2dc72006-08-17 08:56:08 +00003from .util import FileWrapper, guess_scheme, is_hop_by_hop
4from .headers import Headers
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005
6import sys, os, time
7
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +00008__all__ = [
9 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
10 'IISCGIHandler', 'read_environ'
11]
Thomas Wouters0e3f5912006-08-11 14:57:12 +000012
Thomas Wouters0e3f5912006-08-11 14:57:12 +000013# Weekday and month names for HTTP date/time formatting; always English!
14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
15_monthname = [None, # Dummy so we can use 1-based month numbers
16 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
17 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
18
19def format_date_time(timestamp):
20 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
21 return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
22 _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
23 )
24
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +000025_is_request = {
26 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
27 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
28}.__contains__
29
30def _needs_transcode(k):
31 return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
32 or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
33
34def read_environ():
35 """Read environment, fixing HTTP variables"""
36 enc = sys.getfilesystemencoding()
37 esc = 'surrogateescape'
38 try:
39 ''.encode('utf-8', esc)
40 except LookupError:
41 esc = 'replace'
42 environ = {}
43
44 # Take the basic environment from native-unicode os.environ. Attempt to
45 # fix up the variables that come from the HTTP request to compensate for
46 # the bytes->unicode decoding step that will already have taken place.
47 for k, v in os.environ.items():
48 if _needs_transcode(k):
49
50 # On win32, the os.environ is natively Unicode. Different servers
51 # decode the request bytes using different encodings.
52 if sys.platform == 'win32':
53 software = os.environ.get('SERVER_SOFTWARE', '').lower()
54
55 # On IIS, the HTTP request will be decoded as UTF-8 as long
56 # as the input is a valid UTF-8 sequence. Otherwise it is
57 # decoded using the system code page (mbcs), with no way to
58 # detect this has happened. Because UTF-8 is the more likely
59 # encoding, and mbcs is inherently unreliable (an mbcs string
60 # that happens to be valid UTF-8 will not be decoded as mbcs)
61 # always recreate the original bytes as UTF-8.
62 if software.startswith('microsoft-iis/'):
63 v = v.encode('utf-8').decode('iso-8859-1')
64
65 # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
66 # to the Unicode environ. No modification needed.
67 elif software.startswith('apache/'):
68 pass
69
70 # Python 3's http.server.CGIHTTPRequestHandler decodes
71 # using the urllib.unquote default of UTF-8, amongst other
72 # issues.
73 elif (
74 software.startswith('simplehttp/')
75 and 'python/3' in software
76 ):
77 v = v.encode('utf-8').decode('iso-8859-1')
78
79 # For other servers, guess that they have written bytes to
80 # the environ using stdio byte-oriented interfaces, ending up
81 # with the system code page.
82 else:
83 v = v.encode(enc, 'replace').decode('iso-8859-1')
84
85 # Recover bytes from unicode environ, using surrogate escapes
86 # where available (Python 3.1+).
87 else:
88 v = v.encode(enc, esc).decode('iso-8859-1')
89
90 environ[k] = v
91 return environ
92
Thomas Wouters0e3f5912006-08-11 14:57:12 +000093
Thomas Wouters0e3f5912006-08-11 14:57:12 +000094class BaseHandler:
95 """Manage the invocation of a WSGI application"""
96
97 # Configuration parameters; can override per-subclass or per-instance
98 wsgi_version = (1,0)
99 wsgi_multithread = True
100 wsgi_multiprocess = True
101 wsgi_run_once = False
102
103 origin_server = True # We are transmitting direct to client
104 http_version = "1.0" # Version that should be used for response
105 server_software = None # String name of server software, if any
106
107 # os_environ is used to supply configuration from the OS environment:
108 # by default it's a copy of 'os.environ' as of import time, but you can
109 # override this in e.g. your __init__ method.
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +0000110 os_environ= read_environ()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000111
112 # Collaborator classes
113 wsgi_file_wrapper = FileWrapper # set to None to disable
114 headers_class = Headers # must be a Headers-like class
115
116 # Error handling (also per-subclass or per-instance)
117 traceback_limit = None # Print entire traceback to self.get_stderr()
Ezio Melottia3211ee2010-02-16 23:59:54 +0000118 error_status = "500 Internal Server Error"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000119 error_headers = [('Content-Type','text/plain')]
Phillip J. Ebye1594222010-11-02 22:28:59 +0000120 error_body = b"A server error occurred. Please contact the administrator."
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000121
122 # State variables (don't mess with these)
123 status = result = None
124 headers_sent = False
125 headers = None
126 bytes_sent = 0
127
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000128 def run(self, application):
129 """Invoke the application"""
130 # Note to self: don't move the close()! Asynchronous servers shouldn't
131 # call close() from finish_response(), so if you close() anywhere but
132 # the double-error branch here, you'll break asynchronous servers by
133 # prematurely closing. Async servers must return from 'run()' without
134 # closing if there might still be output to iterate over.
135 try:
136 self.setup_environ()
137 self.result = application(self.environ, self.start_response)
138 self.finish_response()
139 except:
140 try:
141 self.handle_error()
142 except:
143 # If we get an error handling an error, just give up already!
144 self.close()
145 raise # ...and let the actual server figure it out.
146
147
148 def setup_environ(self):
149 """Set up the environment for one request"""
150
151 env = self.environ = self.os_environ.copy()
152 self.add_cgi_vars()
153
154 env['wsgi.input'] = self.get_stdin()
155 env['wsgi.errors'] = self.get_stderr()
156 env['wsgi.version'] = self.wsgi_version
157 env['wsgi.run_once'] = self.wsgi_run_once
158 env['wsgi.url_scheme'] = self.get_scheme()
159 env['wsgi.multithread'] = self.wsgi_multithread
160 env['wsgi.multiprocess'] = self.wsgi_multiprocess
161
162 if self.wsgi_file_wrapper is not None:
163 env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
164
165 if self.origin_server and self.server_software:
166 env.setdefault('SERVER_SOFTWARE',self.server_software)
167
168
169 def finish_response(self):
170 """Send any iterable data, then close self and the iterable
171
172 Subclasses intended for use in asynchronous servers will
173 want to redefine this method, such that it sets up callbacks
174 in the event loop to iterate over the data, and to call
175 'self.close()' once the response is finished.
176 """
Antoine Pitrouae247a52012-10-21 14:09:05 +0200177 try:
178 if not self.result_is_file() or not self.sendfile():
179 for data in self.result:
180 self.write(data)
181 self.finish_content()
182 finally:
183 self.close()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000184
185
186 def get_scheme(self):
187 """Return the URL scheme being used"""
188 return guess_scheme(self.environ)
189
190
191 def set_content_length(self):
192 """Compute Content-Length or switch to chunked encoding if possible"""
193 try:
194 blocks = len(self.result)
195 except (TypeError,AttributeError,NotImplementedError):
196 pass
197 else:
198 if blocks==1:
199 self.headers['Content-Length'] = str(self.bytes_sent)
200 return
201 # XXX Try for chunked encoding if origin server and client is 1.1
202
203
204 def cleanup_headers(self):
205 """Make any necessary header changes or defaults
206
207 Subclasses can extend this to add other defaults.
208 """
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000209 if 'Content-Length' not in self.headers:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000210 self.set_content_length()
211
212 def start_response(self, status, headers,exc_info=None):
Phillip J. Ebye1594222010-11-02 22:28:59 +0000213 """'start_response()' callable as specified by PEP 3333"""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214
215 if exc_info:
216 try:
217 if self.headers_sent:
218 # Re-raise original exception if headers sent
Collin Winter828f04a2007-08-31 00:04:24 +0000219 raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000220 finally:
221 exc_info = None # avoid dangling circular ref
222 elif self.headers is not None:
223 raise AssertionError("Headers already set!")
224
Phillip J. Ebye1594222010-11-02 22:28:59 +0000225 self.status = status
226 self.headers = self.headers_class(headers)
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000227 status = self._convert_string_type(status, "Status")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 assert len(status)>=4,"Status must be at least 4 characters"
Berker Peksag1cd4ff62016-03-19 09:04:59 +0200229 assert status[:3].isdigit(), "Status message must begin w/3-digit code"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 assert status[3]==" ", "Status message must have a space after code"
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000231
Phillip J. Ebye1594222010-11-02 22:28:59 +0000232 if __debug__:
233 for name, val in headers:
234 name = self._convert_string_type(name, "Header name")
235 val = self._convert_string_type(val, "Header value")
236 assert not is_hop_by_hop(name),"Hop-by-hop headers not allowed"
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000237
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 return self.write
239
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000240 def _convert_string_type(self, value, title):
241 """Convert/check value type."""
Phillip J. Ebye1594222010-11-02 22:28:59 +0000242 if type(value) is str:
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000243 return value
Phillip J. Ebye1594222010-11-02 22:28:59 +0000244 raise AssertionError(
245 "{0} must be of type str (got {1})".format(title, repr(value))
246 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000247
248 def send_preamble(self):
249 """Transmit version/status/date/server, via self._write()"""
250 if self.origin_server:
251 if self.client_is_modern():
Phillip J. Ebye1594222010-11-02 22:28:59 +0000252 self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000253 if 'Date' not in self.headers:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000254 self._write(
Phillip J. Ebye1594222010-11-02 22:28:59 +0000255 ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000256 )
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000257 if self.server_software and 'Server' not in self.headers:
Phillip J. Ebye1594222010-11-02 22:28:59 +0000258 self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000259 else:
Phillip J. Ebye1594222010-11-02 22:28:59 +0000260 self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261
262 def write(self, data):
Phillip J. Ebye1594222010-11-02 22:28:59 +0000263 """'write()' callable as specified by PEP 3333"""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000264
Phillip J. Ebye1594222010-11-02 22:28:59 +0000265 assert type(data) is bytes, \
266 "write() argument must be a bytes instance"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000267
268 if not self.status:
269 raise AssertionError("write() before start_response()")
270
271 elif not self.headers_sent:
272 # Before the first output, send the stored headers
273 self.bytes_sent = len(data) # make sure we know content-length
274 self.send_headers()
275 else:
276 self.bytes_sent += len(data)
277
278 # XXX check Content-Length and truncate if too many bytes written?
279 self._write(data)
280 self._flush()
281
282
283 def sendfile(self):
284 """Platform-specific file transmission
285
286 Override this method in subclasses to support platform-specific
287 file transmission. It is only called if the application's
288 return iterable ('self.result') is an instance of
289 'self.wsgi_file_wrapper'.
290
291 This method should return a true value if it was able to actually
292 transmit the wrapped file-like object using a platform-specific
293 approach. It should return a false value if normal iteration
294 should be used instead. An exception can be raised to indicate
295 that transmission was attempted, but failed.
296
297 NOTE: this method should call 'self.send_headers()' if
298 'self.headers_sent' is false and it is going to attempt direct
299 transmission of the file.
300 """
301 return False # No platform-specific transmission by default
302
303
304 def finish_content(self):
305 """Ensure headers and content have both been sent"""
306 if not self.headers_sent:
Antoine Pitroub715fac2011-01-06 17:17:04 +0000307 # Only zero Content-Length if not set by the application (so
308 # that HEAD requests can be satisfied properly, see #3839)
309 self.headers.setdefault('Content-Length', "0")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000310 self.send_headers()
311 else:
312 pass # XXX check if content-length was too short?
313
314 def close(self):
315 """Close the iterable (if needed) and reset all instance vars
316
317 Subclasses may want to also drop the client connection.
318 """
319 try:
320 if hasattr(self.result,'close'):
321 self.result.close()
322 finally:
323 self.result = self.headers = self.status = self.environ = None
324 self.bytes_sent = 0; self.headers_sent = False
325
326
327 def send_headers(self):
328 """Transmit headers to the client, via self._write()"""
329 self.cleanup_headers()
330 self.headers_sent = True
331 if not self.origin_server or self.client_is_modern():
332 self.send_preamble()
Phillip J. Ebye1594222010-11-02 22:28:59 +0000333 self._write(bytes(self.headers))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000334
335
336 def result_is_file(self):
337 """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
338 wrapper = self.wsgi_file_wrapper
339 return wrapper is not None and isinstance(self.result,wrapper)
340
341
342 def client_is_modern(self):
343 """True if client can accept status and headers"""
344 return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
345
346
347 def log_exception(self,exc_info):
348 """Log the 'exc_info' tuple in the server log
349
350 Subclasses may override to retarget the output or change its format.
351 """
352 try:
353 from traceback import print_exception
354 stderr = self.get_stderr()
355 print_exception(
356 exc_info[0], exc_info[1], exc_info[2],
357 self.traceback_limit, stderr
358 )
359 stderr.flush()
360 finally:
361 exc_info = None
362
363 def handle_error(self):
364 """Log current error, and send error output to client if possible"""
365 self.log_exception(sys.exc_info())
366 if not self.headers_sent:
367 self.result = self.error_output(self.environ, self.start_response)
368 self.finish_response()
369 # XXX else: attempt advanced recovery techniques for HTML or text?
370
371 def error_output(self, environ, start_response):
372 """WSGI mini-app to create error output
373
374 By default, this just uses the 'error_status', 'error_headers',
375 and 'error_body' attributes to generate an output page. It can
376 be overridden in a subclass to dynamically generate diagnostics,
377 choose an appropriate message for the user's preferred language, etc.
378
379 Note, however, that it's not recommended from a security perspective to
380 spit out diagnostics to any old user; ideally, you should have to do
381 something special to enable diagnostic output, which is why we don't
382 include any here!
383 """
384 start_response(self.error_status,self.error_headers[:],sys.exc_info())
385 return [self.error_body]
386
387
388 # Pure abstract methods; *must* be overridden in subclasses
389
390 def _write(self,data):
391 """Override in subclass to buffer data for send to client
392
393 It's okay if this method actually transmits the data; BaseHandler
394 just separates write and flush operations for greater efficiency
395 when the underlying system actually has such a distinction.
396 """
397 raise NotImplementedError
398
399 def _flush(self):
400 """Override in subclass to force sending of recent '_write()' calls
401
402 It's okay if this method is a no-op (i.e., if '_write()' actually
403 sends the data.
404 """
405 raise NotImplementedError
406
407 def get_stdin(self):
408 """Override in subclass to return suitable 'wsgi.input'"""
409 raise NotImplementedError
410
411 def get_stderr(self):
412 """Override in subclass to return suitable 'wsgi.errors'"""
413 raise NotImplementedError
414
415 def add_cgi_vars(self):
416 """Override in subclass to insert CGI variables in 'self.environ'"""
417 raise NotImplementedError
418
419
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000420class SimpleHandler(BaseHandler):
421 """Handler that's just initialized with streams, environment, etc.
422
423 This handler subclass is intended for synchronous HTTP/1.0 origin servers,
424 and handles sending the entire response output, given the correct inputs.
425
426 Usage::
427
428 handler = SimpleHandler(
429 inp,out,err,env, multithread=False, multiprocess=True
430 )
431 handler.run(app)"""
432
433 def __init__(self,stdin,stdout,stderr,environ,
434 multithread=True, multiprocess=False
435 ):
436 self.stdin = stdin
437 self.stdout = stdout
438 self.stderr = stderr
439 self.base_env = environ
440 self.wsgi_multithread = multithread
441 self.wsgi_multiprocess = multiprocess
442
443 def get_stdin(self):
444 return self.stdin
445
446 def get_stderr(self):
447 return self.stderr
448
449 def add_cgi_vars(self):
450 self.environ.update(self.base_env)
451
452 def _write(self,data):
453 self.stdout.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000454
455 def _flush(self):
456 self.stdout.flush()
457 self._flush = self.stdout.flush
458
459
460class BaseCGIHandler(SimpleHandler):
461
462 """CGI-like systems using input/output/error streams and environ mapping
463
464 Usage::
465
466 handler = BaseCGIHandler(inp,out,err,env)
467 handler.run(app)
468
469 This handler class is useful for gateway protocols like ReadyExec and
470 FastCGI, that have usable input/output/error streams and an environment
471 mapping. It's also the base class for CGIHandler, which just uses
472 sys.stdin, os.environ, and so on.
473
474 The constructor also takes keyword arguments 'multithread' and
475 'multiprocess' (defaulting to 'True' and 'False' respectively) to control
476 the configuration sent to the application. It sets 'origin_server' to
477 False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
478 False.
479 """
480
481 origin_server = False
482
483
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000484class CGIHandler(BaseCGIHandler):
485
486 """CGI-based invocation via sys.stdin/stdout/stderr and os.environ
487
488 Usage::
489
490 CGIHandler().run(app)
491
492 The difference between this class and BaseCGIHandler is that it always
493 uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
494 'wsgi.multiprocess' of 'True'. It does not take any initialization
495 parameters, but always uses 'sys.stdin', 'os.environ', and friends.
496
497 If you need to override any of these parameters, use BaseCGIHandler
498 instead.
499 """
500
501 wsgi_run_once = True
Barry Warsawb1938262010-03-01 21:53:00 +0000502 # Do not allow os.environ to leak between requests in Google App Engine
503 # and other multi-run CGI use cases. This is not easily testable.
504 # See http://bugs.python.org/issue7250
505 os_environ = {}
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000506
507 def __init__(self):
508 BaseCGIHandler.__init__(
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +0000509 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
510 read_environ(), multithread=False, multiprocess=True
511 )
512
513
514class IISCGIHandler(BaseCGIHandler):
515 """CGI-based invocation with workaround for IIS path bug
516
517 This handler should be used in preference to CGIHandler when deploying on
518 Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
519 or metabase allowPathInfoForScriptMappings (IIS<7).
520 """
521 wsgi_run_once = True
522 os_environ = {}
523
524 # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
525 # the front, causing problems for WSGI applications that wish to implement
526 # routing. This handler strips any such duplicated path.
527
528 # IIS can be configured to pass the correct PATH_INFO, but this causes
529 # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
530 # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
531 # setting can only be made on a vhost level, affecting all other script
532 # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
533 # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
534 # rarely uses it because there is still no UI for it.)
535
536 # There is no way for CGI code to tell whether the option was set, so a
537 # separate handler class is provided.
538 def __init__(self):
539 environ= read_environ()
540 path = environ.get('PATH_INFO', '')
541 script = environ.get('SCRIPT_NAME', '')
542 if (path+'/').startswith(script+'/'):
543 environ['PATH_INFO'] = path[len(script):]
544 BaseCGIHandler.__init__(
545 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
546 environ, multithread=False, multiprocess=True
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000547 )