blob: 28ed9b7a6d0353db063ab8802083d78e329a18ef [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""Base classes for server/gateway implementations"""
2
Guido van Rossum06a2dc72006-08-17 08:56:08 +00003from .util import FileWrapper, guess_scheme, is_hop_by_hop
4from .headers import Headers
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005
6import sys, os, time
7
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +00008__all__ = [
9 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
10 'IISCGIHandler', 'read_environ'
11]
Thomas Wouters0e3f5912006-08-11 14:57:12 +000012
Thomas Wouters0e3f5912006-08-11 14:57:12 +000013# Weekday and month names for HTTP date/time formatting; always English!
14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
15_monthname = [None, # Dummy so we can use 1-based month numbers
16 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
17 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
18
19def format_date_time(timestamp):
20 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
21 return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
22 _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
23 )
24
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +000025_is_request = {
26 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
27 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
28}.__contains__
29
30def _needs_transcode(k):
31 return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
32 or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
33
34def read_environ():
35 """Read environment, fixing HTTP variables"""
36 enc = sys.getfilesystemencoding()
37 esc = 'surrogateescape'
38 try:
39 ''.encode('utf-8', esc)
40 except LookupError:
41 esc = 'replace'
42 environ = {}
43
44 # Take the basic environment from native-unicode os.environ. Attempt to
45 # fix up the variables that come from the HTTP request to compensate for
46 # the bytes->unicode decoding step that will already have taken place.
47 for k, v in os.environ.items():
48 if _needs_transcode(k):
49
50 # On win32, the os.environ is natively Unicode. Different servers
51 # decode the request bytes using different encodings.
52 if sys.platform == 'win32':
53 software = os.environ.get('SERVER_SOFTWARE', '').lower()
54
55 # On IIS, the HTTP request will be decoded as UTF-8 as long
56 # as the input is a valid UTF-8 sequence. Otherwise it is
57 # decoded using the system code page (mbcs), with no way to
58 # detect this has happened. Because UTF-8 is the more likely
59 # encoding, and mbcs is inherently unreliable (an mbcs string
60 # that happens to be valid UTF-8 will not be decoded as mbcs)
61 # always recreate the original bytes as UTF-8.
62 if software.startswith('microsoft-iis/'):
63 v = v.encode('utf-8').decode('iso-8859-1')
64
65 # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
66 # to the Unicode environ. No modification needed.
67 elif software.startswith('apache/'):
68 pass
69
70 # Python 3's http.server.CGIHTTPRequestHandler decodes
71 # using the urllib.unquote default of UTF-8, amongst other
72 # issues.
73 elif (
74 software.startswith('simplehttp/')
75 and 'python/3' in software
76 ):
77 v = v.encode('utf-8').decode('iso-8859-1')
78
79 # For other servers, guess that they have written bytes to
80 # the environ using stdio byte-oriented interfaces, ending up
81 # with the system code page.
82 else:
83 v = v.encode(enc, 'replace').decode('iso-8859-1')
84
85 # Recover bytes from unicode environ, using surrogate escapes
86 # where available (Python 3.1+).
87 else:
88 v = v.encode(enc, esc).decode('iso-8859-1')
89
90 environ[k] = v
91 return environ
92
Thomas Wouters0e3f5912006-08-11 14:57:12 +000093
Thomas Wouters0e3f5912006-08-11 14:57:12 +000094class BaseHandler:
95 """Manage the invocation of a WSGI application"""
96
97 # Configuration parameters; can override per-subclass or per-instance
98 wsgi_version = (1,0)
99 wsgi_multithread = True
100 wsgi_multiprocess = True
101 wsgi_run_once = False
102
103 origin_server = True # We are transmitting direct to client
104 http_version = "1.0" # Version that should be used for response
105 server_software = None # String name of server software, if any
106
107 # os_environ is used to supply configuration from the OS environment:
108 # by default it's a copy of 'os.environ' as of import time, but you can
109 # override this in e.g. your __init__ method.
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +0000110 os_environ= read_environ()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000111
112 # Collaborator classes
113 wsgi_file_wrapper = FileWrapper # set to None to disable
114 headers_class = Headers # must be a Headers-like class
115
116 # Error handling (also per-subclass or per-instance)
117 traceback_limit = None # Print entire traceback to self.get_stderr()
Ezio Melottia3211ee2010-02-16 23:59:54 +0000118 error_status = "500 Internal Server Error"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000119 error_headers = [('Content-Type','text/plain')]
Phillip J. Ebye1594222010-11-02 22:28:59 +0000120 error_body = b"A server error occurred. Please contact the administrator."
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000121
122 # State variables (don't mess with these)
123 status = result = None
124 headers_sent = False
125 headers = None
126 bytes_sent = 0
127
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000128 def run(self, application):
129 """Invoke the application"""
130 # Note to self: don't move the close()! Asynchronous servers shouldn't
131 # call close() from finish_response(), so if you close() anywhere but
132 # the double-error branch here, you'll break asynchronous servers by
133 # prematurely closing. Async servers must return from 'run()' without
134 # closing if there might still be output to iterate over.
135 try:
136 self.setup_environ()
137 self.result = application(self.environ, self.start_response)
138 self.finish_response()
139 except:
140 try:
141 self.handle_error()
142 except:
143 # If we get an error handling an error, just give up already!
144 self.close()
145 raise # ...and let the actual server figure it out.
146
147
148 def setup_environ(self):
149 """Set up the environment for one request"""
150
151 env = self.environ = self.os_environ.copy()
152 self.add_cgi_vars()
153
154 env['wsgi.input'] = self.get_stdin()
155 env['wsgi.errors'] = self.get_stderr()
156 env['wsgi.version'] = self.wsgi_version
157 env['wsgi.run_once'] = self.wsgi_run_once
158 env['wsgi.url_scheme'] = self.get_scheme()
159 env['wsgi.multithread'] = self.wsgi_multithread
160 env['wsgi.multiprocess'] = self.wsgi_multiprocess
161
162 if self.wsgi_file_wrapper is not None:
163 env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
164
165 if self.origin_server and self.server_software:
166 env.setdefault('SERVER_SOFTWARE',self.server_software)
167
168
169 def finish_response(self):
170 """Send any iterable data, then close self and the iterable
171
172 Subclasses intended for use in asynchronous servers will
173 want to redefine this method, such that it sets up callbacks
174 in the event loop to iterate over the data, and to call
175 'self.close()' once the response is finished.
176 """
Antoine Pitrouae247a52012-10-21 14:09:05 +0200177 try:
178 if not self.result_is_file() or not self.sendfile():
179 for data in self.result:
180 self.write(data)
181 self.finish_content()
182 finally:
183 self.close()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000184
185
186 def get_scheme(self):
187 """Return the URL scheme being used"""
188 return guess_scheme(self.environ)
189
190
191 def set_content_length(self):
192 """Compute Content-Length or switch to chunked encoding if possible"""
193 try:
194 blocks = len(self.result)
195 except (TypeError,AttributeError,NotImplementedError):
196 pass
197 else:
198 if blocks==1:
199 self.headers['Content-Length'] = str(self.bytes_sent)
200 return
201 # XXX Try for chunked encoding if origin server and client is 1.1
202
203
204 def cleanup_headers(self):
205 """Make any necessary header changes or defaults
206
207 Subclasses can extend this to add other defaults.
208 """
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000209 if 'Content-Length' not in self.headers:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000210 self.set_content_length()
211
212 def start_response(self, status, headers,exc_info=None):
Phillip J. Ebye1594222010-11-02 22:28:59 +0000213 """'start_response()' callable as specified by PEP 3333"""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214
215 if exc_info:
216 try:
217 if self.headers_sent:
218 # Re-raise original exception if headers sent
Collin Winter828f04a2007-08-31 00:04:24 +0000219 raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000220 finally:
221 exc_info = None # avoid dangling circular ref
222 elif self.headers is not None:
223 raise AssertionError("Headers already set!")
224
Phillip J. Ebye1594222010-11-02 22:28:59 +0000225 self.status = status
226 self.headers = self.headers_class(headers)
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000227 status = self._convert_string_type(status, "Status")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 assert len(status)>=4,"Status must be at least 4 characters"
Berker Peksag1cd4ff62016-03-19 09:04:59 +0200229 assert status[:3].isdigit(), "Status message must begin w/3-digit code"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 assert status[3]==" ", "Status message must have a space after code"
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000231
Phillip J. Ebye1594222010-11-02 22:28:59 +0000232 if __debug__:
233 for name, val in headers:
234 name = self._convert_string_type(name, "Header name")
235 val = self._convert_string_type(val, "Header value")
Cheryl Sabella5ef4fc22018-12-25 18:19:11 -0500236 assert not is_hop_by_hop(name),\
237 f"Hop-by-hop header, '{name}: {val}', not allowed"
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000238
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000239 return self.write
240
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000241 def _convert_string_type(self, value, title):
242 """Convert/check value type."""
Phillip J. Ebye1594222010-11-02 22:28:59 +0000243 if type(value) is str:
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000244 return value
Phillip J. Ebye1594222010-11-02 22:28:59 +0000245 raise AssertionError(
246 "{0} must be of type str (got {1})".format(title, repr(value))
247 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000248
249 def send_preamble(self):
250 """Transmit version/status/date/server, via self._write()"""
251 if self.origin_server:
252 if self.client_is_modern():
Phillip J. Ebye1594222010-11-02 22:28:59 +0000253 self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000254 if 'Date' not in self.headers:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000255 self._write(
Phillip J. Ebye1594222010-11-02 22:28:59 +0000256 ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000257 )
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000258 if self.server_software and 'Server' not in self.headers:
Phillip J. Ebye1594222010-11-02 22:28:59 +0000259 self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000260 else:
Phillip J. Ebye1594222010-11-02 22:28:59 +0000261 self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262
263 def write(self, data):
Phillip J. Ebye1594222010-11-02 22:28:59 +0000264 """'write()' callable as specified by PEP 3333"""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265
Phillip J. Ebye1594222010-11-02 22:28:59 +0000266 assert type(data) is bytes, \
267 "write() argument must be a bytes instance"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000268
269 if not self.status:
270 raise AssertionError("write() before start_response()")
271
272 elif not self.headers_sent:
273 # Before the first output, send the stored headers
274 self.bytes_sent = len(data) # make sure we know content-length
275 self.send_headers()
276 else:
277 self.bytes_sent += len(data)
278
279 # XXX check Content-Length and truncate if too many bytes written?
280 self._write(data)
281 self._flush()
282
283
284 def sendfile(self):
285 """Platform-specific file transmission
286
287 Override this method in subclasses to support platform-specific
288 file transmission. It is only called if the application's
289 return iterable ('self.result') is an instance of
290 'self.wsgi_file_wrapper'.
291
292 This method should return a true value if it was able to actually
293 transmit the wrapped file-like object using a platform-specific
294 approach. It should return a false value if normal iteration
295 should be used instead. An exception can be raised to indicate
296 that transmission was attempted, but failed.
297
298 NOTE: this method should call 'self.send_headers()' if
299 'self.headers_sent' is false and it is going to attempt direct
300 transmission of the file.
301 """
302 return False # No platform-specific transmission by default
303
304
305 def finish_content(self):
306 """Ensure headers and content have both been sent"""
307 if not self.headers_sent:
Antoine Pitroub715fac2011-01-06 17:17:04 +0000308 # Only zero Content-Length if not set by the application (so
309 # that HEAD requests can be satisfied properly, see #3839)
310 self.headers.setdefault('Content-Length', "0")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000311 self.send_headers()
312 else:
313 pass # XXX check if content-length was too short?
314
315 def close(self):
316 """Close the iterable (if needed) and reset all instance vars
317
318 Subclasses may want to also drop the client connection.
319 """
320 try:
321 if hasattr(self.result,'close'):
322 self.result.close()
323 finally:
324 self.result = self.headers = self.status = self.environ = None
325 self.bytes_sent = 0; self.headers_sent = False
326
327
328 def send_headers(self):
329 """Transmit headers to the client, via self._write()"""
330 self.cleanup_headers()
331 self.headers_sent = True
332 if not self.origin_server or self.client_is_modern():
333 self.send_preamble()
Phillip J. Ebye1594222010-11-02 22:28:59 +0000334 self._write(bytes(self.headers))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000335
336
337 def result_is_file(self):
338 """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
339 wrapper = self.wsgi_file_wrapper
340 return wrapper is not None and isinstance(self.result,wrapper)
341
342
343 def client_is_modern(self):
344 """True if client can accept status and headers"""
345 return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
346
347
348 def log_exception(self,exc_info):
349 """Log the 'exc_info' tuple in the server log
350
351 Subclasses may override to retarget the output or change its format.
352 """
353 try:
354 from traceback import print_exception
355 stderr = self.get_stderr()
356 print_exception(
357 exc_info[0], exc_info[1], exc_info[2],
358 self.traceback_limit, stderr
359 )
360 stderr.flush()
361 finally:
362 exc_info = None
363
364 def handle_error(self):
365 """Log current error, and send error output to client if possible"""
366 self.log_exception(sys.exc_info())
367 if not self.headers_sent:
368 self.result = self.error_output(self.environ, self.start_response)
369 self.finish_response()
370 # XXX else: attempt advanced recovery techniques for HTML or text?
371
372 def error_output(self, environ, start_response):
373 """WSGI mini-app to create error output
374
375 By default, this just uses the 'error_status', 'error_headers',
376 and 'error_body' attributes to generate an output page. It can
377 be overridden in a subclass to dynamically generate diagnostics,
378 choose an appropriate message for the user's preferred language, etc.
379
380 Note, however, that it's not recommended from a security perspective to
381 spit out diagnostics to any old user; ideally, you should have to do
382 something special to enable diagnostic output, which is why we don't
383 include any here!
384 """
385 start_response(self.error_status,self.error_headers[:],sys.exc_info())
386 return [self.error_body]
387
388
389 # Pure abstract methods; *must* be overridden in subclasses
390
391 def _write(self,data):
392 """Override in subclass to buffer data for send to client
393
394 It's okay if this method actually transmits the data; BaseHandler
395 just separates write and flush operations for greater efficiency
396 when the underlying system actually has such a distinction.
397 """
398 raise NotImplementedError
399
400 def _flush(self):
401 """Override in subclass to force sending of recent '_write()' calls
402
403 It's okay if this method is a no-op (i.e., if '_write()' actually
404 sends the data.
405 """
406 raise NotImplementedError
407
408 def get_stdin(self):
409 """Override in subclass to return suitable 'wsgi.input'"""
410 raise NotImplementedError
411
412 def get_stderr(self):
413 """Override in subclass to return suitable 'wsgi.errors'"""
414 raise NotImplementedError
415
416 def add_cgi_vars(self):
417 """Override in subclass to insert CGI variables in 'self.environ'"""
418 raise NotImplementedError
419
420
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000421class SimpleHandler(BaseHandler):
422 """Handler that's just initialized with streams, environment, etc.
423
424 This handler subclass is intended for synchronous HTTP/1.0 origin servers,
425 and handles sending the entire response output, given the correct inputs.
426
427 Usage::
428
429 handler = SimpleHandler(
430 inp,out,err,env, multithread=False, multiprocess=True
431 )
432 handler.run(app)"""
433
434 def __init__(self,stdin,stdout,stderr,environ,
435 multithread=True, multiprocess=False
436 ):
437 self.stdin = stdin
438 self.stdout = stdout
439 self.stderr = stderr
440 self.base_env = environ
441 self.wsgi_multithread = multithread
442 self.wsgi_multiprocess = multiprocess
443
444 def get_stdin(self):
445 return self.stdin
446
447 def get_stderr(self):
448 return self.stderr
449
450 def add_cgi_vars(self):
451 self.environ.update(self.base_env)
452
453 def _write(self,data):
Martin Pantered0425c2016-06-05 06:28:55 +0000454 result = self.stdout.write(data)
455 if result is None or result == len(data):
456 return
457 from warnings import warn
458 warn("SimpleHandler.stdout.write() should not do partial writes",
459 DeprecationWarning)
460 while True:
461 data = data[result:]
462 if not data:
463 break
464 result = self.stdout.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465
466 def _flush(self):
467 self.stdout.flush()
468 self._flush = self.stdout.flush
469
470
471class BaseCGIHandler(SimpleHandler):
472
473 """CGI-like systems using input/output/error streams and environ mapping
474
475 Usage::
476
477 handler = BaseCGIHandler(inp,out,err,env)
478 handler.run(app)
479
480 This handler class is useful for gateway protocols like ReadyExec and
481 FastCGI, that have usable input/output/error streams and an environment
482 mapping. It's also the base class for CGIHandler, which just uses
483 sys.stdin, os.environ, and so on.
484
485 The constructor also takes keyword arguments 'multithread' and
486 'multiprocess' (defaulting to 'True' and 'False' respectively) to control
487 the configuration sent to the application. It sets 'origin_server' to
488 False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
489 False.
490 """
491
492 origin_server = False
493
494
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000495class CGIHandler(BaseCGIHandler):
496
497 """CGI-based invocation via sys.stdin/stdout/stderr and os.environ
498
499 Usage::
500
501 CGIHandler().run(app)
502
503 The difference between this class and BaseCGIHandler is that it always
504 uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
505 'wsgi.multiprocess' of 'True'. It does not take any initialization
506 parameters, but always uses 'sys.stdin', 'os.environ', and friends.
507
508 If you need to override any of these parameters, use BaseCGIHandler
509 instead.
510 """
511
512 wsgi_run_once = True
Barry Warsawb1938262010-03-01 21:53:00 +0000513 # Do not allow os.environ to leak between requests in Google App Engine
514 # and other multi-run CGI use cases. This is not easily testable.
515 # See http://bugs.python.org/issue7250
516 os_environ = {}
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000517
518 def __init__(self):
519 BaseCGIHandler.__init__(
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +0000520 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
521 read_environ(), multithread=False, multiprocess=True
522 )
523
524
525class IISCGIHandler(BaseCGIHandler):
526 """CGI-based invocation with workaround for IIS path bug
527
528 This handler should be used in preference to CGIHandler when deploying on
529 Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
530 or metabase allowPathInfoForScriptMappings (IIS<7).
531 """
532 wsgi_run_once = True
533 os_environ = {}
534
535 # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
536 # the front, causing problems for WSGI applications that wish to implement
537 # routing. This handler strips any such duplicated path.
538
539 # IIS can be configured to pass the correct PATH_INFO, but this causes
540 # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
541 # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
542 # setting can only be made on a vhost level, affecting all other script
543 # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
544 # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
545 # rarely uses it because there is still no UI for it.)
546
547 # There is no way for CGI code to tell whether the option was set, so a
548 # separate handler class is provided.
549 def __init__(self):
550 environ= read_environ()
551 path = environ.get('PATH_INFO', '')
552 script = environ.get('SCRIPT_NAME', '')
553 if (path+'/').startswith(script+'/'):
554 environ['PATH_INFO'] = path[len(script):]
555 BaseCGIHandler.__init__(
556 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
557 environ, multithread=False, multiprocess=True
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000558 )