blob: 834073d50091e033ab232e95bfdd2394f9502fbb [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""Base classes for server/gateway implementations"""
2
Guido van Rossum06a2dc72006-08-17 08:56:08 +00003from .util import FileWrapper, guess_scheme, is_hop_by_hop
4from .headers import Headers
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005
6import sys, os, time
7
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +00008__all__ = [
9 'BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler',
10 'IISCGIHandler', 'read_environ'
11]
Thomas Wouters0e3f5912006-08-11 14:57:12 +000012
Thomas Wouters0e3f5912006-08-11 14:57:12 +000013# Weekday and month names for HTTP date/time formatting; always English!
14_weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
15_monthname = [None, # Dummy so we can use 1-based month numbers
16 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
17 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
18
19def format_date_time(timestamp):
20 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp)
21 return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
22 _weekdayname[wd], day, _monthname[month], year, hh, mm, ss
23 )
24
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +000025_is_request = {
26 'SCRIPT_NAME', 'PATH_INFO', 'QUERY_STRING', 'REQUEST_METHOD', 'AUTH_TYPE',
27 'CONTENT_TYPE', 'CONTENT_LENGTH', 'HTTPS', 'REMOTE_USER', 'REMOTE_IDENT',
28}.__contains__
29
30def _needs_transcode(k):
31 return _is_request(k) or k.startswith('HTTP_') or k.startswith('SSL_') \
32 or (k.startswith('REDIRECT_') and _needs_transcode(k[9:]))
33
34def read_environ():
35 """Read environment, fixing HTTP variables"""
36 enc = sys.getfilesystemencoding()
37 esc = 'surrogateescape'
38 try:
39 ''.encode('utf-8', esc)
40 except LookupError:
41 esc = 'replace'
42 environ = {}
43
44 # Take the basic environment from native-unicode os.environ. Attempt to
45 # fix up the variables that come from the HTTP request to compensate for
46 # the bytes->unicode decoding step that will already have taken place.
47 for k, v in os.environ.items():
48 if _needs_transcode(k):
49
50 # On win32, the os.environ is natively Unicode. Different servers
51 # decode the request bytes using different encodings.
52 if sys.platform == 'win32':
53 software = os.environ.get('SERVER_SOFTWARE', '').lower()
54
55 # On IIS, the HTTP request will be decoded as UTF-8 as long
56 # as the input is a valid UTF-8 sequence. Otherwise it is
57 # decoded using the system code page (mbcs), with no way to
58 # detect this has happened. Because UTF-8 is the more likely
59 # encoding, and mbcs is inherently unreliable (an mbcs string
60 # that happens to be valid UTF-8 will not be decoded as mbcs)
61 # always recreate the original bytes as UTF-8.
62 if software.startswith('microsoft-iis/'):
63 v = v.encode('utf-8').decode('iso-8859-1')
64
65 # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct
66 # to the Unicode environ. No modification needed.
67 elif software.startswith('apache/'):
68 pass
69
70 # Python 3's http.server.CGIHTTPRequestHandler decodes
71 # using the urllib.unquote default of UTF-8, amongst other
72 # issues.
73 elif (
74 software.startswith('simplehttp/')
75 and 'python/3' in software
76 ):
77 v = v.encode('utf-8').decode('iso-8859-1')
78
79 # For other servers, guess that they have written bytes to
80 # the environ using stdio byte-oriented interfaces, ending up
81 # with the system code page.
82 else:
83 v = v.encode(enc, 'replace').decode('iso-8859-1')
84
85 # Recover bytes from unicode environ, using surrogate escapes
86 # where available (Python 3.1+).
87 else:
88 v = v.encode(enc, esc).decode('iso-8859-1')
89
90 environ[k] = v
91 return environ
92
Thomas Wouters0e3f5912006-08-11 14:57:12 +000093
Thomas Wouters0e3f5912006-08-11 14:57:12 +000094class BaseHandler:
95 """Manage the invocation of a WSGI application"""
96
97 # Configuration parameters; can override per-subclass or per-instance
98 wsgi_version = (1,0)
99 wsgi_multithread = True
100 wsgi_multiprocess = True
101 wsgi_run_once = False
102
103 origin_server = True # We are transmitting direct to client
104 http_version = "1.0" # Version that should be used for response
105 server_software = None # String name of server software, if any
106
107 # os_environ is used to supply configuration from the OS environment:
108 # by default it's a copy of 'os.environ' as of import time, but you can
109 # override this in e.g. your __init__ method.
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +0000110 os_environ= read_environ()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000111
112 # Collaborator classes
113 wsgi_file_wrapper = FileWrapper # set to None to disable
114 headers_class = Headers # must be a Headers-like class
115
116 # Error handling (also per-subclass or per-instance)
117 traceback_limit = None # Print entire traceback to self.get_stderr()
Ezio Melottia3211ee2010-02-16 23:59:54 +0000118 error_status = "500 Internal Server Error"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000119 error_headers = [('Content-Type','text/plain')]
Phillip J. Ebye1594222010-11-02 22:28:59 +0000120 error_body = b"A server error occurred. Please contact the administrator."
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000121
122 # State variables (don't mess with these)
123 status = result = None
124 headers_sent = False
125 headers = None
126 bytes_sent = 0
127
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000128 def run(self, application):
129 """Invoke the application"""
130 # Note to self: don't move the close()! Asynchronous servers shouldn't
131 # call close() from finish_response(), so if you close() anywhere but
132 # the double-error branch here, you'll break asynchronous servers by
133 # prematurely closing. Async servers must return from 'run()' without
134 # closing if there might still be output to iterate over.
135 try:
136 self.setup_environ()
137 self.result = application(self.environ, self.start_response)
138 self.finish_response()
Petter Strandmark3d37ea22019-05-01 19:32:15 +0200139 except (ConnectionAbortedError, BrokenPipeError, ConnectionResetError):
140 # We expect the client to close the connection abruptly from time
141 # to time.
142 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000143 except:
144 try:
145 self.handle_error()
146 except:
147 # If we get an error handling an error, just give up already!
148 self.close()
149 raise # ...and let the actual server figure it out.
150
151
152 def setup_environ(self):
153 """Set up the environment for one request"""
154
155 env = self.environ = self.os_environ.copy()
156 self.add_cgi_vars()
157
158 env['wsgi.input'] = self.get_stdin()
159 env['wsgi.errors'] = self.get_stderr()
160 env['wsgi.version'] = self.wsgi_version
161 env['wsgi.run_once'] = self.wsgi_run_once
162 env['wsgi.url_scheme'] = self.get_scheme()
163 env['wsgi.multithread'] = self.wsgi_multithread
164 env['wsgi.multiprocess'] = self.wsgi_multiprocess
165
166 if self.wsgi_file_wrapper is not None:
167 env['wsgi.file_wrapper'] = self.wsgi_file_wrapper
168
169 if self.origin_server and self.server_software:
170 env.setdefault('SERVER_SOFTWARE',self.server_software)
171
172
173 def finish_response(self):
174 """Send any iterable data, then close self and the iterable
175
176 Subclasses intended for use in asynchronous servers will
177 want to redefine this method, such that it sets up callbacks
178 in the event loop to iterate over the data, and to call
179 'self.close()' once the response is finished.
180 """
Antoine Pitrouae247a52012-10-21 14:09:05 +0200181 try:
182 if not self.result_is_file() or not self.sendfile():
183 for data in self.result:
184 self.write(data)
185 self.finish_content()
186 finally:
187 self.close()
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188
189
190 def get_scheme(self):
191 """Return the URL scheme being used"""
192 return guess_scheme(self.environ)
193
194
195 def set_content_length(self):
196 """Compute Content-Length or switch to chunked encoding if possible"""
197 try:
198 blocks = len(self.result)
199 except (TypeError,AttributeError,NotImplementedError):
200 pass
201 else:
202 if blocks==1:
203 self.headers['Content-Length'] = str(self.bytes_sent)
204 return
205 # XXX Try for chunked encoding if origin server and client is 1.1
206
207
208 def cleanup_headers(self):
209 """Make any necessary header changes or defaults
210
211 Subclasses can extend this to add other defaults.
212 """
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000213 if 'Content-Length' not in self.headers:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 self.set_content_length()
215
216 def start_response(self, status, headers,exc_info=None):
Phillip J. Ebye1594222010-11-02 22:28:59 +0000217 """'start_response()' callable as specified by PEP 3333"""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000218
219 if exc_info:
220 try:
221 if self.headers_sent:
222 # Re-raise original exception if headers sent
Collin Winter828f04a2007-08-31 00:04:24 +0000223 raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000224 finally:
225 exc_info = None # avoid dangling circular ref
226 elif self.headers is not None:
227 raise AssertionError("Headers already set!")
228
Phillip J. Ebye1594222010-11-02 22:28:59 +0000229 self.status = status
230 self.headers = self.headers_class(headers)
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000231 status = self._convert_string_type(status, "Status")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232 assert len(status)>=4,"Status must be at least 4 characters"
Berker Peksag1cd4ff62016-03-19 09:04:59 +0200233 assert status[:3].isdigit(), "Status message must begin w/3-digit code"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000234 assert status[3]==" ", "Status message must have a space after code"
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000235
Phillip J. Ebye1594222010-11-02 22:28:59 +0000236 if __debug__:
237 for name, val in headers:
238 name = self._convert_string_type(name, "Header name")
239 val = self._convert_string_type(val, "Header value")
Cheryl Sabella5ef4fc22018-12-25 18:19:11 -0500240 assert not is_hop_by_hop(name),\
241 f"Hop-by-hop header, '{name}: {val}', not allowed"
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000242
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000243 return self.write
244
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000245 def _convert_string_type(self, value, title):
246 """Convert/check value type."""
Phillip J. Ebye1594222010-11-02 22:28:59 +0000247 if type(value) is str:
Antoine Pitrou38a66ad2009-01-03 18:41:49 +0000248 return value
Phillip J. Ebye1594222010-11-02 22:28:59 +0000249 raise AssertionError(
250 "{0} must be of type str (got {1})".format(title, repr(value))
251 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252
253 def send_preamble(self):
254 """Transmit version/status/date/server, via self._write()"""
255 if self.origin_server:
256 if self.client_is_modern():
Phillip J. Ebye1594222010-11-02 22:28:59 +0000257 self._write(('HTTP/%s %s\r\n' % (self.http_version,self.status)).encode('iso-8859-1'))
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000258 if 'Date' not in self.headers:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000259 self._write(
Phillip J. Ebye1594222010-11-02 22:28:59 +0000260 ('Date: %s\r\n' % format_date_time(time.time())).encode('iso-8859-1')
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 )
Guido van Rossume2b70bc2006-08-18 22:13:04 +0000262 if self.server_software and 'Server' not in self.headers:
Phillip J. Ebye1594222010-11-02 22:28:59 +0000263 self._write(('Server: %s\r\n' % self.server_software).encode('iso-8859-1'))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000264 else:
Phillip J. Ebye1594222010-11-02 22:28:59 +0000265 self._write(('Status: %s\r\n' % self.status).encode('iso-8859-1'))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000266
267 def write(self, data):
Phillip J. Ebye1594222010-11-02 22:28:59 +0000268 """'write()' callable as specified by PEP 3333"""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000269
Phillip J. Ebye1594222010-11-02 22:28:59 +0000270 assert type(data) is bytes, \
271 "write() argument must be a bytes instance"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000272
273 if not self.status:
274 raise AssertionError("write() before start_response()")
275
276 elif not self.headers_sent:
277 # Before the first output, send the stored headers
278 self.bytes_sent = len(data) # make sure we know content-length
279 self.send_headers()
280 else:
281 self.bytes_sent += len(data)
282
283 # XXX check Content-Length and truncate if too many bytes written?
284 self._write(data)
285 self._flush()
286
287
288 def sendfile(self):
289 """Platform-specific file transmission
290
291 Override this method in subclasses to support platform-specific
292 file transmission. It is only called if the application's
293 return iterable ('self.result') is an instance of
294 'self.wsgi_file_wrapper'.
295
296 This method should return a true value if it was able to actually
297 transmit the wrapped file-like object using a platform-specific
298 approach. It should return a false value if normal iteration
299 should be used instead. An exception can be raised to indicate
300 that transmission was attempted, but failed.
301
302 NOTE: this method should call 'self.send_headers()' if
303 'self.headers_sent' is false and it is going to attempt direct
304 transmission of the file.
305 """
306 return False # No platform-specific transmission by default
307
308
309 def finish_content(self):
310 """Ensure headers and content have both been sent"""
311 if not self.headers_sent:
Antoine Pitroub715fac2011-01-06 17:17:04 +0000312 # Only zero Content-Length if not set by the application (so
313 # that HEAD requests can be satisfied properly, see #3839)
314 self.headers.setdefault('Content-Length', "0")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315 self.send_headers()
316 else:
317 pass # XXX check if content-length was too short?
318
319 def close(self):
320 """Close the iterable (if needed) and reset all instance vars
321
322 Subclasses may want to also drop the client connection.
323 """
324 try:
325 if hasattr(self.result,'close'):
326 self.result.close()
327 finally:
328 self.result = self.headers = self.status = self.environ = None
329 self.bytes_sent = 0; self.headers_sent = False
330
331
332 def send_headers(self):
333 """Transmit headers to the client, via self._write()"""
334 self.cleanup_headers()
335 self.headers_sent = True
336 if not self.origin_server or self.client_is_modern():
337 self.send_preamble()
Phillip J. Ebye1594222010-11-02 22:28:59 +0000338 self._write(bytes(self.headers))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339
340
341 def result_is_file(self):
342 """True if 'self.result' is an instance of 'self.wsgi_file_wrapper'"""
343 wrapper = self.wsgi_file_wrapper
344 return wrapper is not None and isinstance(self.result,wrapper)
345
346
347 def client_is_modern(self):
348 """True if client can accept status and headers"""
349 return self.environ['SERVER_PROTOCOL'].upper() != 'HTTP/0.9'
350
351
352 def log_exception(self,exc_info):
353 """Log the 'exc_info' tuple in the server log
354
355 Subclasses may override to retarget the output or change its format.
356 """
357 try:
358 from traceback import print_exception
359 stderr = self.get_stderr()
360 print_exception(
361 exc_info[0], exc_info[1], exc_info[2],
362 self.traceback_limit, stderr
363 )
364 stderr.flush()
365 finally:
366 exc_info = None
367
368 def handle_error(self):
369 """Log current error, and send error output to client if possible"""
370 self.log_exception(sys.exc_info())
371 if not self.headers_sent:
372 self.result = self.error_output(self.environ, self.start_response)
373 self.finish_response()
374 # XXX else: attempt advanced recovery techniques for HTML or text?
375
376 def error_output(self, environ, start_response):
377 """WSGI mini-app to create error output
378
379 By default, this just uses the 'error_status', 'error_headers',
380 and 'error_body' attributes to generate an output page. It can
381 be overridden in a subclass to dynamically generate diagnostics,
382 choose an appropriate message for the user's preferred language, etc.
383
384 Note, however, that it's not recommended from a security perspective to
385 spit out diagnostics to any old user; ideally, you should have to do
386 something special to enable diagnostic output, which is why we don't
387 include any here!
388 """
389 start_response(self.error_status,self.error_headers[:],sys.exc_info())
390 return [self.error_body]
391
392
393 # Pure abstract methods; *must* be overridden in subclasses
394
395 def _write(self,data):
396 """Override in subclass to buffer data for send to client
397
398 It's okay if this method actually transmits the data; BaseHandler
399 just separates write and flush operations for greater efficiency
400 when the underlying system actually has such a distinction.
401 """
402 raise NotImplementedError
403
404 def _flush(self):
405 """Override in subclass to force sending of recent '_write()' calls
406
407 It's okay if this method is a no-op (i.e., if '_write()' actually
408 sends the data.
409 """
410 raise NotImplementedError
411
412 def get_stdin(self):
413 """Override in subclass to return suitable 'wsgi.input'"""
414 raise NotImplementedError
415
416 def get_stderr(self):
417 """Override in subclass to return suitable 'wsgi.errors'"""
418 raise NotImplementedError
419
420 def add_cgi_vars(self):
421 """Override in subclass to insert CGI variables in 'self.environ'"""
422 raise NotImplementedError
423
424
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000425class SimpleHandler(BaseHandler):
426 """Handler that's just initialized with streams, environment, etc.
427
428 This handler subclass is intended for synchronous HTTP/1.0 origin servers,
429 and handles sending the entire response output, given the correct inputs.
430
431 Usage::
432
433 handler = SimpleHandler(
434 inp,out,err,env, multithread=False, multiprocess=True
435 )
436 handler.run(app)"""
437
438 def __init__(self,stdin,stdout,stderr,environ,
439 multithread=True, multiprocess=False
440 ):
441 self.stdin = stdin
442 self.stdout = stdout
443 self.stderr = stderr
444 self.base_env = environ
445 self.wsgi_multithread = multithread
446 self.wsgi_multiprocess = multiprocess
447
448 def get_stdin(self):
449 return self.stdin
450
451 def get_stderr(self):
452 return self.stderr
453
454 def add_cgi_vars(self):
455 self.environ.update(self.base_env)
456
457 def _write(self,data):
Martin Pantered0425c2016-06-05 06:28:55 +0000458 result = self.stdout.write(data)
459 if result is None or result == len(data):
460 return
461 from warnings import warn
462 warn("SimpleHandler.stdout.write() should not do partial writes",
463 DeprecationWarning)
464 while True:
465 data = data[result:]
466 if not data:
467 break
468 result = self.stdout.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469
470 def _flush(self):
471 self.stdout.flush()
472 self._flush = self.stdout.flush
473
474
475class BaseCGIHandler(SimpleHandler):
476
477 """CGI-like systems using input/output/error streams and environ mapping
478
479 Usage::
480
481 handler = BaseCGIHandler(inp,out,err,env)
482 handler.run(app)
483
484 This handler class is useful for gateway protocols like ReadyExec and
485 FastCGI, that have usable input/output/error streams and an environment
486 mapping. It's also the base class for CGIHandler, which just uses
487 sys.stdin, os.environ, and so on.
488
489 The constructor also takes keyword arguments 'multithread' and
490 'multiprocess' (defaulting to 'True' and 'False' respectively) to control
491 the configuration sent to the application. It sets 'origin_server' to
492 False (to enable CGI-like output), and assumes that 'wsgi.run_once' is
493 False.
494 """
495
496 origin_server = False
497
498
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000499class CGIHandler(BaseCGIHandler):
500
501 """CGI-based invocation via sys.stdin/stdout/stderr and os.environ
502
503 Usage::
504
505 CGIHandler().run(app)
506
507 The difference between this class and BaseCGIHandler is that it always
508 uses 'wsgi.run_once' of 'True', 'wsgi.multithread' of 'False', and
509 'wsgi.multiprocess' of 'True'. It does not take any initialization
510 parameters, but always uses 'sys.stdin', 'os.environ', and friends.
511
512 If you need to override any of these parameters, use BaseCGIHandler
513 instead.
514 """
515
516 wsgi_run_once = True
Barry Warsawb1938262010-03-01 21:53:00 +0000517 # Do not allow os.environ to leak between requests in Google App Engine
518 # and other multi-run CGI use cases. This is not easily testable.
519 # See http://bugs.python.org/issue7250
520 os_environ = {}
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000521
522 def __init__(self):
523 BaseCGIHandler.__init__(
Phillip J. Ebyb6d4a8e2010-11-03 22:39:01 +0000524 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
525 read_environ(), multithread=False, multiprocess=True
526 )
527
528
529class IISCGIHandler(BaseCGIHandler):
530 """CGI-based invocation with workaround for IIS path bug
531
532 This handler should be used in preference to CGIHandler when deploying on
533 Microsoft IIS without having set the config allowPathInfo option (IIS>=7)
534 or metabase allowPathInfoForScriptMappings (IIS<7).
535 """
536 wsgi_run_once = True
537 os_environ = {}
538
539 # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at
540 # the front, causing problems for WSGI applications that wish to implement
541 # routing. This handler strips any such duplicated path.
542
543 # IIS can be configured to pass the correct PATH_INFO, but this causes
544 # another bug where PATH_TRANSLATED is wrong. Luckily this variable is
545 # rarely used and is not guaranteed by WSGI. On IIS<7, though, the
546 # setting can only be made on a vhost level, affecting all other script
547 # mappings, many of which break when exposed to the PATH_TRANSLATED bug.
548 # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7
549 # rarely uses it because there is still no UI for it.)
550
551 # There is no way for CGI code to tell whether the option was set, so a
552 # separate handler class is provided.
553 def __init__(self):
554 environ= read_environ()
555 path = environ.get('PATH_INFO', '')
556 script = environ.get('SCRIPT_NAME', '')
557 if (path+'/').startswith(script+'/'):
558 environ['PATH_INFO'] = path[len(script):]
559 BaseCGIHandler.__init__(
560 self, sys.stdin.buffer, sys.stdout.buffer, sys.stderr,
561 environ, multithread=False, multiprocess=True
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000562 )