Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1 | """HTTP server classes. |
| 2 | |
| 3 | Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see |
| 4 | SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, |
| 5 | and CGIHTTPRequestHandler for CGI scripts. |
| 6 | |
| 7 | It does, however, optionally implement HTTP/1.1 persistent connections, |
| 8 | as of version 0.3. |
| 9 | |
| 10 | Notes on CGIHTTPRequestHandler |
| 11 | ------------------------------ |
| 12 | |
| 13 | This class implements GET and POST requests to cgi-bin scripts. |
| 14 | |
| 15 | If the os.fork() function is not present (e.g. on Windows), |
Amaury Forgeot d'Arc | cb0d2d7 | 2008-06-18 22:19:22 +0000 | [diff] [blame] | 16 | subprocess.Popen() is used as a fallback, with slightly altered semantics. |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 17 | |
| 18 | In all cases, the implementation is intentionally naive -- all |
| 19 | requests are executed synchronously. |
| 20 | |
| 21 | SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL |
| 22 | -- it may execute arbitrary Python code or external programs. |
| 23 | |
| 24 | Note that status code 200 is sent prior to execution of a CGI script, so |
| 25 | scripts cannot send other status codes such as 302 (redirect). |
| 26 | |
| 27 | XXX To do: |
| 28 | |
| 29 | - log requests even later (to capture byte count) |
| 30 | - log user-agent header and other interesting goodies |
| 31 | - send error log to separate file |
| 32 | """ |
| 33 | |
| 34 | |
| 35 | # See also: |
| 36 | # |
| 37 | # HTTP Working Group T. Berners-Lee |
| 38 | # INTERNET-DRAFT R. T. Fielding |
| 39 | # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen |
| 40 | # Expires September 8, 1995 March 8, 1995 |
| 41 | # |
| 42 | # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt |
| 43 | # |
| 44 | # and |
| 45 | # |
| 46 | # Network Working Group R. Fielding |
| 47 | # Request for Comments: 2616 et al |
| 48 | # Obsoletes: 2068 June 1999 |
| 49 | # Category: Standards Track |
| 50 | # |
| 51 | # URL: http://www.faqs.org/rfcs/rfc2616.html |
| 52 | |
| 53 | # Log files |
| 54 | # --------- |
| 55 | # |
| 56 | # Here's a quote from the NCSA httpd docs about log file format. |
| 57 | # |
| 58 | # | The logfile format is as follows. Each line consists of: |
| 59 | # | |
| 60 | # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb |
| 61 | # | |
| 62 | # | host: Either the DNS name or the IP number of the remote client |
| 63 | # | rfc931: Any information returned by identd for this person, |
| 64 | # | - otherwise. |
| 65 | # | authuser: If user sent a userid for authentication, the user name, |
| 66 | # | - otherwise. |
| 67 | # | DD: Day |
| 68 | # | Mon: Month (calendar name) |
| 69 | # | YYYY: Year |
| 70 | # | hh: hour (24-hour format, the machine's timezone) |
| 71 | # | mm: minutes |
| 72 | # | ss: seconds |
| 73 | # | request: The first line of the HTTP request as sent by the client. |
| 74 | # | ddd: the status code returned by the server, - if not available. |
| 75 | # | bbbb: the total number of bytes sent, |
| 76 | # | *not including the HTTP/1.0 header*, - if not available |
| 77 | # | |
| 78 | # | You can determine the name of the file accessed through request. |
| 79 | # |
| 80 | # (Actually, the latter is only true if you know the server configuration |
| 81 | # at the time the request was made!) |
| 82 | |
| 83 | __version__ = "0.6" |
| 84 | |
Berker Peksag | 366c570 | 2015-02-13 20:48:15 +0200 | [diff] [blame] | 85 | __all__ = [ |
Géry Ogam | 1cee216 | 2018-05-29 22:10:30 +0200 | [diff] [blame] | 86 | "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", |
Berker Peksag | 366c570 | 2015-02-13 20:48:15 +0200 | [diff] [blame] | 87 | "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", |
| 88 | ] |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 89 | |
Pierre Quentel | 351adda | 2017-04-02 12:26:12 +0200 | [diff] [blame] | 90 | import copy |
| 91 | import datetime |
Berker Peksag | 04bc5b9 | 2016-03-14 06:06:03 +0200 | [diff] [blame] | 92 | import email.utils |
Georg Brandl | 1f7fffb | 2010-10-15 15:57:45 +0000 | [diff] [blame] | 93 | import html |
Jeremy Hylton | 914ab45 | 2009-03-27 17:16:06 +0000 | [diff] [blame] | 94 | import http.client |
| 95 | import io |
| 96 | import mimetypes |
| 97 | import os |
| 98 | import posixpath |
| 99 | import select |
| 100 | import shutil |
| 101 | import socket # For gethostbyaddr() |
| 102 | import socketserver |
| 103 | import sys |
| 104 | import time |
| 105 | import urllib.parse |
Jason R. Coombs | 7cdc31a | 2020-01-06 07:59:36 -0500 | [diff] [blame] | 106 | import contextlib |
Stéphane Wirtel | a17a2f5 | 2017-05-24 09:29:06 +0200 | [diff] [blame] | 107 | from functools import partial |
Senthil Kumaran | 1251faf | 2012-06-03 16:15:54 +0800 | [diff] [blame] | 108 | |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 109 | from http import HTTPStatus |
| 110 | |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 111 | |
| 112 | # Default error message template |
| 113 | DEFAULT_ERROR_MESSAGE = """\ |
Senthil Kumaran | 1b407fe | 2011-03-20 10:44:30 +0800 | [diff] [blame] | 114 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" |
| 115 | "http://www.w3.org/TR/html4/strict.dtd"> |
Ezio Melotti | ca897e9 | 2011-11-02 19:33:29 +0200 | [diff] [blame] | 116 | <html> |
Senthil Kumaran | b253c9f | 2011-03-17 16:43:22 +0800 | [diff] [blame] | 117 | <head> |
Senthil Kumaran | 1b407fe | 2011-03-20 10:44:30 +0800 | [diff] [blame] | 118 | <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> |
Senthil Kumaran | b253c9f | 2011-03-17 16:43:22 +0800 | [diff] [blame] | 119 | <title>Error response</title> |
| 120 | </head> |
| 121 | <body> |
| 122 | <h1>Error response</h1> |
| 123 | <p>Error code: %(code)d</p> |
| 124 | <p>Message: %(message)s.</p> |
| 125 | <p>Error code explanation: %(code)s - %(explain)s.</p> |
| 126 | </body> |
| 127 | </html> |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 128 | """ |
| 129 | |
| 130 | DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" |
| 131 | |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 132 | class HTTPServer(socketserver.TCPServer): |
| 133 | |
| 134 | allow_reuse_address = 1 # Seems to make sense in testing environment |
| 135 | |
| 136 | def server_bind(self): |
| 137 | """Override server_bind to store the server name.""" |
| 138 | socketserver.TCPServer.server_bind(self) |
Martin Panter | 50badad | 2016-04-03 01:28:53 +0000 | [diff] [blame] | 139 | host, port = self.server_address[:2] |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 140 | self.server_name = socket.getfqdn(host) |
| 141 | self.server_port = port |
| 142 | |
| 143 | |
Géry Ogam | 1cee216 | 2018-05-29 22:10:30 +0200 | [diff] [blame] | 144 | class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): |
Julien Palard | 8bcfa02 | 2018-03-23 17:40:33 +0100 | [diff] [blame] | 145 | daemon_threads = True |
| 146 | |
| 147 | |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 148 | class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): |
| 149 | |
| 150 | """HTTP request handler base class. |
| 151 | |
| 152 | The following explanation of HTTP serves to guide you through the |
| 153 | code as well as to expose any misunderstandings I may have about |
| 154 | HTTP (so you don't need to read the code to figure out I'm wrong |
| 155 | :-). |
| 156 | |
| 157 | HTTP (HyperText Transfer Protocol) is an extensible protocol on |
| 158 | top of a reliable stream transport (e.g. TCP/IP). The protocol |
| 159 | recognizes three parts to a request: |
| 160 | |
| 161 | 1. One line identifying the request type and path |
| 162 | 2. An optional set of RFC-822-style headers |
| 163 | 3. An optional data part |
| 164 | |
| 165 | The headers and data are separated by a blank line. |
| 166 | |
| 167 | The first line of the request has the form |
| 168 | |
| 169 | <command> <path> <version> |
| 170 | |
| 171 | where <command> is a (case-sensitive) keyword such as GET or POST, |
| 172 | <path> is a string containing path information for the request, |
| 173 | and <version> should be the string "HTTP/1.0" or "HTTP/1.1". |
| 174 | <path> is encoded using the URL encoding scheme (using %xx to signify |
| 175 | the ASCII character with hex code xx). |
| 176 | |
| 177 | The specification specifies that lines are separated by CRLF but |
| 178 | for compatibility with the widest range of clients recommends |
| 179 | servers also handle LF. Similarly, whitespace in the request line |
| 180 | is treated sensibly (allowing multiple spaces between components |
| 181 | and allowing trailing whitespace). |
| 182 | |
| 183 | Similarly, for output, lines ought to be separated by CRLF pairs |
| 184 | but most clients grok LF characters just fine. |
| 185 | |
| 186 | If the first line of the request has the form |
| 187 | |
| 188 | <command> <path> |
| 189 | |
| 190 | (i.e. <version> is left out) then this is assumed to be an HTTP |
| 191 | 0.9 request; this form has no optional headers and data part and |
| 192 | the reply consists of just the data. |
| 193 | |
| 194 | The reply form of the HTTP 1.x protocol again has three parts: |
| 195 | |
| 196 | 1. One line giving the response code |
| 197 | 2. An optional set of RFC-822-style headers |
| 198 | 3. The data |
| 199 | |
| 200 | Again, the headers and data are separated by a blank line. |
| 201 | |
| 202 | The response code line has the form |
| 203 | |
| 204 | <version> <responsecode> <responsestring> |
| 205 | |
| 206 | where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), |
| 207 | <responsecode> is a 3-digit response code indicating success or |
| 208 | failure of the request, and <responsestring> is an optional |
| 209 | human-readable string explaining what the response code means. |
| 210 | |
| 211 | This server parses the request and the headers, and then calls a |
| 212 | function specific to the request type (<command>). Specifically, |
| 213 | a request SPAM will be handled by a method do_SPAM(). If no |
| 214 | such method exists the server sends an error response to the |
| 215 | client. If it exists, it is called with no arguments: |
| 216 | |
| 217 | do_SPAM() |
| 218 | |
| 219 | Note that the request name is case sensitive (i.e. SPAM and spam |
| 220 | are different requests). |
| 221 | |
| 222 | The various request details are stored in instance variables: |
| 223 | |
| 224 | - client_address is the client IP address in the form (host, |
| 225 | port); |
| 226 | |
| 227 | - command, path and version are the broken-down request line; |
| 228 | |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 229 | - headers is an instance of email.message.Message (or a derived |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 230 | class) containing the header information; |
| 231 | |
| 232 | - rfile is a file object open for reading positioned at the |
| 233 | start of the optional input data part; |
| 234 | |
| 235 | - wfile is a file object open for writing. |
| 236 | |
| 237 | IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! |
| 238 | |
| 239 | The first thing to be written must be the response line. Then |
| 240 | follow 0 or more header lines, then a blank line, and then the |
| 241 | actual data (if any). The meaning of the header lines depends on |
| 242 | the command executed by the server; in most cases, when data is |
| 243 | returned, there should be at least one header line of the form |
| 244 | |
| 245 | Content-type: <type>/<subtype> |
| 246 | |
| 247 | where <type> and <subtype> should be registered MIME types, |
| 248 | e.g. "text/html" or "text/plain". |
| 249 | |
| 250 | """ |
| 251 | |
| 252 | # The Python system version, truncated to its first component. |
| 253 | sys_version = "Python/" + sys.version.split()[0] |
| 254 | |
| 255 | # The server software version. You may want to override this. |
| 256 | # The format is multiple whitespace-separated strings, |
| 257 | # where each string is of the form name[/version]. |
| 258 | server_version = "BaseHTTP/" + __version__ |
| 259 | |
| 260 | error_message_format = DEFAULT_ERROR_MESSAGE |
| 261 | error_content_type = DEFAULT_ERROR_CONTENT_TYPE |
| 262 | |
| 263 | # The default request version. This only affects responses up until |
| 264 | # the point where the request line is parsed, so it mainly decides what |
| 265 | # the client gets back when sending a malformed request line. |
| 266 | # Most web servers default to HTTP 0.9, i.e. don't send a status line. |
| 267 | default_request_version = "HTTP/0.9" |
| 268 | |
| 269 | def parse_request(self): |
| 270 | """Parse a request (internal). |
| 271 | |
| 272 | The request should be stored in self.raw_requestline; the results |
| 273 | are in self.command, self.path, self.request_version and |
| 274 | self.headers. |
| 275 | |
Martin Panter | e82338d | 2016-11-19 01:06:37 +0000 | [diff] [blame] | 276 | Return True for success, False for failure; on failure, any relevant |
| 277 | error response has already been sent back. |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 278 | |
| 279 | """ |
| 280 | self.command = None # set in case of error on the first line |
| 281 | self.request_version = version = self.default_request_version |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 282 | self.close_connection = True |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 283 | requestline = str(self.raw_requestline, 'iso-8859-1') |
Senthil Kumaran | 3075549 | 2011-12-23 17:03:41 +0800 | [diff] [blame] | 284 | requestline = requestline.rstrip('\r\n') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 285 | self.requestline = requestline |
| 286 | words = requestline.split() |
Martin Panter | e82338d | 2016-11-19 01:06:37 +0000 | [diff] [blame] | 287 | if len(words) == 0: |
| 288 | return False |
| 289 | |
| 290 | if len(words) >= 3: # Enough to determine protocol version |
| 291 | version = words[-1] |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 292 | try: |
Martin Panter | e82338d | 2016-11-19 01:06:37 +0000 | [diff] [blame] | 293 | if not version.startswith('HTTP/'): |
Martin Panter | 50badad | 2016-04-03 01:28:53 +0000 | [diff] [blame] | 294 | raise ValueError |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 295 | base_version_number = version.split('/', 1)[1] |
| 296 | version_number = base_version_number.split(".") |
| 297 | # RFC 2145 section 3.1 says there can be only one "." and |
| 298 | # - major and minor numbers MUST be treated as |
| 299 | # separate integers; |
| 300 | # - HTTP/2.4 is a lower version than HTTP/2.13, which in |
| 301 | # turn is lower than HTTP/12.3; |
| 302 | # - Leading zeros MUST be ignored by recipients. |
| 303 | if len(version_number) != 2: |
| 304 | raise ValueError |
| 305 | version_number = int(version_number[0]), int(version_number[1]) |
| 306 | except (ValueError, IndexError): |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 307 | self.send_error( |
| 308 | HTTPStatus.BAD_REQUEST, |
| 309 | "Bad request version (%r)" % version) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 310 | return False |
| 311 | if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 312 | self.close_connection = False |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 313 | if version_number >= (2, 0): |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 314 | self.send_error( |
| 315 | HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, |
Martin Panter | 50badad | 2016-04-03 01:28:53 +0000 | [diff] [blame] | 316 | "Invalid HTTP version (%s)" % base_version_number) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 317 | return False |
Martin Panter | e82338d | 2016-11-19 01:06:37 +0000 | [diff] [blame] | 318 | self.request_version = version |
| 319 | |
| 320 | if not 2 <= len(words) <= 3: |
| 321 | self.send_error( |
| 322 | HTTPStatus.BAD_REQUEST, |
| 323 | "Bad request syntax (%r)" % requestline) |
| 324 | return False |
| 325 | command, path = words[:2] |
| 326 | if len(words) == 2: |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 327 | self.close_connection = True |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 328 | if command != 'GET': |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 329 | self.send_error( |
| 330 | HTTPStatus.BAD_REQUEST, |
| 331 | "Bad HTTP/0.9 request type (%r)" % command) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 332 | return False |
Martin Panter | e82338d | 2016-11-19 01:06:37 +0000 | [diff] [blame] | 333 | self.command, self.path = command, path |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 334 | |
| 335 | # Examine the headers and look for a Connection directive. |
Senthil Kumaran | 5466bf1 | 2010-12-18 16:55:23 +0000 | [diff] [blame] | 336 | try: |
| 337 | self.headers = http.client.parse_headers(self.rfile, |
| 338 | _class=self.MessageClass) |
Martin Panter | 50badad | 2016-04-03 01:28:53 +0000 | [diff] [blame] | 339 | except http.client.LineTooLong as err: |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 340 | self.send_error( |
Martin Panter | 50badad | 2016-04-03 01:28:53 +0000 | [diff] [blame] | 341 | HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, |
| 342 | "Line too long", |
| 343 | str(err)) |
Senthil Kumaran | 5466bf1 | 2010-12-18 16:55:23 +0000 | [diff] [blame] | 344 | return False |
Martin Panter | acc0319 | 2016-04-03 00:45:46 +0000 | [diff] [blame] | 345 | except http.client.HTTPException as err: |
| 346 | self.send_error( |
| 347 | HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, |
| 348 | "Too many headers", |
| 349 | str(err) |
| 350 | ) |
| 351 | return False |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 352 | |
| 353 | conntype = self.headers.get('Connection', "") |
| 354 | if conntype.lower() == 'close': |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 355 | self.close_connection = True |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 356 | elif (conntype.lower() == 'keep-alive' and |
| 357 | self.protocol_version >= "HTTP/1.1"): |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 358 | self.close_connection = False |
Senthil Kumaran | 0f476d4 | 2010-09-30 06:09:18 +0000 | [diff] [blame] | 359 | # Examine the headers and look for an Expect directive |
| 360 | expect = self.headers.get('Expect', "") |
| 361 | if (expect.lower() == "100-continue" and |
| 362 | self.protocol_version >= "HTTP/1.1" and |
| 363 | self.request_version >= "HTTP/1.1"): |
| 364 | if not self.handle_expect_100(): |
| 365 | return False |
| 366 | return True |
| 367 | |
| 368 | def handle_expect_100(self): |
| 369 | """Decide what to do with an "Expect: 100-continue" header. |
| 370 | |
| 371 | If the client is expecting a 100 Continue response, we must |
| 372 | respond with either a 100 Continue or a final response before |
| 373 | waiting for the request body. The default is to always respond |
| 374 | with a 100 Continue. You can behave differently (for example, |
| 375 | reject unauthorized requests) by overriding this method. |
| 376 | |
| 377 | This method should either return True (possibly after sending |
| 378 | a 100 Continue response) or send an error response and return |
| 379 | False. |
| 380 | |
| 381 | """ |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 382 | self.send_response_only(HTTPStatus.CONTINUE) |
Benjamin Peterson | 0442423 | 2014-01-18 21:50:18 -0500 | [diff] [blame] | 383 | self.end_headers() |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 384 | return True |
| 385 | |
| 386 | def handle_one_request(self): |
| 387 | """Handle a single HTTP request. |
| 388 | |
| 389 | You normally don't need to override this method; see the class |
| 390 | __doc__ string for information on how to handle specific HTTP |
| 391 | commands such as GET and POST. |
| 392 | |
| 393 | """ |
Kristján Valur Jónsson | 985fc6a | 2009-07-01 10:01:31 +0000 | [diff] [blame] | 394 | try: |
Antoine Pitrou | c492437 | 2010-12-16 16:48:36 +0000 | [diff] [blame] | 395 | self.raw_requestline = self.rfile.readline(65537) |
| 396 | if len(self.raw_requestline) > 65536: |
| 397 | self.requestline = '' |
| 398 | self.request_version = '' |
| 399 | self.command = '' |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 400 | self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) |
Antoine Pitrou | c492437 | 2010-12-16 16:48:36 +0000 | [diff] [blame] | 401 | return |
Kristján Valur Jónsson | 985fc6a | 2009-07-01 10:01:31 +0000 | [diff] [blame] | 402 | if not self.raw_requestline: |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 403 | self.close_connection = True |
Kristján Valur Jónsson | 985fc6a | 2009-07-01 10:01:31 +0000 | [diff] [blame] | 404 | return |
| 405 | if not self.parse_request(): |
| 406 | # An error code has been sent, just exit |
| 407 | return |
| 408 | mname = 'do_' + self.command |
| 409 | if not hasattr(self, mname): |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 410 | self.send_error( |
| 411 | HTTPStatus.NOT_IMPLEMENTED, |
| 412 | "Unsupported method (%r)" % self.command) |
Kristján Valur Jónsson | 985fc6a | 2009-07-01 10:01:31 +0000 | [diff] [blame] | 413 | return |
| 414 | method = getattr(self, mname) |
| 415 | method() |
| 416 | self.wfile.flush() #actually send the response if not already done. |
Christian Heimes | 03c8ddd | 2020-11-20 09:26:07 +0100 | [diff] [blame] | 417 | except TimeoutError as e: |
Kristján Valur Jónsson | 985fc6a | 2009-07-01 10:01:31 +0000 | [diff] [blame] | 418 | #a read or a write timed out. Discard this connection |
| 419 | self.log_error("Request timed out: %r", e) |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 420 | self.close_connection = True |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 421 | return |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 422 | |
| 423 | def handle(self): |
| 424 | """Handle multiple requests if necessary.""" |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 425 | self.close_connection = True |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 426 | |
| 427 | self.handle_one_request() |
| 428 | while not self.close_connection: |
| 429 | self.handle_one_request() |
| 430 | |
Senthil Kumaran | 2688644 | 2013-03-15 07:53:21 -0700 | [diff] [blame] | 431 | def send_error(self, code, message=None, explain=None): |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 432 | """Send and log an error reply. |
| 433 | |
Senthil Kumaran | 2688644 | 2013-03-15 07:53:21 -0700 | [diff] [blame] | 434 | Arguments are |
| 435 | * code: an HTTP error code |
| 436 | 3 digits |
| 437 | * message: a simple optional 1 line reason phrase. |
| 438 | *( HTAB / SP / VCHAR / %x80-FF ) |
| 439 | defaults to short entry matching the response code |
| 440 | * explain: a detailed message defaults to the long entry |
| 441 | matching the response code. |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 442 | |
| 443 | This sends an error response (so it must be called before any |
| 444 | output has been generated), logs the error, and finally sends |
| 445 | a piece of HTML explaining the error to the user. |
| 446 | |
| 447 | """ |
| 448 | |
| 449 | try: |
| 450 | shortmsg, longmsg = self.responses[code] |
| 451 | except KeyError: |
| 452 | shortmsg, longmsg = '???', '???' |
| 453 | if message is None: |
| 454 | message = shortmsg |
Senthil Kumaran | 2688644 | 2013-03-15 07:53:21 -0700 | [diff] [blame] | 455 | if explain is None: |
| 456 | explain = longmsg |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 457 | self.log_error("code %d, message %s", code, message) |
Senthil Kumaran | 1e7551d | 2013-03-05 02:25:58 -0800 | [diff] [blame] | 458 | self.send_response(code, message) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 459 | self.send_header('Connection', 'close') |
Martin Panter | e42e129 | 2016-06-08 08:29:13 +0000 | [diff] [blame] | 460 | |
| 461 | # Message body is omitted for cases described in: |
| 462 | # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) |
| 463 | # - RFC7231: 6.3.6. 205(Reset Content) |
| 464 | body = None |
| 465 | if (code >= 200 and |
| 466 | code not in (HTTPStatus.NO_CONTENT, |
| 467 | HTTPStatus.RESET_CONTENT, |
| 468 | HTTPStatus.NOT_MODIFIED)): |
| 469 | # HTML encode to prevent Cross Site Scripting attacks |
| 470 | # (see bug #1100201) |
| 471 | content = (self.error_message_format % { |
| 472 | 'code': code, |
Martin Panter | 40de69a | 2016-06-08 09:45:58 +0000 | [diff] [blame] | 473 | 'message': html.escape(message, quote=False), |
| 474 | 'explain': html.escape(explain, quote=False) |
Martin Panter | e42e129 | 2016-06-08 08:29:13 +0000 | [diff] [blame] | 475 | }) |
| 476 | body = content.encode('UTF-8', 'replace') |
| 477 | self.send_header("Content-Type", self.error_content_type) |
ValeriyaSinevich | b36b0a3 | 2018-06-18 14:17:53 -0700 | [diff] [blame] | 478 | self.send_header('Content-Length', str(len(body))) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 479 | self.end_headers() |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 480 | |
Martin Panter | e42e129 | 2016-06-08 08:29:13 +0000 | [diff] [blame] | 481 | if self.command != 'HEAD' and body: |
Senthil Kumaran | 52d2720 | 2012-10-10 23:16:21 -0700 | [diff] [blame] | 482 | self.wfile.write(body) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 483 | |
| 484 | def send_response(self, code, message=None): |
Senthil Kumaran | c7ae19b | 2011-05-09 23:25:02 +0800 | [diff] [blame] | 485 | """Add the response header to the headers buffer and log the |
| 486 | response code. |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 487 | |
| 488 | Also send two standard headers with the server software |
| 489 | version and the current date. |
| 490 | |
| 491 | """ |
| 492 | self.log_request(code) |
Senthil Kumaran | 0f476d4 | 2010-09-30 06:09:18 +0000 | [diff] [blame] | 493 | self.send_response_only(code, message) |
| 494 | self.send_header('Server', self.version_string()) |
| 495 | self.send_header('Date', self.date_time_string()) |
| 496 | |
| 497 | def send_response_only(self, code, message=None): |
| 498 | """Send the response header only.""" |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 499 | if self.request_version != 'HTTP/0.9': |
Martin Panter | 50badad | 2016-04-03 01:28:53 +0000 | [diff] [blame] | 500 | if message is None: |
| 501 | if code in self.responses: |
| 502 | message = self.responses[code][0] |
| 503 | else: |
| 504 | message = '' |
Senthil Kumaran | c7ae19b | 2011-05-09 23:25:02 +0800 | [diff] [blame] | 505 | if not hasattr(self, '_headers_buffer'): |
| 506 | self._headers_buffer = [] |
| 507 | self._headers_buffer.append(("%s %d %s\r\n" % |
| 508 | (self.protocol_version, code, message)).encode( |
| 509 | 'latin-1', 'strict')) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 510 | |
| 511 | def send_header(self, keyword, value): |
Senthil Kumaran | c7ae19b | 2011-05-09 23:25:02 +0800 | [diff] [blame] | 512 | """Send a MIME header to the headers buffer.""" |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 513 | if self.request_version != 'HTTP/0.9': |
Senthil Kumaran | e4dad4f | 2010-11-21 14:36:14 +0000 | [diff] [blame] | 514 | if not hasattr(self, '_headers_buffer'): |
| 515 | self._headers_buffer = [] |
| 516 | self._headers_buffer.append( |
Marc-André Lemburg | 8f36af7 | 2011-02-25 15:42:01 +0000 | [diff] [blame] | 517 | ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 518 | |
| 519 | if keyword.lower() == 'connection': |
| 520 | if value.lower() == 'close': |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 521 | self.close_connection = True |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 522 | elif value.lower() == 'keep-alive': |
Benjamin Peterson | 70e2847 | 2015-02-17 21:11:10 -0500 | [diff] [blame] | 523 | self.close_connection = False |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 524 | |
| 525 | def end_headers(self): |
| 526 | """Send the blank line ending the MIME headers.""" |
| 527 | if self.request_version != 'HTTP/0.9': |
Senthil Kumaran | e4dad4f | 2010-11-21 14:36:14 +0000 | [diff] [blame] | 528 | self._headers_buffer.append(b"\r\n") |
Senthil Kumaran | c7ae19b | 2011-05-09 23:25:02 +0800 | [diff] [blame] | 529 | self.flush_headers() |
| 530 | |
| 531 | def flush_headers(self): |
| 532 | if hasattr(self, '_headers_buffer'): |
Senthil Kumaran | e4dad4f | 2010-11-21 14:36:14 +0000 | [diff] [blame] | 533 | self.wfile.write(b"".join(self._headers_buffer)) |
| 534 | self._headers_buffer = [] |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 535 | |
| 536 | def log_request(self, code='-', size='-'): |
| 537 | """Log an accepted request. |
| 538 | |
| 539 | This is called by send_response(). |
| 540 | |
| 541 | """ |
Serhiy Storchaka | c0a23e6 | 2015-03-07 11:51:37 +0200 | [diff] [blame] | 542 | if isinstance(code, HTTPStatus): |
| 543 | code = code.value |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 544 | self.log_message('"%s" %s %s', |
| 545 | self.requestline, str(code), str(size)) |
| 546 | |
| 547 | def log_error(self, format, *args): |
| 548 | """Log an error. |
| 549 | |
| 550 | This is called when a request cannot be fulfilled. By |
| 551 | default it passes the message on to log_message(). |
| 552 | |
| 553 | Arguments are the same as for log_message(). |
| 554 | |
| 555 | XXX This should go to the separate error log. |
| 556 | |
| 557 | """ |
| 558 | |
| 559 | self.log_message(format, *args) |
| 560 | |
| 561 | def log_message(self, format, *args): |
| 562 | """Log an arbitrary message. |
| 563 | |
| 564 | This is used by all other logging functions. Override |
| 565 | it if you have specific logging wishes. |
| 566 | |
| 567 | The first argument, FORMAT, is a format string for the |
| 568 | message to be logged. If the format string contains |
| 569 | any % escapes requiring parameters, they should be |
| 570 | specified as subsequent arguments (it's just like |
| 571 | printf!). |
| 572 | |
Senthil Kumaran | db727b4 | 2012-04-29 13:41:03 +0800 | [diff] [blame] | 573 | The client ip and current date/time are prefixed to |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 574 | every message. |
| 575 | |
| 576 | """ |
| 577 | |
| 578 | sys.stderr.write("%s - - [%s] %s\n" % |
| 579 | (self.address_string(), |
| 580 | self.log_date_time_string(), |
| 581 | format%args)) |
| 582 | |
| 583 | def version_string(self): |
| 584 | """Return the server software version string.""" |
| 585 | return self.server_version + ' ' + self.sys_version |
| 586 | |
| 587 | def date_time_string(self, timestamp=None): |
| 588 | """Return the current date and time formatted for a message header.""" |
| 589 | if timestamp is None: |
| 590 | timestamp = time.time() |
Berker Peksag | 04bc5b9 | 2016-03-14 06:06:03 +0200 | [diff] [blame] | 591 | return email.utils.formatdate(timestamp, usegmt=True) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 592 | |
| 593 | def log_date_time_string(self): |
| 594 | """Return the current time formatted for logging.""" |
| 595 | now = time.time() |
| 596 | year, month, day, hh, mm, ss, x, y, z = time.localtime(now) |
| 597 | s = "%02d/%3s/%04d %02d:%02d:%02d" % ( |
| 598 | day, self.monthname[month], year, hh, mm, ss) |
| 599 | return s |
| 600 | |
| 601 | weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] |
| 602 | |
| 603 | monthname = [None, |
| 604 | 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
| 605 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
| 606 | |
| 607 | def address_string(self): |
Senthil Kumaran | 1aacba4 | 2012-04-29 12:51:54 +0800 | [diff] [blame] | 608 | """Return the client address.""" |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 609 | |
Senthil Kumaran | 1aacba4 | 2012-04-29 12:51:54 +0800 | [diff] [blame] | 610 | return self.client_address[0] |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 611 | |
| 612 | # Essentially static class variables |
| 613 | |
| 614 | # The version of the HTTP protocol we support. |
| 615 | # Set this to HTTP/1.1 to enable automatic keepalive |
| 616 | protocol_version = "HTTP/1.0" |
| 617 | |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 618 | # MessageClass used to parse headers |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 619 | MessageClass = http.client.HTTPMessage |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 620 | |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 621 | # hack to maintain backwards compatibility |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 622 | responses = { |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 623 | v: (v.phrase, v.description) |
| 624 | for v in HTTPStatus.__members__.values() |
| 625 | } |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 626 | |
| 627 | |
| 628 | class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): |
| 629 | |
| 630 | """Simple HTTP request handler with GET and HEAD commands. |
| 631 | |
| 632 | This serves files from the current directory and any of its |
| 633 | subdirectories. The MIME type for files is determined by |
| 634 | calling the .guess_type() method. |
| 635 | |
| 636 | The GET and HEAD requests are identical except that the HEAD |
| 637 | request omits the actual contents of the file. |
| 638 | |
| 639 | """ |
| 640 | |
| 641 | server_version = "SimpleHTTP/" + __version__ |
An Long | 5907e61 | 2020-01-09 02:28:14 +0800 | [diff] [blame] | 642 | extensions_map = _encodings_map_default = { |
| 643 | '.gz': 'application/gzip', |
| 644 | '.Z': 'application/octet-stream', |
| 645 | '.bz2': 'application/x-bzip2', |
| 646 | '.xz': 'application/x-xz', |
| 647 | } |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 648 | |
Stéphane Wirtel | a17a2f5 | 2017-05-24 09:29:06 +0200 | [diff] [blame] | 649 | def __init__(self, *args, directory=None, **kwargs): |
| 650 | if directory is None: |
| 651 | directory = os.getcwd() |
Géry Ogam | 781266e | 2019-09-11 15:03:46 +0200 | [diff] [blame] | 652 | self.directory = os.fspath(directory) |
Stéphane Wirtel | a17a2f5 | 2017-05-24 09:29:06 +0200 | [diff] [blame] | 653 | super().__init__(*args, **kwargs) |
| 654 | |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 655 | def do_GET(self): |
| 656 | """Serve a GET request.""" |
| 657 | f = self.send_head() |
| 658 | if f: |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 659 | try: |
| 660 | self.copyfile(f, self.wfile) |
| 661 | finally: |
| 662 | f.close() |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 663 | |
| 664 | def do_HEAD(self): |
| 665 | """Serve a HEAD request.""" |
| 666 | f = self.send_head() |
| 667 | if f: |
| 668 | f.close() |
| 669 | |
| 670 | def send_head(self): |
| 671 | """Common code for GET and HEAD commands. |
| 672 | |
| 673 | This sends the response code and MIME headers. |
| 674 | |
| 675 | Return value is either a file object (which has to be copied |
| 676 | to the outputfile by the caller unless the command was HEAD, |
| 677 | and must be closed by the caller under all circumstances), or |
| 678 | None, in which case the caller has nothing further to do. |
| 679 | |
| 680 | """ |
| 681 | path = self.translate_path(self.path) |
| 682 | f = None |
| 683 | if os.path.isdir(path): |
Benjamin Peterson | 94cb7a2 | 2014-12-26 10:53:43 -0600 | [diff] [blame] | 684 | parts = urllib.parse.urlsplit(self.path) |
| 685 | if not parts.path.endswith('/'): |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 686 | # redirect browser - doing basically what apache does |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 687 | self.send_response(HTTPStatus.MOVED_PERMANENTLY) |
Benjamin Peterson | 94cb7a2 | 2014-12-26 10:53:43 -0600 | [diff] [blame] | 688 | new_parts = (parts[0], parts[1], parts[2] + '/', |
| 689 | parts[3], parts[4]) |
| 690 | new_url = urllib.parse.urlunsplit(new_parts) |
| 691 | self.send_header("Location", new_url) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 692 | self.end_headers() |
| 693 | return None |
| 694 | for index in "index.html", "index.htm": |
| 695 | index = os.path.join(path, index) |
| 696 | if os.path.exists(index): |
| 697 | path = index |
| 698 | break |
| 699 | else: |
| 700 | return self.list_directory(path) |
| 701 | ctype = self.guess_type(path) |
Michael Felt | 2062a20 | 2018-12-26 06:43:42 +0100 | [diff] [blame] | 702 | # check for trailing "/" which should return 404. See Issue17324 |
| 703 | # The test for this was added in test_httpserver.py |
| 704 | # However, some OS platforms accept a trailingSlash as a filename |
| 705 | # See discussion on python-dev and Issue34711 regarding |
| 706 | # parseing and rejection of filenames with a trailing slash |
| 707 | if path.endswith("/"): |
| 708 | self.send_error(HTTPStatus.NOT_FOUND, "File not found") |
| 709 | return None |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 710 | try: |
| 711 | f = open(path, 'rb') |
Andrew Svetlov | f7a17b4 | 2012-12-25 16:47:37 +0200 | [diff] [blame] | 712 | except OSError: |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 713 | self.send_error(HTTPStatus.NOT_FOUND, "File not found") |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 714 | return None |
Pierre Quentel | 351adda | 2017-04-02 12:26:12 +0200 | [diff] [blame] | 715 | |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 716 | try: |
Pierre Quentel | 351adda | 2017-04-02 12:26:12 +0200 | [diff] [blame] | 717 | fs = os.fstat(f.fileno()) |
| 718 | # Use browser cache if possible |
| 719 | if ("If-Modified-Since" in self.headers |
| 720 | and "If-None-Match" not in self.headers): |
| 721 | # compare If-Modified-Since and time of last file modification |
| 722 | try: |
| 723 | ims = email.utils.parsedate_to_datetime( |
| 724 | self.headers["If-Modified-Since"]) |
| 725 | except (TypeError, IndexError, OverflowError, ValueError): |
| 726 | # ignore ill-formed values |
| 727 | pass |
| 728 | else: |
| 729 | if ims.tzinfo is None: |
| 730 | # obsolete format with no timezone, cf. |
| 731 | # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 |
| 732 | ims = ims.replace(tzinfo=datetime.timezone.utc) |
| 733 | if ims.tzinfo is datetime.timezone.utc: |
| 734 | # compare to UTC datetime of last modification |
| 735 | last_modif = datetime.datetime.fromtimestamp( |
| 736 | fs.st_mtime, datetime.timezone.utc) |
| 737 | # remove microseconds, like in If-Modified-Since |
| 738 | last_modif = last_modif.replace(microsecond=0) |
Serhiy Storchaka | 13ad3b7 | 2017-09-14 09:38:36 +0300 | [diff] [blame] | 739 | |
Pierre Quentel | 351adda | 2017-04-02 12:26:12 +0200 | [diff] [blame] | 740 | if last_modif <= ims: |
| 741 | self.send_response(HTTPStatus.NOT_MODIFIED) |
| 742 | self.end_headers() |
| 743 | f.close() |
| 744 | return None |
| 745 | |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 746 | self.send_response(HTTPStatus.OK) |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 747 | self.send_header("Content-type", ctype) |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 748 | self.send_header("Content-Length", str(fs[6])) |
Serhiy Storchaka | 13ad3b7 | 2017-09-14 09:38:36 +0300 | [diff] [blame] | 749 | self.send_header("Last-Modified", |
Pierre Quentel | 351adda | 2017-04-02 12:26:12 +0200 | [diff] [blame] | 750 | self.date_time_string(fs.st_mtime)) |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 751 | self.end_headers() |
| 752 | return f |
| 753 | except: |
| 754 | f.close() |
| 755 | raise |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 756 | |
| 757 | def list_directory(self, path): |
| 758 | """Helper to produce a directory listing (absent index.html). |
| 759 | |
| 760 | Return value is either a file object, or None (indicating an |
| 761 | error). In either case, the headers are sent, making the |
| 762 | interface the same as for send_head(). |
| 763 | |
| 764 | """ |
| 765 | try: |
| 766 | list = os.listdir(path) |
Andrew Svetlov | ad28c7f | 2012-12-18 22:02:39 +0200 | [diff] [blame] | 767 | except OSError: |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 768 | self.send_error( |
| 769 | HTTPStatus.NOT_FOUND, |
| 770 | "No permission to list directory") |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 771 | return None |
| 772 | list.sort(key=lambda a: a.lower()) |
| 773 | r = [] |
Serhiy Storchaka | cb5bc40 | 2014-08-17 08:22:11 +0300 | [diff] [blame] | 774 | try: |
| 775 | displaypath = urllib.parse.unquote(self.path, |
| 776 | errors='surrogatepass') |
| 777 | except UnicodeDecodeError: |
| 778 | displaypath = urllib.parse.unquote(path) |
Martin Panter | da3bb38 | 2016-04-11 00:40:08 +0000 | [diff] [blame] | 779 | displaypath = html.escape(displaypath, quote=False) |
Ezio Melotti | ca897e9 | 2011-11-02 19:33:29 +0200 | [diff] [blame] | 780 | enc = sys.getfilesystemencoding() |
| 781 | title = 'Directory listing for %s' % displaypath |
| 782 | r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' |
| 783 | '"http://www.w3.org/TR/html4/strict.dtd">') |
| 784 | r.append('<html>\n<head>') |
| 785 | r.append('<meta http-equiv="Content-Type" ' |
| 786 | 'content="text/html; charset=%s">' % enc) |
| 787 | r.append('<title>%s</title>\n</head>' % title) |
| 788 | r.append('<body>\n<h1>%s</h1>' % title) |
| 789 | r.append('<hr>\n<ul>') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 790 | for name in list: |
| 791 | fullname = os.path.join(path, name) |
| 792 | displayname = linkname = name |
| 793 | # Append / for directories or @ for symbolic links |
| 794 | if os.path.isdir(fullname): |
| 795 | displayname = name + "/" |
| 796 | linkname = name + "/" |
| 797 | if os.path.islink(fullname): |
| 798 | displayname = name + "@" |
| 799 | # Note: a link to a directory displays with @ and links with / |
Ezio Melotti | ca897e9 | 2011-11-02 19:33:29 +0200 | [diff] [blame] | 800 | r.append('<li><a href="%s">%s</a></li>' |
Serhiy Storchaka | cb5bc40 | 2014-08-17 08:22:11 +0300 | [diff] [blame] | 801 | % (urllib.parse.quote(linkname, |
| 802 | errors='surrogatepass'), |
Martin Panter | da3bb38 | 2016-04-11 00:40:08 +0000 | [diff] [blame] | 803 | html.escape(displayname, quote=False))) |
Ezio Melotti | ca897e9 | 2011-11-02 19:33:29 +0200 | [diff] [blame] | 804 | r.append('</ul>\n<hr>\n</body>\n</html>\n') |
Serhiy Storchaka | cb5bc40 | 2014-08-17 08:22:11 +0300 | [diff] [blame] | 805 | encoded = '\n'.join(r).encode(enc, 'surrogateescape') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 806 | f = io.BytesIO() |
| 807 | f.write(encoded) |
| 808 | f.seek(0) |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 809 | self.send_response(HTTPStatus.OK) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 810 | self.send_header("Content-type", "text/html; charset=%s" % enc) |
| 811 | self.send_header("Content-Length", str(len(encoded))) |
| 812 | self.end_headers() |
| 813 | return f |
| 814 | |
| 815 | def translate_path(self, path): |
| 816 | """Translate a /-separated PATH to the local filename syntax. |
| 817 | |
| 818 | Components that mean special things to the local file system |
| 819 | (e.g. drive or directory names) are ignored. (XXX They should |
| 820 | probably be diagnosed.) |
| 821 | |
| 822 | """ |
| 823 | # abandon query parameters |
| 824 | path = path.split('?',1)[0] |
| 825 | path = path.split('#',1)[0] |
Senthil Kumaran | 72c238e | 2013-09-13 00:21:18 -0700 | [diff] [blame] | 826 | # Don't forget explicit trailing slash when normalizing. Issue17324 |
Senthil Kumaran | 600b735 | 2013-09-29 18:59:04 -0700 | [diff] [blame] | 827 | trailing_slash = path.rstrip().endswith('/') |
Serhiy Storchaka | cb5bc40 | 2014-08-17 08:22:11 +0300 | [diff] [blame] | 828 | try: |
| 829 | path = urllib.parse.unquote(path, errors='surrogatepass') |
| 830 | except UnicodeDecodeError: |
| 831 | path = urllib.parse.unquote(path) |
| 832 | path = posixpath.normpath(path) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 833 | words = path.split('/') |
| 834 | words = filter(None, words) |
Stéphane Wirtel | a17a2f5 | 2017-05-24 09:29:06 +0200 | [diff] [blame] | 835 | path = self.directory |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 836 | for word in words: |
Martin Panter | d274b3f | 2016-04-18 03:45:18 +0000 | [diff] [blame] | 837 | if os.path.dirname(word) or word in (os.curdir, os.pardir): |
| 838 | # Ignore components that are not a simple file/directory name |
| 839 | continue |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 840 | path = os.path.join(path, word) |
Senthil Kumaran | 72c238e | 2013-09-13 00:21:18 -0700 | [diff] [blame] | 841 | if trailing_slash: |
| 842 | path += '/' |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 843 | return path |
| 844 | |
| 845 | def copyfile(self, source, outputfile): |
| 846 | """Copy all data between two file objects. |
| 847 | |
| 848 | The SOURCE argument is a file object open for reading |
| 849 | (or anything with a read() method) and the DESTINATION |
| 850 | argument is a file object open for writing (or |
| 851 | anything with a write() method). |
| 852 | |
| 853 | The only reason for overriding this would be to change |
| 854 | the block size or perhaps to replace newlines by CRLF |
| 855 | -- note however that this the default server uses this |
| 856 | to copy binary data as well. |
| 857 | |
| 858 | """ |
| 859 | shutil.copyfileobj(source, outputfile) |
| 860 | |
| 861 | def guess_type(self, path): |
| 862 | """Guess the type of a file. |
| 863 | |
| 864 | Argument is a PATH (a filename). |
| 865 | |
| 866 | Return value is a string of the form type/subtype, |
| 867 | usable for a MIME Content-type header. |
| 868 | |
| 869 | The default implementation looks the file's extension |
| 870 | up in the table self.extensions_map, using application/octet-stream |
| 871 | as a default; however it would be permissible (if |
| 872 | slow) to look inside the data to make a better guess. |
| 873 | |
| 874 | """ |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 875 | base, ext = posixpath.splitext(path) |
| 876 | if ext in self.extensions_map: |
| 877 | return self.extensions_map[ext] |
| 878 | ext = ext.lower() |
| 879 | if ext in self.extensions_map: |
| 880 | return self.extensions_map[ext] |
An Long | 5907e61 | 2020-01-09 02:28:14 +0800 | [diff] [blame] | 881 | guess, _ = mimetypes.guess_type(path) |
| 882 | if guess: |
| 883 | return guess |
| 884 | return 'application/octet-stream' |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 885 | |
| 886 | |
| 887 | # Utilities for CGIHTTPRequestHandler |
| 888 | |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 889 | def _url_collapse_path(path): |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 890 | """ |
| 891 | Given a URL path, remove extra '/'s and '.' path elements and collapse |
Martin Panter | 9955a37 | 2015-10-07 10:26:23 +0000 | [diff] [blame] | 892 | any '..' references and returns a collapsed path. |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 893 | |
| 894 | Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 895 | The utility of this function is limited to is_cgi method and helps |
| 896 | preventing some security attacks. |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 897 | |
Martin Panter | cb29e8c | 2015-10-03 05:55:46 +0000 | [diff] [blame] | 898 | Returns: The reconstituted URL, which will always start with a '/'. |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 899 | |
| 900 | Raises: IndexError if too many '..' occur within the path. |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 901 | |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 902 | """ |
Martin Panter | cb29e8c | 2015-10-03 05:55:46 +0000 | [diff] [blame] | 903 | # Query component should not be involved. |
| 904 | path, _, query = path.partition('?') |
| 905 | path = urllib.parse.unquote(path) |
| 906 | |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 907 | # Similar to os.path.split(os.path.normpath(path)) but specific to URL |
| 908 | # path semantics rather than local operating system semantics. |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 909 | path_parts = path.split('/') |
| 910 | head_parts = [] |
| 911 | for part in path_parts[:-1]: |
| 912 | if part == '..': |
| 913 | head_parts.pop() # IndexError if more '..' than prior parts |
| 914 | elif part and part != '.': |
| 915 | head_parts.append( part ) |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 916 | if path_parts: |
Senthil Kumaran | dbb369d | 2012-04-11 03:15:28 +0800 | [diff] [blame] | 917 | tail_part = path_parts.pop() |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 918 | if tail_part: |
| 919 | if tail_part == '..': |
| 920 | head_parts.pop() |
| 921 | tail_part = '' |
| 922 | elif tail_part == '.': |
| 923 | tail_part = '' |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 924 | else: |
| 925 | tail_part = '' |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 926 | |
Martin Panter | cb29e8c | 2015-10-03 05:55:46 +0000 | [diff] [blame] | 927 | if query: |
| 928 | tail_part = '?'.join((tail_part, query)) |
| 929 | |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 930 | splitpath = ('/' + '/'.join(head_parts), tail_part) |
| 931 | collapsed_path = "/".join(splitpath) |
| 932 | |
| 933 | return collapsed_path |
| 934 | |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 935 | |
| 936 | |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 937 | nobody = None |
| 938 | |
| 939 | def nobody_uid(): |
| 940 | """Internal routine to get nobody's uid""" |
| 941 | global nobody |
| 942 | if nobody: |
| 943 | return nobody |
| 944 | try: |
| 945 | import pwd |
Brett Cannon | cd171c8 | 2013-07-04 17:43:24 -0400 | [diff] [blame] | 946 | except ImportError: |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 947 | return -1 |
| 948 | try: |
| 949 | nobody = pwd.getpwnam('nobody')[2] |
| 950 | except KeyError: |
Georg Brandl | cbd2ab1 | 2010-12-04 10:39:14 +0000 | [diff] [blame] | 951 | nobody = 1 + max(x[2] for x in pwd.getpwall()) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 952 | return nobody |
| 953 | |
| 954 | |
| 955 | def executable(path): |
| 956 | """Test for executable file.""" |
Victor Stinner | fb25ba9 | 2011-06-20 17:45:54 +0200 | [diff] [blame] | 957 | return os.access(path, os.X_OK) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 958 | |
| 959 | |
| 960 | class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): |
| 961 | |
| 962 | """Complete HTTP server with GET, HEAD and POST commands. |
| 963 | |
| 964 | GET and HEAD also support running CGI scripts. |
| 965 | |
| 966 | The POST command is *only* implemented for CGI scripts. |
| 967 | |
| 968 | """ |
| 969 | |
| 970 | # Determine platform specifics |
| 971 | have_fork = hasattr(os, 'fork') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 972 | |
| 973 | # Make rfile unbuffered -- we need to read one line and then pass |
| 974 | # the rest to a subprocess, so we can't use buffered input. |
| 975 | rbufsize = 0 |
| 976 | |
| 977 | def do_POST(self): |
| 978 | """Serve a POST request. |
| 979 | |
| 980 | This is only implemented for CGI scripts. |
| 981 | |
| 982 | """ |
| 983 | |
| 984 | if self.is_cgi(): |
| 985 | self.run_cgi() |
| 986 | else: |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 987 | self.send_error( |
| 988 | HTTPStatus.NOT_IMPLEMENTED, |
| 989 | "Can only POST to CGI scripts") |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 990 | |
| 991 | def send_head(self): |
| 992 | """Version of send_head that support CGI scripts""" |
| 993 | if self.is_cgi(): |
| 994 | return self.run_cgi() |
| 995 | else: |
| 996 | return SimpleHTTPRequestHandler.send_head(self) |
| 997 | |
| 998 | def is_cgi(self): |
| 999 | """Test whether self.path corresponds to a CGI script. |
| 1000 | |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 1001 | Returns True and updates the cgi_info attribute to the tuple |
| 1002 | (dir, rest) if self.path requires running a CGI script. |
| 1003 | Returns False otherwise. |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1004 | |
Benjamin Peterson | a7deeee | 2009-05-08 20:54:42 +0000 | [diff] [blame] | 1005 | If any exception is raised, the caller should assume that |
| 1006 | self.path was rejected as invalid and act accordingly. |
| 1007 | |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 1008 | The default implementation tests whether the normalized url |
| 1009 | path begins with one of the strings in self.cgi_directories |
| 1010 | (and the next character is a '/' or the end of the string). |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1011 | |
| 1012 | """ |
Martin Panter | cb29e8c | 2015-10-03 05:55:46 +0000 | [diff] [blame] | 1013 | collapsed_path = _url_collapse_path(self.path) |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 1014 | dir_sep = collapsed_path.find('/', 1) |
Siwon Kang | 91daa9d | 2019-11-22 18:13:05 +0900 | [diff] [blame] | 1015 | while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories: |
| 1016 | dir_sep = collapsed_path.find('/', dir_sep+1) |
| 1017 | if dir_sep > 0: |
| 1018 | head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] |
Senthil Kumaran | dbb369d | 2012-04-11 03:15:28 +0800 | [diff] [blame] | 1019 | self.cgi_info = head, tail |
Benjamin Peterson | ad71f0f | 2009-04-11 20:12:10 +0000 | [diff] [blame] | 1020 | return True |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1021 | return False |
| 1022 | |
Senthil Kumaran | d70846b | 2012-04-12 02:34:32 +0800 | [diff] [blame] | 1023 | |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1024 | cgi_directories = ['/cgi-bin', '/htbin'] |
| 1025 | |
| 1026 | def is_executable(self, path): |
| 1027 | """Test whether argument path is an executable file.""" |
| 1028 | return executable(path) |
| 1029 | |
| 1030 | def is_python(self, path): |
| 1031 | """Test whether argument path is a Python script.""" |
| 1032 | head, tail = os.path.splitext(path) |
| 1033 | return tail.lower() in (".py", ".pyw") |
| 1034 | |
| 1035 | def run_cgi(self): |
| 1036 | """Execute a CGI script.""" |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1037 | dir, rest = self.cgi_info |
Ned Deily | 915a30f | 2014-07-12 22:06:26 -0700 | [diff] [blame] | 1038 | path = dir + '/' + rest |
| 1039 | i = path.find('/', len(dir)+1) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1040 | while i >= 0: |
Ned Deily | 915a30f | 2014-07-12 22:06:26 -0700 | [diff] [blame] | 1041 | nextdir = path[:i] |
| 1042 | nextrest = path[i+1:] |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1043 | |
| 1044 | scriptdir = self.translate_path(nextdir) |
| 1045 | if os.path.isdir(scriptdir): |
| 1046 | dir, rest = nextdir, nextrest |
Ned Deily | 915a30f | 2014-07-12 22:06:26 -0700 | [diff] [blame] | 1047 | i = path.find('/', len(dir)+1) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1048 | else: |
| 1049 | break |
| 1050 | |
| 1051 | # find an explicit query string, if present. |
Martin Panter | a02e18a | 2015-10-03 05:38:07 +0000 | [diff] [blame] | 1052 | rest, _, query = rest.partition('?') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1053 | |
| 1054 | # dissect the part after the directory name into a script name & |
| 1055 | # a possible additional path, to be stored in PATH_INFO. |
| 1056 | i = rest.find('/') |
| 1057 | if i >= 0: |
| 1058 | script, rest = rest[:i], rest[i:] |
| 1059 | else: |
| 1060 | script, rest = rest, '' |
| 1061 | |
| 1062 | scriptname = dir + '/' + script |
| 1063 | scriptfile = self.translate_path(scriptname) |
| 1064 | if not os.path.exists(scriptfile): |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 1065 | self.send_error( |
| 1066 | HTTPStatus.NOT_FOUND, |
| 1067 | "No such CGI script (%r)" % scriptname) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1068 | return |
| 1069 | if not os.path.isfile(scriptfile): |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 1070 | self.send_error( |
| 1071 | HTTPStatus.FORBIDDEN, |
| 1072 | "CGI script is not a plain file (%r)" % scriptname) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1073 | return |
| 1074 | ispy = self.is_python(scriptname) |
Victor Stinner | fb25ba9 | 2011-06-20 17:45:54 +0200 | [diff] [blame] | 1075 | if self.have_fork or not ispy: |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1076 | if not self.is_executable(scriptfile): |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 1077 | self.send_error( |
| 1078 | HTTPStatus.FORBIDDEN, |
| 1079 | "CGI script is not executable (%r)" % scriptname) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1080 | return |
| 1081 | |
| 1082 | # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html |
| 1083 | # XXX Much of the following could be prepared ahead of time! |
Senthil Kumaran | 4271372 | 2010-10-03 17:55:45 +0000 | [diff] [blame] | 1084 | env = copy.deepcopy(os.environ) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1085 | env['SERVER_SOFTWARE'] = self.version_string() |
| 1086 | env['SERVER_NAME'] = self.server.server_name |
| 1087 | env['GATEWAY_INTERFACE'] = 'CGI/1.1' |
| 1088 | env['SERVER_PROTOCOL'] = self.protocol_version |
| 1089 | env['SERVER_PORT'] = str(self.server.server_port) |
| 1090 | env['REQUEST_METHOD'] = self.command |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 1091 | uqrest = urllib.parse.unquote(rest) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1092 | env['PATH_INFO'] = uqrest |
| 1093 | env['PATH_TRANSLATED'] = self.translate_path(uqrest) |
| 1094 | env['SCRIPT_NAME'] = scriptname |
Senthil Kumaran | 3ec9d01 | 2020-12-02 19:48:14 -0800 | [diff] [blame] | 1095 | env['QUERY_STRING'] = query |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1096 | env['REMOTE_ADDR'] = self.client_address[0] |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 1097 | authorization = self.headers.get("authorization") |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1098 | if authorization: |
| 1099 | authorization = authorization.split() |
| 1100 | if len(authorization) == 2: |
| 1101 | import base64, binascii |
| 1102 | env['AUTH_TYPE'] = authorization[0] |
| 1103 | if authorization[0].lower() == "basic": |
| 1104 | try: |
| 1105 | authorization = authorization[1].encode('ascii') |
Georg Brandl | 706824f | 2009-06-04 09:42:55 +0000 | [diff] [blame] | 1106 | authorization = base64.decodebytes(authorization).\ |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1107 | decode('ascii') |
| 1108 | except (binascii.Error, UnicodeError): |
| 1109 | pass |
| 1110 | else: |
| 1111 | authorization = authorization.split(':') |
| 1112 | if len(authorization) == 2: |
| 1113 | env['REMOTE_USER'] = authorization[0] |
| 1114 | # XXX REMOTE_IDENT |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 1115 | if self.headers.get('content-type') is None: |
| 1116 | env['CONTENT_TYPE'] = self.headers.get_content_type() |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1117 | else: |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 1118 | env['CONTENT_TYPE'] = self.headers['content-type'] |
| 1119 | length = self.headers.get('content-length') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1120 | if length: |
| 1121 | env['CONTENT_LENGTH'] = length |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 1122 | referer = self.headers.get('referer') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1123 | if referer: |
| 1124 | env['HTTP_REFERER'] = referer |
Senthil Kumaran | da3d2ab | 2020-12-05 05:26:24 -0800 | [diff] [blame] | 1125 | accept = self.headers.get_all('accept', ()) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1126 | env['HTTP_ACCEPT'] = ','.join(accept) |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 1127 | ua = self.headers.get('user-agent') |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1128 | if ua: |
| 1129 | env['HTTP_USER_AGENT'] = ua |
Barry Warsaw | 820c120 | 2008-06-12 04:06:45 +0000 | [diff] [blame] | 1130 | co = filter(None, self.headers.get_all('cookie', [])) |
Georg Brandl | 62e2ca2 | 2010-07-31 21:54:24 +0000 | [diff] [blame] | 1131 | cookie_str = ', '.join(co) |
| 1132 | if cookie_str: |
| 1133 | env['HTTP_COOKIE'] = cookie_str |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1134 | # XXX Other HTTP_* headers |
| 1135 | # Since we're setting the env in the parent, provide empty |
| 1136 | # values to override previously set values |
| 1137 | for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', |
| 1138 | 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): |
| 1139 | env.setdefault(k, "") |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1140 | |
Serhiy Storchaka | e4db769 | 2014-12-23 16:28:28 +0200 | [diff] [blame] | 1141 | self.send_response(HTTPStatus.OK, "Script output follows") |
Senthil Kumaran | c7ae19b | 2011-05-09 23:25:02 +0800 | [diff] [blame] | 1142 | self.flush_headers() |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1143 | |
| 1144 | decoded_query = query.replace('+', ' ') |
| 1145 | |
| 1146 | if self.have_fork: |
| 1147 | # Unix -- fork as we should |
| 1148 | args = [script] |
| 1149 | if '=' not in decoded_query: |
| 1150 | args.append(decoded_query) |
| 1151 | nobody = nobody_uid() |
| 1152 | self.wfile.flush() # Always flush before forking |
| 1153 | pid = os.fork() |
| 1154 | if pid != 0: |
| 1155 | # Parent |
| 1156 | pid, sts = os.waitpid(pid, 0) |
| 1157 | # throw away additional data [see bug #427345] |
| 1158 | while select.select([self.rfile], [], [], 0)[0]: |
| 1159 | if not self.rfile.read(1): |
| 1160 | break |
Victor Stinner | 9a679a0 | 2020-04-02 03:42:05 +0200 | [diff] [blame] | 1161 | exitcode = os.waitstatus_to_exitcode(sts) |
| 1162 | if exitcode: |
| 1163 | self.log_error(f"CGI script exit code {exitcode}") |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1164 | return |
| 1165 | # Child |
| 1166 | try: |
| 1167 | try: |
| 1168 | os.setuid(nobody) |
Andrew Svetlov | ad28c7f | 2012-12-18 22:02:39 +0200 | [diff] [blame] | 1169 | except OSError: |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1170 | pass |
| 1171 | os.dup2(self.rfile.fileno(), 0) |
| 1172 | os.dup2(self.wfile.fileno(), 1) |
Senthil Kumaran | 4271372 | 2010-10-03 17:55:45 +0000 | [diff] [blame] | 1173 | os.execve(scriptfile, args, env) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1174 | except: |
| 1175 | self.server.handle_error(self.request, self.client_address) |
| 1176 | os._exit(127) |
| 1177 | |
Amaury Forgeot d'Arc | cb0d2d7 | 2008-06-18 22:19:22 +0000 | [diff] [blame] | 1178 | else: |
| 1179 | # Non-Unix -- use subprocess |
| 1180 | import subprocess |
Senthil Kumaran | e29cd16 | 2009-11-11 04:17:53 +0000 | [diff] [blame] | 1181 | cmdline = [scriptfile] |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1182 | if self.is_python(scriptfile): |
| 1183 | interp = sys.executable |
| 1184 | if interp.lower().endswith("w.exe"): |
| 1185 | # On Windows, use python.exe, not pythonw.exe |
| 1186 | interp = interp[:-5] + interp[-4:] |
Senthil Kumaran | e29cd16 | 2009-11-11 04:17:53 +0000 | [diff] [blame] | 1187 | cmdline = [interp, '-u'] + cmdline |
| 1188 | if '=' not in query: |
| 1189 | cmdline.append(query) |
| 1190 | self.log_message("command: %s", subprocess.list2cmdline(cmdline)) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1191 | try: |
| 1192 | nbytes = int(length) |
| 1193 | except (TypeError, ValueError): |
| 1194 | nbytes = 0 |
Amaury Forgeot d'Arc | cb0d2d7 | 2008-06-18 22:19:22 +0000 | [diff] [blame] | 1195 | p = subprocess.Popen(cmdline, |
| 1196 | stdin=subprocess.PIPE, |
| 1197 | stdout=subprocess.PIPE, |
Senthil Kumaran | 4271372 | 2010-10-03 17:55:45 +0000 | [diff] [blame] | 1198 | stderr=subprocess.PIPE, |
| 1199 | env = env |
Amaury Forgeot d'Arc | cb0d2d7 | 2008-06-18 22:19:22 +0000 | [diff] [blame] | 1200 | ) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1201 | if self.command.lower() == "post" and nbytes > 0: |
| 1202 | data = self.rfile.read(nbytes) |
Amaury Forgeot d'Arc | cb0d2d7 | 2008-06-18 22:19:22 +0000 | [diff] [blame] | 1203 | else: |
| 1204 | data = None |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1205 | # throw away additional data [see bug #427345] |
| 1206 | while select.select([self.rfile._sock], [], [], 0)[0]: |
| 1207 | if not self.rfile._sock.recv(1): |
| 1208 | break |
Amaury Forgeot d'Arc | cb0d2d7 | 2008-06-18 22:19:22 +0000 | [diff] [blame] | 1209 | stdout, stderr = p.communicate(data) |
| 1210 | self.wfile.write(stdout) |
| 1211 | if stderr: |
| 1212 | self.log_error('%s', stderr) |
Brian Curtin | cbad4df | 2010-11-05 15:04:48 +0000 | [diff] [blame] | 1213 | p.stderr.close() |
| 1214 | p.stdout.close() |
Amaury Forgeot d'Arc | cb0d2d7 | 2008-06-18 22:19:22 +0000 | [diff] [blame] | 1215 | status = p.returncode |
| 1216 | if status: |
| 1217 | self.log_error("CGI script exit status %#x", status) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1218 | else: |
| 1219 | self.log_message("CGI script exited OK") |
| 1220 | |
| 1221 | |
Jason R. Coombs | f289084 | 2019-02-07 08:22:45 -0500 | [diff] [blame] | 1222 | def _get_best_family(*address): |
| 1223 | infos = socket.getaddrinfo( |
| 1224 | *address, |
| 1225 | type=socket.SOCK_STREAM, |
| 1226 | flags=socket.AI_PASSIVE, |
| 1227 | ) |
| 1228 | family, type, proto, canonname, sockaddr = next(iter(infos)) |
| 1229 | return family, sockaddr |
| 1230 | |
| 1231 | |
Senthil Kumaran | defe7f4 | 2013-09-15 09:37:27 -0700 | [diff] [blame] | 1232 | def test(HandlerClass=BaseHTTPRequestHandler, |
Géry Ogam | 1cee216 | 2018-05-29 22:10:30 +0200 | [diff] [blame] | 1233 | ServerClass=ThreadingHTTPServer, |
Jason R. Coombs | f289084 | 2019-02-07 08:22:45 -0500 | [diff] [blame] | 1234 | protocol="HTTP/1.0", port=8000, bind=None): |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1235 | """Test the HTTP request handler class. |
| 1236 | |
Robert Collins | 9644f24 | 2015-08-17 12:18:35 +1200 | [diff] [blame] | 1237 | This runs an HTTP server on port 8000 (or the port argument). |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1238 | |
| 1239 | """ |
Jason R. Coombs | f289084 | 2019-02-07 08:22:45 -0500 | [diff] [blame] | 1240 | ServerClass.address_family, addr = _get_best_family(bind, port) |
Lisa Roach | 433433f | 2018-11-26 10:43:38 -0800 | [diff] [blame] | 1241 | |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1242 | HandlerClass.protocol_version = protocol |
Jason R. Coombs | f289084 | 2019-02-07 08:22:45 -0500 | [diff] [blame] | 1243 | with ServerClass(addr, HandlerClass) as httpd: |
| 1244 | host, port = httpd.socket.getsockname()[:2] |
| 1245 | url_host = f'[{host}]' if ':' in host else host |
| 1246 | print( |
| 1247 | f"Serving HTTP on {host} port {port} " |
| 1248 | f"(http://{url_host}:{port}/) ..." |
| 1249 | ) |
Martin Panter | 0cab9c1 | 2016-04-13 00:36:52 +0000 | [diff] [blame] | 1250 | try: |
| 1251 | httpd.serve_forever() |
| 1252 | except KeyboardInterrupt: |
| 1253 | print("\nKeyboard interrupt received, exiting.") |
| 1254 | sys.exit(0) |
Georg Brandl | 2442015 | 2008-05-26 16:32:26 +0000 | [diff] [blame] | 1255 | |
| 1256 | if __name__ == '__main__': |
Serhiy Storchaka | 7e4db2f | 2017-05-04 08:17:47 +0300 | [diff] [blame] | 1257 | import argparse |
| 1258 | |
Senthil Kumaran | 1251faf | 2012-06-03 16:15:54 +0800 | [diff] [blame] | 1259 | parser = argparse.ArgumentParser() |
| 1260 | parser.add_argument('--cgi', action='store_true', |
| 1261 | help='Run as CGI Server') |
Jason R. Coombs | f289084 | 2019-02-07 08:22:45 -0500 | [diff] [blame] | 1262 | parser.add_argument('--bind', '-b', metavar='ADDRESS', |
Senthil Kumaran | defe7f4 | 2013-09-15 09:37:27 -0700 | [diff] [blame] | 1263 | help='Specify alternate bind address ' |
| 1264 | '[default: all interfaces]') |
Stéphane Wirtel | a17a2f5 | 2017-05-24 09:29:06 +0200 | [diff] [blame] | 1265 | parser.add_argument('--directory', '-d', default=os.getcwd(), |
| 1266 | help='Specify alternative directory ' |
| 1267 | '[default:current directory]') |
Senthil Kumaran | 1251faf | 2012-06-03 16:15:54 +0800 | [diff] [blame] | 1268 | parser.add_argument('port', action='store', |
| 1269 | default=8000, type=int, |
| 1270 | nargs='?', |
| 1271 | help='Specify alternate port [default: 8000]') |
| 1272 | args = parser.parse_args() |
| 1273 | if args.cgi: |
Senthil Kumaran | defe7f4 | 2013-09-15 09:37:27 -0700 | [diff] [blame] | 1274 | handler_class = CGIHTTPRequestHandler |
Senthil Kumaran | 1251faf | 2012-06-03 16:15:54 +0800 | [diff] [blame] | 1275 | else: |
Stéphane Wirtel | a17a2f5 | 2017-05-24 09:29:06 +0200 | [diff] [blame] | 1276 | handler_class = partial(SimpleHTTPRequestHandler, |
| 1277 | directory=args.directory) |
Jason R. Coombs | ee94bdb | 2020-01-05 22:32:19 -0500 | [diff] [blame] | 1278 | |
| 1279 | # ensure dual-stack is not disabled; ref #38907 |
| 1280 | class DualStackServer(ThreadingHTTPServer): |
| 1281 | def server_bind(self): |
Jason R. Coombs | 7cdc31a | 2020-01-06 07:59:36 -0500 | [diff] [blame] | 1282 | # suppress exception when protocol is IPv4 |
| 1283 | with contextlib.suppress(Exception): |
| 1284 | self.socket.setsockopt( |
| 1285 | socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) |
Jason R. Coombs | ee94bdb | 2020-01-05 22:32:19 -0500 | [diff] [blame] | 1286 | return super().server_bind() |
| 1287 | |
| 1288 | test( |
| 1289 | HandlerClass=handler_class, |
| 1290 | ServerClass=DualStackServer, |
| 1291 | port=args.port, |
| 1292 | bind=args.bind, |
| 1293 | ) |