blob: 6c72507c2087de4d83c6cc6db564325b56dc870a [file] [log] [blame]
Benjamin Peterson8c703a02010-03-11 22:05:58 +00001#! /usr/local/bin/python
Guido van Rossum1c9daa81995-09-18 21:52:37 +00002
Guido van Rossum467d7232001-02-13 13:13:33 +00003# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4# intentionally NOT "/usr/bin/env python". On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python. Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin. So let those vendors patch cgi.py to match their choice
10# of installation.
11
Guido van Rossum72755611996-03-06 07:20:06 +000012"""Support module for CGI (Common Gateway Interface) scripts.
Guido van Rossum1c9daa81995-09-18 21:52:37 +000013
Guido van Rossum7aee3841996-03-07 18:00:44 +000014This module defines a number of utilities for use by CGI scripts
15written in Python.
Guido van Rossum72755611996-03-06 07:20:06 +000016"""
17
Guido van Rossum98d9fd32000-02-28 15:12:25 +000018# History
19# -------
Tim Peters88869f92001-01-14 23:36:06 +000020#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000021# Michael McLay started this module. Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict. The multipart
23# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
Tim Peters88869f92001-01-14 23:36:06 +000026#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000027
Guido van Rossum52b8c292001-06-29 13:06:06 +000028__version__ = "2.6"
Guido van Rossum0147db01996-03-09 03:16:04 +000029
Guido van Rossum72755611996-03-06 07:20:06 +000030
31# Imports
32# =======
33
Victor Stinner5c23b8e2011-01-14 13:05:21 +000034from io import StringIO, BytesIO, TextIOWrapper
Serhiy Storchaka2e576f52017-04-24 09:05:00 +030035from collections.abc import Mapping
Guido van Rossum72755611996-03-06 07:20:06 +000036import sys
37import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038import urllib.parse
Victor Stinner5c23b8e2011-01-14 13:05:21 +000039from email.parser import FeedParser
Senthil Kumaranb4cbb922014-01-11 22:20:16 -080040from email.message import Message
Georg Brandl1f7fffb2010-10-15 15:57:45 +000041import html
Victor Stinner5c23b8e2011-01-14 13:05:21 +000042import locale
43import tempfile
Guido van Rossum72755611996-03-06 07:20:06 +000044
INADA Naoki698865d2018-06-19 17:28:50 +090045__all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_multipart",
Martin Panter1cd27722016-06-06 01:53:28 +000046 "parse_header", "test", "print_exception", "print_environ",
Guido van Rossuma8423a92001-03-19 13:40:44 +000047 "print_form", "print_directory", "print_arguments",
INADA Naoki698865d2018-06-19 17:28:50 +090048 "print_environ_usage"]
Guido van Rossumc204c701996-09-05 19:07:11 +000049
50# Logging support
51# ===============
52
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000053logfile = "" # Filename to log to, if not empty
54logfp = None # File object to log to, if not None
Guido van Rossumc204c701996-09-05 19:07:11 +000055
56def initlog(*allargs):
57 """Write a log message, if there is a log file.
58
59 Even though this function is called initlog(), you should always
60 use log(); log is a variable that is set either to initlog
61 (initially), to dolog (once the log file has been opened), or to
62 nolog (when logging is disabled).
63
64 The first argument is a format string; the remaining arguments (if
65 any) are arguments to the % operator, so e.g.
66 log("%s: %s", "a", "b")
67 will write "a: b" to the log file, followed by a newline.
68
69 If the global logfp is not None, it should be a file object to
70 which log data is written.
71
72 If the global logfp is None, the global logfile may be a string
73 giving a filename to open, in append mode. This file should be
74 world writable!!! If the file can't be opened, logging is
75 silently disabled (since there is no safe place where we could
76 send an error message).
77
78 """
Victor Stinnerd33344a2011-07-14 22:28:36 +020079 global log, logfile, logfp
Guido van Rossumc204c701996-09-05 19:07:11 +000080 if logfile and not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000081 try:
82 logfp = open(logfile, "a")
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020083 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000084 pass
Guido van Rossumc204c701996-09-05 19:07:11 +000085 if not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 log = nolog
Guido van Rossumc204c701996-09-05 19:07:11 +000087 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 log = dolog
Guido van Rossum68468eb2003-02-27 20:14:51 +000089 log(*allargs)
Guido van Rossumc204c701996-09-05 19:07:11 +000090
91def dolog(fmt, *args):
92 """Write a log message to the log file. See initlog() for docs."""
93 logfp.write(fmt%args + "\n")
94
95def nolog(*allargs):
96 """Dummy function, assigned to log when logging is disabled."""
97 pass
98
Victor Stinnerd33344a2011-07-14 22:28:36 +020099def closelog():
100 """Close the log file."""
101 global log, logfile, logfp
102 logfile = ''
103 if logfp:
104 logfp.close()
105 logfp = None
106 log = initlog
107
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108log = initlog # The current logging function
Guido van Rossumc204c701996-09-05 19:07:11 +0000109
110
Guido van Rossum72755611996-03-06 07:20:06 +0000111# Parsing functions
112# =================
113
Guido van Rossumad164711997-05-13 19:03:23 +0000114# Maximum input we will accept when REQUEST_METHOD is POST
115# 0 ==> unlimited input
116maxlen = 0
117
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200118def parse(fp=None, environ=os.environ, keep_blank_values=0,
119 strict_parsing=0, separator='&'):
Guido van Rossum773ab271996-07-23 03:46:24 +0000120 """Parse a query in the environment or from a file (default stdin)
121
122 Arguments, all optional:
123
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000124 fp : file pointer; default: sys.stdin.buffer
Guido van Rossum773ab271996-07-23 03:46:24 +0000125
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000126 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000127
128 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000129 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000130 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000131 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000132 blank values are to be ignored and treated as if they were
133 not included.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000134
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000135 strict_parsing: flag indicating what to do with parsing errors.
136 If false (the default), errors are silently ignored.
137 If true, errors raise a ValueError exception.
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200138
139 separator: str. The symbol to use for separating the query arguments.
140 Defaults to &.
Guido van Rossum773ab271996-07-23 03:46:24 +0000141 """
Raymond Hettingera1449002002-05-31 23:54:44 +0000142 if fp is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000143 fp = sys.stdin
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000144
145 # field keys and values (except for files) are returned as strings
146 # an encoding is required to decode the bytes read from self.fp
147 if hasattr(fp,'encoding'):
148 encoding = fp.encoding
149 else:
150 encoding = 'latin-1'
151
152 # fp.read() must return bytes
153 if isinstance(fp, TextIOWrapper):
154 fp = fp.buffer
155
Raymond Hettinger54f02222002-06-01 14:18:47 +0000156 if not 'REQUEST_METHOD' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000157 environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
Guido van Rossum7aee3841996-03-07 18:00:44 +0000158 if environ['REQUEST_METHOD'] == 'POST':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000159 ctype, pdict = parse_header(environ['CONTENT_TYPE'])
160 if ctype == 'multipart/form-data':
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200161 return parse_multipart(fp, pdict, separator=separator)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 elif ctype == 'application/x-www-form-urlencoded':
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000163 clength = int(environ['CONTENT_LENGTH'])
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000164 if maxlen and clength > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000165 raise ValueError('Maximum content length exceeded')
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000166 qs = fp.read(clength).decode(encoding)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000167 else:
168 qs = '' # Unknown content-type
Raymond Hettinger54f02222002-06-01 14:18:47 +0000169 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000170 if qs: qs = qs + '&'
171 qs = qs + environ['QUERY_STRING']
Tim Peters88869f92001-01-14 23:36:06 +0000172 elif sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000173 if qs: qs = qs + '&'
174 qs = qs + sys.argv[1]
175 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Raymond Hettinger54f02222002-06-01 14:18:47 +0000176 elif 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 qs = environ['QUERY_STRING']
Guido van Rossum7aee3841996-03-07 18:00:44 +0000178 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000179 if sys.argv[1:]:
180 qs = sys.argv[1]
181 else:
182 qs = ""
183 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000184 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200185 encoding=encoding, separator=separator)
Guido van Rossume7808771995-08-07 20:12:09 +0000186
187
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200188def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000189 """Parse multipart input.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000190
Guido van Rossum7aee3841996-03-07 18:00:44 +0000191 Arguments:
192 fp : input file
Johannes Gijsbersc7fc10a2005-01-08 13:56:36 +0000193 pdict: dictionary containing other parameters of content-type header
Amber Brown545c9552018-05-14 18:11:55 -0400194 encoding, errors: request encoding and error handler, passed to
195 FieldStorage
Guido van Rossum72755611996-03-06 07:20:06 +0000196
Tim Peters88869f92001-01-14 23:36:06 +0000197 Returns a dictionary just like parse_qs(): keys are the field names, each
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200198 value is a list of values for that field. For non-file fields, the value
199 is a list of strings.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000200 """
Mike Leic143cc32020-12-17 01:34:19 +0000201 # RFC 2046, Section 5.1 : The "multipart" boundary delimiters are always
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200202 # represented as 7bit US-ASCII.
203 boundary = pdict['boundary'].decode('ascii')
204 ctype = "multipart/form-data; boundary={}".format(boundary)
205 headers = Message()
206 headers.set_type(ctype)
rogerd8cf3512020-06-15 16:58:54 +0200207 try:
208 headers['Content-Length'] = pdict['CONTENT-LENGTH']
209 except KeyError:
210 pass
Amber Brown545c9552018-05-14 18:11:55 -0400211 fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200212 environ={'REQUEST_METHOD': 'POST'}, separator=separator)
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200213 return {k: fs.getlist(k) for k in fs}
Guido van Rossum9a22de11995-01-12 12:29:47 +0000214
Fred Drake9a0a65b2008-12-04 19:24:50 +0000215def _parseparam(s):
216 while s[:1] == ';':
217 s = s[1:]
218 end = s.find(';')
Senthil Kumaran1ef0c032011-10-20 01:05:44 +0800219 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Fred Drake9a0a65b2008-12-04 19:24:50 +0000220 end = s.find(';', end + 1)
221 if end < 0:
222 end = len(s)
223 f = s[:end]
224 yield f.strip()
225 s = s[end:]
226
Guido van Rossum72755611996-03-06 07:20:06 +0000227def parse_header(line):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000228 """Parse a Content-type like header.
229
230 Return the main content-type and a dictionary of options.
231
232 """
Fred Drake9a0a65b2008-12-04 19:24:50 +0000233 parts = _parseparam(';' + line)
234 key = parts.__next__()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000235 pdict = {}
Fred Drake9a0a65b2008-12-04 19:24:50 +0000236 for p in parts:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000237 i = p.find('=')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000238 if i >= 0:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000239 name = p[:i].strip().lower()
240 value = p[i+1:].strip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000241 if len(value) >= 2 and value[0] == value[-1] == '"':
242 value = value[1:-1]
Johannes Gijsbers9e15dd62004-08-14 15:39:34 +0000243 value = value.replace('\\\\', '\\').replace('\\"', '"')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 pdict[name] = value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000245 return key, pdict
Guido van Rossum72755611996-03-06 07:20:06 +0000246
247
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000248# Classes for field storage
249# =========================
250
251class MiniFieldStorage:
252
Guido van Rossum0147db01996-03-09 03:16:04 +0000253 """Like FieldStorage, for use when no file uploads are possible."""
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000254
Guido van Rossum7aee3841996-03-07 18:00:44 +0000255 # Dummy attributes
256 filename = None
257 list = None
258 type = None
Guido van Rossum773ab271996-07-23 03:46:24 +0000259 file = None
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000260 type_options = {}
Guido van Rossum7aee3841996-03-07 18:00:44 +0000261 disposition = None
262 disposition_options = {}
263 headers = {}
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000264
Guido van Rossum7aee3841996-03-07 18:00:44 +0000265 def __init__(self, name, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000266 """Constructor from field name and value."""
267 self.name = name
268 self.value = value
Guido van Rossum773ab271996-07-23 03:46:24 +0000269 # self.file = StringIO(value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000270
271 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000272 """Return printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000273 return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000274
275
276class FieldStorage:
277
Guido van Rossum7aee3841996-03-07 18:00:44 +0000278 """Store a sequence of fields, reading multipart/form-data.
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000279
Guido van Rossum7aee3841996-03-07 18:00:44 +0000280 This class provides naming, typing, files stored on disk, and
281 more. At the top level, it is accessible like a dictionary, whose
282 keys are the field names. (Note: None can occur as a field name.)
283 The items are either a Python list (if there's multiple values) or
284 another FieldStorage or MiniFieldStorage object. If it's a single
285 object, it has the following attributes:
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000286
Guido van Rossum7aee3841996-03-07 18:00:44 +0000287 name: the field name, if specified; otherwise None
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000288
Guido van Rossum7aee3841996-03-07 18:00:44 +0000289 filename: the filename, if specified; otherwise None; this is the
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000290 client side filename, *not* the file name on which it is
291 stored (that's a temporary file you don't deal with)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000292
Guido van Rossum7aee3841996-03-07 18:00:44 +0000293 value: the value as a *string*; for file uploads, this
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000294 transparently reads the file every time you request the value
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000295 and returns *bytes*
Guido van Rossum7aee3841996-03-07 18:00:44 +0000296
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000297 file: the file(-like) object from which you can read the data *as
298 bytes* ; None if the data is stored a simple string
Guido van Rossum7aee3841996-03-07 18:00:44 +0000299
300 type: the content-type, or None if not specified
301
302 type_options: dictionary of options specified on the content-type
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000303 line
Guido van Rossum7aee3841996-03-07 18:00:44 +0000304
305 disposition: content-disposition, or None if not specified
306
307 disposition_options: dictionary of corresponding options
308
Barry Warsaw596097e2008-06-12 02:38:51 +0000309 headers: a dictionary(-like) object (sometimes email.message.Message or a
Armin Rigo3a703b62005-09-19 09:11:04 +0000310 subclass thereof) containing *all* headers
Guido van Rossum7aee3841996-03-07 18:00:44 +0000311
312 The class is subclassable, mostly for the purpose of overriding
313 the make_file() method, which is called internally to come up with
314 a file open for reading and writing. This makes it possible to
315 override the default choice of storing all files in a temporary
316 directory and unlinking them as soon as they have been opened.
317
318 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000319 def __init__(self, fp=None, headers=None, outerboundary=b'',
320 environ=os.environ, keep_blank_values=0, strict_parsing=0,
matthewbelisle-wf20914482018-10-19 05:52:59 -0500321 limit=None, encoding='utf-8', errors='replace',
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200322 max_num_fields=None, separator='&'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000323 """Constructor. Read multipart/* until last part.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000324
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000325 Arguments, all optional:
Guido van Rossum7aee3841996-03-07 18:00:44 +0000326
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000327 fp : file pointer; default: sys.stdin.buffer
Guido van Rossumb1b4f941998-05-08 19:55:51 +0000328 (not used when the request method is GET)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000329 Can be :
330 1. a TextIOWrapper object
331 2. an object whose read() and readline() methods return bytes
Guido van Rossum7aee3841996-03-07 18:00:44 +0000332
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000333 headers : header dictionary-like object; default:
334 taken from environ as per CGI spec
Guido van Rossum7aee3841996-03-07 18:00:44 +0000335
Guido van Rossum773ab271996-07-23 03:46:24 +0000336 outerboundary : terminating multipart boundary
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000337 (for internal use only)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000338
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000339 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000340
341 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000342 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000343 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000344 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000345 blank values are to be ignored and treated as if they were
346 not included.
Guido van Rossum773ab271996-07-23 03:46:24 +0000347
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000348 strict_parsing: flag indicating what to do with parsing errors.
349 If false (the default), errors are silently ignored.
350 If true, errors raise a ValueError exception.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000351
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000352 limit : used internally to read parts of multipart/form-data forms,
353 to exit from the reading loop when reached. It is the difference
354 between the form content-length and the number of bytes already
355 read
356
357 encoding, errors : the encoding and error handler used to decode the
358 binary stream to strings. Must be the same as the charset defined
359 for the page sending the form (content-type : meta http-equiv or
360 header)
361
matthewbelisle-wf20914482018-10-19 05:52:59 -0500362 max_num_fields: int. If set, then __init__ throws a ValueError
363 if there are more than n fields read by parse_qsl().
364
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000365 """
366 method = 'GET'
367 self.keep_blank_values = keep_blank_values
368 self.strict_parsing = strict_parsing
matthewbelisle-wf20914482018-10-19 05:52:59 -0500369 self.max_num_fields = max_num_fields
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200370 self.separator = separator
Raymond Hettinger54f02222002-06-01 14:18:47 +0000371 if 'REQUEST_METHOD' in environ:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000372 method = environ['REQUEST_METHOD'].upper()
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000373 self.qs_on_post = None
Guido van Rossum01852831998-06-25 02:40:17 +0000374 if method == 'GET' or method == 'HEAD':
Raymond Hettinger54f02222002-06-01 14:18:47 +0000375 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000376 qs = environ['QUERY_STRING']
377 elif sys.argv[1:]:
378 qs = sys.argv[1]
379 else:
380 qs = ""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000381 qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
382 fp = BytesIO(qs)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000383 if headers is None:
384 headers = {'content-type':
385 "application/x-www-form-urlencoded"}
386 if headers is None:
Guido van Rossumcff311a1998-06-11 14:06:59 +0000387 headers = {}
388 if method == 'POST':
389 # Set default content-type for POST to what's traditional
390 headers['content-type'] = "application/x-www-form-urlencoded"
Raymond Hettinger54f02222002-06-01 14:18:47 +0000391 if 'CONTENT_TYPE' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000392 headers['content-type'] = environ['CONTENT_TYPE']
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000393 if 'QUERY_STRING' in environ:
394 self.qs_on_post = environ['QUERY_STRING']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000395 if 'CONTENT_LENGTH' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000396 headers['content-length'] = environ['CONTENT_LENGTH']
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800397 else:
398 if not (isinstance(headers, (Mapping, Message))):
399 raise TypeError("headers must be mapping or an instance of "
400 "email.message.Message")
401 self.headers = headers
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000402 if fp is None:
403 self.fp = sys.stdin.buffer
404 # self.fp.read() must return bytes
405 elif isinstance(fp, TextIOWrapper):
406 self.fp = fp.buffer
407 else:
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800408 if not (hasattr(fp, 'read') and hasattr(fp, 'readline')):
409 raise TypeError("fp must be file pointer")
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000410 self.fp = fp
411
412 self.encoding = encoding
413 self.errors = errors
414
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000415 if not isinstance(outerboundary, bytes):
416 raise TypeError('outerboundary must be bytes, not %s'
417 % type(outerboundary).__name__)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000418 self.outerboundary = outerboundary
Guido van Rossum7aee3841996-03-07 18:00:44 +0000419
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000420 self.bytes_read = 0
421 self.limit = limit
422
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000423 # Process content-disposition header
424 cdisp, pdict = "", {}
Raymond Hettinger54f02222002-06-01 14:18:47 +0000425 if 'content-disposition' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000426 cdisp, pdict = parse_header(self.headers['content-disposition'])
427 self.disposition = cdisp
428 self.disposition_options = pdict
429 self.name = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000430 if 'name' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000431 self.name = pdict['name']
432 self.filename = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000433 if 'filename' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000434 self.filename = pdict['filename']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000435 self._binary_file = self.filename is not None
Guido van Rossum7aee3841996-03-07 18:00:44 +0000436
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000437 # Process content-type header
Barry Warsaw302331a1999-01-08 17:42:03 +0000438 #
439 # Honor any existing content-type header. But if there is no
440 # content-type header, use some sensible defaults. Assume
441 # outerboundary is "" at the outer level, but something non-false
442 # inside a multi-part. The default for an inner part is text/plain,
443 # but for an outer part it should be urlencoded. This should catch
444 # bogus clients which erroneously forget to include a content-type
445 # header.
446 #
447 # See below for what we do if there does exist a content-type header,
448 # but it happens to be something we don't understand.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000449 if 'content-type' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000450 ctype, pdict = parse_header(self.headers['content-type'])
Guido van Rossumce900de1999-06-02 18:44:22 +0000451 elif self.outerboundary or method != 'POST':
Barry Warsaw302331a1999-01-08 17:42:03 +0000452 ctype, pdict = "text/plain", {}
453 else:
454 ctype, pdict = 'application/x-www-form-urlencoded', {}
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000455 self.type = ctype
456 self.type_options = pdict
Raymond Hettinger54f02222002-06-01 14:18:47 +0000457 if 'boundary' in pdict:
Amber Brown545c9552018-05-14 18:11:55 -0400458 self.innerboundary = pdict['boundary'].encode(self.encoding,
459 self.errors)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000460 else:
461 self.innerboundary = b""
462
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000463 clen = -1
Raymond Hettinger54f02222002-06-01 14:18:47 +0000464 if 'content-length' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000465 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000466 clen = int(self.headers['content-length'])
Skip Montanarodb5d1442002-03-23 05:50:17 +0000467 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000468 pass
469 if maxlen and clen > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000470 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000471 self.length = clen
Pierre Quentel2d7caca2019-09-11 13:05:53 +0200472 if self.limit is None and clen >= 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000473 self.limit = clen
Guido van Rossum7aee3841996-03-07 18:00:44 +0000474
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000475 self.list = self.file = None
476 self.done = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000477 if ctype == 'application/x-www-form-urlencoded':
478 self.read_urlencoded()
479 elif ctype[:10] == 'multipart/':
Guido van Rossumf5745001998-10-20 14:43:02 +0000480 self.read_multi(environ, keep_blank_values, strict_parsing)
Barry Warsaw302331a1999-01-08 17:42:03 +0000481 else:
Guido van Rossum60a3bd81999-06-11 18:26:09 +0000482 self.read_single()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000483
Brett Cannonf79126f2013-08-23 15:15:48 -0400484 def __del__(self):
485 try:
486 self.file.close()
487 except AttributeError:
488 pass
489
Berker Peksagbf5e9602015-02-06 10:21:37 +0200490 def __enter__(self):
491 return self
492
493 def __exit__(self, *args):
494 self.file.close()
495
Guido van Rossum7aee3841996-03-07 18:00:44 +0000496 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000497 """Return a printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000498 return "FieldStorage(%r, %r, %r)" % (
499 self.name, self.filename, self.value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000500
Guido van Rossum4061cbe2002-09-11 18:20:34 +0000501 def __iter__(self):
502 return iter(self.keys())
503
Guido van Rossum7aee3841996-03-07 18:00:44 +0000504 def __getattr__(self, name):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000505 if name != 'value':
Collin Winterce36ad82007-08-30 01:19:48 +0000506 raise AttributeError(name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000507 if self.file:
508 self.file.seek(0)
509 value = self.file.read()
510 self.file.seek(0)
511 elif self.list is not None:
512 value = self.list
513 else:
514 value = None
515 return value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000516
517 def __getitem__(self, key):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000518 """Dictionary style indexing."""
519 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000520 raise TypeError("not indexable")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000521 found = []
522 for item in self.list:
523 if item.name == key: found.append(item)
524 if not found:
Collin Winterce36ad82007-08-30 01:19:48 +0000525 raise KeyError(key)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000526 if len(found) == 1:
527 return found[0]
528 else:
529 return found
Guido van Rossum7aee3841996-03-07 18:00:44 +0000530
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000531 def getvalue(self, key, default=None):
532 """Dictionary style get() method, including 'value' lookup."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000533 if key in self:
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000534 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000535 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000536 return [x.value for x in value]
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000537 else:
538 return value.value
539 else:
540 return default
541
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000542 def getfirst(self, key, default=None):
543 """ Return the first value received."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000544 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000545 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000546 if isinstance(value, list):
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000547 return value[0].value
548 else:
549 return value.value
550 else:
551 return default
552
553 def getlist(self, key):
554 """ Return list of received values."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000555 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000556 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000557 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000558 return [x.value for x in value]
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000559 else:
560 return [value.value]
561 else:
562 return []
563
Guido van Rossum7aee3841996-03-07 18:00:44 +0000564 def keys(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000565 """Dictionary style keys() method."""
566 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000567 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000568 return list(set(item.name for item in self.list))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000569
Raymond Hettinger54f02222002-06-01 14:18:47 +0000570 def __contains__(self, key):
571 """Dictionary style __contains__ method."""
572 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000573 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000574 return any(item.name == key for item in self.list)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000575
Guido van Rossum88b85d41997-01-11 19:21:33 +0000576 def __len__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000577 """Dictionary style len(x) support."""
578 return len(self.keys())
Guido van Rossum88b85d41997-01-11 19:21:33 +0000579
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800580 def __bool__(self):
581 if self.list is None:
582 raise TypeError("Cannot be converted to bool.")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000583 return bool(self.list)
584
Guido van Rossum7aee3841996-03-07 18:00:44 +0000585 def read_urlencoded(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000586 """Internal: read data in query string format."""
587 qs = self.fp.read(self.length)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000588 if not isinstance(qs, bytes):
589 raise ValueError("%s should return bytes, got %s" \
590 % (self.fp, type(qs).__name__))
591 qs = qs.decode(self.encoding, self.errors)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000592 if self.qs_on_post:
593 qs += '&' + self.qs_on_post
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000594 query = urllib.parse.parse_qsl(
595 qs, self.keep_blank_values, self.strict_parsing,
matthewbelisle-wf20914482018-10-19 05:52:59 -0500596 encoding=self.encoding, errors=self.errors,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200597 max_num_fields=self.max_num_fields, separator=self.separator)
matthewbelisle-wf20914482018-10-19 05:52:59 -0500598 self.list = [MiniFieldStorage(key, value) for key, value in query]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000599 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000600
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000601 FieldStorageClass = None
602
Guido van Rossumf5745001998-10-20 14:43:02 +0000603 def read_multi(self, environ, keep_blank_values, strict_parsing):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000604 """Internal: read a part that is itself multipart."""
Guido van Rossum2e441f72001-07-25 21:00:19 +0000605 ib = self.innerboundary
606 if not valid_boundary(ib):
Collin Winterce36ad82007-08-30 01:19:48 +0000607 raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000608 self.list = []
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000609 if self.qs_on_post:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000610 query = urllib.parse.parse_qsl(
611 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
matthewbelisle-wf20914482018-10-19 05:52:59 -0500612 encoding=self.encoding, errors=self.errors,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200613 max_num_fields=self.max_num_fields, separator=self.separator)
matthewbelisle-wf20914482018-10-19 05:52:59 -0500614 self.list.extend(MiniFieldStorage(key, value) for key, value in query)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000615
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000616 klass = self.FieldStorageClass or self.__class__
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000617 first_line = self.fp.readline() # bytes
618 if not isinstance(first_line, bytes):
619 raise ValueError("%s should return bytes, got %s" \
620 % (self.fp, type(first_line).__name__))
621 self.bytes_read += len(first_line)
Donald Stufftd90f8d12015-03-29 16:43:23 -0400622
623 # Ensure that we consume the file until we've hit our inner boundary
624 while (first_line.strip() != (b"--" + self.innerboundary) and
625 first_line):
626 first_line = self.fp.readline()
627 self.bytes_read += len(first_line)
628
matthewbelisle-wfb79b5c02018-10-23 03:14:35 -0500629 # Propagate max_num_fields into the sub class appropriately
630 max_num_fields = self.max_num_fields
631 if max_num_fields is not None:
632 max_num_fields -= len(self.list)
633
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000634 while True:
635 parser = FeedParser()
636 hdr_text = b""
637 while True:
638 data = self.fp.readline()
639 hdr_text += data
640 if not data.strip():
641 break
642 if not hdr_text:
643 break
644 # parser takes strings, not bytes
645 self.bytes_read += len(hdr_text)
646 parser.feed(hdr_text.decode(self.encoding, self.errors))
647 headers = parser.close()
Victor Stinner65794592015-08-18 10:21:10 -0700648
649 # Some clients add Content-Length for part headers, ignore them
650 if 'content-length' in headers:
651 del headers['content-length']
652
Pierre Quentel2d7caca2019-09-11 13:05:53 +0200653 limit = None if self.limit is None \
654 else self.limit - self.bytes_read
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000655 part = klass(self.fp, headers, ib, environ, keep_blank_values,
Pierre Quentel2d7caca2019-09-11 13:05:53 +0200656 strict_parsing, limit,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200657 self.encoding, self.errors, max_num_fields, self.separator)
matthewbelisle-wf20914482018-10-19 05:52:59 -0500658
matthewbelisle-wfb79b5c02018-10-23 03:14:35 -0500659 if max_num_fields is not None:
660 max_num_fields -= 1
661 if part.list:
662 max_num_fields -= len(part.list)
663 if max_num_fields < 0:
664 raise ValueError('Max number of fields exceeded')
matthewbelisle-wf20914482018-10-19 05:52:59 -0500665
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000666 self.bytes_read += part.bytes_read
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000667 self.list.append(part)
Florent Xicluna331c3fd2013-07-07 12:44:28 +0200668 if part.done or self.bytes_read >= self.length > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000669 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000670 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000671
672 def read_single(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000673 """Internal: read an atomic part."""
674 if self.length >= 0:
675 self.read_binary()
676 self.skip_lines()
677 else:
678 self.read_lines()
679 self.file.seek(0)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000680
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000681 bufsize = 8*1024 # I/O buffering size for copy to file
Guido van Rossum7aee3841996-03-07 18:00:44 +0000682
683 def read_binary(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000684 """Internal: read binary data."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000685 self.file = self.make_file()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000686 todo = self.length
687 if todo >= 0:
688 while todo > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000689 data = self.fp.read(min(todo, self.bufsize)) # bytes
690 if not isinstance(data, bytes):
691 raise ValueError("%s should return bytes, got %s"
692 % (self.fp, type(data).__name__))
693 self.bytes_read += len(data)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000694 if not data:
695 self.done = -1
696 break
697 self.file.write(data)
698 todo = todo - len(data)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000699
700 def read_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000701 """Internal: read lines until EOF or outerboundary."""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000702 if self._binary_file:
703 self.file = self.__file = BytesIO() # store data as bytes for files
704 else:
705 self.file = self.__file = StringIO() # as strings for other fields
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000706 if self.outerboundary:
707 self.read_lines_to_outerboundary()
708 else:
709 self.read_lines_to_eof()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000710
Guido van Rossum52b8c292001-06-29 13:06:06 +0000711 def __write(self, line):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000712 """line is always bytes, not string"""
Guido van Rossum52b8c292001-06-29 13:06:06 +0000713 if self.__file is not None:
714 if self.__file.tell() + len(line) > 1000:
Guido van Rossuma1a68522007-08-28 03:11:34 +0000715 self.file = self.make_file()
716 data = self.__file.getvalue()
717 self.file.write(data)
Guido van Rossum52b8c292001-06-29 13:06:06 +0000718 self.__file = None
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000719 if self._binary_file:
720 # keep bytes
721 self.file.write(line)
722 else:
723 # decode to string
724 self.file.write(line.decode(self.encoding, self.errors))
Guido van Rossum52b8c292001-06-29 13:06:06 +0000725
Guido van Rossum7aee3841996-03-07 18:00:44 +0000726 def read_lines_to_eof(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000727 """Internal: read lines until EOF."""
728 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000729 line = self.fp.readline(1<<16) # bytes
730 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000731 if not line:
732 self.done = -1
733 break
Guido van Rossum52b8c292001-06-29 13:06:06 +0000734 self.__write(line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000735
736 def read_lines_to_outerboundary(self):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000737 """Internal: read lines until outerboundary.
738 Data is read as bytes: boundaries and line ends must be converted
739 to bytes for comparisons.
740 """
741 next_boundary = b"--" + self.outerboundary
742 last_boundary = next_boundary + b"--"
743 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000744 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000745 _read = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000746 while 1:
rogerd8cf3512020-06-15 16:58:54 +0200747
748 if self.limit is not None and 0 <= self.limit <= _read:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000749 break
750 line = self.fp.readline(1<<16) # bytes
751 self.bytes_read += len(line)
752 _read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000753 if not line:
754 self.done = -1
755 break
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300756 if delim == b"\r":
757 line = delim + line
758 delim = b""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000759 if line.startswith(b"--") and last_line_lfend:
760 strippedline = line.rstrip()
761 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000762 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000763 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000764 self.done = 1
765 break
766 odelim = delim
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000767 if line.endswith(b"\r\n"):
768 delim = b"\r\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000769 line = line[:-2]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000770 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000771 elif line.endswith(b"\n"):
772 delim = b"\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000773 line = line[:-1]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000774 last_line_lfend = True
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300775 elif line.endswith(b"\r"):
776 # We may interrupt \r\n sequences if they span the 2**16
777 # byte boundary
778 delim = b"\r"
779 line = line[:-1]
780 last_line_lfend = False
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000781 else:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000782 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000783 last_line_lfend = False
Guido van Rossum52b8c292001-06-29 13:06:06 +0000784 self.__write(odelim + line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000785
786 def skip_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000787 """Internal: skip lines until outer boundary if defined."""
788 if not self.outerboundary or self.done:
789 return
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000790 next_boundary = b"--" + self.outerboundary
791 last_boundary = next_boundary + b"--"
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000792 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000793 while True:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000794 line = self.fp.readline(1<<16)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000795 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000796 if not line:
797 self.done = -1
798 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000799 if line.endswith(b"--") and last_line_lfend:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000800 strippedline = line.strip()
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000801 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000802 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000803 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000804 self.done = 1
805 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000806 last_line_lfend = line.endswith(b'\n')
Guido van Rossum7aee3841996-03-07 18:00:44 +0000807
Guido van Rossuma1a68522007-08-28 03:11:34 +0000808 def make_file(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000809 """Overridable: return a readable & writable file.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000810
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000811 The file will be used as follows:
812 - data is written to it
813 - seek(0)
814 - data is read from it
Guido van Rossum7aee3841996-03-07 18:00:44 +0000815
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000816 The file is opened in binary mode for files, in text mode
817 for other fields
Guido van Rossum7aee3841996-03-07 18:00:44 +0000818
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000819 This version opens a temporary file for reading and writing,
820 and immediately deletes (unlinks) it. The trick (on Unix!) is
821 that the file can still be used, but it can't be opened by
822 another process, and it will automatically be deleted when it
823 is closed or when the current process terminates.
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000824
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000825 If you want a more permanent file, you derive a class which
826 overrides this method. If you want a visible temporary file
827 that is nevertheless automatically deleted when the script
828 terminates, try defining a __del__ method in a derived class
829 which unlinks the temporary files you have created.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000830
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000831 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000832 if self._binary_file:
833 return tempfile.TemporaryFile("wb+")
834 else:
835 return tempfile.TemporaryFile("w+",
836 encoding=self.encoding, newline = '\n')
Tim Peters88869f92001-01-14 23:36:06 +0000837
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000838
Guido van Rossum72755611996-03-06 07:20:06 +0000839# Test/debug code
840# ===============
Guido van Rossum9a22de11995-01-12 12:29:47 +0000841
Guido van Rossum773ab271996-07-23 03:46:24 +0000842def test(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000843 """Robust test CGI script, usable as main program.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000844
Guido van Rossum7aee3841996-03-07 18:00:44 +0000845 Write minimal HTTP headers and dump all information provided to
846 the script in HTML form.
847
848 """
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000849 print("Content-type: text/html")
850 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000851 sys.stderr = sys.stdout
852 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000853 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000854 print_directory()
855 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000856 print_form(form)
857 print_environ(environ)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000858 print_environ_usage()
859 def f():
Georg Brandl7cae87c2006-09-06 06:51:57 +0000860 exec("testing print_exception() -- <I>italics?</I>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000861 def g(f=f):
862 f()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000863 print("<H3>What follows is a test, not an actual exception:</H3>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000864 g()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000865 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000866 print_exception()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000867
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000868 print("<H1>Second try with a small maxlen...</H1>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000869
Guido van Rossumad164711997-05-13 19:03:23 +0000870 global maxlen
871 maxlen = 50
872 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000873 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000874 print_directory()
875 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000876 print_form(form)
877 print_environ(environ)
Guido van Rossumad164711997-05-13 19:03:23 +0000878 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000879 print_exception()
Guido van Rossumad164711997-05-13 19:03:23 +0000880
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000881def print_exception(type=None, value=None, tb=None, limit=None):
882 if type is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000883 type, value, tb = sys.exc_info()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000884 import traceback
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000885 print()
886 print("<H3>Traceback (most recent call last):</H3>")
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000887 list = traceback.format_tb(tb, limit) + \
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000888 traceback.format_exception_only(type, value)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000889 print("<PRE>%s<B>%s</B></PRE>" % (
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000890 html.escape("".join(list[:-1])),
891 html.escape(list[-1]),
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000892 ))
Guido van Rossumf15d1591997-09-29 23:22:12 +0000893 del tb
Guido van Rossum9a22de11995-01-12 12:29:47 +0000894
Guido van Rossum773ab271996-07-23 03:46:24 +0000895def print_environ(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000896 """Dump the shell environment as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000897 keys = sorted(environ.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000898 print()
899 print("<H3>Shell Environment:</H3>")
900 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000901 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000902 print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000903 print("</DL>")
904 print()
Guido van Rossum72755611996-03-06 07:20:06 +0000905
906def print_form(form):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000907 """Dump the contents of a form as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000908 keys = sorted(form.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000909 print()
910 print("<H3>Form Contents:</H3>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000911 if not keys:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000912 print("<P>No form fields.")
913 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000914 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000915 print("<DT>" + html.escape(key) + ":", end=' ')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000916 value = form[key]
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000917 print("<i>" + html.escape(repr(type(value))) + "</i>")
918 print("<DD>" + html.escape(repr(value)))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000919 print("</DL>")
920 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000921
922def print_directory():
923 """Dump the current directory as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000924 print()
925 print("<H3>Current Working Directory:</H3>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000926 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000927 pwd = os.getcwd()
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200928 except OSError as msg:
Andrew Svetlov8b33dd82012-12-24 19:58:48 +0200929 print("OSError:", html.escape(str(msg)))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000930 else:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000931 print(html.escape(pwd))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000932 print()
Guido van Rossum9a22de11995-01-12 12:29:47 +0000933
Guido van Rossuma8738a51996-03-14 21:30:28 +0000934def print_arguments():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000935 print()
936 print("<H3>Command Line Arguments:</H3>")
937 print()
938 print(sys.argv)
939 print()
Guido van Rossuma8738a51996-03-14 21:30:28 +0000940
Guido van Rossum9a22de11995-01-12 12:29:47 +0000941def print_environ_usage():
Guido van Rossum7aee3841996-03-07 18:00:44 +0000942 """Dump a list of environment variables used by CGI as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000943 print("""
Guido van Rossum72755611996-03-06 07:20:06 +0000944<H3>These environment variables could have been set:</H3>
945<UL>
Guido van Rossum9a22de11995-01-12 12:29:47 +0000946<LI>AUTH_TYPE
947<LI>CONTENT_LENGTH
948<LI>CONTENT_TYPE
949<LI>DATE_GMT
950<LI>DATE_LOCAL
951<LI>DOCUMENT_NAME
952<LI>DOCUMENT_ROOT
953<LI>DOCUMENT_URI
954<LI>GATEWAY_INTERFACE
955<LI>LAST_MODIFIED
956<LI>PATH
957<LI>PATH_INFO
958<LI>PATH_TRANSLATED
959<LI>QUERY_STRING
960<LI>REMOTE_ADDR
961<LI>REMOTE_HOST
962<LI>REMOTE_IDENT
963<LI>REMOTE_USER
964<LI>REQUEST_METHOD
965<LI>SCRIPT_NAME
966<LI>SERVER_NAME
967<LI>SERVER_PORT
968<LI>SERVER_PROTOCOL
969<LI>SERVER_ROOT
970<LI>SERVER_SOFTWARE
971</UL>
Guido van Rossum7aee3841996-03-07 18:00:44 +0000972In addition, HTTP headers sent by the server may be passed in the
973environment as well. Here are some common variable names:
974<UL>
975<LI>HTTP_ACCEPT
976<LI>HTTP_CONNECTION
977<LI>HTTP_HOST
978<LI>HTTP_PRAGMA
979<LI>HTTP_REFERER
980<LI>HTTP_USER_AGENT
981</UL>
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000982""")
Guido van Rossum9a22de11995-01-12 12:29:47 +0000983
Guido van Rossum9a22de11995-01-12 12:29:47 +0000984
Guido van Rossum72755611996-03-06 07:20:06 +0000985# Utilities
986# =========
Guido van Rossum9a22de11995-01-12 12:29:47 +0000987
Benjamin Peterson4d59a782014-04-03 10:22:10 -0400988def valid_boundary(s):
Guido van Rossum2e441f72001-07-25 21:00:19 +0000989 import re
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000990 if isinstance(s, bytes):
991 _vb_pattern = b"^[ -~]{0,200}[!-~]$"
992 else:
993 _vb_pattern = "^[ -~]{0,200}[!-~]$"
Guido van Rossum2e441f72001-07-25 21:00:19 +0000994 return re.match(_vb_pattern, s)
Guido van Rossum9a22de11995-01-12 12:29:47 +0000995
Guido van Rossum72755611996-03-06 07:20:06 +0000996# Invoke mainline
997# ===============
998
999# Call test() when this file is run as a script (not imported as a module)
Tim Peters88869f92001-01-14 23:36:06 +00001000if __name__ == '__main__':
Guido van Rossum7aee3841996-03-07 18:00:44 +00001001 test()