blob: 6cb8cf28bd66457ef05a8cc19bb53b8f2afbb780 [file] [log] [blame]
Benjamin Peterson8c703a02010-03-11 22:05:58 +00001#! /usr/local/bin/python
Guido van Rossum1c9daa81995-09-18 21:52:37 +00002
Guido van Rossum467d7232001-02-13 13:13:33 +00003# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4# intentionally NOT "/usr/bin/env python". On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python. Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin. So let those vendors patch cgi.py to match their choice
10# of installation.
11
Guido van Rossum72755611996-03-06 07:20:06 +000012"""Support module for CGI (Common Gateway Interface) scripts.
Guido van Rossum1c9daa81995-09-18 21:52:37 +000013
Guido van Rossum7aee3841996-03-07 18:00:44 +000014This module defines a number of utilities for use by CGI scripts
15written in Python.
Guido van Rossum72755611996-03-06 07:20:06 +000016"""
17
Guido van Rossum98d9fd32000-02-28 15:12:25 +000018# History
19# -------
Tim Peters88869f92001-01-14 23:36:06 +000020#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000021# Michael McLay started this module. Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict. The multipart
23# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
Tim Peters88869f92001-01-14 23:36:06 +000026#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000027
Guido van Rossum52b8c292001-06-29 13:06:06 +000028__version__ = "2.6"
Guido van Rossum0147db01996-03-09 03:16:04 +000029
Guido van Rossum72755611996-03-06 07:20:06 +000030
31# Imports
32# =======
33
Victor Stinner5c23b8e2011-01-14 13:05:21 +000034from io import StringIO, BytesIO, TextIOWrapper
Serhiy Storchaka2e576f52017-04-24 09:05:00 +030035from collections.abc import Mapping
Guido van Rossum72755611996-03-06 07:20:06 +000036import sys
37import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038import urllib.parse
Victor Stinner5c23b8e2011-01-14 13:05:21 +000039from email.parser import FeedParser
Senthil Kumaranb4cbb922014-01-11 22:20:16 -080040from email.message import Message
Georg Brandl1f7fffb2010-10-15 15:57:45 +000041import html
Victor Stinner5c23b8e2011-01-14 13:05:21 +000042import locale
43import tempfile
Inada Naokie52ab422021-04-29 11:36:04 +090044import warnings
Guido van Rossum72755611996-03-06 07:20:06 +000045
INADA Naoki698865d2018-06-19 17:28:50 +090046__all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_multipart",
Martin Panter1cd27722016-06-06 01:53:28 +000047 "parse_header", "test", "print_exception", "print_environ",
Guido van Rossuma8423a92001-03-19 13:40:44 +000048 "print_form", "print_directory", "print_arguments",
INADA Naoki698865d2018-06-19 17:28:50 +090049 "print_environ_usage"]
Guido van Rossumc204c701996-09-05 19:07:11 +000050
51# Logging support
52# ===============
53
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000054logfile = "" # Filename to log to, if not empty
55logfp = None # File object to log to, if not None
Guido van Rossumc204c701996-09-05 19:07:11 +000056
57def initlog(*allargs):
58 """Write a log message, if there is a log file.
59
60 Even though this function is called initlog(), you should always
61 use log(); log is a variable that is set either to initlog
62 (initially), to dolog (once the log file has been opened), or to
63 nolog (when logging is disabled).
64
65 The first argument is a format string; the remaining arguments (if
66 any) are arguments to the % operator, so e.g.
67 log("%s: %s", "a", "b")
68 will write "a: b" to the log file, followed by a newline.
69
70 If the global logfp is not None, it should be a file object to
71 which log data is written.
72
73 If the global logfp is None, the global logfile may be a string
74 giving a filename to open, in append mode. This file should be
75 world writable!!! If the file can't be opened, logging is
76 silently disabled (since there is no safe place where we could
77 send an error message).
78
79 """
Victor Stinnerd33344a2011-07-14 22:28:36 +020080 global log, logfile, logfp
Inada Naokie52ab422021-04-29 11:36:04 +090081 warnings.warn("cgi.log() is deprecated as of 3.10. Use logging instead",
82 DeprecationWarning, stacklevel=2)
Guido van Rossumc204c701996-09-05 19:07:11 +000083 if logfile and not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000084 try:
Inada Naokie52ab422021-04-29 11:36:04 +090085 logfp = open(logfile, "a", encoding="locale")
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020086 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000087 pass
Guido van Rossumc204c701996-09-05 19:07:11 +000088 if not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000089 log = nolog
Guido van Rossumc204c701996-09-05 19:07:11 +000090 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000091 log = dolog
Guido van Rossum68468eb2003-02-27 20:14:51 +000092 log(*allargs)
Guido van Rossumc204c701996-09-05 19:07:11 +000093
94def dolog(fmt, *args):
95 """Write a log message to the log file. See initlog() for docs."""
96 logfp.write(fmt%args + "\n")
97
98def nolog(*allargs):
99 """Dummy function, assigned to log when logging is disabled."""
100 pass
101
Victor Stinnerd33344a2011-07-14 22:28:36 +0200102def closelog():
103 """Close the log file."""
104 global log, logfile, logfp
105 logfile = ''
106 if logfp:
107 logfp.close()
108 logfp = None
109 log = initlog
110
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000111log = initlog # The current logging function
Guido van Rossumc204c701996-09-05 19:07:11 +0000112
113
Guido van Rossum72755611996-03-06 07:20:06 +0000114# Parsing functions
115# =================
116
Guido van Rossumad164711997-05-13 19:03:23 +0000117# Maximum input we will accept when REQUEST_METHOD is POST
118# 0 ==> unlimited input
119maxlen = 0
120
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200121def parse(fp=None, environ=os.environ, keep_blank_values=0,
122 strict_parsing=0, separator='&'):
Guido van Rossum773ab271996-07-23 03:46:24 +0000123 """Parse a query in the environment or from a file (default stdin)
124
125 Arguments, all optional:
126
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000127 fp : file pointer; default: sys.stdin.buffer
Guido van Rossum773ab271996-07-23 03:46:24 +0000128
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000129 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000130
131 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000132 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000133 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000134 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000135 blank values are to be ignored and treated as if they were
136 not included.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000137
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000138 strict_parsing: flag indicating what to do with parsing errors.
139 If false (the default), errors are silently ignored.
140 If true, errors raise a ValueError exception.
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200141
142 separator: str. The symbol to use for separating the query arguments.
143 Defaults to &.
Guido van Rossum773ab271996-07-23 03:46:24 +0000144 """
Raymond Hettingera1449002002-05-31 23:54:44 +0000145 if fp is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000146 fp = sys.stdin
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000147
148 # field keys and values (except for files) are returned as strings
149 # an encoding is required to decode the bytes read from self.fp
150 if hasattr(fp,'encoding'):
151 encoding = fp.encoding
152 else:
153 encoding = 'latin-1'
154
155 # fp.read() must return bytes
156 if isinstance(fp, TextIOWrapper):
157 fp = fp.buffer
158
Raymond Hettinger54f02222002-06-01 14:18:47 +0000159 if not 'REQUEST_METHOD' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000160 environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
Guido van Rossum7aee3841996-03-07 18:00:44 +0000161 if environ['REQUEST_METHOD'] == 'POST':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 ctype, pdict = parse_header(environ['CONTENT_TYPE'])
163 if ctype == 'multipart/form-data':
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200164 return parse_multipart(fp, pdict, separator=separator)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000165 elif ctype == 'application/x-www-form-urlencoded':
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000166 clength = int(environ['CONTENT_LENGTH'])
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000167 if maxlen and clength > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000168 raise ValueError('Maximum content length exceeded')
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000169 qs = fp.read(clength).decode(encoding)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000170 else:
171 qs = '' # Unknown content-type
Raymond Hettinger54f02222002-06-01 14:18:47 +0000172 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000173 if qs: qs = qs + '&'
174 qs = qs + environ['QUERY_STRING']
Tim Peters88869f92001-01-14 23:36:06 +0000175 elif sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000176 if qs: qs = qs + '&'
177 qs = qs + sys.argv[1]
178 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Raymond Hettinger54f02222002-06-01 14:18:47 +0000179 elif 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000180 qs = environ['QUERY_STRING']
Guido van Rossum7aee3841996-03-07 18:00:44 +0000181 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000182 if sys.argv[1:]:
183 qs = sys.argv[1]
184 else:
185 qs = ""
186 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000187 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200188 encoding=encoding, separator=separator)
Guido van Rossume7808771995-08-07 20:12:09 +0000189
190
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200191def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000192 """Parse multipart input.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000193
Guido van Rossum7aee3841996-03-07 18:00:44 +0000194 Arguments:
195 fp : input file
Johannes Gijsbersc7fc10a2005-01-08 13:56:36 +0000196 pdict: dictionary containing other parameters of content-type header
Amber Brown545c9552018-05-14 18:11:55 -0400197 encoding, errors: request encoding and error handler, passed to
198 FieldStorage
Guido van Rossum72755611996-03-06 07:20:06 +0000199
Tim Peters88869f92001-01-14 23:36:06 +0000200 Returns a dictionary just like parse_qs(): keys are the field names, each
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200201 value is a list of values for that field. For non-file fields, the value
202 is a list of strings.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000203 """
Mike Leic143cc32020-12-17 01:34:19 +0000204 # RFC 2046, Section 5.1 : The "multipart" boundary delimiters are always
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200205 # represented as 7bit US-ASCII.
206 boundary = pdict['boundary'].decode('ascii')
207 ctype = "multipart/form-data; boundary={}".format(boundary)
208 headers = Message()
209 headers.set_type(ctype)
rogerd8cf3512020-06-15 16:58:54 +0200210 try:
211 headers['Content-Length'] = pdict['CONTENT-LENGTH']
212 except KeyError:
213 pass
Amber Brown545c9552018-05-14 18:11:55 -0400214 fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200215 environ={'REQUEST_METHOD': 'POST'}, separator=separator)
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200216 return {k: fs.getlist(k) for k in fs}
Guido van Rossum9a22de11995-01-12 12:29:47 +0000217
Fred Drake9a0a65b2008-12-04 19:24:50 +0000218def _parseparam(s):
219 while s[:1] == ';':
220 s = s[1:]
221 end = s.find(';')
Senthil Kumaran1ef0c032011-10-20 01:05:44 +0800222 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Fred Drake9a0a65b2008-12-04 19:24:50 +0000223 end = s.find(';', end + 1)
224 if end < 0:
225 end = len(s)
226 f = s[:end]
227 yield f.strip()
228 s = s[end:]
229
Guido van Rossum72755611996-03-06 07:20:06 +0000230def parse_header(line):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000231 """Parse a Content-type like header.
232
233 Return the main content-type and a dictionary of options.
234
235 """
Fred Drake9a0a65b2008-12-04 19:24:50 +0000236 parts = _parseparam(';' + line)
237 key = parts.__next__()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000238 pdict = {}
Fred Drake9a0a65b2008-12-04 19:24:50 +0000239 for p in parts:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000240 i = p.find('=')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000241 if i >= 0:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000242 name = p[:i].strip().lower()
243 value = p[i+1:].strip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 if len(value) >= 2 and value[0] == value[-1] == '"':
245 value = value[1:-1]
Johannes Gijsbers9e15dd62004-08-14 15:39:34 +0000246 value = value.replace('\\\\', '\\').replace('\\"', '"')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000247 pdict[name] = value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000248 return key, pdict
Guido van Rossum72755611996-03-06 07:20:06 +0000249
250
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000251# Classes for field storage
252# =========================
253
254class MiniFieldStorage:
255
Guido van Rossum0147db01996-03-09 03:16:04 +0000256 """Like FieldStorage, for use when no file uploads are possible."""
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000257
Guido van Rossum7aee3841996-03-07 18:00:44 +0000258 # Dummy attributes
259 filename = None
260 list = None
261 type = None
Guido van Rossum773ab271996-07-23 03:46:24 +0000262 file = None
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000263 type_options = {}
Guido van Rossum7aee3841996-03-07 18:00:44 +0000264 disposition = None
265 disposition_options = {}
266 headers = {}
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000267
Guido van Rossum7aee3841996-03-07 18:00:44 +0000268 def __init__(self, name, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000269 """Constructor from field name and value."""
270 self.name = name
271 self.value = value
Guido van Rossum773ab271996-07-23 03:46:24 +0000272 # self.file = StringIO(value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000273
274 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000275 """Return printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000276 return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000277
278
279class FieldStorage:
280
Guido van Rossum7aee3841996-03-07 18:00:44 +0000281 """Store a sequence of fields, reading multipart/form-data.
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000282
Guido van Rossum7aee3841996-03-07 18:00:44 +0000283 This class provides naming, typing, files stored on disk, and
284 more. At the top level, it is accessible like a dictionary, whose
285 keys are the field names. (Note: None can occur as a field name.)
286 The items are either a Python list (if there's multiple values) or
287 another FieldStorage or MiniFieldStorage object. If it's a single
288 object, it has the following attributes:
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000289
Guido van Rossum7aee3841996-03-07 18:00:44 +0000290 name: the field name, if specified; otherwise None
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000291
Guido van Rossum7aee3841996-03-07 18:00:44 +0000292 filename: the filename, if specified; otherwise None; this is the
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000293 client side filename, *not* the file name on which it is
294 stored (that's a temporary file you don't deal with)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000295
Guido van Rossum7aee3841996-03-07 18:00:44 +0000296 value: the value as a *string*; for file uploads, this
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000297 transparently reads the file every time you request the value
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000298 and returns *bytes*
Guido van Rossum7aee3841996-03-07 18:00:44 +0000299
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000300 file: the file(-like) object from which you can read the data *as
301 bytes* ; None if the data is stored a simple string
Guido van Rossum7aee3841996-03-07 18:00:44 +0000302
303 type: the content-type, or None if not specified
304
305 type_options: dictionary of options specified on the content-type
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000306 line
Guido van Rossum7aee3841996-03-07 18:00:44 +0000307
308 disposition: content-disposition, or None if not specified
309
310 disposition_options: dictionary of corresponding options
311
Barry Warsaw596097e2008-06-12 02:38:51 +0000312 headers: a dictionary(-like) object (sometimes email.message.Message or a
Armin Rigo3a703b62005-09-19 09:11:04 +0000313 subclass thereof) containing *all* headers
Guido van Rossum7aee3841996-03-07 18:00:44 +0000314
315 The class is subclassable, mostly for the purpose of overriding
316 the make_file() method, which is called internally to come up with
317 a file open for reading and writing. This makes it possible to
318 override the default choice of storing all files in a temporary
319 directory and unlinking them as soon as they have been opened.
320
321 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000322 def __init__(self, fp=None, headers=None, outerboundary=b'',
323 environ=os.environ, keep_blank_values=0, strict_parsing=0,
matthewbelisle-wf20914482018-10-19 05:52:59 -0500324 limit=None, encoding='utf-8', errors='replace',
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200325 max_num_fields=None, separator='&'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000326 """Constructor. Read multipart/* until last part.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000327
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000328 Arguments, all optional:
Guido van Rossum7aee3841996-03-07 18:00:44 +0000329
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000330 fp : file pointer; default: sys.stdin.buffer
Guido van Rossumb1b4f941998-05-08 19:55:51 +0000331 (not used when the request method is GET)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000332 Can be :
333 1. a TextIOWrapper object
334 2. an object whose read() and readline() methods return bytes
Guido van Rossum7aee3841996-03-07 18:00:44 +0000335
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000336 headers : header dictionary-like object; default:
337 taken from environ as per CGI spec
Guido van Rossum7aee3841996-03-07 18:00:44 +0000338
Guido van Rossum773ab271996-07-23 03:46:24 +0000339 outerboundary : terminating multipart boundary
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000340 (for internal use only)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000341
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000342 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000343
344 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000345 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000346 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000347 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000348 blank values are to be ignored and treated as if they were
349 not included.
Guido van Rossum773ab271996-07-23 03:46:24 +0000350
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000351 strict_parsing: flag indicating what to do with parsing errors.
352 If false (the default), errors are silently ignored.
353 If true, errors raise a ValueError exception.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000354
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000355 limit : used internally to read parts of multipart/form-data forms,
356 to exit from the reading loop when reached. It is the difference
357 between the form content-length and the number of bytes already
358 read
359
360 encoding, errors : the encoding and error handler used to decode the
361 binary stream to strings. Must be the same as the charset defined
362 for the page sending the form (content-type : meta http-equiv or
363 header)
364
matthewbelisle-wf20914482018-10-19 05:52:59 -0500365 max_num_fields: int. If set, then __init__ throws a ValueError
366 if there are more than n fields read by parse_qsl().
367
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000368 """
369 method = 'GET'
370 self.keep_blank_values = keep_blank_values
371 self.strict_parsing = strict_parsing
matthewbelisle-wf20914482018-10-19 05:52:59 -0500372 self.max_num_fields = max_num_fields
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200373 self.separator = separator
Raymond Hettinger54f02222002-06-01 14:18:47 +0000374 if 'REQUEST_METHOD' in environ:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000375 method = environ['REQUEST_METHOD'].upper()
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000376 self.qs_on_post = None
Guido van Rossum01852831998-06-25 02:40:17 +0000377 if method == 'GET' or method == 'HEAD':
Raymond Hettinger54f02222002-06-01 14:18:47 +0000378 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000379 qs = environ['QUERY_STRING']
380 elif sys.argv[1:]:
381 qs = sys.argv[1]
382 else:
383 qs = ""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000384 qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
385 fp = BytesIO(qs)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000386 if headers is None:
387 headers = {'content-type':
388 "application/x-www-form-urlencoded"}
389 if headers is None:
Guido van Rossumcff311a1998-06-11 14:06:59 +0000390 headers = {}
391 if method == 'POST':
392 # Set default content-type for POST to what's traditional
393 headers['content-type'] = "application/x-www-form-urlencoded"
Raymond Hettinger54f02222002-06-01 14:18:47 +0000394 if 'CONTENT_TYPE' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000395 headers['content-type'] = environ['CONTENT_TYPE']
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000396 if 'QUERY_STRING' in environ:
397 self.qs_on_post = environ['QUERY_STRING']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000398 if 'CONTENT_LENGTH' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000399 headers['content-length'] = environ['CONTENT_LENGTH']
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800400 else:
401 if not (isinstance(headers, (Mapping, Message))):
402 raise TypeError("headers must be mapping or an instance of "
403 "email.message.Message")
404 self.headers = headers
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000405 if fp is None:
406 self.fp = sys.stdin.buffer
407 # self.fp.read() must return bytes
408 elif isinstance(fp, TextIOWrapper):
409 self.fp = fp.buffer
410 else:
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800411 if not (hasattr(fp, 'read') and hasattr(fp, 'readline')):
412 raise TypeError("fp must be file pointer")
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000413 self.fp = fp
414
415 self.encoding = encoding
416 self.errors = errors
417
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000418 if not isinstance(outerboundary, bytes):
419 raise TypeError('outerboundary must be bytes, not %s'
420 % type(outerboundary).__name__)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000421 self.outerboundary = outerboundary
Guido van Rossum7aee3841996-03-07 18:00:44 +0000422
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000423 self.bytes_read = 0
424 self.limit = limit
425
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000426 # Process content-disposition header
427 cdisp, pdict = "", {}
Raymond Hettinger54f02222002-06-01 14:18:47 +0000428 if 'content-disposition' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000429 cdisp, pdict = parse_header(self.headers['content-disposition'])
430 self.disposition = cdisp
431 self.disposition_options = pdict
432 self.name = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000433 if 'name' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000434 self.name = pdict['name']
435 self.filename = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000436 if 'filename' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000437 self.filename = pdict['filename']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000438 self._binary_file = self.filename is not None
Guido van Rossum7aee3841996-03-07 18:00:44 +0000439
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000440 # Process content-type header
Barry Warsaw302331a1999-01-08 17:42:03 +0000441 #
442 # Honor any existing content-type header. But if there is no
443 # content-type header, use some sensible defaults. Assume
444 # outerboundary is "" at the outer level, but something non-false
445 # inside a multi-part. The default for an inner part is text/plain,
446 # but for an outer part it should be urlencoded. This should catch
447 # bogus clients which erroneously forget to include a content-type
448 # header.
449 #
450 # See below for what we do if there does exist a content-type header,
451 # but it happens to be something we don't understand.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000452 if 'content-type' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000453 ctype, pdict = parse_header(self.headers['content-type'])
Guido van Rossumce900de1999-06-02 18:44:22 +0000454 elif self.outerboundary or method != 'POST':
Barry Warsaw302331a1999-01-08 17:42:03 +0000455 ctype, pdict = "text/plain", {}
456 else:
457 ctype, pdict = 'application/x-www-form-urlencoded', {}
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000458 self.type = ctype
459 self.type_options = pdict
Raymond Hettinger54f02222002-06-01 14:18:47 +0000460 if 'boundary' in pdict:
Amber Brown545c9552018-05-14 18:11:55 -0400461 self.innerboundary = pdict['boundary'].encode(self.encoding,
462 self.errors)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000463 else:
464 self.innerboundary = b""
465
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000466 clen = -1
Raymond Hettinger54f02222002-06-01 14:18:47 +0000467 if 'content-length' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000468 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000469 clen = int(self.headers['content-length'])
Skip Montanarodb5d1442002-03-23 05:50:17 +0000470 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000471 pass
472 if maxlen and clen > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000473 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000474 self.length = clen
Pierre Quentel2d7caca2019-09-11 13:05:53 +0200475 if self.limit is None and clen >= 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000476 self.limit = clen
Guido van Rossum7aee3841996-03-07 18:00:44 +0000477
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000478 self.list = self.file = None
479 self.done = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000480 if ctype == 'application/x-www-form-urlencoded':
481 self.read_urlencoded()
482 elif ctype[:10] == 'multipart/':
Guido van Rossumf5745001998-10-20 14:43:02 +0000483 self.read_multi(environ, keep_blank_values, strict_parsing)
Barry Warsaw302331a1999-01-08 17:42:03 +0000484 else:
Guido van Rossum60a3bd81999-06-11 18:26:09 +0000485 self.read_single()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000486
Brett Cannonf79126f2013-08-23 15:15:48 -0400487 def __del__(self):
488 try:
489 self.file.close()
490 except AttributeError:
491 pass
492
Berker Peksagbf5e9602015-02-06 10:21:37 +0200493 def __enter__(self):
494 return self
495
496 def __exit__(self, *args):
497 self.file.close()
498
Guido van Rossum7aee3841996-03-07 18:00:44 +0000499 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000500 """Return a printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000501 return "FieldStorage(%r, %r, %r)" % (
502 self.name, self.filename, self.value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000503
Guido van Rossum4061cbe2002-09-11 18:20:34 +0000504 def __iter__(self):
505 return iter(self.keys())
506
Guido van Rossum7aee3841996-03-07 18:00:44 +0000507 def __getattr__(self, name):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000508 if name != 'value':
Collin Winterce36ad82007-08-30 01:19:48 +0000509 raise AttributeError(name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000510 if self.file:
511 self.file.seek(0)
512 value = self.file.read()
513 self.file.seek(0)
514 elif self.list is not None:
515 value = self.list
516 else:
517 value = None
518 return value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000519
520 def __getitem__(self, key):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000521 """Dictionary style indexing."""
522 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000523 raise TypeError("not indexable")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000524 found = []
525 for item in self.list:
526 if item.name == key: found.append(item)
527 if not found:
Collin Winterce36ad82007-08-30 01:19:48 +0000528 raise KeyError(key)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000529 if len(found) == 1:
530 return found[0]
531 else:
532 return found
Guido van Rossum7aee3841996-03-07 18:00:44 +0000533
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000534 def getvalue(self, key, default=None):
535 """Dictionary style get() method, including 'value' lookup."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000536 if key in self:
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000537 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000538 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000539 return [x.value for x in value]
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000540 else:
541 return value.value
542 else:
543 return default
544
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000545 def getfirst(self, key, default=None):
546 """ Return the first value received."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000547 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000548 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000549 if isinstance(value, list):
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000550 return value[0].value
551 else:
552 return value.value
553 else:
554 return default
555
556 def getlist(self, key):
557 """ Return list of received values."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000558 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000559 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000560 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000561 return [x.value for x in value]
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000562 else:
563 return [value.value]
564 else:
565 return []
566
Guido van Rossum7aee3841996-03-07 18:00:44 +0000567 def keys(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000568 """Dictionary style keys() method."""
569 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000570 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000571 return list(set(item.name for item in self.list))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000572
Raymond Hettinger54f02222002-06-01 14:18:47 +0000573 def __contains__(self, key):
574 """Dictionary style __contains__ method."""
575 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000576 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000577 return any(item.name == key for item in self.list)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000578
Guido van Rossum88b85d41997-01-11 19:21:33 +0000579 def __len__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000580 """Dictionary style len(x) support."""
581 return len(self.keys())
Guido van Rossum88b85d41997-01-11 19:21:33 +0000582
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800583 def __bool__(self):
584 if self.list is None:
585 raise TypeError("Cannot be converted to bool.")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000586 return bool(self.list)
587
Guido van Rossum7aee3841996-03-07 18:00:44 +0000588 def read_urlencoded(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000589 """Internal: read data in query string format."""
590 qs = self.fp.read(self.length)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000591 if not isinstance(qs, bytes):
592 raise ValueError("%s should return bytes, got %s" \
593 % (self.fp, type(qs).__name__))
594 qs = qs.decode(self.encoding, self.errors)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000595 if self.qs_on_post:
596 qs += '&' + self.qs_on_post
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000597 query = urllib.parse.parse_qsl(
598 qs, self.keep_blank_values, self.strict_parsing,
matthewbelisle-wf20914482018-10-19 05:52:59 -0500599 encoding=self.encoding, errors=self.errors,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200600 max_num_fields=self.max_num_fields, separator=self.separator)
matthewbelisle-wf20914482018-10-19 05:52:59 -0500601 self.list = [MiniFieldStorage(key, value) for key, value in query]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000602 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000603
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000604 FieldStorageClass = None
605
Guido van Rossumf5745001998-10-20 14:43:02 +0000606 def read_multi(self, environ, keep_blank_values, strict_parsing):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000607 """Internal: read a part that is itself multipart."""
Guido van Rossum2e441f72001-07-25 21:00:19 +0000608 ib = self.innerboundary
609 if not valid_boundary(ib):
Collin Winterce36ad82007-08-30 01:19:48 +0000610 raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000611 self.list = []
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000612 if self.qs_on_post:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000613 query = urllib.parse.parse_qsl(
614 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
matthewbelisle-wf20914482018-10-19 05:52:59 -0500615 encoding=self.encoding, errors=self.errors,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200616 max_num_fields=self.max_num_fields, separator=self.separator)
matthewbelisle-wf20914482018-10-19 05:52:59 -0500617 self.list.extend(MiniFieldStorage(key, value) for key, value in query)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000618
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000619 klass = self.FieldStorageClass or self.__class__
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000620 first_line = self.fp.readline() # bytes
621 if not isinstance(first_line, bytes):
622 raise ValueError("%s should return bytes, got %s" \
623 % (self.fp, type(first_line).__name__))
624 self.bytes_read += len(first_line)
Donald Stufftd90f8d12015-03-29 16:43:23 -0400625
626 # Ensure that we consume the file until we've hit our inner boundary
627 while (first_line.strip() != (b"--" + self.innerboundary) and
628 first_line):
629 first_line = self.fp.readline()
630 self.bytes_read += len(first_line)
631
matthewbelisle-wfb79b5c02018-10-23 03:14:35 -0500632 # Propagate max_num_fields into the sub class appropriately
633 max_num_fields = self.max_num_fields
634 if max_num_fields is not None:
635 max_num_fields -= len(self.list)
636
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000637 while True:
638 parser = FeedParser()
639 hdr_text = b""
640 while True:
641 data = self.fp.readline()
642 hdr_text += data
643 if not data.strip():
644 break
645 if not hdr_text:
646 break
647 # parser takes strings, not bytes
648 self.bytes_read += len(hdr_text)
649 parser.feed(hdr_text.decode(self.encoding, self.errors))
650 headers = parser.close()
Victor Stinner65794592015-08-18 10:21:10 -0700651
652 # Some clients add Content-Length for part headers, ignore them
653 if 'content-length' in headers:
654 del headers['content-length']
655
Pierre Quentel2d7caca2019-09-11 13:05:53 +0200656 limit = None if self.limit is None \
657 else self.limit - self.bytes_read
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000658 part = klass(self.fp, headers, ib, environ, keep_blank_values,
Pierre Quentel2d7caca2019-09-11 13:05:53 +0200659 strict_parsing, limit,
Adam Goldschmidtfcbe0cb2021-02-15 00:41:57 +0200660 self.encoding, self.errors, max_num_fields, self.separator)
matthewbelisle-wf20914482018-10-19 05:52:59 -0500661
matthewbelisle-wfb79b5c02018-10-23 03:14:35 -0500662 if max_num_fields is not None:
663 max_num_fields -= 1
664 if part.list:
665 max_num_fields -= len(part.list)
666 if max_num_fields < 0:
667 raise ValueError('Max number of fields exceeded')
matthewbelisle-wf20914482018-10-19 05:52:59 -0500668
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000669 self.bytes_read += part.bytes_read
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000670 self.list.append(part)
Florent Xicluna331c3fd2013-07-07 12:44:28 +0200671 if part.done or self.bytes_read >= self.length > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000672 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000673 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000674
675 def read_single(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000676 """Internal: read an atomic part."""
677 if self.length >= 0:
678 self.read_binary()
679 self.skip_lines()
680 else:
681 self.read_lines()
682 self.file.seek(0)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000683
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000684 bufsize = 8*1024 # I/O buffering size for copy to file
Guido van Rossum7aee3841996-03-07 18:00:44 +0000685
686 def read_binary(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000687 """Internal: read binary data."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000688 self.file = self.make_file()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000689 todo = self.length
690 if todo >= 0:
691 while todo > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000692 data = self.fp.read(min(todo, self.bufsize)) # bytes
693 if not isinstance(data, bytes):
694 raise ValueError("%s should return bytes, got %s"
695 % (self.fp, type(data).__name__))
696 self.bytes_read += len(data)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000697 if not data:
698 self.done = -1
699 break
700 self.file.write(data)
701 todo = todo - len(data)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000702
703 def read_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000704 """Internal: read lines until EOF or outerboundary."""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000705 if self._binary_file:
706 self.file = self.__file = BytesIO() # store data as bytes for files
707 else:
708 self.file = self.__file = StringIO() # as strings for other fields
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000709 if self.outerboundary:
710 self.read_lines_to_outerboundary()
711 else:
712 self.read_lines_to_eof()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000713
Guido van Rossum52b8c292001-06-29 13:06:06 +0000714 def __write(self, line):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000715 """line is always bytes, not string"""
Guido van Rossum52b8c292001-06-29 13:06:06 +0000716 if self.__file is not None:
717 if self.__file.tell() + len(line) > 1000:
Guido van Rossuma1a68522007-08-28 03:11:34 +0000718 self.file = self.make_file()
719 data = self.__file.getvalue()
720 self.file.write(data)
Guido van Rossum52b8c292001-06-29 13:06:06 +0000721 self.__file = None
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000722 if self._binary_file:
723 # keep bytes
724 self.file.write(line)
725 else:
726 # decode to string
727 self.file.write(line.decode(self.encoding, self.errors))
Guido van Rossum52b8c292001-06-29 13:06:06 +0000728
Guido van Rossum7aee3841996-03-07 18:00:44 +0000729 def read_lines_to_eof(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000730 """Internal: read lines until EOF."""
731 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000732 line = self.fp.readline(1<<16) # bytes
733 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000734 if not line:
735 self.done = -1
736 break
Guido van Rossum52b8c292001-06-29 13:06:06 +0000737 self.__write(line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000738
739 def read_lines_to_outerboundary(self):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000740 """Internal: read lines until outerboundary.
741 Data is read as bytes: boundaries and line ends must be converted
742 to bytes for comparisons.
743 """
744 next_boundary = b"--" + self.outerboundary
745 last_boundary = next_boundary + b"--"
746 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000747 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000748 _read = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000749 while 1:
rogerd8cf3512020-06-15 16:58:54 +0200750
751 if self.limit is not None and 0 <= self.limit <= _read:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000752 break
753 line = self.fp.readline(1<<16) # bytes
754 self.bytes_read += len(line)
755 _read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000756 if not line:
757 self.done = -1
758 break
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300759 if delim == b"\r":
760 line = delim + line
761 delim = b""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000762 if line.startswith(b"--") and last_line_lfend:
763 strippedline = line.rstrip()
764 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000765 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000766 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000767 self.done = 1
768 break
769 odelim = delim
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000770 if line.endswith(b"\r\n"):
771 delim = b"\r\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000772 line = line[:-2]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000773 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000774 elif line.endswith(b"\n"):
775 delim = b"\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000776 line = line[:-1]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000777 last_line_lfend = True
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300778 elif line.endswith(b"\r"):
779 # We may interrupt \r\n sequences if they span the 2**16
780 # byte boundary
781 delim = b"\r"
782 line = line[:-1]
783 last_line_lfend = False
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000784 else:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000785 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000786 last_line_lfend = False
Guido van Rossum52b8c292001-06-29 13:06:06 +0000787 self.__write(odelim + line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000788
789 def skip_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000790 """Internal: skip lines until outer boundary if defined."""
791 if not self.outerboundary or self.done:
792 return
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000793 next_boundary = b"--" + self.outerboundary
794 last_boundary = next_boundary + b"--"
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000795 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000796 while True:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000797 line = self.fp.readline(1<<16)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000798 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000799 if not line:
800 self.done = -1
801 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000802 if line.endswith(b"--") and last_line_lfend:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000803 strippedline = line.strip()
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000804 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000805 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000806 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000807 self.done = 1
808 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000809 last_line_lfend = line.endswith(b'\n')
Guido van Rossum7aee3841996-03-07 18:00:44 +0000810
Guido van Rossuma1a68522007-08-28 03:11:34 +0000811 def make_file(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000812 """Overridable: return a readable & writable file.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000813
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000814 The file will be used as follows:
815 - data is written to it
816 - seek(0)
817 - data is read from it
Guido van Rossum7aee3841996-03-07 18:00:44 +0000818
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000819 The file is opened in binary mode for files, in text mode
820 for other fields
Guido van Rossum7aee3841996-03-07 18:00:44 +0000821
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000822 This version opens a temporary file for reading and writing,
823 and immediately deletes (unlinks) it. The trick (on Unix!) is
824 that the file can still be used, but it can't be opened by
825 another process, and it will automatically be deleted when it
826 is closed or when the current process terminates.
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000827
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000828 If you want a more permanent file, you derive a class which
829 overrides this method. If you want a visible temporary file
830 that is nevertheless automatically deleted when the script
831 terminates, try defining a __del__ method in a derived class
832 which unlinks the temporary files you have created.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000833
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000834 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000835 if self._binary_file:
836 return tempfile.TemporaryFile("wb+")
837 else:
838 return tempfile.TemporaryFile("w+",
839 encoding=self.encoding, newline = '\n')
Tim Peters88869f92001-01-14 23:36:06 +0000840
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000841
Guido van Rossum72755611996-03-06 07:20:06 +0000842# Test/debug code
843# ===============
Guido van Rossum9a22de11995-01-12 12:29:47 +0000844
Guido van Rossum773ab271996-07-23 03:46:24 +0000845def test(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000846 """Robust test CGI script, usable as main program.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000847
Guido van Rossum7aee3841996-03-07 18:00:44 +0000848 Write minimal HTTP headers and dump all information provided to
849 the script in HTML form.
850
851 """
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000852 print("Content-type: text/html")
853 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000854 sys.stderr = sys.stdout
855 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000856 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000857 print_directory()
858 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000859 print_form(form)
860 print_environ(environ)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000861 print_environ_usage()
862 def f():
Georg Brandl7cae87c2006-09-06 06:51:57 +0000863 exec("testing print_exception() -- <I>italics?</I>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000864 def g(f=f):
865 f()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000866 print("<H3>What follows is a test, not an actual exception:</H3>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000867 g()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000868 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000869 print_exception()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000870
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000871 print("<H1>Second try with a small maxlen...</H1>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000872
Guido van Rossumad164711997-05-13 19:03:23 +0000873 global maxlen
874 maxlen = 50
875 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000876 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000877 print_directory()
878 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000879 print_form(form)
880 print_environ(environ)
Guido van Rossumad164711997-05-13 19:03:23 +0000881 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000882 print_exception()
Guido van Rossumad164711997-05-13 19:03:23 +0000883
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000884def print_exception(type=None, value=None, tb=None, limit=None):
885 if type is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000886 type, value, tb = sys.exc_info()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000887 import traceback
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000888 print()
889 print("<H3>Traceback (most recent call last):</H3>")
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000890 list = traceback.format_tb(tb, limit) + \
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000891 traceback.format_exception_only(type, value)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000892 print("<PRE>%s<B>%s</B></PRE>" % (
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000893 html.escape("".join(list[:-1])),
894 html.escape(list[-1]),
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000895 ))
Guido van Rossumf15d1591997-09-29 23:22:12 +0000896 del tb
Guido van Rossum9a22de11995-01-12 12:29:47 +0000897
Guido van Rossum773ab271996-07-23 03:46:24 +0000898def print_environ(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000899 """Dump the shell environment as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000900 keys = sorted(environ.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000901 print()
902 print("<H3>Shell Environment:</H3>")
903 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000904 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000905 print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000906 print("</DL>")
907 print()
Guido van Rossum72755611996-03-06 07:20:06 +0000908
909def print_form(form):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000910 """Dump the contents of a form as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000911 keys = sorted(form.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000912 print()
913 print("<H3>Form Contents:</H3>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000914 if not keys:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000915 print("<P>No form fields.")
916 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000917 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000918 print("<DT>" + html.escape(key) + ":", end=' ')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000919 value = form[key]
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000920 print("<i>" + html.escape(repr(type(value))) + "</i>")
921 print("<DD>" + html.escape(repr(value)))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000922 print("</DL>")
923 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000924
925def print_directory():
926 """Dump the current directory as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000927 print()
928 print("<H3>Current Working Directory:</H3>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000929 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000930 pwd = os.getcwd()
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200931 except OSError as msg:
Andrew Svetlov8b33dd82012-12-24 19:58:48 +0200932 print("OSError:", html.escape(str(msg)))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000933 else:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000934 print(html.escape(pwd))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000935 print()
Guido van Rossum9a22de11995-01-12 12:29:47 +0000936
Guido van Rossuma8738a51996-03-14 21:30:28 +0000937def print_arguments():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000938 print()
939 print("<H3>Command Line Arguments:</H3>")
940 print()
941 print(sys.argv)
942 print()
Guido van Rossuma8738a51996-03-14 21:30:28 +0000943
Guido van Rossum9a22de11995-01-12 12:29:47 +0000944def print_environ_usage():
Guido van Rossum7aee3841996-03-07 18:00:44 +0000945 """Dump a list of environment variables used by CGI as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000946 print("""
Guido van Rossum72755611996-03-06 07:20:06 +0000947<H3>These environment variables could have been set:</H3>
948<UL>
Guido van Rossum9a22de11995-01-12 12:29:47 +0000949<LI>AUTH_TYPE
950<LI>CONTENT_LENGTH
951<LI>CONTENT_TYPE
952<LI>DATE_GMT
953<LI>DATE_LOCAL
954<LI>DOCUMENT_NAME
955<LI>DOCUMENT_ROOT
956<LI>DOCUMENT_URI
957<LI>GATEWAY_INTERFACE
958<LI>LAST_MODIFIED
959<LI>PATH
960<LI>PATH_INFO
961<LI>PATH_TRANSLATED
962<LI>QUERY_STRING
963<LI>REMOTE_ADDR
964<LI>REMOTE_HOST
965<LI>REMOTE_IDENT
966<LI>REMOTE_USER
967<LI>REQUEST_METHOD
968<LI>SCRIPT_NAME
969<LI>SERVER_NAME
970<LI>SERVER_PORT
971<LI>SERVER_PROTOCOL
972<LI>SERVER_ROOT
973<LI>SERVER_SOFTWARE
974</UL>
Guido van Rossum7aee3841996-03-07 18:00:44 +0000975In addition, HTTP headers sent by the server may be passed in the
976environment as well. Here are some common variable names:
977<UL>
978<LI>HTTP_ACCEPT
979<LI>HTTP_CONNECTION
980<LI>HTTP_HOST
981<LI>HTTP_PRAGMA
982<LI>HTTP_REFERER
983<LI>HTTP_USER_AGENT
984</UL>
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000985""")
Guido van Rossum9a22de11995-01-12 12:29:47 +0000986
Guido van Rossum9a22de11995-01-12 12:29:47 +0000987
Guido van Rossum72755611996-03-06 07:20:06 +0000988# Utilities
989# =========
Guido van Rossum9a22de11995-01-12 12:29:47 +0000990
Benjamin Peterson4d59a782014-04-03 10:22:10 -0400991def valid_boundary(s):
Guido van Rossum2e441f72001-07-25 21:00:19 +0000992 import re
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000993 if isinstance(s, bytes):
994 _vb_pattern = b"^[ -~]{0,200}[!-~]$"
995 else:
996 _vb_pattern = "^[ -~]{0,200}[!-~]$"
Guido van Rossum2e441f72001-07-25 21:00:19 +0000997 return re.match(_vb_pattern, s)
Guido van Rossum9a22de11995-01-12 12:29:47 +0000998
Guido van Rossum72755611996-03-06 07:20:06 +0000999# Invoke mainline
1000# ===============
1001
1002# Call test() when this file is run as a script (not imported as a module)
Tim Peters88869f92001-01-14 23:36:06 +00001003if __name__ == '__main__':
Guido van Rossum7aee3841996-03-07 18:00:44 +00001004 test()