blob: 8cf668718ded35d296b717cfedf2659a0ef4febb [file] [log] [blame]
Benjamin Peterson8c703a02010-03-11 22:05:58 +00001#! /usr/local/bin/python
Guido van Rossum1c9daa81995-09-18 21:52:37 +00002
Guido van Rossum467d7232001-02-13 13:13:33 +00003# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4# intentionally NOT "/usr/bin/env python". On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python. Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin. So let those vendors patch cgi.py to match their choice
10# of installation.
11
Guido van Rossum72755611996-03-06 07:20:06 +000012"""Support module for CGI (Common Gateway Interface) scripts.
Guido van Rossum1c9daa81995-09-18 21:52:37 +000013
Guido van Rossum7aee3841996-03-07 18:00:44 +000014This module defines a number of utilities for use by CGI scripts
15written in Python.
Guido van Rossum72755611996-03-06 07:20:06 +000016"""
17
Guido van Rossum98d9fd32000-02-28 15:12:25 +000018# History
19# -------
Tim Peters88869f92001-01-14 23:36:06 +000020#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000021# Michael McLay started this module. Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict. The multipart
23# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
Tim Peters88869f92001-01-14 23:36:06 +000026#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000027
Guido van Rossum52b8c292001-06-29 13:06:06 +000028__version__ = "2.6"
Guido van Rossum0147db01996-03-09 03:16:04 +000029
Guido van Rossum72755611996-03-06 07:20:06 +000030
31# Imports
32# =======
33
Victor Stinner5c23b8e2011-01-14 13:05:21 +000034from io import StringIO, BytesIO, TextIOWrapper
Serhiy Storchaka2e576f52017-04-24 09:05:00 +030035from collections.abc import Mapping
Guido van Rossum72755611996-03-06 07:20:06 +000036import sys
37import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038import urllib.parse
Victor Stinner5c23b8e2011-01-14 13:05:21 +000039from email.parser import FeedParser
Senthil Kumaranb4cbb922014-01-11 22:20:16 -080040from email.message import Message
Facundo Batistac469d4c2008-09-03 22:49:01 +000041from warnings import warn
Georg Brandl1f7fffb2010-10-15 15:57:45 +000042import html
Victor Stinner5c23b8e2011-01-14 13:05:21 +000043import locale
44import tempfile
Guido van Rossum72755611996-03-06 07:20:06 +000045
Georg Brandl49d1b4f2008-05-11 21:42:51 +000046__all__ = ["MiniFieldStorage", "FieldStorage",
Guido van Rossuma8423a92001-03-19 13:40:44 +000047 "parse", "parse_qs", "parse_qsl", "parse_multipart",
Martin Panter1cd27722016-06-06 01:53:28 +000048 "parse_header", "test", "print_exception", "print_environ",
Guido van Rossuma8423a92001-03-19 13:40:44 +000049 "print_form", "print_directory", "print_arguments",
50 "print_environ_usage", "escape"]
Guido van Rossumc204c701996-09-05 19:07:11 +000051
52# Logging support
53# ===============
54
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000055logfile = "" # Filename to log to, if not empty
56logfp = None # File object to log to, if not None
Guido van Rossumc204c701996-09-05 19:07:11 +000057
58def initlog(*allargs):
59 """Write a log message, if there is a log file.
60
61 Even though this function is called initlog(), you should always
62 use log(); log is a variable that is set either to initlog
63 (initially), to dolog (once the log file has been opened), or to
64 nolog (when logging is disabled).
65
66 The first argument is a format string; the remaining arguments (if
67 any) are arguments to the % operator, so e.g.
68 log("%s: %s", "a", "b")
69 will write "a: b" to the log file, followed by a newline.
70
71 If the global logfp is not None, it should be a file object to
72 which log data is written.
73
74 If the global logfp is None, the global logfile may be a string
75 giving a filename to open, in append mode. This file should be
76 world writable!!! If the file can't be opened, logging is
77 silently disabled (since there is no safe place where we could
78 send an error message).
79
80 """
Victor Stinnerd33344a2011-07-14 22:28:36 +020081 global log, logfile, logfp
Guido van Rossumc204c701996-09-05 19:07:11 +000082 if logfile and not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000083 try:
84 logfp = open(logfile, "a")
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020085 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 pass
Guido van Rossumc204c701996-09-05 19:07:11 +000087 if not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 log = nolog
Guido van Rossumc204c701996-09-05 19:07:11 +000089 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000090 log = dolog
Guido van Rossum68468eb2003-02-27 20:14:51 +000091 log(*allargs)
Guido van Rossumc204c701996-09-05 19:07:11 +000092
93def dolog(fmt, *args):
94 """Write a log message to the log file. See initlog() for docs."""
95 logfp.write(fmt%args + "\n")
96
97def nolog(*allargs):
98 """Dummy function, assigned to log when logging is disabled."""
99 pass
100
Victor Stinnerd33344a2011-07-14 22:28:36 +0200101def closelog():
102 """Close the log file."""
103 global log, logfile, logfp
104 logfile = ''
105 if logfp:
106 logfp.close()
107 logfp = None
108 log = initlog
109
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000110log = initlog # The current logging function
Guido van Rossumc204c701996-09-05 19:07:11 +0000111
112
Guido van Rossum72755611996-03-06 07:20:06 +0000113# Parsing functions
114# =================
115
Guido van Rossumad164711997-05-13 19:03:23 +0000116# Maximum input we will accept when REQUEST_METHOD is POST
117# 0 ==> unlimited input
118maxlen = 0
119
Guido van Rossume08c04c1996-11-11 19:29:11 +0000120def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
Guido van Rossum773ab271996-07-23 03:46:24 +0000121 """Parse a query in the environment or from a file (default stdin)
122
123 Arguments, all optional:
124
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000125 fp : file pointer; default: sys.stdin.buffer
Guido van Rossum773ab271996-07-23 03:46:24 +0000126
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000127 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000128
129 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000130 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000131 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000132 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 blank values are to be ignored and treated as if they were
134 not included.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000135
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000136 strict_parsing: flag indicating what to do with parsing errors.
137 If false (the default), errors are silently ignored.
138 If true, errors raise a ValueError exception.
Guido van Rossum773ab271996-07-23 03:46:24 +0000139 """
Raymond Hettingera1449002002-05-31 23:54:44 +0000140 if fp is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 fp = sys.stdin
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000142
143 # field keys and values (except for files) are returned as strings
144 # an encoding is required to decode the bytes read from self.fp
145 if hasattr(fp,'encoding'):
146 encoding = fp.encoding
147 else:
148 encoding = 'latin-1'
149
150 # fp.read() must return bytes
151 if isinstance(fp, TextIOWrapper):
152 fp = fp.buffer
153
Raymond Hettinger54f02222002-06-01 14:18:47 +0000154 if not 'REQUEST_METHOD' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
Guido van Rossum7aee3841996-03-07 18:00:44 +0000156 if environ['REQUEST_METHOD'] == 'POST':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000157 ctype, pdict = parse_header(environ['CONTENT_TYPE'])
158 if ctype == 'multipart/form-data':
159 return parse_multipart(fp, pdict)
160 elif ctype == 'application/x-www-form-urlencoded':
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000161 clength = int(environ['CONTENT_LENGTH'])
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 if maxlen and clength > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000163 raise ValueError('Maximum content length exceeded')
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000164 qs = fp.read(clength).decode(encoding)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000165 else:
166 qs = '' # Unknown content-type
Raymond Hettinger54f02222002-06-01 14:18:47 +0000167 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000168 if qs: qs = qs + '&'
169 qs = qs + environ['QUERY_STRING']
Tim Peters88869f92001-01-14 23:36:06 +0000170 elif sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 if qs: qs = qs + '&'
172 qs = qs + sys.argv[1]
173 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Raymond Hettinger54f02222002-06-01 14:18:47 +0000174 elif 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 qs = environ['QUERY_STRING']
Guido van Rossum7aee3841996-03-07 18:00:44 +0000176 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 if sys.argv[1:]:
178 qs = sys.argv[1]
179 else:
180 qs = ""
181 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000182 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
183 encoding=encoding)
Guido van Rossume7808771995-08-07 20:12:09 +0000184
185
Facundo Batistac469d4c2008-09-03 22:49:01 +0000186# parse query string function called from urlparse,
Martin Panter46f50722016-05-26 05:35:26 +0000187# this is done in order to maintain backward compatibility.
Facundo Batistac469d4c2008-09-03 22:49:01 +0000188
Guido van Rossume08c04c1996-11-11 19:29:11 +0000189def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000190 """Parse a query given as a string argument."""
191 warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000192 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000193 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
Guido van Rossum1946f0d1999-06-04 17:54:39 +0000194
195def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000196 """Parse a query given as a string argument."""
Facundo Batistaa27244b2008-09-09 02:43:19 +0000197 warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000198 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000199 return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
Guido van Rossum9a22de11995-01-12 12:29:47 +0000200
Miss Islington (bot)e8f968d2018-05-14 15:20:06 -0700201def parse_multipart(fp, pdict, encoding="utf-8", errors="replace"):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000202 """Parse multipart input.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000203
Guido van Rossum7aee3841996-03-07 18:00:44 +0000204 Arguments:
205 fp : input file
Johannes Gijsbersc7fc10a2005-01-08 13:56:36 +0000206 pdict: dictionary containing other parameters of content-type header
Miss Islington (bot)e8f968d2018-05-14 15:20:06 -0700207 encoding, errors: request encoding and error handler, passed to
208 FieldStorage
Guido van Rossum72755611996-03-06 07:20:06 +0000209
Tim Peters88869f92001-01-14 23:36:06 +0000210 Returns a dictionary just like parse_qs(): keys are the field names, each
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200211 value is a list of values for that field. For non-file fields, the value
212 is a list of strings.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000213 """
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200214 # RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always
215 # represented as 7bit US-ASCII.
216 boundary = pdict['boundary'].decode('ascii')
217 ctype = "multipart/form-data; boundary={}".format(boundary)
218 headers = Message()
219 headers.set_type(ctype)
220 headers['Content-Length'] = pdict['CONTENT-LENGTH']
Miss Islington (bot)e8f968d2018-05-14 15:20:06 -0700221 fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors,
Pierre Quentelcc3fa202017-05-08 14:08:34 +0200222 environ={'REQUEST_METHOD': 'POST'})
223 return {k: fs.getlist(k) for k in fs}
Guido van Rossum9a22de11995-01-12 12:29:47 +0000224
Fred Drake9a0a65b2008-12-04 19:24:50 +0000225def _parseparam(s):
226 while s[:1] == ';':
227 s = s[1:]
228 end = s.find(';')
Senthil Kumaran1ef0c032011-10-20 01:05:44 +0800229 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Fred Drake9a0a65b2008-12-04 19:24:50 +0000230 end = s.find(';', end + 1)
231 if end < 0:
232 end = len(s)
233 f = s[:end]
234 yield f.strip()
235 s = s[end:]
236
Guido van Rossum72755611996-03-06 07:20:06 +0000237def parse_header(line):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000238 """Parse a Content-type like header.
239
240 Return the main content-type and a dictionary of options.
241
242 """
Fred Drake9a0a65b2008-12-04 19:24:50 +0000243 parts = _parseparam(';' + line)
244 key = parts.__next__()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000245 pdict = {}
Fred Drake9a0a65b2008-12-04 19:24:50 +0000246 for p in parts:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000247 i = p.find('=')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000248 if i >= 0:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000249 name = p[:i].strip().lower()
250 value = p[i+1:].strip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000251 if len(value) >= 2 and value[0] == value[-1] == '"':
252 value = value[1:-1]
Johannes Gijsbers9e15dd62004-08-14 15:39:34 +0000253 value = value.replace('\\\\', '\\').replace('\\"', '"')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000254 pdict[name] = value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000255 return key, pdict
Guido van Rossum72755611996-03-06 07:20:06 +0000256
257
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000258# Classes for field storage
259# =========================
260
261class MiniFieldStorage:
262
Guido van Rossum0147db01996-03-09 03:16:04 +0000263 """Like FieldStorage, for use when no file uploads are possible."""
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000264
Guido van Rossum7aee3841996-03-07 18:00:44 +0000265 # Dummy attributes
266 filename = None
267 list = None
268 type = None
Guido van Rossum773ab271996-07-23 03:46:24 +0000269 file = None
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000270 type_options = {}
Guido van Rossum7aee3841996-03-07 18:00:44 +0000271 disposition = None
272 disposition_options = {}
273 headers = {}
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000274
Guido van Rossum7aee3841996-03-07 18:00:44 +0000275 def __init__(self, name, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000276 """Constructor from field name and value."""
277 self.name = name
278 self.value = value
Guido van Rossum773ab271996-07-23 03:46:24 +0000279 # self.file = StringIO(value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000280
281 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000282 """Return printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000283 return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000284
285
286class FieldStorage:
287
Guido van Rossum7aee3841996-03-07 18:00:44 +0000288 """Store a sequence of fields, reading multipart/form-data.
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000289
Guido van Rossum7aee3841996-03-07 18:00:44 +0000290 This class provides naming, typing, files stored on disk, and
291 more. At the top level, it is accessible like a dictionary, whose
292 keys are the field names. (Note: None can occur as a field name.)
293 The items are either a Python list (if there's multiple values) or
294 another FieldStorage or MiniFieldStorage object. If it's a single
295 object, it has the following attributes:
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000296
Guido van Rossum7aee3841996-03-07 18:00:44 +0000297 name: the field name, if specified; otherwise None
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000298
Guido van Rossum7aee3841996-03-07 18:00:44 +0000299 filename: the filename, if specified; otherwise None; this is the
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000300 client side filename, *not* the file name on which it is
301 stored (that's a temporary file you don't deal with)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000302
Guido van Rossum7aee3841996-03-07 18:00:44 +0000303 value: the value as a *string*; for file uploads, this
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000304 transparently reads the file every time you request the value
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000305 and returns *bytes*
Guido van Rossum7aee3841996-03-07 18:00:44 +0000306
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000307 file: the file(-like) object from which you can read the data *as
308 bytes* ; None if the data is stored a simple string
Guido van Rossum7aee3841996-03-07 18:00:44 +0000309
310 type: the content-type, or None if not specified
311
312 type_options: dictionary of options specified on the content-type
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000313 line
Guido van Rossum7aee3841996-03-07 18:00:44 +0000314
315 disposition: content-disposition, or None if not specified
316
317 disposition_options: dictionary of corresponding options
318
Barry Warsaw596097e2008-06-12 02:38:51 +0000319 headers: a dictionary(-like) object (sometimes email.message.Message or a
Armin Rigo3a703b62005-09-19 09:11:04 +0000320 subclass thereof) containing *all* headers
Guido van Rossum7aee3841996-03-07 18:00:44 +0000321
322 The class is subclassable, mostly for the purpose of overriding
323 the make_file() method, which is called internally to come up with
324 a file open for reading and writing. This makes it possible to
325 override the default choice of storing all files in a temporary
326 directory and unlinking them as soon as they have been opened.
327
328 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000329 def __init__(self, fp=None, headers=None, outerboundary=b'',
330 environ=os.environ, keep_blank_values=0, strict_parsing=0,
Miss Islington (bot)a66f2792018-10-19 04:11:16 -0700331 limit=None, encoding='utf-8', errors='replace',
332 max_num_fields=None):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000333 """Constructor. Read multipart/* until last part.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000334
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000335 Arguments, all optional:
Guido van Rossum7aee3841996-03-07 18:00:44 +0000336
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000337 fp : file pointer; default: sys.stdin.buffer
Guido van Rossumb1b4f941998-05-08 19:55:51 +0000338 (not used when the request method is GET)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000339 Can be :
340 1. a TextIOWrapper object
341 2. an object whose read() and readline() methods return bytes
Guido van Rossum7aee3841996-03-07 18:00:44 +0000342
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000343 headers : header dictionary-like object; default:
344 taken from environ as per CGI spec
Guido van Rossum7aee3841996-03-07 18:00:44 +0000345
Guido van Rossum773ab271996-07-23 03:46:24 +0000346 outerboundary : terminating multipart boundary
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000347 (for internal use only)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000348
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000349 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000350
351 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000352 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000353 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000354 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000355 blank values are to be ignored and treated as if they were
356 not included.
Guido van Rossum773ab271996-07-23 03:46:24 +0000357
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000358 strict_parsing: flag indicating what to do with parsing errors.
359 If false (the default), errors are silently ignored.
360 If true, errors raise a ValueError exception.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000361
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000362 limit : used internally to read parts of multipart/form-data forms,
363 to exit from the reading loop when reached. It is the difference
364 between the form content-length and the number of bytes already
365 read
366
367 encoding, errors : the encoding and error handler used to decode the
368 binary stream to strings. Must be the same as the charset defined
369 for the page sending the form (content-type : meta http-equiv or
370 header)
371
Miss Islington (bot)a66f2792018-10-19 04:11:16 -0700372 max_num_fields: int. If set, then __init__ throws a ValueError
373 if there are more than n fields read by parse_qsl().
374
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000375 """
376 method = 'GET'
377 self.keep_blank_values = keep_blank_values
378 self.strict_parsing = strict_parsing
Miss Islington (bot)a66f2792018-10-19 04:11:16 -0700379 self.max_num_fields = max_num_fields
Raymond Hettinger54f02222002-06-01 14:18:47 +0000380 if 'REQUEST_METHOD' in environ:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000381 method = environ['REQUEST_METHOD'].upper()
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000382 self.qs_on_post = None
Guido van Rossum01852831998-06-25 02:40:17 +0000383 if method == 'GET' or method == 'HEAD':
Raymond Hettinger54f02222002-06-01 14:18:47 +0000384 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000385 qs = environ['QUERY_STRING']
386 elif sys.argv[1:]:
387 qs = sys.argv[1]
388 else:
389 qs = ""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000390 qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
391 fp = BytesIO(qs)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000392 if headers is None:
393 headers = {'content-type':
394 "application/x-www-form-urlencoded"}
395 if headers is None:
Guido van Rossumcff311a1998-06-11 14:06:59 +0000396 headers = {}
397 if method == 'POST':
398 # Set default content-type for POST to what's traditional
399 headers['content-type'] = "application/x-www-form-urlencoded"
Raymond Hettinger54f02222002-06-01 14:18:47 +0000400 if 'CONTENT_TYPE' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000401 headers['content-type'] = environ['CONTENT_TYPE']
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000402 if 'QUERY_STRING' in environ:
403 self.qs_on_post = environ['QUERY_STRING']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000404 if 'CONTENT_LENGTH' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000405 headers['content-length'] = environ['CONTENT_LENGTH']
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800406 else:
407 if not (isinstance(headers, (Mapping, Message))):
408 raise TypeError("headers must be mapping or an instance of "
409 "email.message.Message")
410 self.headers = headers
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000411 if fp is None:
412 self.fp = sys.stdin.buffer
413 # self.fp.read() must return bytes
414 elif isinstance(fp, TextIOWrapper):
415 self.fp = fp.buffer
416 else:
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800417 if not (hasattr(fp, 'read') and hasattr(fp, 'readline')):
418 raise TypeError("fp must be file pointer")
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000419 self.fp = fp
420
421 self.encoding = encoding
422 self.errors = errors
423
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000424 if not isinstance(outerboundary, bytes):
425 raise TypeError('outerboundary must be bytes, not %s'
426 % type(outerboundary).__name__)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000427 self.outerboundary = outerboundary
Guido van Rossum7aee3841996-03-07 18:00:44 +0000428
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000429 self.bytes_read = 0
430 self.limit = limit
431
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000432 # Process content-disposition header
433 cdisp, pdict = "", {}
Raymond Hettinger54f02222002-06-01 14:18:47 +0000434 if 'content-disposition' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000435 cdisp, pdict = parse_header(self.headers['content-disposition'])
436 self.disposition = cdisp
437 self.disposition_options = pdict
438 self.name = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000439 if 'name' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000440 self.name = pdict['name']
441 self.filename = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000442 if 'filename' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000443 self.filename = pdict['filename']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000444 self._binary_file = self.filename is not None
Guido van Rossum7aee3841996-03-07 18:00:44 +0000445
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000446 # Process content-type header
Barry Warsaw302331a1999-01-08 17:42:03 +0000447 #
448 # Honor any existing content-type header. But if there is no
449 # content-type header, use some sensible defaults. Assume
450 # outerboundary is "" at the outer level, but something non-false
451 # inside a multi-part. The default for an inner part is text/plain,
452 # but for an outer part it should be urlencoded. This should catch
453 # bogus clients which erroneously forget to include a content-type
454 # header.
455 #
456 # See below for what we do if there does exist a content-type header,
457 # but it happens to be something we don't understand.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000458 if 'content-type' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000459 ctype, pdict = parse_header(self.headers['content-type'])
Guido van Rossumce900de1999-06-02 18:44:22 +0000460 elif self.outerboundary or method != 'POST':
Barry Warsaw302331a1999-01-08 17:42:03 +0000461 ctype, pdict = "text/plain", {}
462 else:
463 ctype, pdict = 'application/x-www-form-urlencoded', {}
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000464 self.type = ctype
465 self.type_options = pdict
Raymond Hettinger54f02222002-06-01 14:18:47 +0000466 if 'boundary' in pdict:
Miss Islington (bot)e8f968d2018-05-14 15:20:06 -0700467 self.innerboundary = pdict['boundary'].encode(self.encoding,
468 self.errors)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000469 else:
470 self.innerboundary = b""
471
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000472 clen = -1
Raymond Hettinger54f02222002-06-01 14:18:47 +0000473 if 'content-length' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000474 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000475 clen = int(self.headers['content-length'])
Skip Montanarodb5d1442002-03-23 05:50:17 +0000476 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000477 pass
478 if maxlen and clen > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000479 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000480 self.length = clen
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000481 if self.limit is None and clen:
482 self.limit = clen
Guido van Rossum7aee3841996-03-07 18:00:44 +0000483
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000484 self.list = self.file = None
485 self.done = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000486 if ctype == 'application/x-www-form-urlencoded':
487 self.read_urlencoded()
488 elif ctype[:10] == 'multipart/':
Guido van Rossumf5745001998-10-20 14:43:02 +0000489 self.read_multi(environ, keep_blank_values, strict_parsing)
Barry Warsaw302331a1999-01-08 17:42:03 +0000490 else:
Guido van Rossum60a3bd81999-06-11 18:26:09 +0000491 self.read_single()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000492
Brett Cannonf79126f2013-08-23 15:15:48 -0400493 def __del__(self):
494 try:
495 self.file.close()
496 except AttributeError:
497 pass
498
Berker Peksagbf5e9602015-02-06 10:21:37 +0200499 def __enter__(self):
500 return self
501
502 def __exit__(self, *args):
503 self.file.close()
504
Guido van Rossum7aee3841996-03-07 18:00:44 +0000505 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000506 """Return a printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000507 return "FieldStorage(%r, %r, %r)" % (
508 self.name, self.filename, self.value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000509
Guido van Rossum4061cbe2002-09-11 18:20:34 +0000510 def __iter__(self):
511 return iter(self.keys())
512
Guido van Rossum7aee3841996-03-07 18:00:44 +0000513 def __getattr__(self, name):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000514 if name != 'value':
Collin Winterce36ad82007-08-30 01:19:48 +0000515 raise AttributeError(name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000516 if self.file:
517 self.file.seek(0)
518 value = self.file.read()
519 self.file.seek(0)
520 elif self.list is not None:
521 value = self.list
522 else:
523 value = None
524 return value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000525
526 def __getitem__(self, key):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000527 """Dictionary style indexing."""
528 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000529 raise TypeError("not indexable")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000530 found = []
531 for item in self.list:
532 if item.name == key: found.append(item)
533 if not found:
Collin Winterce36ad82007-08-30 01:19:48 +0000534 raise KeyError(key)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000535 if len(found) == 1:
536 return found[0]
537 else:
538 return found
Guido van Rossum7aee3841996-03-07 18:00:44 +0000539
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000540 def getvalue(self, key, default=None):
541 """Dictionary style get() method, including 'value' lookup."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000542 if key in self:
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000543 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000544 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000545 return [x.value for x in value]
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000546 else:
547 return value.value
548 else:
549 return default
550
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000551 def getfirst(self, key, default=None):
552 """ Return the first value received."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000553 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000554 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000555 if isinstance(value, list):
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000556 return value[0].value
557 else:
558 return value.value
559 else:
560 return default
561
562 def getlist(self, key):
563 """ Return list of received values."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000564 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000565 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000566 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000567 return [x.value for x in value]
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000568 else:
569 return [value.value]
570 else:
571 return []
572
Guido van Rossum7aee3841996-03-07 18:00:44 +0000573 def keys(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000574 """Dictionary style keys() method."""
575 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000576 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000577 return list(set(item.name for item in self.list))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000578
Raymond Hettinger54f02222002-06-01 14:18:47 +0000579 def __contains__(self, key):
580 """Dictionary style __contains__ method."""
581 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000582 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000583 return any(item.name == key for item in self.list)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000584
Guido van Rossum88b85d41997-01-11 19:21:33 +0000585 def __len__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000586 """Dictionary style len(x) support."""
587 return len(self.keys())
Guido van Rossum88b85d41997-01-11 19:21:33 +0000588
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800589 def __bool__(self):
590 if self.list is None:
591 raise TypeError("Cannot be converted to bool.")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000592 return bool(self.list)
593
Guido van Rossum7aee3841996-03-07 18:00:44 +0000594 def read_urlencoded(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000595 """Internal: read data in query string format."""
596 qs = self.fp.read(self.length)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000597 if not isinstance(qs, bytes):
598 raise ValueError("%s should return bytes, got %s" \
599 % (self.fp, type(qs).__name__))
600 qs = qs.decode(self.encoding, self.errors)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000601 if self.qs_on_post:
602 qs += '&' + self.qs_on_post
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000603 query = urllib.parse.parse_qsl(
604 qs, self.keep_blank_values, self.strict_parsing,
Miss Islington (bot)a66f2792018-10-19 04:11:16 -0700605 encoding=self.encoding, errors=self.errors,
606 max_num_fields=self.max_num_fields)
607 self.list = [MiniFieldStorage(key, value) for key, value in query]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000608 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000609
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000610 FieldStorageClass = None
611
Guido van Rossumf5745001998-10-20 14:43:02 +0000612 def read_multi(self, environ, keep_blank_values, strict_parsing):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000613 """Internal: read a part that is itself multipart."""
Guido van Rossum2e441f72001-07-25 21:00:19 +0000614 ib = self.innerboundary
615 if not valid_boundary(ib):
Collin Winterce36ad82007-08-30 01:19:48 +0000616 raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000617 self.list = []
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000618 if self.qs_on_post:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000619 query = urllib.parse.parse_qsl(
620 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
Miss Islington (bot)a66f2792018-10-19 04:11:16 -0700621 encoding=self.encoding, errors=self.errors,
622 max_num_fields=self.max_num_fields)
623 self.list.extend(MiniFieldStorage(key, value) for key, value in query)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000624
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000625 klass = self.FieldStorageClass or self.__class__
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000626 first_line = self.fp.readline() # bytes
627 if not isinstance(first_line, bytes):
628 raise ValueError("%s should return bytes, got %s" \
629 % (self.fp, type(first_line).__name__))
630 self.bytes_read += len(first_line)
Donald Stufftd90f8d12015-03-29 16:43:23 -0400631
632 # Ensure that we consume the file until we've hit our inner boundary
633 while (first_line.strip() != (b"--" + self.innerboundary) and
634 first_line):
635 first_line = self.fp.readline()
636 self.bytes_read += len(first_line)
637
Miss Islington (bot)178bf582018-10-23 03:54:52 -0700638 # Propagate max_num_fields into the sub class appropriately
639 max_num_fields = self.max_num_fields
640 if max_num_fields is not None:
641 max_num_fields -= len(self.list)
642
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000643 while True:
644 parser = FeedParser()
645 hdr_text = b""
646 while True:
647 data = self.fp.readline()
648 hdr_text += data
649 if not data.strip():
650 break
651 if not hdr_text:
652 break
653 # parser takes strings, not bytes
654 self.bytes_read += len(hdr_text)
655 parser.feed(hdr_text.decode(self.encoding, self.errors))
656 headers = parser.close()
Victor Stinner65794592015-08-18 10:21:10 -0700657
658 # Some clients add Content-Length for part headers, ignore them
659 if 'content-length' in headers:
660 del headers['content-length']
661
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000662 part = klass(self.fp, headers, ib, environ, keep_blank_values,
663 strict_parsing,self.limit-self.bytes_read,
Miss Islington (bot)178bf582018-10-23 03:54:52 -0700664 self.encoding, self.errors, max_num_fields)
Miss Islington (bot)a66f2792018-10-19 04:11:16 -0700665
Miss Islington (bot)178bf582018-10-23 03:54:52 -0700666 if max_num_fields is not None:
667 max_num_fields -= 1
668 if part.list:
669 max_num_fields -= len(part.list)
670 if max_num_fields < 0:
671 raise ValueError('Max number of fields exceeded')
Miss Islington (bot)a66f2792018-10-19 04:11:16 -0700672
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000673 self.bytes_read += part.bytes_read
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000674 self.list.append(part)
Florent Xicluna331c3fd2013-07-07 12:44:28 +0200675 if part.done or self.bytes_read >= self.length > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000676 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000677 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000678
679 def read_single(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000680 """Internal: read an atomic part."""
681 if self.length >= 0:
682 self.read_binary()
683 self.skip_lines()
684 else:
685 self.read_lines()
686 self.file.seek(0)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000687
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000688 bufsize = 8*1024 # I/O buffering size for copy to file
Guido van Rossum7aee3841996-03-07 18:00:44 +0000689
690 def read_binary(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000691 """Internal: read binary data."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000692 self.file = self.make_file()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000693 todo = self.length
694 if todo >= 0:
695 while todo > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000696 data = self.fp.read(min(todo, self.bufsize)) # bytes
697 if not isinstance(data, bytes):
698 raise ValueError("%s should return bytes, got %s"
699 % (self.fp, type(data).__name__))
700 self.bytes_read += len(data)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000701 if not data:
702 self.done = -1
703 break
704 self.file.write(data)
705 todo = todo - len(data)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000706
707 def read_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000708 """Internal: read lines until EOF or outerboundary."""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000709 if self._binary_file:
710 self.file = self.__file = BytesIO() # store data as bytes for files
711 else:
712 self.file = self.__file = StringIO() # as strings for other fields
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000713 if self.outerboundary:
714 self.read_lines_to_outerboundary()
715 else:
716 self.read_lines_to_eof()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000717
Guido van Rossum52b8c292001-06-29 13:06:06 +0000718 def __write(self, line):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000719 """line is always bytes, not string"""
Guido van Rossum52b8c292001-06-29 13:06:06 +0000720 if self.__file is not None:
721 if self.__file.tell() + len(line) > 1000:
Guido van Rossuma1a68522007-08-28 03:11:34 +0000722 self.file = self.make_file()
723 data = self.__file.getvalue()
724 self.file.write(data)
Guido van Rossum52b8c292001-06-29 13:06:06 +0000725 self.__file = None
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000726 if self._binary_file:
727 # keep bytes
728 self.file.write(line)
729 else:
730 # decode to string
731 self.file.write(line.decode(self.encoding, self.errors))
Guido van Rossum52b8c292001-06-29 13:06:06 +0000732
Guido van Rossum7aee3841996-03-07 18:00:44 +0000733 def read_lines_to_eof(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000734 """Internal: read lines until EOF."""
735 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000736 line = self.fp.readline(1<<16) # bytes
737 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000738 if not line:
739 self.done = -1
740 break
Guido van Rossum52b8c292001-06-29 13:06:06 +0000741 self.__write(line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000742
743 def read_lines_to_outerboundary(self):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000744 """Internal: read lines until outerboundary.
745 Data is read as bytes: boundaries and line ends must be converted
746 to bytes for comparisons.
747 """
748 next_boundary = b"--" + self.outerboundary
749 last_boundary = next_boundary + b"--"
750 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000751 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000752 _read = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000753 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000754 if _read >= self.limit:
755 break
756 line = self.fp.readline(1<<16) # bytes
757 self.bytes_read += len(line)
758 _read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000759 if not line:
760 self.done = -1
761 break
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300762 if delim == b"\r":
763 line = delim + line
764 delim = b""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000765 if line.startswith(b"--") and last_line_lfend:
766 strippedline = line.rstrip()
767 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000768 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000769 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000770 self.done = 1
771 break
772 odelim = delim
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000773 if line.endswith(b"\r\n"):
774 delim = b"\r\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000775 line = line[:-2]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000776 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000777 elif line.endswith(b"\n"):
778 delim = b"\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000779 line = line[:-1]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000780 last_line_lfend = True
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300781 elif line.endswith(b"\r"):
782 # We may interrupt \r\n sequences if they span the 2**16
783 # byte boundary
784 delim = b"\r"
785 line = line[:-1]
786 last_line_lfend = False
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000787 else:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000788 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000789 last_line_lfend = False
Guido van Rossum52b8c292001-06-29 13:06:06 +0000790 self.__write(odelim + line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000791
792 def skip_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000793 """Internal: skip lines until outer boundary if defined."""
794 if not self.outerboundary or self.done:
795 return
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000796 next_boundary = b"--" + self.outerboundary
797 last_boundary = next_boundary + b"--"
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000798 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000799 while True:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000800 line = self.fp.readline(1<<16)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000801 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000802 if not line:
803 self.done = -1
804 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000805 if line.endswith(b"--") and last_line_lfend:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000806 strippedline = line.strip()
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000807 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000808 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000809 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000810 self.done = 1
811 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000812 last_line_lfend = line.endswith(b'\n')
Guido van Rossum7aee3841996-03-07 18:00:44 +0000813
Guido van Rossuma1a68522007-08-28 03:11:34 +0000814 def make_file(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000815 """Overridable: return a readable & writable file.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000816
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000817 The file will be used as follows:
818 - data is written to it
819 - seek(0)
820 - data is read from it
Guido van Rossum7aee3841996-03-07 18:00:44 +0000821
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000822 The file is opened in binary mode for files, in text mode
823 for other fields
Guido van Rossum7aee3841996-03-07 18:00:44 +0000824
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000825 This version opens a temporary file for reading and writing,
826 and immediately deletes (unlinks) it. The trick (on Unix!) is
827 that the file can still be used, but it can't be opened by
828 another process, and it will automatically be deleted when it
829 is closed or when the current process terminates.
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000830
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000831 If you want a more permanent file, you derive a class which
832 overrides this method. If you want a visible temporary file
833 that is nevertheless automatically deleted when the script
834 terminates, try defining a __del__ method in a derived class
835 which unlinks the temporary files you have created.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000836
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000837 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000838 if self._binary_file:
839 return tempfile.TemporaryFile("wb+")
840 else:
841 return tempfile.TemporaryFile("w+",
842 encoding=self.encoding, newline = '\n')
Tim Peters88869f92001-01-14 23:36:06 +0000843
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000844
Guido van Rossum72755611996-03-06 07:20:06 +0000845# Test/debug code
846# ===============
Guido van Rossum9a22de11995-01-12 12:29:47 +0000847
Guido van Rossum773ab271996-07-23 03:46:24 +0000848def test(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000849 """Robust test CGI script, usable as main program.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000850
Guido van Rossum7aee3841996-03-07 18:00:44 +0000851 Write minimal HTTP headers and dump all information provided to
852 the script in HTML form.
853
854 """
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000855 print("Content-type: text/html")
856 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000857 sys.stderr = sys.stdout
858 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000859 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000860 print_directory()
861 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000862 print_form(form)
863 print_environ(environ)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000864 print_environ_usage()
865 def f():
Georg Brandl7cae87c2006-09-06 06:51:57 +0000866 exec("testing print_exception() -- <I>italics?</I>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000867 def g(f=f):
868 f()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000869 print("<H3>What follows is a test, not an actual exception:</H3>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000870 g()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000871 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000872 print_exception()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000873
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000874 print("<H1>Second try with a small maxlen...</H1>")
Guido van Rossum57d51f2f2000-09-16 21:16:01 +0000875
Guido van Rossumad164711997-05-13 19:03:23 +0000876 global maxlen
877 maxlen = 50
878 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000879 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000880 print_directory()
881 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000882 print_form(form)
883 print_environ(environ)
Guido van Rossumad164711997-05-13 19:03:23 +0000884 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000885 print_exception()
Guido van Rossumad164711997-05-13 19:03:23 +0000886
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000887def print_exception(type=None, value=None, tb=None, limit=None):
888 if type is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000889 type, value, tb = sys.exc_info()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000890 import traceback
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000891 print()
892 print("<H3>Traceback (most recent call last):</H3>")
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000893 list = traceback.format_tb(tb, limit) + \
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000894 traceback.format_exception_only(type, value)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000895 print("<PRE>%s<B>%s</B></PRE>" % (
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000896 html.escape("".join(list[:-1])),
897 html.escape(list[-1]),
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000898 ))
Guido van Rossumf15d1591997-09-29 23:22:12 +0000899 del tb
Guido van Rossum9a22de11995-01-12 12:29:47 +0000900
Guido van Rossum773ab271996-07-23 03:46:24 +0000901def print_environ(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000902 """Dump the shell environment as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000903 keys = sorted(environ.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000904 print()
905 print("<H3>Shell Environment:</H3>")
906 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000907 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000908 print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000909 print("</DL>")
910 print()
Guido van Rossum72755611996-03-06 07:20:06 +0000911
912def print_form(form):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000913 """Dump the contents of a form as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000914 keys = sorted(form.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000915 print()
916 print("<H3>Form Contents:</H3>")
Guido van Rossum57d51f2f2000-09-16 21:16:01 +0000917 if not keys:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000918 print("<P>No form fields.")
919 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000920 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000921 print("<DT>" + html.escape(key) + ":", end=' ')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000922 value = form[key]
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000923 print("<i>" + html.escape(repr(type(value))) + "</i>")
924 print("<DD>" + html.escape(repr(value)))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000925 print("</DL>")
926 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000927
928def print_directory():
929 """Dump the current directory as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000930 print()
931 print("<H3>Current Working Directory:</H3>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000932 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000933 pwd = os.getcwd()
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200934 except OSError as msg:
Andrew Svetlov8b33dd82012-12-24 19:58:48 +0200935 print("OSError:", html.escape(str(msg)))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000936 else:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000937 print(html.escape(pwd))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000938 print()
Guido van Rossum9a22de11995-01-12 12:29:47 +0000939
Guido van Rossuma8738a51996-03-14 21:30:28 +0000940def print_arguments():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000941 print()
942 print("<H3>Command Line Arguments:</H3>")
943 print()
944 print(sys.argv)
945 print()
Guido van Rossuma8738a51996-03-14 21:30:28 +0000946
Guido van Rossum9a22de11995-01-12 12:29:47 +0000947def print_environ_usage():
Guido van Rossum7aee3841996-03-07 18:00:44 +0000948 """Dump a list of environment variables used by CGI as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000949 print("""
Guido van Rossum72755611996-03-06 07:20:06 +0000950<H3>These environment variables could have been set:</H3>
951<UL>
Guido van Rossum9a22de11995-01-12 12:29:47 +0000952<LI>AUTH_TYPE
953<LI>CONTENT_LENGTH
954<LI>CONTENT_TYPE
955<LI>DATE_GMT
956<LI>DATE_LOCAL
957<LI>DOCUMENT_NAME
958<LI>DOCUMENT_ROOT
959<LI>DOCUMENT_URI
960<LI>GATEWAY_INTERFACE
961<LI>LAST_MODIFIED
962<LI>PATH
963<LI>PATH_INFO
964<LI>PATH_TRANSLATED
965<LI>QUERY_STRING
966<LI>REMOTE_ADDR
967<LI>REMOTE_HOST
968<LI>REMOTE_IDENT
969<LI>REMOTE_USER
970<LI>REQUEST_METHOD
971<LI>SCRIPT_NAME
972<LI>SERVER_NAME
973<LI>SERVER_PORT
974<LI>SERVER_PROTOCOL
975<LI>SERVER_ROOT
976<LI>SERVER_SOFTWARE
977</UL>
Guido van Rossum7aee3841996-03-07 18:00:44 +0000978In addition, HTTP headers sent by the server may be passed in the
979environment as well. Here are some common variable names:
980<UL>
981<LI>HTTP_ACCEPT
982<LI>HTTP_CONNECTION
983<LI>HTTP_HOST
984<LI>HTTP_PRAGMA
985<LI>HTTP_REFERER
986<LI>HTTP_USER_AGENT
987</UL>
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000988""")
Guido van Rossum9a22de11995-01-12 12:29:47 +0000989
Guido van Rossum9a22de11995-01-12 12:29:47 +0000990
Guido van Rossum72755611996-03-06 07:20:06 +0000991# Utilities
992# =========
Guido van Rossum9a22de11995-01-12 12:29:47 +0000993
Guido van Rossum64c66201997-07-19 20:11:53 +0000994def escape(s, quote=None):
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000995 """Deprecated API."""
996 warn("cgi.escape is deprecated, use html.escape instead",
Florent Xicluna67317752011-12-10 11:07:42 +0100997 DeprecationWarning, stacklevel=2)
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000998 s = s.replace("&", "&amp;") # Must be done first!
999 s = s.replace("<", "&lt;")
1000 s = s.replace(">", "&gt;")
Guido van Rossum64c66201997-07-19 20:11:53 +00001001 if quote:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001002 s = s.replace('"', "&quot;")
Guido van Rossum7aee3841996-03-07 18:00:44 +00001003 return s
Guido van Rossum9a22de11995-01-12 12:29:47 +00001004
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001005
Benjamin Peterson4d59a782014-04-03 10:22:10 -04001006def valid_boundary(s):
Guido van Rossum2e441f72001-07-25 21:00:19 +00001007 import re
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001008 if isinstance(s, bytes):
1009 _vb_pattern = b"^[ -~]{0,200}[!-~]$"
1010 else:
1011 _vb_pattern = "^[ -~]{0,200}[!-~]$"
Guido van Rossum2e441f72001-07-25 21:00:19 +00001012 return re.match(_vb_pattern, s)
Guido van Rossum9a22de11995-01-12 12:29:47 +00001013
Guido van Rossum72755611996-03-06 07:20:06 +00001014# Invoke mainline
1015# ===============
1016
1017# Call test() when this file is run as a script (not imported as a module)
Tim Peters88869f92001-01-14 23:36:06 +00001018if __name__ == '__main__':
Guido van Rossum7aee3841996-03-07 18:00:44 +00001019 test()