blob: e198ed8653f20db60370f94f7fe439f76cf0efc8 [file] [log] [blame]
Benjamin Peterson8c703a02010-03-11 22:05:58 +00001#! /usr/local/bin/python
Guido van Rossum1c9daa81995-09-18 21:52:37 +00002
Guido van Rossum467d7232001-02-13 13:13:33 +00003# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4# intentionally NOT "/usr/bin/env python". On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python. Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin. So let those vendors patch cgi.py to match their choice
10# of installation.
11
Guido van Rossum72755611996-03-06 07:20:06 +000012"""Support module for CGI (Common Gateway Interface) scripts.
Guido van Rossum1c9daa81995-09-18 21:52:37 +000013
Guido van Rossum7aee3841996-03-07 18:00:44 +000014This module defines a number of utilities for use by CGI scripts
15written in Python.
Guido van Rossum72755611996-03-06 07:20:06 +000016"""
17
Guido van Rossum98d9fd32000-02-28 15:12:25 +000018# History
19# -------
Tim Peters88869f92001-01-14 23:36:06 +000020#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000021# Michael McLay started this module. Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict. The multipart
23# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
Tim Peters88869f92001-01-14 23:36:06 +000026#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000027
Guido van Rossum52b8c292001-06-29 13:06:06 +000028__version__ = "2.6"
Guido van Rossum0147db01996-03-09 03:16:04 +000029
Guido van Rossum72755611996-03-06 07:20:06 +000030
31# Imports
32# =======
33
Victor Stinner5c23b8e2011-01-14 13:05:21 +000034from io import StringIO, BytesIO, TextIOWrapper
Guido van Rossum72755611996-03-06 07:20:06 +000035import sys
36import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037import urllib.parse
Victor Stinner5c23b8e2011-01-14 13:05:21 +000038from email.parser import FeedParser
Facundo Batistac469d4c2008-09-03 22:49:01 +000039from warnings import warn
Georg Brandl1f7fffb2010-10-15 15:57:45 +000040import html
Victor Stinner5c23b8e2011-01-14 13:05:21 +000041import locale
42import tempfile
Guido van Rossum72755611996-03-06 07:20:06 +000043
Georg Brandl49d1b4f2008-05-11 21:42:51 +000044__all__ = ["MiniFieldStorage", "FieldStorage",
Guido van Rossuma8423a92001-03-19 13:40:44 +000045 "parse", "parse_qs", "parse_qsl", "parse_multipart",
46 "parse_header", "print_exception", "print_environ",
47 "print_form", "print_directory", "print_arguments",
48 "print_environ_usage", "escape"]
Guido van Rossumc204c701996-09-05 19:07:11 +000049
50# Logging support
51# ===============
52
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000053logfile = "" # Filename to log to, if not empty
54logfp = None # File object to log to, if not None
Guido van Rossumc204c701996-09-05 19:07:11 +000055
56def initlog(*allargs):
57 """Write a log message, if there is a log file.
58
59 Even though this function is called initlog(), you should always
60 use log(); log is a variable that is set either to initlog
61 (initially), to dolog (once the log file has been opened), or to
62 nolog (when logging is disabled).
63
64 The first argument is a format string; the remaining arguments (if
65 any) are arguments to the % operator, so e.g.
66 log("%s: %s", "a", "b")
67 will write "a: b" to the log file, followed by a newline.
68
69 If the global logfp is not None, it should be a file object to
70 which log data is written.
71
72 If the global logfp is None, the global logfile may be a string
73 giving a filename to open, in append mode. This file should be
74 world writable!!! If the file can't be opened, logging is
75 silently disabled (since there is no safe place where we could
76 send an error message).
77
78 """
79 global logfp, log
80 if logfile and not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000081 try:
82 logfp = open(logfile, "a")
83 except IOError:
84 pass
Guido van Rossumc204c701996-09-05 19:07:11 +000085 if not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 log = nolog
Guido van Rossumc204c701996-09-05 19:07:11 +000087 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 log = dolog
Guido van Rossum68468eb2003-02-27 20:14:51 +000089 log(*allargs)
Guido van Rossumc204c701996-09-05 19:07:11 +000090
91def dolog(fmt, *args):
92 """Write a log message to the log file. See initlog() for docs."""
93 logfp.write(fmt%args + "\n")
94
95def nolog(*allargs):
96 """Dummy function, assigned to log when logging is disabled."""
97 pass
98
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000099log = initlog # The current logging function
Guido van Rossumc204c701996-09-05 19:07:11 +0000100
101
Guido van Rossum72755611996-03-06 07:20:06 +0000102# Parsing functions
103# =================
104
Guido van Rossumad164711997-05-13 19:03:23 +0000105# Maximum input we will accept when REQUEST_METHOD is POST
106# 0 ==> unlimited input
107maxlen = 0
108
Guido van Rossume08c04c1996-11-11 19:29:11 +0000109def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
Guido van Rossum773ab271996-07-23 03:46:24 +0000110 """Parse a query in the environment or from a file (default stdin)
111
112 Arguments, all optional:
113
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000114 fp : file pointer; default: sys.stdin.buffer
Guido van Rossum773ab271996-07-23 03:46:24 +0000115
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000116 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000117
118 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000119 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000120 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000121 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000122 blank values are to be ignored and treated as if they were
123 not included.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000124
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 strict_parsing: flag indicating what to do with parsing errors.
126 If false (the default), errors are silently ignored.
127 If true, errors raise a ValueError exception.
Guido van Rossum773ab271996-07-23 03:46:24 +0000128 """
Raymond Hettingera1449002002-05-31 23:54:44 +0000129 if fp is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000130 fp = sys.stdin
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000131
132 # field keys and values (except for files) are returned as strings
133 # an encoding is required to decode the bytes read from self.fp
134 if hasattr(fp,'encoding'):
135 encoding = fp.encoding
136 else:
137 encoding = 'latin-1'
138
139 # fp.read() must return bytes
140 if isinstance(fp, TextIOWrapper):
141 fp = fp.buffer
142
Raymond Hettinger54f02222002-06-01 14:18:47 +0000143 if not 'REQUEST_METHOD' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000144 environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
Guido van Rossum7aee3841996-03-07 18:00:44 +0000145 if environ['REQUEST_METHOD'] == 'POST':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000146 ctype, pdict = parse_header(environ['CONTENT_TYPE'])
147 if ctype == 'multipart/form-data':
148 return parse_multipart(fp, pdict)
149 elif ctype == 'application/x-www-form-urlencoded':
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000150 clength = int(environ['CONTENT_LENGTH'])
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000151 if maxlen and clength > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000152 raise ValueError('Maximum content length exceeded')
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000153 qs = fp.read(clength).decode(encoding)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000154 else:
155 qs = '' # Unknown content-type
Raymond Hettinger54f02222002-06-01 14:18:47 +0000156 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000157 if qs: qs = qs + '&'
158 qs = qs + environ['QUERY_STRING']
Tim Peters88869f92001-01-14 23:36:06 +0000159 elif sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000160 if qs: qs = qs + '&'
161 qs = qs + sys.argv[1]
162 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Raymond Hettinger54f02222002-06-01 14:18:47 +0000163 elif 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000164 qs = environ['QUERY_STRING']
Guido van Rossum7aee3841996-03-07 18:00:44 +0000165 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000166 if sys.argv[1:]:
167 qs = sys.argv[1]
168 else:
169 qs = ""
170 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000171 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
172 encoding=encoding)
Guido van Rossume7808771995-08-07 20:12:09 +0000173
174
Facundo Batistac469d4c2008-09-03 22:49:01 +0000175# parse query string function called from urlparse,
176# this is done in order to maintain backward compatiblity.
177
Guido van Rossume08c04c1996-11-11 19:29:11 +0000178def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000179 """Parse a query given as a string argument."""
180 warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000181 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000182 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
Guido van Rossum1946f0d1999-06-04 17:54:39 +0000183
184def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000185 """Parse a query given as a string argument."""
Facundo Batistaa27244b2008-09-09 02:43:19 +0000186 warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000187 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000188 return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
Guido van Rossum9a22de11995-01-12 12:29:47 +0000189
Guido van Rossum0147db01996-03-09 03:16:04 +0000190def parse_multipart(fp, pdict):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000191 """Parse multipart input.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000192
Guido van Rossum7aee3841996-03-07 18:00:44 +0000193 Arguments:
194 fp : input file
Johannes Gijsbersc7fc10a2005-01-08 13:56:36 +0000195 pdict: dictionary containing other parameters of content-type header
Guido van Rossum72755611996-03-06 07:20:06 +0000196
Tim Peters88869f92001-01-14 23:36:06 +0000197 Returns a dictionary just like parse_qs(): keys are the field names, each
198 value is a list of values for that field. This is easy to use but not
199 much good if you are expecting megabytes to be uploaded -- in that case,
200 use the FieldStorage class instead which is much more flexible. Note
201 that content-type is the raw, unparsed contents of the content-type
Guido van Rossum0147db01996-03-09 03:16:04 +0000202 header.
Tim Peters88869f92001-01-14 23:36:06 +0000203
204 XXX This does not parse nested multipart parts -- use FieldStorage for
Guido van Rossum0147db01996-03-09 03:16:04 +0000205 that.
Tim Peters88869f92001-01-14 23:36:06 +0000206
207 XXX This should really be subsumed by FieldStorage altogether -- no
Guido van Rossum0147db01996-03-09 03:16:04 +0000208 point in having two implementations of the same parsing algorithm.
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000209 Also, FieldStorage protects itself better against certain DoS attacks
210 by limiting the size of the data read in one chunk. The API here
211 does not support that kind of protection. This also affects parse()
212 since it can call parse_multipart().
Guido van Rossum72755611996-03-06 07:20:06 +0000213
Guido van Rossum7aee3841996-03-07 18:00:44 +0000214 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000215 import http.client
216
Guido van Rossum2e441f72001-07-25 21:00:19 +0000217 boundary = ""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000218 if 'boundary' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000219 boundary = pdict['boundary']
Guido van Rossum2e441f72001-07-25 21:00:19 +0000220 if not valid_boundary(boundary):
Collin Winterce36ad82007-08-30 01:19:48 +0000221 raise ValueError('Invalid boundary in multipart form: %r'
Walter Dörwald70a6b492004-02-12 17:35:32 +0000222 % (boundary,))
Tim Petersab9ba272001-08-09 21:40:30 +0000223
Guido van Rossum7aee3841996-03-07 18:00:44 +0000224 nextpart = "--" + boundary
225 lastpart = "--" + boundary + "--"
226 partdict = {}
227 terminator = ""
228
229 while terminator != lastpart:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000230 bytes = -1
231 data = None
232 if terminator:
233 # At start of next part. Read headers first.
Barry Warsaw820c1202008-06-12 04:06:45 +0000234 headers = http.client.parse_headers(fp)
235 clength = headers.get('content-length')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000236 if clength:
237 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000238 bytes = int(clength)
239 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000240 pass
241 if bytes > 0:
242 if maxlen and bytes > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000243 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000244 data = fp.read(bytes)
245 else:
246 data = ""
247 # Read lines until end of part.
248 lines = []
249 while 1:
250 line = fp.readline()
251 if not line:
252 terminator = lastpart # End outer loop
253 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000254 if line.startswith("--"):
255 terminator = line.rstrip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000256 if terminator in (nextpart, lastpart):
257 break
258 lines.append(line)
259 # Done with part.
260 if data is None:
261 continue
262 if bytes < 0:
263 if lines:
264 # Strip final line terminator
265 line = lines[-1]
266 if line[-2:] == "\r\n":
267 line = line[:-2]
268 elif line[-1:] == "\n":
269 line = line[:-1]
270 lines[-1] = line
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000271 data = "".join(lines)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000272 line = headers['content-disposition']
273 if not line:
274 continue
275 key, params = parse_header(line)
276 if key != 'form-data':
277 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000278 if 'name' in params:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000279 name = params['name']
280 else:
281 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000282 if name in partdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000283 partdict[name].append(data)
284 else:
285 partdict[name] = [data]
Guido van Rossum72755611996-03-06 07:20:06 +0000286
Guido van Rossum7aee3841996-03-07 18:00:44 +0000287 return partdict
Guido van Rossum9a22de11995-01-12 12:29:47 +0000288
289
Fred Drake9a0a65b2008-12-04 19:24:50 +0000290def _parseparam(s):
291 while s[:1] == ';':
292 s = s[1:]
293 end = s.find(';')
294 while end > 0 and s.count('"', 0, end) % 2:
295 end = s.find(';', end + 1)
296 if end < 0:
297 end = len(s)
298 f = s[:end]
299 yield f.strip()
300 s = s[end:]
301
Guido van Rossum72755611996-03-06 07:20:06 +0000302def parse_header(line):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000303 """Parse a Content-type like header.
304
305 Return the main content-type and a dictionary of options.
306
307 """
Fred Drake9a0a65b2008-12-04 19:24:50 +0000308 parts = _parseparam(';' + line)
309 key = parts.__next__()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000310 pdict = {}
Fred Drake9a0a65b2008-12-04 19:24:50 +0000311 for p in parts:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000312 i = p.find('=')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000313 if i >= 0:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000314 name = p[:i].strip().lower()
315 value = p[i+1:].strip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000316 if len(value) >= 2 and value[0] == value[-1] == '"':
317 value = value[1:-1]
Johannes Gijsbers9e15dd62004-08-14 15:39:34 +0000318 value = value.replace('\\\\', '\\').replace('\\"', '"')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000319 pdict[name] = value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000320 return key, pdict
Guido van Rossum72755611996-03-06 07:20:06 +0000321
322
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000323# Classes for field storage
324# =========================
325
326class MiniFieldStorage:
327
Guido van Rossum0147db01996-03-09 03:16:04 +0000328 """Like FieldStorage, for use when no file uploads are possible."""
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000329
Guido van Rossum7aee3841996-03-07 18:00:44 +0000330 # Dummy attributes
331 filename = None
332 list = None
333 type = None
Guido van Rossum773ab271996-07-23 03:46:24 +0000334 file = None
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000335 type_options = {}
Guido van Rossum7aee3841996-03-07 18:00:44 +0000336 disposition = None
337 disposition_options = {}
338 headers = {}
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000339
Guido van Rossum7aee3841996-03-07 18:00:44 +0000340 def __init__(self, name, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000341 """Constructor from field name and value."""
342 self.name = name
343 self.value = value
Guido van Rossum773ab271996-07-23 03:46:24 +0000344 # self.file = StringIO(value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000345
346 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000347 """Return printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000348 return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000349
350
351class FieldStorage:
352
Guido van Rossum7aee3841996-03-07 18:00:44 +0000353 """Store a sequence of fields, reading multipart/form-data.
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000354
Guido van Rossum7aee3841996-03-07 18:00:44 +0000355 This class provides naming, typing, files stored on disk, and
356 more. At the top level, it is accessible like a dictionary, whose
357 keys are the field names. (Note: None can occur as a field name.)
358 The items are either a Python list (if there's multiple values) or
359 another FieldStorage or MiniFieldStorage object. If it's a single
360 object, it has the following attributes:
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000361
Guido van Rossum7aee3841996-03-07 18:00:44 +0000362 name: the field name, if specified; otherwise None
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000363
Guido van Rossum7aee3841996-03-07 18:00:44 +0000364 filename: the filename, if specified; otherwise None; this is the
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000365 client side filename, *not* the file name on which it is
366 stored (that's a temporary file you don't deal with)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000367
Guido van Rossum7aee3841996-03-07 18:00:44 +0000368 value: the value as a *string*; for file uploads, this
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000369 transparently reads the file every time you request the value
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000370 and returns *bytes*
Guido van Rossum7aee3841996-03-07 18:00:44 +0000371
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000372 file: the file(-like) object from which you can read the data *as
373 bytes* ; None if the data is stored a simple string
Guido van Rossum7aee3841996-03-07 18:00:44 +0000374
375 type: the content-type, or None if not specified
376
377 type_options: dictionary of options specified on the content-type
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000378 line
Guido van Rossum7aee3841996-03-07 18:00:44 +0000379
380 disposition: content-disposition, or None if not specified
381
382 disposition_options: dictionary of corresponding options
383
Barry Warsaw596097e2008-06-12 02:38:51 +0000384 headers: a dictionary(-like) object (sometimes email.message.Message or a
Armin Rigo3a703b62005-09-19 09:11:04 +0000385 subclass thereof) containing *all* headers
Guido van Rossum7aee3841996-03-07 18:00:44 +0000386
387 The class is subclassable, mostly for the purpose of overriding
388 the make_file() method, which is called internally to come up with
389 a file open for reading and writing. This makes it possible to
390 override the default choice of storing all files in a temporary
391 directory and unlinking them as soon as they have been opened.
392
393 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000394 def __init__(self, fp=None, headers=None, outerboundary=b'',
395 environ=os.environ, keep_blank_values=0, strict_parsing=0,
396 limit=None, encoding='utf-8', errors='replace'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000397 """Constructor. Read multipart/* until last part.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000398
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000399 Arguments, all optional:
Guido van Rossum7aee3841996-03-07 18:00:44 +0000400
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000401 fp : file pointer; default: sys.stdin.buffer
Guido van Rossumb1b4f941998-05-08 19:55:51 +0000402 (not used when the request method is GET)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000403 Can be :
404 1. a TextIOWrapper object
405 2. an object whose read() and readline() methods return bytes
Guido van Rossum7aee3841996-03-07 18:00:44 +0000406
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000407 headers : header dictionary-like object; default:
408 taken from environ as per CGI spec
Guido van Rossum7aee3841996-03-07 18:00:44 +0000409
Guido van Rossum773ab271996-07-23 03:46:24 +0000410 outerboundary : terminating multipart boundary
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000411 (for internal use only)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000412
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000413 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000414
415 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000416 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000417 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000418 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000419 blank values are to be ignored and treated as if they were
420 not included.
Guido van Rossum773ab271996-07-23 03:46:24 +0000421
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000422 strict_parsing: flag indicating what to do with parsing errors.
423 If false (the default), errors are silently ignored.
424 If true, errors raise a ValueError exception.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000425
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000426 limit : used internally to read parts of multipart/form-data forms,
427 to exit from the reading loop when reached. It is the difference
428 between the form content-length and the number of bytes already
429 read
430
431 encoding, errors : the encoding and error handler used to decode the
432 binary stream to strings. Must be the same as the charset defined
433 for the page sending the form (content-type : meta http-equiv or
434 header)
435
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000436 """
437 method = 'GET'
438 self.keep_blank_values = keep_blank_values
439 self.strict_parsing = strict_parsing
Raymond Hettinger54f02222002-06-01 14:18:47 +0000440 if 'REQUEST_METHOD' in environ:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000441 method = environ['REQUEST_METHOD'].upper()
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000442 self.qs_on_post = None
Guido van Rossum01852831998-06-25 02:40:17 +0000443 if method == 'GET' or method == 'HEAD':
Raymond Hettinger54f02222002-06-01 14:18:47 +0000444 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000445 qs = environ['QUERY_STRING']
446 elif sys.argv[1:]:
447 qs = sys.argv[1]
448 else:
449 qs = ""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000450 qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
451 fp = BytesIO(qs)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000452 if headers is None:
453 headers = {'content-type':
454 "application/x-www-form-urlencoded"}
455 if headers is None:
Guido van Rossumcff311a1998-06-11 14:06:59 +0000456 headers = {}
457 if method == 'POST':
458 # Set default content-type for POST to what's traditional
459 headers['content-type'] = "application/x-www-form-urlencoded"
Raymond Hettinger54f02222002-06-01 14:18:47 +0000460 if 'CONTENT_TYPE' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000461 headers['content-type'] = environ['CONTENT_TYPE']
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000462 if 'QUERY_STRING' in environ:
463 self.qs_on_post = environ['QUERY_STRING']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000464 if 'CONTENT_LENGTH' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000465 headers['content-length'] = environ['CONTENT_LENGTH']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000466 if fp is None:
467 self.fp = sys.stdin.buffer
468 # self.fp.read() must return bytes
469 elif isinstance(fp, TextIOWrapper):
470 self.fp = fp.buffer
471 else:
472 self.fp = fp
473
474 self.encoding = encoding
475 self.errors = errors
476
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000477 self.headers = headers
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000478 if not isinstance(outerboundary, bytes):
479 raise TypeError('outerboundary must be bytes, not %s'
480 % type(outerboundary).__name__)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000481 self.outerboundary = outerboundary
Guido van Rossum7aee3841996-03-07 18:00:44 +0000482
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000483 self.bytes_read = 0
484 self.limit = limit
485
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000486 # Process content-disposition header
487 cdisp, pdict = "", {}
Raymond Hettinger54f02222002-06-01 14:18:47 +0000488 if 'content-disposition' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000489 cdisp, pdict = parse_header(self.headers['content-disposition'])
490 self.disposition = cdisp
491 self.disposition_options = pdict
492 self.name = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000493 if 'name' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000494 self.name = pdict['name']
495 self.filename = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000496 if 'filename' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000497 self.filename = pdict['filename']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000498 self._binary_file = self.filename is not None
Guido van Rossum7aee3841996-03-07 18:00:44 +0000499
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000500 # Process content-type header
Barry Warsaw302331a1999-01-08 17:42:03 +0000501 #
502 # Honor any existing content-type header. But if there is no
503 # content-type header, use some sensible defaults. Assume
504 # outerboundary is "" at the outer level, but something non-false
505 # inside a multi-part. The default for an inner part is text/plain,
506 # but for an outer part it should be urlencoded. This should catch
507 # bogus clients which erroneously forget to include a content-type
508 # header.
509 #
510 # See below for what we do if there does exist a content-type header,
511 # but it happens to be something we don't understand.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000512 if 'content-type' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000513 ctype, pdict = parse_header(self.headers['content-type'])
Guido van Rossumce900de1999-06-02 18:44:22 +0000514 elif self.outerboundary or method != 'POST':
Barry Warsaw302331a1999-01-08 17:42:03 +0000515 ctype, pdict = "text/plain", {}
516 else:
517 ctype, pdict = 'application/x-www-form-urlencoded', {}
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000518 self.type = ctype
519 self.type_options = pdict
Raymond Hettinger54f02222002-06-01 14:18:47 +0000520 if 'boundary' in pdict:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000521 self.innerboundary = pdict['boundary'].encode(self.encoding)
522 else:
523 self.innerboundary = b""
524
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000525 clen = -1
Raymond Hettinger54f02222002-06-01 14:18:47 +0000526 if 'content-length' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000527 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000528 clen = int(self.headers['content-length'])
Skip Montanarodb5d1442002-03-23 05:50:17 +0000529 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000530 pass
531 if maxlen and clen > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000532 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000533 self.length = clen
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000534 if self.limit is None and clen:
535 self.limit = clen
Guido van Rossum7aee3841996-03-07 18:00:44 +0000536
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000537 self.list = self.file = None
538 self.done = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000539 if ctype == 'application/x-www-form-urlencoded':
540 self.read_urlencoded()
541 elif ctype[:10] == 'multipart/':
Guido van Rossumf5745001998-10-20 14:43:02 +0000542 self.read_multi(environ, keep_blank_values, strict_parsing)
Barry Warsaw302331a1999-01-08 17:42:03 +0000543 else:
Guido van Rossum60a3bd81999-06-11 18:26:09 +0000544 self.read_single()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000545
546 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000547 """Return a printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000548 return "FieldStorage(%r, %r, %r)" % (
549 self.name, self.filename, self.value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000550
Guido van Rossum4061cbe2002-09-11 18:20:34 +0000551 def __iter__(self):
552 return iter(self.keys())
553
Guido van Rossum7aee3841996-03-07 18:00:44 +0000554 def __getattr__(self, name):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000555 if name != 'value':
Collin Winterce36ad82007-08-30 01:19:48 +0000556 raise AttributeError(name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000557 if self.file:
558 self.file.seek(0)
559 value = self.file.read()
560 self.file.seek(0)
561 elif self.list is not None:
562 value = self.list
563 else:
564 value = None
565 return value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000566
567 def __getitem__(self, key):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000568 """Dictionary style indexing."""
569 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000570 raise TypeError("not indexable")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000571 found = []
572 for item in self.list:
573 if item.name == key: found.append(item)
574 if not found:
Collin Winterce36ad82007-08-30 01:19:48 +0000575 raise KeyError(key)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000576 if len(found) == 1:
577 return found[0]
578 else:
579 return found
Guido van Rossum7aee3841996-03-07 18:00:44 +0000580
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000581 def getvalue(self, key, default=None):
582 """Dictionary style get() method, including 'value' lookup."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000583 if key in self:
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000584 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000585 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000586 return [x.value for x in value]
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000587 else:
588 return value.value
589 else:
590 return default
591
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000592 def getfirst(self, key, default=None):
593 """ Return the first value received."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000594 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000595 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000596 if isinstance(value, list):
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000597 return value[0].value
598 else:
599 return value.value
600 else:
601 return default
602
603 def getlist(self, key):
604 """ Return list of received values."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000605 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000606 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000607 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000608 return [x.value for x in value]
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000609 else:
610 return [value.value]
611 else:
612 return []
613
Guido van Rossum7aee3841996-03-07 18:00:44 +0000614 def keys(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000615 """Dictionary style keys() method."""
616 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000617 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000618 return list(set(item.name for item in self.list))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000619
Raymond Hettinger54f02222002-06-01 14:18:47 +0000620 def __contains__(self, key):
621 """Dictionary style __contains__ method."""
622 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000623 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000624 return any(item.name == key for item in self.list)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000625
Guido van Rossum88b85d41997-01-11 19:21:33 +0000626 def __len__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000627 """Dictionary style len(x) support."""
628 return len(self.keys())
Guido van Rossum88b85d41997-01-11 19:21:33 +0000629
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000630 def __nonzero__(self):
631 return bool(self.list)
632
Guido van Rossum7aee3841996-03-07 18:00:44 +0000633 def read_urlencoded(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000634 """Internal: read data in query string format."""
635 qs = self.fp.read(self.length)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000636 if not isinstance(qs, bytes):
637 raise ValueError("%s should return bytes, got %s" \
638 % (self.fp, type(qs).__name__))
639 qs = qs.decode(self.encoding, self.errors)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000640 if self.qs_on_post:
641 qs += '&' + self.qs_on_post
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000642 self.list = []
643 query = urllib.parse.parse_qsl(
644 qs, self.keep_blank_values, self.strict_parsing,
645 encoding=self.encoding, errors=self.errors)
646 for key, value in query:
647 self.list.append(MiniFieldStorage(key, value))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000648 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000649
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000650 FieldStorageClass = None
651
Guido van Rossumf5745001998-10-20 14:43:02 +0000652 def read_multi(self, environ, keep_blank_values, strict_parsing):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000653 """Internal: read a part that is itself multipart."""
Guido van Rossum2e441f72001-07-25 21:00:19 +0000654 ib = self.innerboundary
655 if not valid_boundary(ib):
Collin Winterce36ad82007-08-30 01:19:48 +0000656 raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000657 self.list = []
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000658 if self.qs_on_post:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000659 query = urllib.parse.parse_qsl(
660 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
661 encoding=self.encoding, errors=self.errors)
662 for key, value in query:
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000663 self.list.append(MiniFieldStorage(key, value))
664 FieldStorageClass = None
665
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000666 klass = self.FieldStorageClass or self.__class__
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000667 first_line = self.fp.readline() # bytes
668 if not isinstance(first_line, bytes):
669 raise ValueError("%s should return bytes, got %s" \
670 % (self.fp, type(first_line).__name__))
671 self.bytes_read += len(first_line)
672 # first line holds boundary ; ignore it, or check that
673 # b"--" + ib == first_line.strip() ?
674 while True:
675 parser = FeedParser()
676 hdr_text = b""
677 while True:
678 data = self.fp.readline()
679 hdr_text += data
680 if not data.strip():
681 break
682 if not hdr_text:
683 break
684 # parser takes strings, not bytes
685 self.bytes_read += len(hdr_text)
686 parser.feed(hdr_text.decode(self.encoding, self.errors))
687 headers = parser.close()
688 part = klass(self.fp, headers, ib, environ, keep_blank_values,
689 strict_parsing,self.limit-self.bytes_read,
690 self.encoding, self.errors)
691 self.bytes_read += part.bytes_read
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000692 self.list.append(part)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000693 if self.bytes_read >= self.length:
694 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000695 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000696
697 def read_single(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000698 """Internal: read an atomic part."""
699 if self.length >= 0:
700 self.read_binary()
701 self.skip_lines()
702 else:
703 self.read_lines()
704 self.file.seek(0)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000705
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000706 bufsize = 8*1024 # I/O buffering size for copy to file
Guido van Rossum7aee3841996-03-07 18:00:44 +0000707
708 def read_binary(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000709 """Internal: read binary data."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000710 self.file = self.make_file()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000711 todo = self.length
712 if todo >= 0:
713 while todo > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000714 data = self.fp.read(min(todo, self.bufsize)) # bytes
715 if not isinstance(data, bytes):
716 raise ValueError("%s should return bytes, got %s"
717 % (self.fp, type(data).__name__))
718 self.bytes_read += len(data)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000719 if not data:
720 self.done = -1
721 break
722 self.file.write(data)
723 todo = todo - len(data)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000724
725 def read_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000726 """Internal: read lines until EOF or outerboundary."""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000727 if self._binary_file:
728 self.file = self.__file = BytesIO() # store data as bytes for files
729 else:
730 self.file = self.__file = StringIO() # as strings for other fields
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000731 if self.outerboundary:
732 self.read_lines_to_outerboundary()
733 else:
734 self.read_lines_to_eof()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000735
Guido van Rossum52b8c292001-06-29 13:06:06 +0000736 def __write(self, line):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000737 """line is always bytes, not string"""
Guido van Rossum52b8c292001-06-29 13:06:06 +0000738 if self.__file is not None:
739 if self.__file.tell() + len(line) > 1000:
Guido van Rossuma1a68522007-08-28 03:11:34 +0000740 self.file = self.make_file()
741 data = self.__file.getvalue()
742 self.file.write(data)
Guido van Rossum52b8c292001-06-29 13:06:06 +0000743 self.__file = None
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000744 if self._binary_file:
745 # keep bytes
746 self.file.write(line)
747 else:
748 # decode to string
749 self.file.write(line.decode(self.encoding, self.errors))
Guido van Rossum52b8c292001-06-29 13:06:06 +0000750
Guido van Rossum7aee3841996-03-07 18:00:44 +0000751 def read_lines_to_eof(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000752 """Internal: read lines until EOF."""
753 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000754 line = self.fp.readline(1<<16) # bytes
755 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000756 if not line:
757 self.done = -1
758 break
Guido van Rossum52b8c292001-06-29 13:06:06 +0000759 self.__write(line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000760
761 def read_lines_to_outerboundary(self):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000762 """Internal: read lines until outerboundary.
763 Data is read as bytes: boundaries and line ends must be converted
764 to bytes for comparisons.
765 """
766 next_boundary = b"--" + self.outerboundary
767 last_boundary = next_boundary + b"--"
768 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000769 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000770 _read = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000771 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000772 if _read >= self.limit:
773 break
774 line = self.fp.readline(1<<16) # bytes
775 self.bytes_read += len(line)
776 _read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000777 if not line:
778 self.done = -1
779 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000780 if line.startswith(b"--") and last_line_lfend:
781 strippedline = line.rstrip()
782 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000783 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000784 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000785 self.done = 1
786 break
787 odelim = delim
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000788 if line.endswith(b"\r\n"):
789 delim = b"\r\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000790 line = line[:-2]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000791 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000792 elif line.endswith(b"\n"):
793 delim = b"\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000794 line = line[:-1]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000795 last_line_lfend = True
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000796 else:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000797 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000798 last_line_lfend = False
Guido van Rossum52b8c292001-06-29 13:06:06 +0000799 self.__write(odelim + line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000800
801 def skip_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000802 """Internal: skip lines until outer boundary if defined."""
803 if not self.outerboundary or self.done:
804 return
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000805 next_boundary = b"--" + self.outerboundary
806 last_boundary = next_boundary + b"--"
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000807 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000808 while True:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000809 line = self.fp.readline(1<<16)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000810 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000811 if not line:
812 self.done = -1
813 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000814 if line.endswith(b"--") and last_line_lfend:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000815 strippedline = line.strip()
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000816 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000817 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000818 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000819 self.done = 1
820 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000821 last_line_lfend = line.endswith(b'\n')
Guido van Rossum7aee3841996-03-07 18:00:44 +0000822
Guido van Rossuma1a68522007-08-28 03:11:34 +0000823 def make_file(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000824 """Overridable: return a readable & writable file.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000825
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000826 The file will be used as follows:
827 - data is written to it
828 - seek(0)
829 - data is read from it
Guido van Rossum7aee3841996-03-07 18:00:44 +0000830
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000831 The file is opened in binary mode for files, in text mode
832 for other fields
Guido van Rossum7aee3841996-03-07 18:00:44 +0000833
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000834 This version opens a temporary file for reading and writing,
835 and immediately deletes (unlinks) it. The trick (on Unix!) is
836 that the file can still be used, but it can't be opened by
837 another process, and it will automatically be deleted when it
838 is closed or when the current process terminates.
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000839
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000840 If you want a more permanent file, you derive a class which
841 overrides this method. If you want a visible temporary file
842 that is nevertheless automatically deleted when the script
843 terminates, try defining a __del__ method in a derived class
844 which unlinks the temporary files you have created.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000845
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000846 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000847 if self._binary_file:
848 return tempfile.TemporaryFile("wb+")
849 else:
850 return tempfile.TemporaryFile("w+",
851 encoding=self.encoding, newline = '\n')
Tim Peters88869f92001-01-14 23:36:06 +0000852
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000853
Guido van Rossum72755611996-03-06 07:20:06 +0000854# Test/debug code
855# ===============
Guido van Rossum9a22de11995-01-12 12:29:47 +0000856
Guido van Rossum773ab271996-07-23 03:46:24 +0000857def test(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000858 """Robust test CGI script, usable as main program.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000859
Guido van Rossum7aee3841996-03-07 18:00:44 +0000860 Write minimal HTTP headers and dump all information provided to
861 the script in HTML form.
862
863 """
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000864 print("Content-type: text/html")
865 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000866 sys.stderr = sys.stdout
867 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000868 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000869 print_directory()
870 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000871 print_form(form)
872 print_environ(environ)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000873 print_environ_usage()
874 def f():
Georg Brandl7cae87c2006-09-06 06:51:57 +0000875 exec("testing print_exception() -- <I>italics?</I>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000876 def g(f=f):
877 f()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000878 print("<H3>What follows is a test, not an actual exception:</H3>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000879 g()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000880 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000881 print_exception()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000882
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000883 print("<H1>Second try with a small maxlen...</H1>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000884
Guido van Rossumad164711997-05-13 19:03:23 +0000885 global maxlen
886 maxlen = 50
887 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000888 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000889 print_directory()
890 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000891 print_form(form)
892 print_environ(environ)
Guido van Rossumad164711997-05-13 19:03:23 +0000893 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000894 print_exception()
Guido van Rossumad164711997-05-13 19:03:23 +0000895
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000896def print_exception(type=None, value=None, tb=None, limit=None):
897 if type is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000898 type, value, tb = sys.exc_info()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000899 import traceback
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000900 print()
901 print("<H3>Traceback (most recent call last):</H3>")
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000902 list = traceback.format_tb(tb, limit) + \
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000903 traceback.format_exception_only(type, value)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000904 print("<PRE>%s<B>%s</B></PRE>" % (
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000905 html.escape("".join(list[:-1])),
906 html.escape(list[-1]),
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000907 ))
Guido van Rossumf15d1591997-09-29 23:22:12 +0000908 del tb
Guido van Rossum9a22de11995-01-12 12:29:47 +0000909
Guido van Rossum773ab271996-07-23 03:46:24 +0000910def print_environ(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000911 """Dump the shell environment as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000912 keys = sorted(environ.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000913 print()
914 print("<H3>Shell Environment:</H3>")
915 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000916 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000917 print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000918 print("</DL>")
919 print()
Guido van Rossum72755611996-03-06 07:20:06 +0000920
921def print_form(form):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000922 """Dump the contents of a form as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000923 keys = sorted(form.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000924 print()
925 print("<H3>Form Contents:</H3>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000926 if not keys:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000927 print("<P>No form fields.")
928 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000929 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000930 print("<DT>" + html.escape(key) + ":", end=' ')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000931 value = form[key]
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000932 print("<i>" + html.escape(repr(type(value))) + "</i>")
933 print("<DD>" + html.escape(repr(value)))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000934 print("</DL>")
935 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000936
937def print_directory():
938 """Dump the current directory as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000939 print()
940 print("<H3>Current Working Directory:</H3>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000941 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000942 pwd = os.getcwd()
Guido van Rossumb940e112007-01-10 16:19:56 +0000943 except os.error as msg:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000944 print("os.error:", html.escape(str(msg)))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000945 else:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000946 print(html.escape(pwd))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000947 print()
Guido van Rossum9a22de11995-01-12 12:29:47 +0000948
Guido van Rossuma8738a51996-03-14 21:30:28 +0000949def print_arguments():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000950 print()
951 print("<H3>Command Line Arguments:</H3>")
952 print()
953 print(sys.argv)
954 print()
Guido van Rossuma8738a51996-03-14 21:30:28 +0000955
Guido van Rossum9a22de11995-01-12 12:29:47 +0000956def print_environ_usage():
Guido van Rossum7aee3841996-03-07 18:00:44 +0000957 """Dump a list of environment variables used by CGI as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000958 print("""
Guido van Rossum72755611996-03-06 07:20:06 +0000959<H3>These environment variables could have been set:</H3>
960<UL>
Guido van Rossum9a22de11995-01-12 12:29:47 +0000961<LI>AUTH_TYPE
962<LI>CONTENT_LENGTH
963<LI>CONTENT_TYPE
964<LI>DATE_GMT
965<LI>DATE_LOCAL
966<LI>DOCUMENT_NAME
967<LI>DOCUMENT_ROOT
968<LI>DOCUMENT_URI
969<LI>GATEWAY_INTERFACE
970<LI>LAST_MODIFIED
971<LI>PATH
972<LI>PATH_INFO
973<LI>PATH_TRANSLATED
974<LI>QUERY_STRING
975<LI>REMOTE_ADDR
976<LI>REMOTE_HOST
977<LI>REMOTE_IDENT
978<LI>REMOTE_USER
979<LI>REQUEST_METHOD
980<LI>SCRIPT_NAME
981<LI>SERVER_NAME
982<LI>SERVER_PORT
983<LI>SERVER_PROTOCOL
984<LI>SERVER_ROOT
985<LI>SERVER_SOFTWARE
986</UL>
Guido van Rossum7aee3841996-03-07 18:00:44 +0000987In addition, HTTP headers sent by the server may be passed in the
988environment as well. Here are some common variable names:
989<UL>
990<LI>HTTP_ACCEPT
991<LI>HTTP_CONNECTION
992<LI>HTTP_HOST
993<LI>HTTP_PRAGMA
994<LI>HTTP_REFERER
995<LI>HTTP_USER_AGENT
996</UL>
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000997""")
Guido van Rossum9a22de11995-01-12 12:29:47 +0000998
Guido van Rossum9a22de11995-01-12 12:29:47 +0000999
Guido van Rossum72755611996-03-06 07:20:06 +00001000# Utilities
1001# =========
Guido van Rossum9a22de11995-01-12 12:29:47 +00001002
Guido van Rossum64c66201997-07-19 20:11:53 +00001003def escape(s, quote=None):
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001004 """Deprecated API."""
1005 warn("cgi.escape is deprecated, use html.escape instead",
1006 PendingDeprecationWarning, stacklevel=2)
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001007 s = s.replace("&", "&amp;") # Must be done first!
1008 s = s.replace("<", "&lt;")
1009 s = s.replace(">", "&gt;")
Guido van Rossum64c66201997-07-19 20:11:53 +00001010 if quote:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001011 s = s.replace('"', "&quot;")
Guido van Rossum7aee3841996-03-07 18:00:44 +00001012 return s
Guido van Rossum9a22de11995-01-12 12:29:47 +00001013
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001014
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001015def valid_boundary(s, _vb_pattern=None):
Guido van Rossum2e441f72001-07-25 21:00:19 +00001016 import re
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001017 if isinstance(s, bytes):
1018 _vb_pattern = b"^[ -~]{0,200}[!-~]$"
1019 else:
1020 _vb_pattern = "^[ -~]{0,200}[!-~]$"
Guido van Rossum2e441f72001-07-25 21:00:19 +00001021 return re.match(_vb_pattern, s)
Guido van Rossum9a22de11995-01-12 12:29:47 +00001022
Guido van Rossum72755611996-03-06 07:20:06 +00001023# Invoke mainline
1024# ===============
1025
1026# Call test() when this file is run as a script (not imported as a module)
Tim Peters88869f92001-01-14 23:36:06 +00001027if __name__ == '__main__':
Guido van Rossum7aee3841996-03-07 18:00:44 +00001028 test()