blob: 96b1f5721d4494317a502fdcc18d6d4e9ca29259 [file] [log] [blame]
Benjamin Peterson8c703a02010-03-11 22:05:58 +00001#! /usr/local/bin/python
Guido van Rossum1c9daa81995-09-18 21:52:37 +00002
Guido van Rossum467d7232001-02-13 13:13:33 +00003# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4# intentionally NOT "/usr/bin/env python". On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python. Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin. So let those vendors patch cgi.py to match their choice
10# of installation.
11
Guido van Rossum72755611996-03-06 07:20:06 +000012"""Support module for CGI (Common Gateway Interface) scripts.
Guido van Rossum1c9daa81995-09-18 21:52:37 +000013
Guido van Rossum7aee3841996-03-07 18:00:44 +000014This module defines a number of utilities for use by CGI scripts
15written in Python.
Guido van Rossum72755611996-03-06 07:20:06 +000016"""
17
Guido van Rossum98d9fd32000-02-28 15:12:25 +000018# History
19# -------
Tim Peters88869f92001-01-14 23:36:06 +000020#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000021# Michael McLay started this module. Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict. The multipart
23# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
Tim Peters88869f92001-01-14 23:36:06 +000026#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000027
Guido van Rossum52b8c292001-06-29 13:06:06 +000028__version__ = "2.6"
Guido van Rossum0147db01996-03-09 03:16:04 +000029
Guido van Rossum72755611996-03-06 07:20:06 +000030
31# Imports
32# =======
33
Victor Stinner5c23b8e2011-01-14 13:05:21 +000034from io import StringIO, BytesIO, TextIOWrapper
Guido van Rossum72755611996-03-06 07:20:06 +000035import sys
36import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037import urllib.parse
Victor Stinner5c23b8e2011-01-14 13:05:21 +000038from email.parser import FeedParser
Facundo Batistac469d4c2008-09-03 22:49:01 +000039from warnings import warn
Georg Brandl1f7fffb2010-10-15 15:57:45 +000040import html
Victor Stinner5c23b8e2011-01-14 13:05:21 +000041import locale
42import tempfile
Guido van Rossum72755611996-03-06 07:20:06 +000043
Georg Brandl49d1b4f2008-05-11 21:42:51 +000044__all__ = ["MiniFieldStorage", "FieldStorage",
Guido van Rossuma8423a92001-03-19 13:40:44 +000045 "parse", "parse_qs", "parse_qsl", "parse_multipart",
46 "parse_header", "print_exception", "print_environ",
47 "print_form", "print_directory", "print_arguments",
48 "print_environ_usage", "escape"]
Guido van Rossumc204c701996-09-05 19:07:11 +000049
50# Logging support
51# ===============
52
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000053logfile = "" # Filename to log to, if not empty
54logfp = None # File object to log to, if not None
Guido van Rossumc204c701996-09-05 19:07:11 +000055
56def initlog(*allargs):
57 """Write a log message, if there is a log file.
58
59 Even though this function is called initlog(), you should always
60 use log(); log is a variable that is set either to initlog
61 (initially), to dolog (once the log file has been opened), or to
62 nolog (when logging is disabled).
63
64 The first argument is a format string; the remaining arguments (if
65 any) are arguments to the % operator, so e.g.
66 log("%s: %s", "a", "b")
67 will write "a: b" to the log file, followed by a newline.
68
69 If the global logfp is not None, it should be a file object to
70 which log data is written.
71
72 If the global logfp is None, the global logfile may be a string
73 giving a filename to open, in append mode. This file should be
74 world writable!!! If the file can't be opened, logging is
75 silently disabled (since there is no safe place where we could
76 send an error message).
77
78 """
Victor Stinnerd33344a2011-07-14 22:28:36 +020079 global log, logfile, logfp
Guido van Rossumc204c701996-09-05 19:07:11 +000080 if logfile and not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000081 try:
82 logfp = open(logfile, "a")
83 except IOError:
84 pass
Guido van Rossumc204c701996-09-05 19:07:11 +000085 if not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 log = nolog
Guido van Rossumc204c701996-09-05 19:07:11 +000087 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 log = dolog
Guido van Rossum68468eb2003-02-27 20:14:51 +000089 log(*allargs)
Guido van Rossumc204c701996-09-05 19:07:11 +000090
91def dolog(fmt, *args):
92 """Write a log message to the log file. See initlog() for docs."""
93 logfp.write(fmt%args + "\n")
94
95def nolog(*allargs):
96 """Dummy function, assigned to log when logging is disabled."""
97 pass
98
Victor Stinnerd33344a2011-07-14 22:28:36 +020099def closelog():
100 """Close the log file."""
101 global log, logfile, logfp
102 logfile = ''
103 if logfp:
104 logfp.close()
105 logfp = None
106 log = initlog
107
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108log = initlog # The current logging function
Guido van Rossumc204c701996-09-05 19:07:11 +0000109
110
Guido van Rossum72755611996-03-06 07:20:06 +0000111# Parsing functions
112# =================
113
Guido van Rossumad164711997-05-13 19:03:23 +0000114# Maximum input we will accept when REQUEST_METHOD is POST
115# 0 ==> unlimited input
116maxlen = 0
117
Guido van Rossume08c04c1996-11-11 19:29:11 +0000118def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
Guido van Rossum773ab271996-07-23 03:46:24 +0000119 """Parse a query in the environment or from a file (default stdin)
120
121 Arguments, all optional:
122
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000123 fp : file pointer; default: sys.stdin.buffer
Guido van Rossum773ab271996-07-23 03:46:24 +0000124
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000126
127 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000128 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000129 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000130 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000131 blank values are to be ignored and treated as if they were
132 not included.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000133
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000134 strict_parsing: flag indicating what to do with parsing errors.
135 If false (the default), errors are silently ignored.
136 If true, errors raise a ValueError exception.
Guido van Rossum773ab271996-07-23 03:46:24 +0000137 """
Raymond Hettingera1449002002-05-31 23:54:44 +0000138 if fp is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000139 fp = sys.stdin
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000140
141 # field keys and values (except for files) are returned as strings
142 # an encoding is required to decode the bytes read from self.fp
143 if hasattr(fp,'encoding'):
144 encoding = fp.encoding
145 else:
146 encoding = 'latin-1'
147
148 # fp.read() must return bytes
149 if isinstance(fp, TextIOWrapper):
150 fp = fp.buffer
151
Raymond Hettinger54f02222002-06-01 14:18:47 +0000152 if not 'REQUEST_METHOD' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000153 environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
Guido van Rossum7aee3841996-03-07 18:00:44 +0000154 if environ['REQUEST_METHOD'] == 'POST':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 ctype, pdict = parse_header(environ['CONTENT_TYPE'])
156 if ctype == 'multipart/form-data':
157 return parse_multipart(fp, pdict)
158 elif ctype == 'application/x-www-form-urlencoded':
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000159 clength = int(environ['CONTENT_LENGTH'])
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000160 if maxlen and clength > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000161 raise ValueError('Maximum content length exceeded')
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000162 qs = fp.read(clength).decode(encoding)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000163 else:
164 qs = '' # Unknown content-type
Raymond Hettinger54f02222002-06-01 14:18:47 +0000165 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000166 if qs: qs = qs + '&'
167 qs = qs + environ['QUERY_STRING']
Tim Peters88869f92001-01-14 23:36:06 +0000168 elif sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000169 if qs: qs = qs + '&'
170 qs = qs + sys.argv[1]
171 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Raymond Hettinger54f02222002-06-01 14:18:47 +0000172 elif 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000173 qs = environ['QUERY_STRING']
Guido van Rossum7aee3841996-03-07 18:00:44 +0000174 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 if sys.argv[1:]:
176 qs = sys.argv[1]
177 else:
178 qs = ""
179 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000180 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
181 encoding=encoding)
Guido van Rossume7808771995-08-07 20:12:09 +0000182
183
Facundo Batistac469d4c2008-09-03 22:49:01 +0000184# parse query string function called from urlparse,
185# this is done in order to maintain backward compatiblity.
186
Guido van Rossume08c04c1996-11-11 19:29:11 +0000187def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000188 """Parse a query given as a string argument."""
189 warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000190 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000191 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
Guido van Rossum1946f0d1999-06-04 17:54:39 +0000192
193def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000194 """Parse a query given as a string argument."""
Facundo Batistaa27244b2008-09-09 02:43:19 +0000195 warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000196 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000197 return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
Guido van Rossum9a22de11995-01-12 12:29:47 +0000198
Guido van Rossum0147db01996-03-09 03:16:04 +0000199def parse_multipart(fp, pdict):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000200 """Parse multipart input.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000201
Guido van Rossum7aee3841996-03-07 18:00:44 +0000202 Arguments:
203 fp : input file
Johannes Gijsbersc7fc10a2005-01-08 13:56:36 +0000204 pdict: dictionary containing other parameters of content-type header
Guido van Rossum72755611996-03-06 07:20:06 +0000205
Tim Peters88869f92001-01-14 23:36:06 +0000206 Returns a dictionary just like parse_qs(): keys are the field names, each
207 value is a list of values for that field. This is easy to use but not
208 much good if you are expecting megabytes to be uploaded -- in that case,
209 use the FieldStorage class instead which is much more flexible. Note
210 that content-type is the raw, unparsed contents of the content-type
Guido van Rossum0147db01996-03-09 03:16:04 +0000211 header.
Tim Peters88869f92001-01-14 23:36:06 +0000212
213 XXX This does not parse nested multipart parts -- use FieldStorage for
Guido van Rossum0147db01996-03-09 03:16:04 +0000214 that.
Tim Peters88869f92001-01-14 23:36:06 +0000215
216 XXX This should really be subsumed by FieldStorage altogether -- no
Guido van Rossum0147db01996-03-09 03:16:04 +0000217 point in having two implementations of the same parsing algorithm.
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000218 Also, FieldStorage protects itself better against certain DoS attacks
219 by limiting the size of the data read in one chunk. The API here
220 does not support that kind of protection. This also affects parse()
221 since it can call parse_multipart().
Guido van Rossum72755611996-03-06 07:20:06 +0000222
Guido van Rossum7aee3841996-03-07 18:00:44 +0000223 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000224 import http.client
225
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800226 boundary = b""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000227 if 'boundary' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000228 boundary = pdict['boundary']
Guido van Rossum2e441f72001-07-25 21:00:19 +0000229 if not valid_boundary(boundary):
Collin Winterce36ad82007-08-30 01:19:48 +0000230 raise ValueError('Invalid boundary in multipart form: %r'
Walter Dörwald70a6b492004-02-12 17:35:32 +0000231 % (boundary,))
Tim Petersab9ba272001-08-09 21:40:30 +0000232
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800233 nextpart = b"--" + boundary
234 lastpart = b"--" + boundary + b"--"
Guido van Rossum7aee3841996-03-07 18:00:44 +0000235 partdict = {}
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800236 terminator = b""
Guido van Rossum7aee3841996-03-07 18:00:44 +0000237
238 while terminator != lastpart:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000239 bytes = -1
240 data = None
241 if terminator:
242 # At start of next part. Read headers first.
Barry Warsaw820c1202008-06-12 04:06:45 +0000243 headers = http.client.parse_headers(fp)
244 clength = headers.get('content-length')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 if clength:
246 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000247 bytes = int(clength)
248 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000249 pass
250 if bytes > 0:
251 if maxlen and bytes > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000252 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000253 data = fp.read(bytes)
254 else:
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800255 data = b""
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000256 # Read lines until end of part.
257 lines = []
258 while 1:
259 line = fp.readline()
260 if not line:
261 terminator = lastpart # End outer loop
262 break
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800263 if line.startswith(b"--"):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000264 terminator = line.rstrip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000265 if terminator in (nextpart, lastpart):
266 break
267 lines.append(line)
268 # Done with part.
269 if data is None:
270 continue
271 if bytes < 0:
272 if lines:
273 # Strip final line terminator
274 line = lines[-1]
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800275 if line[-2:] == b"\r\n":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000276 line = line[:-2]
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800277 elif line[-1:] == b"\n":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000278 line = line[:-1]
279 lines[-1] = line
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800280 data = b"".join(lines)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000281 line = headers['content-disposition']
282 if not line:
283 continue
284 key, params = parse_header(line)
285 if key != 'form-data':
286 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000287 if 'name' in params:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000288 name = params['name']
289 else:
290 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000291 if name in partdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000292 partdict[name].append(data)
293 else:
294 partdict[name] = [data]
Guido van Rossum72755611996-03-06 07:20:06 +0000295
Guido van Rossum7aee3841996-03-07 18:00:44 +0000296 return partdict
Guido van Rossum9a22de11995-01-12 12:29:47 +0000297
298
Fred Drake9a0a65b2008-12-04 19:24:50 +0000299def _parseparam(s):
300 while s[:1] == ';':
301 s = s[1:]
302 end = s.find(';')
Senthil Kumaran1ef0c032011-10-20 01:05:44 +0800303 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Fred Drake9a0a65b2008-12-04 19:24:50 +0000304 end = s.find(';', end + 1)
305 if end < 0:
306 end = len(s)
307 f = s[:end]
308 yield f.strip()
309 s = s[end:]
310
Guido van Rossum72755611996-03-06 07:20:06 +0000311def parse_header(line):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000312 """Parse a Content-type like header.
313
314 Return the main content-type and a dictionary of options.
315
316 """
Fred Drake9a0a65b2008-12-04 19:24:50 +0000317 parts = _parseparam(';' + line)
318 key = parts.__next__()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000319 pdict = {}
Fred Drake9a0a65b2008-12-04 19:24:50 +0000320 for p in parts:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000321 i = p.find('=')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000322 if i >= 0:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000323 name = p[:i].strip().lower()
324 value = p[i+1:].strip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000325 if len(value) >= 2 and value[0] == value[-1] == '"':
326 value = value[1:-1]
Johannes Gijsbers9e15dd62004-08-14 15:39:34 +0000327 value = value.replace('\\\\', '\\').replace('\\"', '"')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000328 pdict[name] = value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000329 return key, pdict
Guido van Rossum72755611996-03-06 07:20:06 +0000330
331
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000332# Classes for field storage
333# =========================
334
335class MiniFieldStorage:
336
Guido van Rossum0147db01996-03-09 03:16:04 +0000337 """Like FieldStorage, for use when no file uploads are possible."""
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000338
Guido van Rossum7aee3841996-03-07 18:00:44 +0000339 # Dummy attributes
340 filename = None
341 list = None
342 type = None
Guido van Rossum773ab271996-07-23 03:46:24 +0000343 file = None
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000344 type_options = {}
Guido van Rossum7aee3841996-03-07 18:00:44 +0000345 disposition = None
346 disposition_options = {}
347 headers = {}
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000348
Guido van Rossum7aee3841996-03-07 18:00:44 +0000349 def __init__(self, name, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000350 """Constructor from field name and value."""
351 self.name = name
352 self.value = value
Guido van Rossum773ab271996-07-23 03:46:24 +0000353 # self.file = StringIO(value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000354
355 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000356 """Return printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000357 return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000358
359
360class FieldStorage:
361
Guido van Rossum7aee3841996-03-07 18:00:44 +0000362 """Store a sequence of fields, reading multipart/form-data.
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000363
Guido van Rossum7aee3841996-03-07 18:00:44 +0000364 This class provides naming, typing, files stored on disk, and
365 more. At the top level, it is accessible like a dictionary, whose
366 keys are the field names. (Note: None can occur as a field name.)
367 The items are either a Python list (if there's multiple values) or
368 another FieldStorage or MiniFieldStorage object. If it's a single
369 object, it has the following attributes:
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000370
Guido van Rossum7aee3841996-03-07 18:00:44 +0000371 name: the field name, if specified; otherwise None
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000372
Guido van Rossum7aee3841996-03-07 18:00:44 +0000373 filename: the filename, if specified; otherwise None; this is the
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000374 client side filename, *not* the file name on which it is
375 stored (that's a temporary file you don't deal with)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000376
Guido van Rossum7aee3841996-03-07 18:00:44 +0000377 value: the value as a *string*; for file uploads, this
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000378 transparently reads the file every time you request the value
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000379 and returns *bytes*
Guido van Rossum7aee3841996-03-07 18:00:44 +0000380
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000381 file: the file(-like) object from which you can read the data *as
382 bytes* ; None if the data is stored a simple string
Guido van Rossum7aee3841996-03-07 18:00:44 +0000383
384 type: the content-type, or None if not specified
385
386 type_options: dictionary of options specified on the content-type
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000387 line
Guido van Rossum7aee3841996-03-07 18:00:44 +0000388
389 disposition: content-disposition, or None if not specified
390
391 disposition_options: dictionary of corresponding options
392
Barry Warsaw596097e2008-06-12 02:38:51 +0000393 headers: a dictionary(-like) object (sometimes email.message.Message or a
Armin Rigo3a703b62005-09-19 09:11:04 +0000394 subclass thereof) containing *all* headers
Guido van Rossum7aee3841996-03-07 18:00:44 +0000395
396 The class is subclassable, mostly for the purpose of overriding
397 the make_file() method, which is called internally to come up with
398 a file open for reading and writing. This makes it possible to
399 override the default choice of storing all files in a temporary
400 directory and unlinking them as soon as they have been opened.
401
402 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000403 def __init__(self, fp=None, headers=None, outerboundary=b'',
404 environ=os.environ, keep_blank_values=0, strict_parsing=0,
405 limit=None, encoding='utf-8', errors='replace'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000406 """Constructor. Read multipart/* until last part.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000407
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000408 Arguments, all optional:
Guido van Rossum7aee3841996-03-07 18:00:44 +0000409
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000410 fp : file pointer; default: sys.stdin.buffer
Guido van Rossumb1b4f941998-05-08 19:55:51 +0000411 (not used when the request method is GET)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000412 Can be :
413 1. a TextIOWrapper object
414 2. an object whose read() and readline() methods return bytes
Guido van Rossum7aee3841996-03-07 18:00:44 +0000415
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000416 headers : header dictionary-like object; default:
417 taken from environ as per CGI spec
Guido van Rossum7aee3841996-03-07 18:00:44 +0000418
Guido van Rossum773ab271996-07-23 03:46:24 +0000419 outerboundary : terminating multipart boundary
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000420 (for internal use only)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000421
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000422 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000423
424 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000425 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000426 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000427 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000428 blank values are to be ignored and treated as if they were
429 not included.
Guido van Rossum773ab271996-07-23 03:46:24 +0000430
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000431 strict_parsing: flag indicating what to do with parsing errors.
432 If false (the default), errors are silently ignored.
433 If true, errors raise a ValueError exception.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000434
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000435 limit : used internally to read parts of multipart/form-data forms,
436 to exit from the reading loop when reached. It is the difference
437 between the form content-length and the number of bytes already
438 read
439
440 encoding, errors : the encoding and error handler used to decode the
441 binary stream to strings. Must be the same as the charset defined
442 for the page sending the form (content-type : meta http-equiv or
443 header)
444
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000445 """
446 method = 'GET'
447 self.keep_blank_values = keep_blank_values
448 self.strict_parsing = strict_parsing
Raymond Hettinger54f02222002-06-01 14:18:47 +0000449 if 'REQUEST_METHOD' in environ:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000450 method = environ['REQUEST_METHOD'].upper()
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000451 self.qs_on_post = None
Guido van Rossum01852831998-06-25 02:40:17 +0000452 if method == 'GET' or method == 'HEAD':
Raymond Hettinger54f02222002-06-01 14:18:47 +0000453 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000454 qs = environ['QUERY_STRING']
455 elif sys.argv[1:]:
456 qs = sys.argv[1]
457 else:
458 qs = ""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000459 qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
460 fp = BytesIO(qs)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000461 if headers is None:
462 headers = {'content-type':
463 "application/x-www-form-urlencoded"}
464 if headers is None:
Guido van Rossumcff311a1998-06-11 14:06:59 +0000465 headers = {}
466 if method == 'POST':
467 # Set default content-type for POST to what's traditional
468 headers['content-type'] = "application/x-www-form-urlencoded"
Raymond Hettinger54f02222002-06-01 14:18:47 +0000469 if 'CONTENT_TYPE' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000470 headers['content-type'] = environ['CONTENT_TYPE']
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000471 if 'QUERY_STRING' in environ:
472 self.qs_on_post = environ['QUERY_STRING']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000473 if 'CONTENT_LENGTH' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000474 headers['content-length'] = environ['CONTENT_LENGTH']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000475 if fp is None:
476 self.fp = sys.stdin.buffer
477 # self.fp.read() must return bytes
478 elif isinstance(fp, TextIOWrapper):
479 self.fp = fp.buffer
480 else:
481 self.fp = fp
482
483 self.encoding = encoding
484 self.errors = errors
485
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000486 self.headers = headers
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000487 if not isinstance(outerboundary, bytes):
488 raise TypeError('outerboundary must be bytes, not %s'
489 % type(outerboundary).__name__)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000490 self.outerboundary = outerboundary
Guido van Rossum7aee3841996-03-07 18:00:44 +0000491
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000492 self.bytes_read = 0
493 self.limit = limit
494
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000495 # Process content-disposition header
496 cdisp, pdict = "", {}
Raymond Hettinger54f02222002-06-01 14:18:47 +0000497 if 'content-disposition' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000498 cdisp, pdict = parse_header(self.headers['content-disposition'])
499 self.disposition = cdisp
500 self.disposition_options = pdict
501 self.name = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000502 if 'name' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000503 self.name = pdict['name']
504 self.filename = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000505 if 'filename' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000506 self.filename = pdict['filename']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000507 self._binary_file = self.filename is not None
Guido van Rossum7aee3841996-03-07 18:00:44 +0000508
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000509 # Process content-type header
Barry Warsaw302331a1999-01-08 17:42:03 +0000510 #
511 # Honor any existing content-type header. But if there is no
512 # content-type header, use some sensible defaults. Assume
513 # outerboundary is "" at the outer level, but something non-false
514 # inside a multi-part. The default for an inner part is text/plain,
515 # but for an outer part it should be urlencoded. This should catch
516 # bogus clients which erroneously forget to include a content-type
517 # header.
518 #
519 # See below for what we do if there does exist a content-type header,
520 # but it happens to be something we don't understand.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000521 if 'content-type' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000522 ctype, pdict = parse_header(self.headers['content-type'])
Guido van Rossumce900de1999-06-02 18:44:22 +0000523 elif self.outerboundary or method != 'POST':
Barry Warsaw302331a1999-01-08 17:42:03 +0000524 ctype, pdict = "text/plain", {}
525 else:
526 ctype, pdict = 'application/x-www-form-urlencoded', {}
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000527 self.type = ctype
528 self.type_options = pdict
Raymond Hettinger54f02222002-06-01 14:18:47 +0000529 if 'boundary' in pdict:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000530 self.innerboundary = pdict['boundary'].encode(self.encoding)
531 else:
532 self.innerboundary = b""
533
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000534 clen = -1
Raymond Hettinger54f02222002-06-01 14:18:47 +0000535 if 'content-length' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000536 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000537 clen = int(self.headers['content-length'])
Skip Montanarodb5d1442002-03-23 05:50:17 +0000538 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000539 pass
540 if maxlen and clen > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000541 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000542 self.length = clen
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000543 if self.limit is None and clen:
544 self.limit = clen
Guido van Rossum7aee3841996-03-07 18:00:44 +0000545
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000546 self.list = self.file = None
547 self.done = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000548 if ctype == 'application/x-www-form-urlencoded':
549 self.read_urlencoded()
550 elif ctype[:10] == 'multipart/':
Guido van Rossumf5745001998-10-20 14:43:02 +0000551 self.read_multi(environ, keep_blank_values, strict_parsing)
Barry Warsaw302331a1999-01-08 17:42:03 +0000552 else:
Guido van Rossum60a3bd81999-06-11 18:26:09 +0000553 self.read_single()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000554
555 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000556 """Return a printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000557 return "FieldStorage(%r, %r, %r)" % (
558 self.name, self.filename, self.value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000559
Guido van Rossum4061cbe2002-09-11 18:20:34 +0000560 def __iter__(self):
561 return iter(self.keys())
562
Guido van Rossum7aee3841996-03-07 18:00:44 +0000563 def __getattr__(self, name):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000564 if name != 'value':
Collin Winterce36ad82007-08-30 01:19:48 +0000565 raise AttributeError(name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000566 if self.file:
567 self.file.seek(0)
568 value = self.file.read()
569 self.file.seek(0)
570 elif self.list is not None:
571 value = self.list
572 else:
573 value = None
574 return value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000575
576 def __getitem__(self, key):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000577 """Dictionary style indexing."""
578 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000579 raise TypeError("not indexable")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000580 found = []
581 for item in self.list:
582 if item.name == key: found.append(item)
583 if not found:
Collin Winterce36ad82007-08-30 01:19:48 +0000584 raise KeyError(key)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000585 if len(found) == 1:
586 return found[0]
587 else:
588 return found
Guido van Rossum7aee3841996-03-07 18:00:44 +0000589
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000590 def getvalue(self, key, default=None):
591 """Dictionary style get() method, including 'value' lookup."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000592 if key in self:
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000593 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000594 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000595 return [x.value for x in value]
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000596 else:
597 return value.value
598 else:
599 return default
600
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000601 def getfirst(self, key, default=None):
602 """ Return the first value received."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000603 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000604 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000605 if isinstance(value, list):
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000606 return value[0].value
607 else:
608 return value.value
609 else:
610 return default
611
612 def getlist(self, key):
613 """ Return list of received values."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000614 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000615 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000616 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000617 return [x.value for x in value]
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000618 else:
619 return [value.value]
620 else:
621 return []
622
Guido van Rossum7aee3841996-03-07 18:00:44 +0000623 def keys(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000624 """Dictionary style keys() method."""
625 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000626 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000627 return list(set(item.name for item in self.list))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000628
Raymond Hettinger54f02222002-06-01 14:18:47 +0000629 def __contains__(self, key):
630 """Dictionary style __contains__ method."""
631 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000632 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000633 return any(item.name == key for item in self.list)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000634
Guido van Rossum88b85d41997-01-11 19:21:33 +0000635 def __len__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000636 """Dictionary style len(x) support."""
637 return len(self.keys())
Guido van Rossum88b85d41997-01-11 19:21:33 +0000638
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000639 def __nonzero__(self):
640 return bool(self.list)
641
Guido van Rossum7aee3841996-03-07 18:00:44 +0000642 def read_urlencoded(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000643 """Internal: read data in query string format."""
644 qs = self.fp.read(self.length)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000645 if not isinstance(qs, bytes):
646 raise ValueError("%s should return bytes, got %s" \
647 % (self.fp, type(qs).__name__))
648 qs = qs.decode(self.encoding, self.errors)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000649 if self.qs_on_post:
650 qs += '&' + self.qs_on_post
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000651 self.list = []
652 query = urllib.parse.parse_qsl(
653 qs, self.keep_blank_values, self.strict_parsing,
654 encoding=self.encoding, errors=self.errors)
655 for key, value in query:
656 self.list.append(MiniFieldStorage(key, value))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000657 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000658
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000659 FieldStorageClass = None
660
Guido van Rossumf5745001998-10-20 14:43:02 +0000661 def read_multi(self, environ, keep_blank_values, strict_parsing):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000662 """Internal: read a part that is itself multipart."""
Guido van Rossum2e441f72001-07-25 21:00:19 +0000663 ib = self.innerboundary
664 if not valid_boundary(ib):
Collin Winterce36ad82007-08-30 01:19:48 +0000665 raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000666 self.list = []
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000667 if self.qs_on_post:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000668 query = urllib.parse.parse_qsl(
669 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
670 encoding=self.encoding, errors=self.errors)
671 for key, value in query:
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000672 self.list.append(MiniFieldStorage(key, value))
673 FieldStorageClass = None
674
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000675 klass = self.FieldStorageClass or self.__class__
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000676 first_line = self.fp.readline() # bytes
677 if not isinstance(first_line, bytes):
678 raise ValueError("%s should return bytes, got %s" \
679 % (self.fp, type(first_line).__name__))
680 self.bytes_read += len(first_line)
681 # first line holds boundary ; ignore it, or check that
682 # b"--" + ib == first_line.strip() ?
683 while True:
684 parser = FeedParser()
685 hdr_text = b""
686 while True:
687 data = self.fp.readline()
688 hdr_text += data
689 if not data.strip():
690 break
691 if not hdr_text:
692 break
693 # parser takes strings, not bytes
694 self.bytes_read += len(hdr_text)
695 parser.feed(hdr_text.decode(self.encoding, self.errors))
696 headers = parser.close()
697 part = klass(self.fp, headers, ib, environ, keep_blank_values,
698 strict_parsing,self.limit-self.bytes_read,
699 self.encoding, self.errors)
700 self.bytes_read += part.bytes_read
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000701 self.list.append(part)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000702 if self.bytes_read >= self.length:
703 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000704 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000705
706 def read_single(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000707 """Internal: read an atomic part."""
708 if self.length >= 0:
709 self.read_binary()
710 self.skip_lines()
711 else:
712 self.read_lines()
713 self.file.seek(0)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000714
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000715 bufsize = 8*1024 # I/O buffering size for copy to file
Guido van Rossum7aee3841996-03-07 18:00:44 +0000716
717 def read_binary(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000718 """Internal: read binary data."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000719 self.file = self.make_file()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000720 todo = self.length
721 if todo >= 0:
722 while todo > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000723 data = self.fp.read(min(todo, self.bufsize)) # bytes
724 if not isinstance(data, bytes):
725 raise ValueError("%s should return bytes, got %s"
726 % (self.fp, type(data).__name__))
727 self.bytes_read += len(data)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000728 if not data:
729 self.done = -1
730 break
731 self.file.write(data)
732 todo = todo - len(data)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000733
734 def read_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000735 """Internal: read lines until EOF or outerboundary."""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000736 if self._binary_file:
737 self.file = self.__file = BytesIO() # store data as bytes for files
738 else:
739 self.file = self.__file = StringIO() # as strings for other fields
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000740 if self.outerboundary:
741 self.read_lines_to_outerboundary()
742 else:
743 self.read_lines_to_eof()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000744
Guido van Rossum52b8c292001-06-29 13:06:06 +0000745 def __write(self, line):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000746 """line is always bytes, not string"""
Guido van Rossum52b8c292001-06-29 13:06:06 +0000747 if self.__file is not None:
748 if self.__file.tell() + len(line) > 1000:
Guido van Rossuma1a68522007-08-28 03:11:34 +0000749 self.file = self.make_file()
750 data = self.__file.getvalue()
751 self.file.write(data)
Guido van Rossum52b8c292001-06-29 13:06:06 +0000752 self.__file = None
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000753 if self._binary_file:
754 # keep bytes
755 self.file.write(line)
756 else:
757 # decode to string
758 self.file.write(line.decode(self.encoding, self.errors))
Guido van Rossum52b8c292001-06-29 13:06:06 +0000759
Guido van Rossum7aee3841996-03-07 18:00:44 +0000760 def read_lines_to_eof(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000761 """Internal: read lines until EOF."""
762 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000763 line = self.fp.readline(1<<16) # bytes
764 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000765 if not line:
766 self.done = -1
767 break
Guido van Rossum52b8c292001-06-29 13:06:06 +0000768 self.__write(line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000769
770 def read_lines_to_outerboundary(self):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000771 """Internal: read lines until outerboundary.
772 Data is read as bytes: boundaries and line ends must be converted
773 to bytes for comparisons.
774 """
775 next_boundary = b"--" + self.outerboundary
776 last_boundary = next_boundary + b"--"
777 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000778 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000779 _read = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000780 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000781 if _read >= self.limit:
782 break
783 line = self.fp.readline(1<<16) # bytes
784 self.bytes_read += len(line)
785 _read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000786 if not line:
787 self.done = -1
788 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000789 if line.startswith(b"--") and last_line_lfend:
790 strippedline = line.rstrip()
791 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000792 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000793 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000794 self.done = 1
795 break
796 odelim = delim
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000797 if line.endswith(b"\r\n"):
798 delim = b"\r\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000799 line = line[:-2]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000800 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000801 elif line.endswith(b"\n"):
802 delim = b"\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000803 line = line[:-1]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000804 last_line_lfend = True
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000805 else:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000806 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000807 last_line_lfend = False
Guido van Rossum52b8c292001-06-29 13:06:06 +0000808 self.__write(odelim + line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000809
810 def skip_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000811 """Internal: skip lines until outer boundary if defined."""
812 if not self.outerboundary or self.done:
813 return
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000814 next_boundary = b"--" + self.outerboundary
815 last_boundary = next_boundary + b"--"
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000816 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000817 while True:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000818 line = self.fp.readline(1<<16)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000819 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000820 if not line:
821 self.done = -1
822 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000823 if line.endswith(b"--") and last_line_lfend:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000824 strippedline = line.strip()
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000825 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000826 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000827 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000828 self.done = 1
829 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000830 last_line_lfend = line.endswith(b'\n')
Guido van Rossum7aee3841996-03-07 18:00:44 +0000831
Guido van Rossuma1a68522007-08-28 03:11:34 +0000832 def make_file(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000833 """Overridable: return a readable & writable file.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000834
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000835 The file will be used as follows:
836 - data is written to it
837 - seek(0)
838 - data is read from it
Guido van Rossum7aee3841996-03-07 18:00:44 +0000839
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000840 The file is opened in binary mode for files, in text mode
841 for other fields
Guido van Rossum7aee3841996-03-07 18:00:44 +0000842
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000843 This version opens a temporary file for reading and writing,
844 and immediately deletes (unlinks) it. The trick (on Unix!) is
845 that the file can still be used, but it can't be opened by
846 another process, and it will automatically be deleted when it
847 is closed or when the current process terminates.
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000848
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000849 If you want a more permanent file, you derive a class which
850 overrides this method. If you want a visible temporary file
851 that is nevertheless automatically deleted when the script
852 terminates, try defining a __del__ method in a derived class
853 which unlinks the temporary files you have created.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000854
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000855 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000856 if self._binary_file:
857 return tempfile.TemporaryFile("wb+")
858 else:
859 return tempfile.TemporaryFile("w+",
860 encoding=self.encoding, newline = '\n')
Tim Peters88869f92001-01-14 23:36:06 +0000861
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000862
Guido van Rossum72755611996-03-06 07:20:06 +0000863# Test/debug code
864# ===============
Guido van Rossum9a22de11995-01-12 12:29:47 +0000865
Guido van Rossum773ab271996-07-23 03:46:24 +0000866def test(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000867 """Robust test CGI script, usable as main program.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000868
Guido van Rossum7aee3841996-03-07 18:00:44 +0000869 Write minimal HTTP headers and dump all information provided to
870 the script in HTML form.
871
872 """
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000873 print("Content-type: text/html")
874 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000875 sys.stderr = sys.stdout
876 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000877 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000878 print_directory()
879 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000880 print_form(form)
881 print_environ(environ)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000882 print_environ_usage()
883 def f():
Georg Brandl7cae87c2006-09-06 06:51:57 +0000884 exec("testing print_exception() -- <I>italics?</I>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000885 def g(f=f):
886 f()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000887 print("<H3>What follows is a test, not an actual exception:</H3>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000888 g()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000889 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000890 print_exception()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000891
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000892 print("<H1>Second try with a small maxlen...</H1>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000893
Guido van Rossumad164711997-05-13 19:03:23 +0000894 global maxlen
895 maxlen = 50
896 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000897 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000898 print_directory()
899 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000900 print_form(form)
901 print_environ(environ)
Guido van Rossumad164711997-05-13 19:03:23 +0000902 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000903 print_exception()
Guido van Rossumad164711997-05-13 19:03:23 +0000904
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000905def print_exception(type=None, value=None, tb=None, limit=None):
906 if type is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000907 type, value, tb = sys.exc_info()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000908 import traceback
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000909 print()
910 print("<H3>Traceback (most recent call last):</H3>")
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000911 list = traceback.format_tb(tb, limit) + \
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000912 traceback.format_exception_only(type, value)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000913 print("<PRE>%s<B>%s</B></PRE>" % (
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000914 html.escape("".join(list[:-1])),
915 html.escape(list[-1]),
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000916 ))
Guido van Rossumf15d1591997-09-29 23:22:12 +0000917 del tb
Guido van Rossum9a22de11995-01-12 12:29:47 +0000918
Guido van Rossum773ab271996-07-23 03:46:24 +0000919def print_environ(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000920 """Dump the shell environment as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000921 keys = sorted(environ.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000922 print()
923 print("<H3>Shell Environment:</H3>")
924 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000925 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000926 print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000927 print("</DL>")
928 print()
Guido van Rossum72755611996-03-06 07:20:06 +0000929
930def print_form(form):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000931 """Dump the contents of a form as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000932 keys = sorted(form.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000933 print()
934 print("<H3>Form Contents:</H3>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000935 if not keys:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000936 print("<P>No form fields.")
937 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000938 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000939 print("<DT>" + html.escape(key) + ":", end=' ')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000940 value = form[key]
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000941 print("<i>" + html.escape(repr(type(value))) + "</i>")
942 print("<DD>" + html.escape(repr(value)))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000943 print("</DL>")
944 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000945
946def print_directory():
947 """Dump the current directory as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000948 print()
949 print("<H3>Current Working Directory:</H3>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000950 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000951 pwd = os.getcwd()
Guido van Rossumb940e112007-01-10 16:19:56 +0000952 except os.error as msg:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000953 print("os.error:", html.escape(str(msg)))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000954 else:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000955 print(html.escape(pwd))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000956 print()
Guido van Rossum9a22de11995-01-12 12:29:47 +0000957
Guido van Rossuma8738a51996-03-14 21:30:28 +0000958def print_arguments():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000959 print()
960 print("<H3>Command Line Arguments:</H3>")
961 print()
962 print(sys.argv)
963 print()
Guido van Rossuma8738a51996-03-14 21:30:28 +0000964
Guido van Rossum9a22de11995-01-12 12:29:47 +0000965def print_environ_usage():
Guido van Rossum7aee3841996-03-07 18:00:44 +0000966 """Dump a list of environment variables used by CGI as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000967 print("""
Guido van Rossum72755611996-03-06 07:20:06 +0000968<H3>These environment variables could have been set:</H3>
969<UL>
Guido van Rossum9a22de11995-01-12 12:29:47 +0000970<LI>AUTH_TYPE
971<LI>CONTENT_LENGTH
972<LI>CONTENT_TYPE
973<LI>DATE_GMT
974<LI>DATE_LOCAL
975<LI>DOCUMENT_NAME
976<LI>DOCUMENT_ROOT
977<LI>DOCUMENT_URI
978<LI>GATEWAY_INTERFACE
979<LI>LAST_MODIFIED
980<LI>PATH
981<LI>PATH_INFO
982<LI>PATH_TRANSLATED
983<LI>QUERY_STRING
984<LI>REMOTE_ADDR
985<LI>REMOTE_HOST
986<LI>REMOTE_IDENT
987<LI>REMOTE_USER
988<LI>REQUEST_METHOD
989<LI>SCRIPT_NAME
990<LI>SERVER_NAME
991<LI>SERVER_PORT
992<LI>SERVER_PROTOCOL
993<LI>SERVER_ROOT
994<LI>SERVER_SOFTWARE
995</UL>
Guido van Rossum7aee3841996-03-07 18:00:44 +0000996In addition, HTTP headers sent by the server may be passed in the
997environment as well. Here are some common variable names:
998<UL>
999<LI>HTTP_ACCEPT
1000<LI>HTTP_CONNECTION
1001<LI>HTTP_HOST
1002<LI>HTTP_PRAGMA
1003<LI>HTTP_REFERER
1004<LI>HTTP_USER_AGENT
1005</UL>
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001006""")
Guido van Rossum9a22de11995-01-12 12:29:47 +00001007
Guido van Rossum9a22de11995-01-12 12:29:47 +00001008
Guido van Rossum72755611996-03-06 07:20:06 +00001009# Utilities
1010# =========
Guido van Rossum9a22de11995-01-12 12:29:47 +00001011
Guido van Rossum64c66201997-07-19 20:11:53 +00001012def escape(s, quote=None):
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001013 """Deprecated API."""
1014 warn("cgi.escape is deprecated, use html.escape instead",
Florent Xicluna67317752011-12-10 11:07:42 +01001015 DeprecationWarning, stacklevel=2)
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001016 s = s.replace("&", "&amp;") # Must be done first!
1017 s = s.replace("<", "&lt;")
1018 s = s.replace(">", "&gt;")
Guido van Rossum64c66201997-07-19 20:11:53 +00001019 if quote:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001020 s = s.replace('"', "&quot;")
Guido van Rossum7aee3841996-03-07 18:00:44 +00001021 return s
Guido van Rossum9a22de11995-01-12 12:29:47 +00001022
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001023
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001024def valid_boundary(s, _vb_pattern=None):
Guido van Rossum2e441f72001-07-25 21:00:19 +00001025 import re
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001026 if isinstance(s, bytes):
1027 _vb_pattern = b"^[ -~]{0,200}[!-~]$"
1028 else:
1029 _vb_pattern = "^[ -~]{0,200}[!-~]$"
Guido van Rossum2e441f72001-07-25 21:00:19 +00001030 return re.match(_vb_pattern, s)
Guido van Rossum9a22de11995-01-12 12:29:47 +00001031
Guido van Rossum72755611996-03-06 07:20:06 +00001032# Invoke mainline
1033# ===============
1034
1035# Call test() when this file is run as a script (not imported as a module)
Tim Peters88869f92001-01-14 23:36:06 +00001036if __name__ == '__main__':
Guido van Rossum7aee3841996-03-07 18:00:44 +00001037 test()