blob: 1d8040cc7ab976710a35698c07abe7f633c1a1cf [file] [log] [blame]
Benjamin Peterson8c703a02010-03-11 22:05:58 +00001#! /usr/local/bin/python
Guido van Rossum1c9daa81995-09-18 21:52:37 +00002
Guido van Rossum467d7232001-02-13 13:13:33 +00003# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4# intentionally NOT "/usr/bin/env python". On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python. Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin. So let those vendors patch cgi.py to match their choice
10# of installation.
11
Guido van Rossum72755611996-03-06 07:20:06 +000012"""Support module for CGI (Common Gateway Interface) scripts.
Guido van Rossum1c9daa81995-09-18 21:52:37 +000013
Guido van Rossum7aee3841996-03-07 18:00:44 +000014This module defines a number of utilities for use by CGI scripts
15written in Python.
Guido van Rossum72755611996-03-06 07:20:06 +000016"""
17
Guido van Rossum98d9fd32000-02-28 15:12:25 +000018# History
19# -------
Tim Peters88869f92001-01-14 23:36:06 +000020#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000021# Michael McLay started this module. Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict. The multipart
23# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
Tim Peters88869f92001-01-14 23:36:06 +000026#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000027
Guido van Rossum52b8c292001-06-29 13:06:06 +000028__version__ = "2.6"
Guido van Rossum0147db01996-03-09 03:16:04 +000029
Guido van Rossum72755611996-03-06 07:20:06 +000030
31# Imports
32# =======
33
Victor Stinner5c23b8e2011-01-14 13:05:21 +000034from io import StringIO, BytesIO, TextIOWrapper
Guido van Rossum72755611996-03-06 07:20:06 +000035import sys
36import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000037import urllib.parse
Victor Stinner5c23b8e2011-01-14 13:05:21 +000038from email.parser import FeedParser
Facundo Batistac469d4c2008-09-03 22:49:01 +000039from warnings import warn
Georg Brandl1f7fffb2010-10-15 15:57:45 +000040import html
Victor Stinner5c23b8e2011-01-14 13:05:21 +000041import locale
42import tempfile
Guido van Rossum72755611996-03-06 07:20:06 +000043
Georg Brandl49d1b4f2008-05-11 21:42:51 +000044__all__ = ["MiniFieldStorage", "FieldStorage",
Guido van Rossuma8423a92001-03-19 13:40:44 +000045 "parse", "parse_qs", "parse_qsl", "parse_multipart",
46 "parse_header", "print_exception", "print_environ",
47 "print_form", "print_directory", "print_arguments",
48 "print_environ_usage", "escape"]
Guido van Rossumc204c701996-09-05 19:07:11 +000049
50# Logging support
51# ===============
52
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000053logfile = "" # Filename to log to, if not empty
54logfp = None # File object to log to, if not None
Guido van Rossumc204c701996-09-05 19:07:11 +000055
56def initlog(*allargs):
57 """Write a log message, if there is a log file.
58
59 Even though this function is called initlog(), you should always
60 use log(); log is a variable that is set either to initlog
61 (initially), to dolog (once the log file has been opened), or to
62 nolog (when logging is disabled).
63
64 The first argument is a format string; the remaining arguments (if
65 any) are arguments to the % operator, so e.g.
66 log("%s: %s", "a", "b")
67 will write "a: b" to the log file, followed by a newline.
68
69 If the global logfp is not None, it should be a file object to
70 which log data is written.
71
72 If the global logfp is None, the global logfile may be a string
73 giving a filename to open, in append mode. This file should be
74 world writable!!! If the file can't be opened, logging is
75 silently disabled (since there is no safe place where we could
76 send an error message).
77
78 """
Victor Stinnerd33344a2011-07-14 22:28:36 +020079 global log, logfile, logfp
Guido van Rossumc204c701996-09-05 19:07:11 +000080 if logfile and not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000081 try:
82 logfp = open(logfile, "a")
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020083 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000084 pass
Guido van Rossumc204c701996-09-05 19:07:11 +000085 if not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 log = nolog
Guido van Rossumc204c701996-09-05 19:07:11 +000087 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 log = dolog
Guido van Rossum68468eb2003-02-27 20:14:51 +000089 log(*allargs)
Guido van Rossumc204c701996-09-05 19:07:11 +000090
91def dolog(fmt, *args):
92 """Write a log message to the log file. See initlog() for docs."""
93 logfp.write(fmt%args + "\n")
94
95def nolog(*allargs):
96 """Dummy function, assigned to log when logging is disabled."""
97 pass
98
Victor Stinnerd33344a2011-07-14 22:28:36 +020099def closelog():
100 """Close the log file."""
101 global log, logfile, logfp
102 logfile = ''
103 if logfp:
104 logfp.close()
105 logfp = None
106 log = initlog
107
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000108log = initlog # The current logging function
Guido van Rossumc204c701996-09-05 19:07:11 +0000109
110
Guido van Rossum72755611996-03-06 07:20:06 +0000111# Parsing functions
112# =================
113
Guido van Rossumad164711997-05-13 19:03:23 +0000114# Maximum input we will accept when REQUEST_METHOD is POST
115# 0 ==> unlimited input
116maxlen = 0
117
Guido van Rossume08c04c1996-11-11 19:29:11 +0000118def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
Guido van Rossum773ab271996-07-23 03:46:24 +0000119 """Parse a query in the environment or from a file (default stdin)
120
121 Arguments, all optional:
122
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000123 fp : file pointer; default: sys.stdin.buffer
Guido van Rossum773ab271996-07-23 03:46:24 +0000124
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000125 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000126
127 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000128 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000129 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000130 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000131 blank values are to be ignored and treated as if they were
132 not included.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000133
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000134 strict_parsing: flag indicating what to do with parsing errors.
135 If false (the default), errors are silently ignored.
136 If true, errors raise a ValueError exception.
Guido van Rossum773ab271996-07-23 03:46:24 +0000137 """
Raymond Hettingera1449002002-05-31 23:54:44 +0000138 if fp is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000139 fp = sys.stdin
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000140
141 # field keys and values (except for files) are returned as strings
142 # an encoding is required to decode the bytes read from self.fp
143 if hasattr(fp,'encoding'):
144 encoding = fp.encoding
145 else:
146 encoding = 'latin-1'
147
148 # fp.read() must return bytes
149 if isinstance(fp, TextIOWrapper):
150 fp = fp.buffer
151
Raymond Hettinger54f02222002-06-01 14:18:47 +0000152 if not 'REQUEST_METHOD' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000153 environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
Guido van Rossum7aee3841996-03-07 18:00:44 +0000154 if environ['REQUEST_METHOD'] == 'POST':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 ctype, pdict = parse_header(environ['CONTENT_TYPE'])
156 if ctype == 'multipart/form-data':
157 return parse_multipart(fp, pdict)
158 elif ctype == 'application/x-www-form-urlencoded':
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000159 clength = int(environ['CONTENT_LENGTH'])
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000160 if maxlen and clength > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000161 raise ValueError('Maximum content length exceeded')
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000162 qs = fp.read(clength).decode(encoding)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000163 else:
164 qs = '' # Unknown content-type
Raymond Hettinger54f02222002-06-01 14:18:47 +0000165 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000166 if qs: qs = qs + '&'
167 qs = qs + environ['QUERY_STRING']
Tim Peters88869f92001-01-14 23:36:06 +0000168 elif sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000169 if qs: qs = qs + '&'
170 qs = qs + sys.argv[1]
171 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Raymond Hettinger54f02222002-06-01 14:18:47 +0000172 elif 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000173 qs = environ['QUERY_STRING']
Guido van Rossum7aee3841996-03-07 18:00:44 +0000174 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 if sys.argv[1:]:
176 qs = sys.argv[1]
177 else:
178 qs = ""
179 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000180 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
181 encoding=encoding)
Guido van Rossume7808771995-08-07 20:12:09 +0000182
183
Facundo Batistac469d4c2008-09-03 22:49:01 +0000184# parse query string function called from urlparse,
185# this is done in order to maintain backward compatiblity.
186
Guido van Rossume08c04c1996-11-11 19:29:11 +0000187def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000188 """Parse a query given as a string argument."""
189 warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000190 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000191 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
Guido van Rossum1946f0d1999-06-04 17:54:39 +0000192
193def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000194 """Parse a query given as a string argument."""
Facundo Batistaa27244b2008-09-09 02:43:19 +0000195 warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000196 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000197 return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
Guido van Rossum9a22de11995-01-12 12:29:47 +0000198
Guido van Rossum0147db01996-03-09 03:16:04 +0000199def parse_multipart(fp, pdict):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000200 """Parse multipart input.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000201
Guido van Rossum7aee3841996-03-07 18:00:44 +0000202 Arguments:
203 fp : input file
Johannes Gijsbersc7fc10a2005-01-08 13:56:36 +0000204 pdict: dictionary containing other parameters of content-type header
Guido van Rossum72755611996-03-06 07:20:06 +0000205
Tim Peters88869f92001-01-14 23:36:06 +0000206 Returns a dictionary just like parse_qs(): keys are the field names, each
207 value is a list of values for that field. This is easy to use but not
208 much good if you are expecting megabytes to be uploaded -- in that case,
209 use the FieldStorage class instead which is much more flexible. Note
210 that content-type is the raw, unparsed contents of the content-type
Guido van Rossum0147db01996-03-09 03:16:04 +0000211 header.
Tim Peters88869f92001-01-14 23:36:06 +0000212
213 XXX This does not parse nested multipart parts -- use FieldStorage for
Guido van Rossum0147db01996-03-09 03:16:04 +0000214 that.
Tim Peters88869f92001-01-14 23:36:06 +0000215
216 XXX This should really be subsumed by FieldStorage altogether -- no
Guido van Rossum0147db01996-03-09 03:16:04 +0000217 point in having two implementations of the same parsing algorithm.
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000218 Also, FieldStorage protects itself better against certain DoS attacks
219 by limiting the size of the data read in one chunk. The API here
220 does not support that kind of protection. This also affects parse()
221 since it can call parse_multipart().
Guido van Rossum72755611996-03-06 07:20:06 +0000222
Guido van Rossum7aee3841996-03-07 18:00:44 +0000223 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000224 import http.client
225
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800226 boundary = b""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000227 if 'boundary' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000228 boundary = pdict['boundary']
Guido van Rossum2e441f72001-07-25 21:00:19 +0000229 if not valid_boundary(boundary):
Collin Winterce36ad82007-08-30 01:19:48 +0000230 raise ValueError('Invalid boundary in multipart form: %r'
Walter Dörwald70a6b492004-02-12 17:35:32 +0000231 % (boundary,))
Tim Petersab9ba272001-08-09 21:40:30 +0000232
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800233 nextpart = b"--" + boundary
234 lastpart = b"--" + boundary + b"--"
Guido van Rossum7aee3841996-03-07 18:00:44 +0000235 partdict = {}
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800236 terminator = b""
Guido van Rossum7aee3841996-03-07 18:00:44 +0000237
238 while terminator != lastpart:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000239 bytes = -1
240 data = None
241 if terminator:
242 # At start of next part. Read headers first.
Barry Warsaw820c1202008-06-12 04:06:45 +0000243 headers = http.client.parse_headers(fp)
244 clength = headers.get('content-length')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000245 if clength:
246 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000247 bytes = int(clength)
248 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000249 pass
250 if bytes > 0:
251 if maxlen and bytes > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000252 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000253 data = fp.read(bytes)
254 else:
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800255 data = b""
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000256 # Read lines until end of part.
257 lines = []
258 while 1:
259 line = fp.readline()
260 if not line:
261 terminator = lastpart # End outer loop
262 break
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800263 if line.startswith(b"--"):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000264 terminator = line.rstrip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000265 if terminator in (nextpart, lastpart):
266 break
267 lines.append(line)
268 # Done with part.
269 if data is None:
270 continue
271 if bytes < 0:
272 if lines:
273 # Strip final line terminator
274 line = lines[-1]
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800275 if line[-2:] == b"\r\n":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000276 line = line[:-2]
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800277 elif line[-1:] == b"\n":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000278 line = line[:-1]
279 lines[-1] = line
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800280 data = b"".join(lines)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000281 line = headers['content-disposition']
282 if not line:
283 continue
284 key, params = parse_header(line)
285 if key != 'form-data':
286 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000287 if 'name' in params:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000288 name = params['name']
289 else:
290 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000291 if name in partdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000292 partdict[name].append(data)
293 else:
294 partdict[name] = [data]
Guido van Rossum72755611996-03-06 07:20:06 +0000295
Guido van Rossum7aee3841996-03-07 18:00:44 +0000296 return partdict
Guido van Rossum9a22de11995-01-12 12:29:47 +0000297
298
Fred Drake9a0a65b2008-12-04 19:24:50 +0000299def _parseparam(s):
300 while s[:1] == ';':
301 s = s[1:]
302 end = s.find(';')
Senthil Kumaran1ef0c032011-10-20 01:05:44 +0800303 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Fred Drake9a0a65b2008-12-04 19:24:50 +0000304 end = s.find(';', end + 1)
305 if end < 0:
306 end = len(s)
307 f = s[:end]
308 yield f.strip()
309 s = s[end:]
310
Guido van Rossum72755611996-03-06 07:20:06 +0000311def parse_header(line):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000312 """Parse a Content-type like header.
313
314 Return the main content-type and a dictionary of options.
315
316 """
Fred Drake9a0a65b2008-12-04 19:24:50 +0000317 parts = _parseparam(';' + line)
318 key = parts.__next__()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000319 pdict = {}
Fred Drake9a0a65b2008-12-04 19:24:50 +0000320 for p in parts:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000321 i = p.find('=')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000322 if i >= 0:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000323 name = p[:i].strip().lower()
324 value = p[i+1:].strip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000325 if len(value) >= 2 and value[0] == value[-1] == '"':
326 value = value[1:-1]
Johannes Gijsbers9e15dd62004-08-14 15:39:34 +0000327 value = value.replace('\\\\', '\\').replace('\\"', '"')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000328 pdict[name] = value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000329 return key, pdict
Guido van Rossum72755611996-03-06 07:20:06 +0000330
331
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000332# Classes for field storage
333# =========================
334
335class MiniFieldStorage:
336
Guido van Rossum0147db01996-03-09 03:16:04 +0000337 """Like FieldStorage, for use when no file uploads are possible."""
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000338
Guido van Rossum7aee3841996-03-07 18:00:44 +0000339 # Dummy attributes
340 filename = None
341 list = None
342 type = None
Guido van Rossum773ab271996-07-23 03:46:24 +0000343 file = None
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000344 type_options = {}
Guido van Rossum7aee3841996-03-07 18:00:44 +0000345 disposition = None
346 disposition_options = {}
347 headers = {}
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000348
Guido van Rossum7aee3841996-03-07 18:00:44 +0000349 def __init__(self, name, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000350 """Constructor from field name and value."""
351 self.name = name
352 self.value = value
Guido van Rossum773ab271996-07-23 03:46:24 +0000353 # self.file = StringIO(value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000354
355 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000356 """Return printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000357 return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000358
359
360class FieldStorage:
361
Guido van Rossum7aee3841996-03-07 18:00:44 +0000362 """Store a sequence of fields, reading multipart/form-data.
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000363
Guido van Rossum7aee3841996-03-07 18:00:44 +0000364 This class provides naming, typing, files stored on disk, and
365 more. At the top level, it is accessible like a dictionary, whose
366 keys are the field names. (Note: None can occur as a field name.)
367 The items are either a Python list (if there's multiple values) or
368 another FieldStorage or MiniFieldStorage object. If it's a single
369 object, it has the following attributes:
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000370
Guido van Rossum7aee3841996-03-07 18:00:44 +0000371 name: the field name, if specified; otherwise None
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000372
Guido van Rossum7aee3841996-03-07 18:00:44 +0000373 filename: the filename, if specified; otherwise None; this is the
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000374 client side filename, *not* the file name on which it is
375 stored (that's a temporary file you don't deal with)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000376
Guido van Rossum7aee3841996-03-07 18:00:44 +0000377 value: the value as a *string*; for file uploads, this
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000378 transparently reads the file every time you request the value
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000379 and returns *bytes*
Guido van Rossum7aee3841996-03-07 18:00:44 +0000380
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000381 file: the file(-like) object from which you can read the data *as
382 bytes* ; None if the data is stored a simple string
Guido van Rossum7aee3841996-03-07 18:00:44 +0000383
384 type: the content-type, or None if not specified
385
386 type_options: dictionary of options specified on the content-type
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000387 line
Guido van Rossum7aee3841996-03-07 18:00:44 +0000388
389 disposition: content-disposition, or None if not specified
390
391 disposition_options: dictionary of corresponding options
392
Barry Warsaw596097e2008-06-12 02:38:51 +0000393 headers: a dictionary(-like) object (sometimes email.message.Message or a
Armin Rigo3a703b62005-09-19 09:11:04 +0000394 subclass thereof) containing *all* headers
Guido van Rossum7aee3841996-03-07 18:00:44 +0000395
396 The class is subclassable, mostly for the purpose of overriding
397 the make_file() method, which is called internally to come up with
398 a file open for reading and writing. This makes it possible to
399 override the default choice of storing all files in a temporary
400 directory and unlinking them as soon as they have been opened.
401
402 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000403 def __init__(self, fp=None, headers=None, outerboundary=b'',
404 environ=os.environ, keep_blank_values=0, strict_parsing=0,
405 limit=None, encoding='utf-8', errors='replace'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000406 """Constructor. Read multipart/* until last part.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000407
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000408 Arguments, all optional:
Guido van Rossum7aee3841996-03-07 18:00:44 +0000409
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000410 fp : file pointer; default: sys.stdin.buffer
Guido van Rossumb1b4f941998-05-08 19:55:51 +0000411 (not used when the request method is GET)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000412 Can be :
413 1. a TextIOWrapper object
414 2. an object whose read() and readline() methods return bytes
Guido van Rossum7aee3841996-03-07 18:00:44 +0000415
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000416 headers : header dictionary-like object; default:
417 taken from environ as per CGI spec
Guido van Rossum7aee3841996-03-07 18:00:44 +0000418
Guido van Rossum773ab271996-07-23 03:46:24 +0000419 outerboundary : terminating multipart boundary
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000420 (for internal use only)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000421
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000422 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000423
424 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000425 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000426 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000427 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000428 blank values are to be ignored and treated as if they were
429 not included.
Guido van Rossum773ab271996-07-23 03:46:24 +0000430
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000431 strict_parsing: flag indicating what to do with parsing errors.
432 If false (the default), errors are silently ignored.
433 If true, errors raise a ValueError exception.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000434
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000435 limit : used internally to read parts of multipart/form-data forms,
436 to exit from the reading loop when reached. It is the difference
437 between the form content-length and the number of bytes already
438 read
439
440 encoding, errors : the encoding and error handler used to decode the
441 binary stream to strings. Must be the same as the charset defined
442 for the page sending the form (content-type : meta http-equiv or
443 header)
444
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000445 """
446 method = 'GET'
447 self.keep_blank_values = keep_blank_values
448 self.strict_parsing = strict_parsing
Raymond Hettinger54f02222002-06-01 14:18:47 +0000449 if 'REQUEST_METHOD' in environ:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000450 method = environ['REQUEST_METHOD'].upper()
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000451 self.qs_on_post = None
Guido van Rossum01852831998-06-25 02:40:17 +0000452 if method == 'GET' or method == 'HEAD':
Raymond Hettinger54f02222002-06-01 14:18:47 +0000453 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000454 qs = environ['QUERY_STRING']
455 elif sys.argv[1:]:
456 qs = sys.argv[1]
457 else:
458 qs = ""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000459 qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
460 fp = BytesIO(qs)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000461 if headers is None:
462 headers = {'content-type':
463 "application/x-www-form-urlencoded"}
464 if headers is None:
Guido van Rossumcff311a1998-06-11 14:06:59 +0000465 headers = {}
466 if method == 'POST':
467 # Set default content-type for POST to what's traditional
468 headers['content-type'] = "application/x-www-form-urlencoded"
Raymond Hettinger54f02222002-06-01 14:18:47 +0000469 if 'CONTENT_TYPE' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000470 headers['content-type'] = environ['CONTENT_TYPE']
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000471 if 'QUERY_STRING' in environ:
472 self.qs_on_post = environ['QUERY_STRING']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000473 if 'CONTENT_LENGTH' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000474 headers['content-length'] = environ['CONTENT_LENGTH']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000475 if fp is None:
476 self.fp = sys.stdin.buffer
477 # self.fp.read() must return bytes
478 elif isinstance(fp, TextIOWrapper):
479 self.fp = fp.buffer
480 else:
481 self.fp = fp
482
483 self.encoding = encoding
484 self.errors = errors
485
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000486 self.headers = headers
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000487 if not isinstance(outerboundary, bytes):
488 raise TypeError('outerboundary must be bytes, not %s'
489 % type(outerboundary).__name__)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000490 self.outerboundary = outerboundary
Guido van Rossum7aee3841996-03-07 18:00:44 +0000491
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000492 self.bytes_read = 0
493 self.limit = limit
494
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000495 # Process content-disposition header
496 cdisp, pdict = "", {}
Raymond Hettinger54f02222002-06-01 14:18:47 +0000497 if 'content-disposition' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000498 cdisp, pdict = parse_header(self.headers['content-disposition'])
499 self.disposition = cdisp
500 self.disposition_options = pdict
501 self.name = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000502 if 'name' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000503 self.name = pdict['name']
504 self.filename = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000505 if 'filename' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000506 self.filename = pdict['filename']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000507 self._binary_file = self.filename is not None
Guido van Rossum7aee3841996-03-07 18:00:44 +0000508
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000509 # Process content-type header
Barry Warsaw302331a1999-01-08 17:42:03 +0000510 #
511 # Honor any existing content-type header. But if there is no
512 # content-type header, use some sensible defaults. Assume
513 # outerboundary is "" at the outer level, but something non-false
514 # inside a multi-part. The default for an inner part is text/plain,
515 # but for an outer part it should be urlencoded. This should catch
516 # bogus clients which erroneously forget to include a content-type
517 # header.
518 #
519 # See below for what we do if there does exist a content-type header,
520 # but it happens to be something we don't understand.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000521 if 'content-type' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000522 ctype, pdict = parse_header(self.headers['content-type'])
Guido van Rossumce900de1999-06-02 18:44:22 +0000523 elif self.outerboundary or method != 'POST':
Barry Warsaw302331a1999-01-08 17:42:03 +0000524 ctype, pdict = "text/plain", {}
525 else:
526 ctype, pdict = 'application/x-www-form-urlencoded', {}
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000527 self.type = ctype
528 self.type_options = pdict
Raymond Hettinger54f02222002-06-01 14:18:47 +0000529 if 'boundary' in pdict:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000530 self.innerboundary = pdict['boundary'].encode(self.encoding)
531 else:
532 self.innerboundary = b""
533
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000534 clen = -1
Raymond Hettinger54f02222002-06-01 14:18:47 +0000535 if 'content-length' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000536 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000537 clen = int(self.headers['content-length'])
Skip Montanarodb5d1442002-03-23 05:50:17 +0000538 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000539 pass
540 if maxlen and clen > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000541 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000542 self.length = clen
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000543 if self.limit is None and clen:
544 self.limit = clen
Guido van Rossum7aee3841996-03-07 18:00:44 +0000545
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000546 self.list = self.file = None
547 self.done = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000548 if ctype == 'application/x-www-form-urlencoded':
549 self.read_urlencoded()
550 elif ctype[:10] == 'multipart/':
Guido van Rossumf5745001998-10-20 14:43:02 +0000551 self.read_multi(environ, keep_blank_values, strict_parsing)
Barry Warsaw302331a1999-01-08 17:42:03 +0000552 else:
Guido van Rossum60a3bd81999-06-11 18:26:09 +0000553 self.read_single()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000554
Brett Cannonf79126f2013-08-23 15:15:48 -0400555 def __del__(self):
556 try:
557 self.file.close()
558 except AttributeError:
559 pass
560
Guido van Rossum7aee3841996-03-07 18:00:44 +0000561 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000562 """Return a printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000563 return "FieldStorage(%r, %r, %r)" % (
564 self.name, self.filename, self.value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000565
Guido van Rossum4061cbe2002-09-11 18:20:34 +0000566 def __iter__(self):
567 return iter(self.keys())
568
Guido van Rossum7aee3841996-03-07 18:00:44 +0000569 def __getattr__(self, name):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000570 if name != 'value':
Collin Winterce36ad82007-08-30 01:19:48 +0000571 raise AttributeError(name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000572 if self.file:
573 self.file.seek(0)
574 value = self.file.read()
575 self.file.seek(0)
576 elif self.list is not None:
577 value = self.list
578 else:
579 value = None
580 return value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000581
582 def __getitem__(self, key):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000583 """Dictionary style indexing."""
584 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000585 raise TypeError("not indexable")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000586 found = []
587 for item in self.list:
588 if item.name == key: found.append(item)
589 if not found:
Collin Winterce36ad82007-08-30 01:19:48 +0000590 raise KeyError(key)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000591 if len(found) == 1:
592 return found[0]
593 else:
594 return found
Guido van Rossum7aee3841996-03-07 18:00:44 +0000595
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000596 def getvalue(self, key, default=None):
597 """Dictionary style get() method, including 'value' lookup."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000598 if key in self:
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000599 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000600 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000601 return [x.value for x in value]
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000602 else:
603 return value.value
604 else:
605 return default
606
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000607 def getfirst(self, key, default=None):
608 """ Return the first value received."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000609 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000610 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000611 if isinstance(value, list):
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000612 return value[0].value
613 else:
614 return value.value
615 else:
616 return default
617
618 def getlist(self, key):
619 """ Return list of received values."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000620 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000621 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000622 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000623 return [x.value for x in value]
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000624 else:
625 return [value.value]
626 else:
627 return []
628
Guido van Rossum7aee3841996-03-07 18:00:44 +0000629 def keys(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000630 """Dictionary style keys() method."""
631 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000632 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000633 return list(set(item.name for item in self.list))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000634
Raymond Hettinger54f02222002-06-01 14:18:47 +0000635 def __contains__(self, key):
636 """Dictionary style __contains__ method."""
637 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000638 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000639 return any(item.name == key for item in self.list)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000640
Guido van Rossum88b85d41997-01-11 19:21:33 +0000641 def __len__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000642 """Dictionary style len(x) support."""
643 return len(self.keys())
Guido van Rossum88b85d41997-01-11 19:21:33 +0000644
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000645 def __nonzero__(self):
646 return bool(self.list)
647
Guido van Rossum7aee3841996-03-07 18:00:44 +0000648 def read_urlencoded(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000649 """Internal: read data in query string format."""
650 qs = self.fp.read(self.length)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000651 if not isinstance(qs, bytes):
652 raise ValueError("%s should return bytes, got %s" \
653 % (self.fp, type(qs).__name__))
654 qs = qs.decode(self.encoding, self.errors)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000655 if self.qs_on_post:
656 qs += '&' + self.qs_on_post
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000657 self.list = []
658 query = urllib.parse.parse_qsl(
659 qs, self.keep_blank_values, self.strict_parsing,
660 encoding=self.encoding, errors=self.errors)
661 for key, value in query:
662 self.list.append(MiniFieldStorage(key, value))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000663 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000664
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000665 FieldStorageClass = None
666
Guido van Rossumf5745001998-10-20 14:43:02 +0000667 def read_multi(self, environ, keep_blank_values, strict_parsing):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000668 """Internal: read a part that is itself multipart."""
Guido van Rossum2e441f72001-07-25 21:00:19 +0000669 ib = self.innerboundary
670 if not valid_boundary(ib):
Collin Winterce36ad82007-08-30 01:19:48 +0000671 raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000672 self.list = []
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000673 if self.qs_on_post:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000674 query = urllib.parse.parse_qsl(
675 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
676 encoding=self.encoding, errors=self.errors)
677 for key, value in query:
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000678 self.list.append(MiniFieldStorage(key, value))
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000679
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000680 klass = self.FieldStorageClass or self.__class__
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000681 first_line = self.fp.readline() # bytes
682 if not isinstance(first_line, bytes):
683 raise ValueError("%s should return bytes, got %s" \
684 % (self.fp, type(first_line).__name__))
685 self.bytes_read += len(first_line)
686 # first line holds boundary ; ignore it, or check that
687 # b"--" + ib == first_line.strip() ?
688 while True:
689 parser = FeedParser()
690 hdr_text = b""
691 while True:
692 data = self.fp.readline()
693 hdr_text += data
694 if not data.strip():
695 break
696 if not hdr_text:
697 break
698 # parser takes strings, not bytes
699 self.bytes_read += len(hdr_text)
700 parser.feed(hdr_text.decode(self.encoding, self.errors))
701 headers = parser.close()
702 part = klass(self.fp, headers, ib, environ, keep_blank_values,
703 strict_parsing,self.limit-self.bytes_read,
704 self.encoding, self.errors)
705 self.bytes_read += part.bytes_read
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000706 self.list.append(part)
Florent Xicluna331c3fd2013-07-07 12:44:28 +0200707 if part.done or self.bytes_read >= self.length > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000708 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000709 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000710
711 def read_single(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000712 """Internal: read an atomic part."""
713 if self.length >= 0:
714 self.read_binary()
715 self.skip_lines()
716 else:
717 self.read_lines()
718 self.file.seek(0)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000719
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000720 bufsize = 8*1024 # I/O buffering size for copy to file
Guido van Rossum7aee3841996-03-07 18:00:44 +0000721
722 def read_binary(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000723 """Internal: read binary data."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000724 self.file = self.make_file()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000725 todo = self.length
726 if todo >= 0:
727 while todo > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000728 data = self.fp.read(min(todo, self.bufsize)) # bytes
729 if not isinstance(data, bytes):
730 raise ValueError("%s should return bytes, got %s"
731 % (self.fp, type(data).__name__))
732 self.bytes_read += len(data)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000733 if not data:
734 self.done = -1
735 break
736 self.file.write(data)
737 todo = todo - len(data)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000738
739 def read_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000740 """Internal: read lines until EOF or outerboundary."""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000741 if self._binary_file:
742 self.file = self.__file = BytesIO() # store data as bytes for files
743 else:
744 self.file = self.__file = StringIO() # as strings for other fields
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000745 if self.outerboundary:
746 self.read_lines_to_outerboundary()
747 else:
748 self.read_lines_to_eof()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000749
Guido van Rossum52b8c292001-06-29 13:06:06 +0000750 def __write(self, line):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000751 """line is always bytes, not string"""
Guido van Rossum52b8c292001-06-29 13:06:06 +0000752 if self.__file is not None:
753 if self.__file.tell() + len(line) > 1000:
Guido van Rossuma1a68522007-08-28 03:11:34 +0000754 self.file = self.make_file()
755 data = self.__file.getvalue()
756 self.file.write(data)
Guido van Rossum52b8c292001-06-29 13:06:06 +0000757 self.__file = None
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000758 if self._binary_file:
759 # keep bytes
760 self.file.write(line)
761 else:
762 # decode to string
763 self.file.write(line.decode(self.encoding, self.errors))
Guido van Rossum52b8c292001-06-29 13:06:06 +0000764
Guido van Rossum7aee3841996-03-07 18:00:44 +0000765 def read_lines_to_eof(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000766 """Internal: read lines until EOF."""
767 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000768 line = self.fp.readline(1<<16) # bytes
769 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000770 if not line:
771 self.done = -1
772 break
Guido van Rossum52b8c292001-06-29 13:06:06 +0000773 self.__write(line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000774
775 def read_lines_to_outerboundary(self):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000776 """Internal: read lines until outerboundary.
777 Data is read as bytes: boundaries and line ends must be converted
778 to bytes for comparisons.
779 """
780 next_boundary = b"--" + self.outerboundary
781 last_boundary = next_boundary + b"--"
782 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000783 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000784 _read = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000785 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000786 if _read >= self.limit:
787 break
788 line = self.fp.readline(1<<16) # bytes
789 self.bytes_read += len(line)
790 _read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000791 if not line:
792 self.done = -1
793 break
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300794 if delim == b"\r":
795 line = delim + line
796 delim = b""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000797 if line.startswith(b"--") and last_line_lfend:
798 strippedline = line.rstrip()
799 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000800 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000801 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000802 self.done = 1
803 break
804 odelim = delim
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000805 if line.endswith(b"\r\n"):
806 delim = b"\r\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000807 line = line[:-2]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000808 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000809 elif line.endswith(b"\n"):
810 delim = b"\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000811 line = line[:-1]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000812 last_line_lfend = True
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300813 elif line.endswith(b"\r"):
814 # We may interrupt \r\n sequences if they span the 2**16
815 # byte boundary
816 delim = b"\r"
817 line = line[:-1]
818 last_line_lfend = False
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000819 else:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000820 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000821 last_line_lfend = False
Guido van Rossum52b8c292001-06-29 13:06:06 +0000822 self.__write(odelim + line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000823
824 def skip_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000825 """Internal: skip lines until outer boundary if defined."""
826 if not self.outerboundary or self.done:
827 return
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000828 next_boundary = b"--" + self.outerboundary
829 last_boundary = next_boundary + b"--"
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000830 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000831 while True:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000832 line = self.fp.readline(1<<16)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000833 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000834 if not line:
835 self.done = -1
836 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000837 if line.endswith(b"--") and last_line_lfend:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000838 strippedline = line.strip()
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000839 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000840 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000841 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000842 self.done = 1
843 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000844 last_line_lfend = line.endswith(b'\n')
Guido van Rossum7aee3841996-03-07 18:00:44 +0000845
Guido van Rossuma1a68522007-08-28 03:11:34 +0000846 def make_file(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000847 """Overridable: return a readable & writable file.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000848
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000849 The file will be used as follows:
850 - data is written to it
851 - seek(0)
852 - data is read from it
Guido van Rossum7aee3841996-03-07 18:00:44 +0000853
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000854 The file is opened in binary mode for files, in text mode
855 for other fields
Guido van Rossum7aee3841996-03-07 18:00:44 +0000856
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000857 This version opens a temporary file for reading and writing,
858 and immediately deletes (unlinks) it. The trick (on Unix!) is
859 that the file can still be used, but it can't be opened by
860 another process, and it will automatically be deleted when it
861 is closed or when the current process terminates.
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000862
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000863 If you want a more permanent file, you derive a class which
864 overrides this method. If you want a visible temporary file
865 that is nevertheless automatically deleted when the script
866 terminates, try defining a __del__ method in a derived class
867 which unlinks the temporary files you have created.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000868
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000869 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000870 if self._binary_file:
871 return tempfile.TemporaryFile("wb+")
872 else:
873 return tempfile.TemporaryFile("w+",
874 encoding=self.encoding, newline = '\n')
Tim Peters88869f92001-01-14 23:36:06 +0000875
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000876
Guido van Rossum72755611996-03-06 07:20:06 +0000877# Test/debug code
878# ===============
Guido van Rossum9a22de11995-01-12 12:29:47 +0000879
Guido van Rossum773ab271996-07-23 03:46:24 +0000880def test(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000881 """Robust test CGI script, usable as main program.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000882
Guido van Rossum7aee3841996-03-07 18:00:44 +0000883 Write minimal HTTP headers and dump all information provided to
884 the script in HTML form.
885
886 """
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000887 print("Content-type: text/html")
888 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000889 sys.stderr = sys.stdout
890 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000891 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000892 print_directory()
893 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000894 print_form(form)
895 print_environ(environ)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000896 print_environ_usage()
897 def f():
Georg Brandl7cae87c2006-09-06 06:51:57 +0000898 exec("testing print_exception() -- <I>italics?</I>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000899 def g(f=f):
900 f()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000901 print("<H3>What follows is a test, not an actual exception:</H3>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000902 g()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000903 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000904 print_exception()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000905
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000906 print("<H1>Second try with a small maxlen...</H1>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000907
Guido van Rossumad164711997-05-13 19:03:23 +0000908 global maxlen
909 maxlen = 50
910 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000911 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000912 print_directory()
913 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000914 print_form(form)
915 print_environ(environ)
Guido van Rossumad164711997-05-13 19:03:23 +0000916 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000917 print_exception()
Guido van Rossumad164711997-05-13 19:03:23 +0000918
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000919def print_exception(type=None, value=None, tb=None, limit=None):
920 if type is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000921 type, value, tb = sys.exc_info()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000922 import traceback
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000923 print()
924 print("<H3>Traceback (most recent call last):</H3>")
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000925 list = traceback.format_tb(tb, limit) + \
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000926 traceback.format_exception_only(type, value)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000927 print("<PRE>%s<B>%s</B></PRE>" % (
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000928 html.escape("".join(list[:-1])),
929 html.escape(list[-1]),
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000930 ))
Guido van Rossumf15d1591997-09-29 23:22:12 +0000931 del tb
Guido van Rossum9a22de11995-01-12 12:29:47 +0000932
Guido van Rossum773ab271996-07-23 03:46:24 +0000933def print_environ(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000934 """Dump the shell environment as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000935 keys = sorted(environ.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000936 print()
937 print("<H3>Shell Environment:</H3>")
938 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000939 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000940 print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000941 print("</DL>")
942 print()
Guido van Rossum72755611996-03-06 07:20:06 +0000943
944def print_form(form):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000945 """Dump the contents of a form as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000946 keys = sorted(form.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000947 print()
948 print("<H3>Form Contents:</H3>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000949 if not keys:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000950 print("<P>No form fields.")
951 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000952 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000953 print("<DT>" + html.escape(key) + ":", end=' ')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000954 value = form[key]
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000955 print("<i>" + html.escape(repr(type(value))) + "</i>")
956 print("<DD>" + html.escape(repr(value)))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000957 print("</DL>")
958 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000959
960def print_directory():
961 """Dump the current directory as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000962 print()
963 print("<H3>Current Working Directory:</H3>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000964 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000965 pwd = os.getcwd()
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200966 except OSError as msg:
Andrew Svetlov8b33dd82012-12-24 19:58:48 +0200967 print("OSError:", html.escape(str(msg)))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000968 else:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000969 print(html.escape(pwd))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000970 print()
Guido van Rossum9a22de11995-01-12 12:29:47 +0000971
Guido van Rossuma8738a51996-03-14 21:30:28 +0000972def print_arguments():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000973 print()
974 print("<H3>Command Line Arguments:</H3>")
975 print()
976 print(sys.argv)
977 print()
Guido van Rossuma8738a51996-03-14 21:30:28 +0000978
Guido van Rossum9a22de11995-01-12 12:29:47 +0000979def print_environ_usage():
Guido van Rossum7aee3841996-03-07 18:00:44 +0000980 """Dump a list of environment variables used by CGI as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000981 print("""
Guido van Rossum72755611996-03-06 07:20:06 +0000982<H3>These environment variables could have been set:</H3>
983<UL>
Guido van Rossum9a22de11995-01-12 12:29:47 +0000984<LI>AUTH_TYPE
985<LI>CONTENT_LENGTH
986<LI>CONTENT_TYPE
987<LI>DATE_GMT
988<LI>DATE_LOCAL
989<LI>DOCUMENT_NAME
990<LI>DOCUMENT_ROOT
991<LI>DOCUMENT_URI
992<LI>GATEWAY_INTERFACE
993<LI>LAST_MODIFIED
994<LI>PATH
995<LI>PATH_INFO
996<LI>PATH_TRANSLATED
997<LI>QUERY_STRING
998<LI>REMOTE_ADDR
999<LI>REMOTE_HOST
1000<LI>REMOTE_IDENT
1001<LI>REMOTE_USER
1002<LI>REQUEST_METHOD
1003<LI>SCRIPT_NAME
1004<LI>SERVER_NAME
1005<LI>SERVER_PORT
1006<LI>SERVER_PROTOCOL
1007<LI>SERVER_ROOT
1008<LI>SERVER_SOFTWARE
1009</UL>
Guido van Rossum7aee3841996-03-07 18:00:44 +00001010In addition, HTTP headers sent by the server may be passed in the
1011environment as well. Here are some common variable names:
1012<UL>
1013<LI>HTTP_ACCEPT
1014<LI>HTTP_CONNECTION
1015<LI>HTTP_HOST
1016<LI>HTTP_PRAGMA
1017<LI>HTTP_REFERER
1018<LI>HTTP_USER_AGENT
1019</UL>
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001020""")
Guido van Rossum9a22de11995-01-12 12:29:47 +00001021
Guido van Rossum9a22de11995-01-12 12:29:47 +00001022
Guido van Rossum72755611996-03-06 07:20:06 +00001023# Utilities
1024# =========
Guido van Rossum9a22de11995-01-12 12:29:47 +00001025
Guido van Rossum64c66201997-07-19 20:11:53 +00001026def escape(s, quote=None):
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001027 """Deprecated API."""
1028 warn("cgi.escape is deprecated, use html.escape instead",
Florent Xicluna67317752011-12-10 11:07:42 +01001029 DeprecationWarning, stacklevel=2)
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001030 s = s.replace("&", "&amp;") # Must be done first!
1031 s = s.replace("<", "&lt;")
1032 s = s.replace(">", "&gt;")
Guido van Rossum64c66201997-07-19 20:11:53 +00001033 if quote:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001034 s = s.replace('"', "&quot;")
Guido van Rossum7aee3841996-03-07 18:00:44 +00001035 return s
Guido van Rossum9a22de11995-01-12 12:29:47 +00001036
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001037
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001038def valid_boundary(s, _vb_pattern=None):
Guido van Rossum2e441f72001-07-25 21:00:19 +00001039 import re
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001040 if isinstance(s, bytes):
1041 _vb_pattern = b"^[ -~]{0,200}[!-~]$"
1042 else:
1043 _vb_pattern = "^[ -~]{0,200}[!-~]$"
Guido van Rossum2e441f72001-07-25 21:00:19 +00001044 return re.match(_vb_pattern, s)
Guido van Rossum9a22de11995-01-12 12:29:47 +00001045
Guido van Rossum72755611996-03-06 07:20:06 +00001046# Invoke mainline
1047# ===============
1048
1049# Call test() when this file is run as a script (not imported as a module)
Tim Peters88869f92001-01-14 23:36:06 +00001050if __name__ == '__main__':
Guido van Rossum7aee3841996-03-07 18:00:44 +00001051 test()