blob: 233a496e8170abb3f8744881835a47516c76c497 [file] [log] [blame]
Benjamin Peterson8c703a02010-03-11 22:05:58 +00001#! /usr/local/bin/python
Guido van Rossum1c9daa81995-09-18 21:52:37 +00002
Guido van Rossum467d7232001-02-13 13:13:33 +00003# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is
4# intentionally NOT "/usr/bin/env python". On many systems
5# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
6# scripts, and /usr/local/bin is the default directory where Python is
7# installed, so /usr/bin/env would be unable to find python. Granted,
8# binary installations by Linux vendors often install Python in
9# /usr/bin. So let those vendors patch cgi.py to match their choice
10# of installation.
11
Guido van Rossum72755611996-03-06 07:20:06 +000012"""Support module for CGI (Common Gateway Interface) scripts.
Guido van Rossum1c9daa81995-09-18 21:52:37 +000013
Guido van Rossum7aee3841996-03-07 18:00:44 +000014This module defines a number of utilities for use by CGI scripts
15written in Python.
Guido van Rossum72755611996-03-06 07:20:06 +000016"""
17
Guido van Rossum98d9fd32000-02-28 15:12:25 +000018# History
19# -------
Tim Peters88869f92001-01-14 23:36:06 +000020#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000021# Michael McLay started this module. Steve Majewski changed the
22# interface to SvFormContentDict and FormContentDict. The multipart
23# parsing was inspired by code submitted by Andreas Paepcke. Guido van
24# Rossum rewrote, reformatted and documented the module and is currently
25# responsible for its maintenance.
Tim Peters88869f92001-01-14 23:36:06 +000026#
Guido van Rossum98d9fd32000-02-28 15:12:25 +000027
Guido van Rossum52b8c292001-06-29 13:06:06 +000028__version__ = "2.6"
Guido van Rossum0147db01996-03-09 03:16:04 +000029
Guido van Rossum72755611996-03-06 07:20:06 +000030
31# Imports
32# =======
33
Victor Stinner5c23b8e2011-01-14 13:05:21 +000034from io import StringIO, BytesIO, TextIOWrapper
Senthil Kumaranb4cbb922014-01-11 22:20:16 -080035from collections import Mapping
Guido van Rossum72755611996-03-06 07:20:06 +000036import sys
37import os
Jeremy Hylton1afc1692008-06-18 20:49:58 +000038import urllib.parse
Victor Stinner5c23b8e2011-01-14 13:05:21 +000039from email.parser import FeedParser
Senthil Kumaranb4cbb922014-01-11 22:20:16 -080040from email.message import Message
Facundo Batistac469d4c2008-09-03 22:49:01 +000041from warnings import warn
Georg Brandl1f7fffb2010-10-15 15:57:45 +000042import html
Victor Stinner5c23b8e2011-01-14 13:05:21 +000043import locale
44import tempfile
Guido van Rossum72755611996-03-06 07:20:06 +000045
Georg Brandl49d1b4f2008-05-11 21:42:51 +000046__all__ = ["MiniFieldStorage", "FieldStorage",
Guido van Rossuma8423a92001-03-19 13:40:44 +000047 "parse", "parse_qs", "parse_qsl", "parse_multipart",
Martin Panter1cd27722016-06-06 01:53:28 +000048 "parse_header", "test", "print_exception", "print_environ",
Guido van Rossuma8423a92001-03-19 13:40:44 +000049 "print_form", "print_directory", "print_arguments",
50 "print_environ_usage", "escape"]
Guido van Rossumc204c701996-09-05 19:07:11 +000051
52# Logging support
53# ===============
54
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000055logfile = "" # Filename to log to, if not empty
56logfp = None # File object to log to, if not None
Guido van Rossumc204c701996-09-05 19:07:11 +000057
58def initlog(*allargs):
59 """Write a log message, if there is a log file.
60
61 Even though this function is called initlog(), you should always
62 use log(); log is a variable that is set either to initlog
63 (initially), to dolog (once the log file has been opened), or to
64 nolog (when logging is disabled).
65
66 The first argument is a format string; the remaining arguments (if
67 any) are arguments to the % operator, so e.g.
68 log("%s: %s", "a", "b")
69 will write "a: b" to the log file, followed by a newline.
70
71 If the global logfp is not None, it should be a file object to
72 which log data is written.
73
74 If the global logfp is None, the global logfile may be a string
75 giving a filename to open, in append mode. This file should be
76 world writable!!! If the file can't be opened, logging is
77 silently disabled (since there is no safe place where we could
78 send an error message).
79
80 """
Victor Stinnerd33344a2011-07-14 22:28:36 +020081 global log, logfile, logfp
Guido van Rossumc204c701996-09-05 19:07:11 +000082 if logfile and not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000083 try:
84 logfp = open(logfile, "a")
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020085 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 pass
Guido van Rossumc204c701996-09-05 19:07:11 +000087 if not logfp:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 log = nolog
Guido van Rossumc204c701996-09-05 19:07:11 +000089 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000090 log = dolog
Guido van Rossum68468eb2003-02-27 20:14:51 +000091 log(*allargs)
Guido van Rossumc204c701996-09-05 19:07:11 +000092
93def dolog(fmt, *args):
94 """Write a log message to the log file. See initlog() for docs."""
95 logfp.write(fmt%args + "\n")
96
97def nolog(*allargs):
98 """Dummy function, assigned to log when logging is disabled."""
99 pass
100
Victor Stinnerd33344a2011-07-14 22:28:36 +0200101def closelog():
102 """Close the log file."""
103 global log, logfile, logfp
104 logfile = ''
105 if logfp:
106 logfp.close()
107 logfp = None
108 log = initlog
109
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000110log = initlog # The current logging function
Guido van Rossumc204c701996-09-05 19:07:11 +0000111
112
Guido van Rossum72755611996-03-06 07:20:06 +0000113# Parsing functions
114# =================
115
Guido van Rossumad164711997-05-13 19:03:23 +0000116# Maximum input we will accept when REQUEST_METHOD is POST
117# 0 ==> unlimited input
118maxlen = 0
119
Guido van Rossume08c04c1996-11-11 19:29:11 +0000120def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
Guido van Rossum773ab271996-07-23 03:46:24 +0000121 """Parse a query in the environment or from a file (default stdin)
122
123 Arguments, all optional:
124
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000125 fp : file pointer; default: sys.stdin.buffer
Guido van Rossum773ab271996-07-23 03:46:24 +0000126
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000127 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000128
129 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000130 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000131 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000132 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000133 blank values are to be ignored and treated as if they were
134 not included.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000135
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000136 strict_parsing: flag indicating what to do with parsing errors.
137 If false (the default), errors are silently ignored.
138 If true, errors raise a ValueError exception.
Guido van Rossum773ab271996-07-23 03:46:24 +0000139 """
Raymond Hettingera1449002002-05-31 23:54:44 +0000140 if fp is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000141 fp = sys.stdin
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000142
143 # field keys and values (except for files) are returned as strings
144 # an encoding is required to decode the bytes read from self.fp
145 if hasattr(fp,'encoding'):
146 encoding = fp.encoding
147 else:
148 encoding = 'latin-1'
149
150 # fp.read() must return bytes
151 if isinstance(fp, TextIOWrapper):
152 fp = fp.buffer
153
Raymond Hettinger54f02222002-06-01 14:18:47 +0000154 if not 'REQUEST_METHOD' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000155 environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone
Guido van Rossum7aee3841996-03-07 18:00:44 +0000156 if environ['REQUEST_METHOD'] == 'POST':
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000157 ctype, pdict = parse_header(environ['CONTENT_TYPE'])
158 if ctype == 'multipart/form-data':
159 return parse_multipart(fp, pdict)
160 elif ctype == 'application/x-www-form-urlencoded':
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000161 clength = int(environ['CONTENT_LENGTH'])
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000162 if maxlen and clength > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000163 raise ValueError('Maximum content length exceeded')
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000164 qs = fp.read(clength).decode(encoding)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000165 else:
166 qs = '' # Unknown content-type
Raymond Hettinger54f02222002-06-01 14:18:47 +0000167 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000168 if qs: qs = qs + '&'
169 qs = qs + environ['QUERY_STRING']
Tim Peters88869f92001-01-14 23:36:06 +0000170 elif sys.argv[1:]:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000171 if qs: qs = qs + '&'
172 qs = qs + sys.argv[1]
173 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Raymond Hettinger54f02222002-06-01 14:18:47 +0000174 elif 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000175 qs = environ['QUERY_STRING']
Guido van Rossum7aee3841996-03-07 18:00:44 +0000176 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000177 if sys.argv[1:]:
178 qs = sys.argv[1]
179 else:
180 qs = ""
181 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000182 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
183 encoding=encoding)
Guido van Rossume7808771995-08-07 20:12:09 +0000184
185
Facundo Batistac469d4c2008-09-03 22:49:01 +0000186# parse query string function called from urlparse,
Martin Panter46f50722016-05-26 05:35:26 +0000187# this is done in order to maintain backward compatibility.
Facundo Batistac469d4c2008-09-03 22:49:01 +0000188
Guido van Rossume08c04c1996-11-11 19:29:11 +0000189def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000190 """Parse a query given as a string argument."""
191 warn("cgi.parse_qs is deprecated, use urllib.parse.parse_qs instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000192 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000193 return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing)
Guido van Rossum1946f0d1999-06-04 17:54:39 +0000194
195def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
Facundo Batistac469d4c2008-09-03 22:49:01 +0000196 """Parse a query given as a string argument."""
Facundo Batistaa27244b2008-09-09 02:43:19 +0000197 warn("cgi.parse_qsl is deprecated, use urllib.parse.parse_qsl instead",
Philip Jenveya394f2d2009-05-08 03:57:12 +0000198 DeprecationWarning, 2)
Facundo Batistac469d4c2008-09-03 22:49:01 +0000199 return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
Guido van Rossum9a22de11995-01-12 12:29:47 +0000200
Guido van Rossum0147db01996-03-09 03:16:04 +0000201def parse_multipart(fp, pdict):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000202 """Parse multipart input.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000203
Guido van Rossum7aee3841996-03-07 18:00:44 +0000204 Arguments:
205 fp : input file
Johannes Gijsbersc7fc10a2005-01-08 13:56:36 +0000206 pdict: dictionary containing other parameters of content-type header
Guido van Rossum72755611996-03-06 07:20:06 +0000207
Tim Peters88869f92001-01-14 23:36:06 +0000208 Returns a dictionary just like parse_qs(): keys are the field names, each
209 value is a list of values for that field. This is easy to use but not
210 much good if you are expecting megabytes to be uploaded -- in that case,
211 use the FieldStorage class instead which is much more flexible. Note
212 that content-type is the raw, unparsed contents of the content-type
Guido van Rossum0147db01996-03-09 03:16:04 +0000213 header.
Tim Peters88869f92001-01-14 23:36:06 +0000214
215 XXX This does not parse nested multipart parts -- use FieldStorage for
Guido van Rossum0147db01996-03-09 03:16:04 +0000216 that.
Tim Peters88869f92001-01-14 23:36:06 +0000217
218 XXX This should really be subsumed by FieldStorage altogether -- no
Guido van Rossum0147db01996-03-09 03:16:04 +0000219 point in having two implementations of the same parsing algorithm.
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000220 Also, FieldStorage protects itself better against certain DoS attacks
221 by limiting the size of the data read in one chunk. The API here
222 does not support that kind of protection. This also affects parse()
223 since it can call parse_multipart().
Guido van Rossum72755611996-03-06 07:20:06 +0000224
Guido van Rossum7aee3841996-03-07 18:00:44 +0000225 """
Barry Warsaw820c1202008-06-12 04:06:45 +0000226 import http.client
227
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800228 boundary = b""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000229 if 'boundary' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000230 boundary = pdict['boundary']
Guido van Rossum2e441f72001-07-25 21:00:19 +0000231 if not valid_boundary(boundary):
Collin Winterce36ad82007-08-30 01:19:48 +0000232 raise ValueError('Invalid boundary in multipart form: %r'
Walter Dörwald70a6b492004-02-12 17:35:32 +0000233 % (boundary,))
Tim Petersab9ba272001-08-09 21:40:30 +0000234
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800235 nextpart = b"--" + boundary
236 lastpart = b"--" + boundary + b"--"
Guido van Rossum7aee3841996-03-07 18:00:44 +0000237 partdict = {}
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800238 terminator = b""
Guido van Rossum7aee3841996-03-07 18:00:44 +0000239
240 while terminator != lastpart:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000241 bytes = -1
242 data = None
243 if terminator:
244 # At start of next part. Read headers first.
Barry Warsaw820c1202008-06-12 04:06:45 +0000245 headers = http.client.parse_headers(fp)
246 clength = headers.get('content-length')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000247 if clength:
248 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000249 bytes = int(clength)
250 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000251 pass
252 if bytes > 0:
253 if maxlen and bytes > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000254 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000255 data = fp.read(bytes)
256 else:
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800257 data = b""
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000258 # Read lines until end of part.
259 lines = []
260 while 1:
261 line = fp.readline()
262 if not line:
263 terminator = lastpart # End outer loop
264 break
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800265 if line.startswith(b"--"):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000266 terminator = line.rstrip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000267 if terminator in (nextpart, lastpart):
268 break
269 lines.append(line)
270 # Done with part.
271 if data is None:
272 continue
273 if bytes < 0:
274 if lines:
275 # Strip final line terminator
276 line = lines[-1]
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800277 if line[-2:] == b"\r\n":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000278 line = line[:-2]
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800279 elif line[-1:] == b"\n":
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000280 line = line[:-1]
281 lines[-1] = line
Senthil Kumaran6b102f22013-01-23 02:50:15 -0800282 data = b"".join(lines)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000283 line = headers['content-disposition']
284 if not line:
285 continue
286 key, params = parse_header(line)
287 if key != 'form-data':
288 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000289 if 'name' in params:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000290 name = params['name']
291 else:
292 continue
Raymond Hettinger54f02222002-06-01 14:18:47 +0000293 if name in partdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000294 partdict[name].append(data)
295 else:
296 partdict[name] = [data]
Guido van Rossum72755611996-03-06 07:20:06 +0000297
Guido van Rossum7aee3841996-03-07 18:00:44 +0000298 return partdict
Guido van Rossum9a22de11995-01-12 12:29:47 +0000299
300
Fred Drake9a0a65b2008-12-04 19:24:50 +0000301def _parseparam(s):
302 while s[:1] == ';':
303 s = s[1:]
304 end = s.find(';')
Senthil Kumaran1ef0c032011-10-20 01:05:44 +0800305 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
Fred Drake9a0a65b2008-12-04 19:24:50 +0000306 end = s.find(';', end + 1)
307 if end < 0:
308 end = len(s)
309 f = s[:end]
310 yield f.strip()
311 s = s[end:]
312
Guido van Rossum72755611996-03-06 07:20:06 +0000313def parse_header(line):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000314 """Parse a Content-type like header.
315
316 Return the main content-type and a dictionary of options.
317
318 """
Fred Drake9a0a65b2008-12-04 19:24:50 +0000319 parts = _parseparam(';' + line)
320 key = parts.__next__()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000321 pdict = {}
Fred Drake9a0a65b2008-12-04 19:24:50 +0000322 for p in parts:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000323 i = p.find('=')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000324 if i >= 0:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000325 name = p[:i].strip().lower()
326 value = p[i+1:].strip()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000327 if len(value) >= 2 and value[0] == value[-1] == '"':
328 value = value[1:-1]
Johannes Gijsbers9e15dd62004-08-14 15:39:34 +0000329 value = value.replace('\\\\', '\\').replace('\\"', '"')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000330 pdict[name] = value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000331 return key, pdict
Guido van Rossum72755611996-03-06 07:20:06 +0000332
333
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000334# Classes for field storage
335# =========================
336
337class MiniFieldStorage:
338
Guido van Rossum0147db01996-03-09 03:16:04 +0000339 """Like FieldStorage, for use when no file uploads are possible."""
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000340
Guido van Rossum7aee3841996-03-07 18:00:44 +0000341 # Dummy attributes
342 filename = None
343 list = None
344 type = None
Guido van Rossum773ab271996-07-23 03:46:24 +0000345 file = None
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000346 type_options = {}
Guido van Rossum7aee3841996-03-07 18:00:44 +0000347 disposition = None
348 disposition_options = {}
349 headers = {}
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000350
Guido van Rossum7aee3841996-03-07 18:00:44 +0000351 def __init__(self, name, value):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000352 """Constructor from field name and value."""
353 self.name = name
354 self.value = value
Guido van Rossum773ab271996-07-23 03:46:24 +0000355 # self.file = StringIO(value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000356
357 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000358 """Return printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000359 return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000360
361
362class FieldStorage:
363
Guido van Rossum7aee3841996-03-07 18:00:44 +0000364 """Store a sequence of fields, reading multipart/form-data.
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000365
Guido van Rossum7aee3841996-03-07 18:00:44 +0000366 This class provides naming, typing, files stored on disk, and
367 more. At the top level, it is accessible like a dictionary, whose
368 keys are the field names. (Note: None can occur as a field name.)
369 The items are either a Python list (if there's multiple values) or
370 another FieldStorage or MiniFieldStorage object. If it's a single
371 object, it has the following attributes:
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000372
Guido van Rossum7aee3841996-03-07 18:00:44 +0000373 name: the field name, if specified; otherwise None
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000374
Guido van Rossum7aee3841996-03-07 18:00:44 +0000375 filename: the filename, if specified; otherwise None; this is the
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000376 client side filename, *not* the file name on which it is
377 stored (that's a temporary file you don't deal with)
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000378
Guido van Rossum7aee3841996-03-07 18:00:44 +0000379 value: the value as a *string*; for file uploads, this
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000380 transparently reads the file every time you request the value
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000381 and returns *bytes*
Guido van Rossum7aee3841996-03-07 18:00:44 +0000382
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000383 file: the file(-like) object from which you can read the data *as
384 bytes* ; None if the data is stored a simple string
Guido van Rossum7aee3841996-03-07 18:00:44 +0000385
386 type: the content-type, or None if not specified
387
388 type_options: dictionary of options specified on the content-type
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000389 line
Guido van Rossum7aee3841996-03-07 18:00:44 +0000390
391 disposition: content-disposition, or None if not specified
392
393 disposition_options: dictionary of corresponding options
394
Barry Warsaw596097e2008-06-12 02:38:51 +0000395 headers: a dictionary(-like) object (sometimes email.message.Message or a
Armin Rigo3a703b62005-09-19 09:11:04 +0000396 subclass thereof) containing *all* headers
Guido van Rossum7aee3841996-03-07 18:00:44 +0000397
398 The class is subclassable, mostly for the purpose of overriding
399 the make_file() method, which is called internally to come up with
400 a file open for reading and writing. This makes it possible to
401 override the default choice of storing all files in a temporary
402 directory and unlinking them as soon as they have been opened.
403
404 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000405 def __init__(self, fp=None, headers=None, outerboundary=b'',
406 environ=os.environ, keep_blank_values=0, strict_parsing=0,
407 limit=None, encoding='utf-8', errors='replace'):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000408 """Constructor. Read multipart/* until last part.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000409
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000410 Arguments, all optional:
Guido van Rossum7aee3841996-03-07 18:00:44 +0000411
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000412 fp : file pointer; default: sys.stdin.buffer
Guido van Rossumb1b4f941998-05-08 19:55:51 +0000413 (not used when the request method is GET)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000414 Can be :
415 1. a TextIOWrapper object
416 2. an object whose read() and readline() methods return bytes
Guido van Rossum7aee3841996-03-07 18:00:44 +0000417
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000418 headers : header dictionary-like object; default:
419 taken from environ as per CGI spec
Guido van Rossum7aee3841996-03-07 18:00:44 +0000420
Guido van Rossum773ab271996-07-23 03:46:24 +0000421 outerboundary : terminating multipart boundary
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000422 (for internal use only)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000423
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000424 environ : environment dictionary; default: os.environ
Guido van Rossum773ab271996-07-23 03:46:24 +0000425
426 keep_blank_values: flag indicating whether blank values in
Senthil Kumaran30e86a42010-08-09 20:01:35 +0000427 percent-encoded forms should be treated as blank strings.
Tim Peters88869f92001-01-14 23:36:06 +0000428 A true value indicates that blanks should be retained as
Guido van Rossum773ab271996-07-23 03:46:24 +0000429 blank strings. The default false value indicates that
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000430 blank values are to be ignored and treated as if they were
431 not included.
Guido van Rossum773ab271996-07-23 03:46:24 +0000432
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000433 strict_parsing: flag indicating what to do with parsing errors.
434 If false (the default), errors are silently ignored.
435 If true, errors raise a ValueError exception.
Guido van Rossume08c04c1996-11-11 19:29:11 +0000436
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000437 limit : used internally to read parts of multipart/form-data forms,
438 to exit from the reading loop when reached. It is the difference
439 between the form content-length and the number of bytes already
440 read
441
442 encoding, errors : the encoding and error handler used to decode the
443 binary stream to strings. Must be the same as the charset defined
444 for the page sending the form (content-type : meta http-equiv or
445 header)
446
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000447 """
448 method = 'GET'
449 self.keep_blank_values = keep_blank_values
450 self.strict_parsing = strict_parsing
Raymond Hettinger54f02222002-06-01 14:18:47 +0000451 if 'REQUEST_METHOD' in environ:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000452 method = environ['REQUEST_METHOD'].upper()
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000453 self.qs_on_post = None
Guido van Rossum01852831998-06-25 02:40:17 +0000454 if method == 'GET' or method == 'HEAD':
Raymond Hettinger54f02222002-06-01 14:18:47 +0000455 if 'QUERY_STRING' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000456 qs = environ['QUERY_STRING']
457 elif sys.argv[1:]:
458 qs = sys.argv[1]
459 else:
460 qs = ""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000461 qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
462 fp = BytesIO(qs)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000463 if headers is None:
464 headers = {'content-type':
465 "application/x-www-form-urlencoded"}
466 if headers is None:
Guido van Rossumcff311a1998-06-11 14:06:59 +0000467 headers = {}
468 if method == 'POST':
469 # Set default content-type for POST to what's traditional
470 headers['content-type'] = "application/x-www-form-urlencoded"
Raymond Hettinger54f02222002-06-01 14:18:47 +0000471 if 'CONTENT_TYPE' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000472 headers['content-type'] = environ['CONTENT_TYPE']
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000473 if 'QUERY_STRING' in environ:
474 self.qs_on_post = environ['QUERY_STRING']
Raymond Hettinger54f02222002-06-01 14:18:47 +0000475 if 'CONTENT_LENGTH' in environ:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000476 headers['content-length'] = environ['CONTENT_LENGTH']
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800477 else:
478 if not (isinstance(headers, (Mapping, Message))):
479 raise TypeError("headers must be mapping or an instance of "
480 "email.message.Message")
481 self.headers = headers
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000482 if fp is None:
483 self.fp = sys.stdin.buffer
484 # self.fp.read() must return bytes
485 elif isinstance(fp, TextIOWrapper):
486 self.fp = fp.buffer
487 else:
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800488 if not (hasattr(fp, 'read') and hasattr(fp, 'readline')):
489 raise TypeError("fp must be file pointer")
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000490 self.fp = fp
491
492 self.encoding = encoding
493 self.errors = errors
494
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000495 if not isinstance(outerboundary, bytes):
496 raise TypeError('outerboundary must be bytes, not %s'
497 % type(outerboundary).__name__)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000498 self.outerboundary = outerboundary
Guido van Rossum7aee3841996-03-07 18:00:44 +0000499
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000500 self.bytes_read = 0
501 self.limit = limit
502
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000503 # Process content-disposition header
504 cdisp, pdict = "", {}
Raymond Hettinger54f02222002-06-01 14:18:47 +0000505 if 'content-disposition' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000506 cdisp, pdict = parse_header(self.headers['content-disposition'])
507 self.disposition = cdisp
508 self.disposition_options = pdict
509 self.name = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000510 if 'name' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000511 self.name = pdict['name']
512 self.filename = None
Raymond Hettinger54f02222002-06-01 14:18:47 +0000513 if 'filename' in pdict:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000514 self.filename = pdict['filename']
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000515 self._binary_file = self.filename is not None
Guido van Rossum7aee3841996-03-07 18:00:44 +0000516
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000517 # Process content-type header
Barry Warsaw302331a1999-01-08 17:42:03 +0000518 #
519 # Honor any existing content-type header. But if there is no
520 # content-type header, use some sensible defaults. Assume
521 # outerboundary is "" at the outer level, but something non-false
522 # inside a multi-part. The default for an inner part is text/plain,
523 # but for an outer part it should be urlencoded. This should catch
524 # bogus clients which erroneously forget to include a content-type
525 # header.
526 #
527 # See below for what we do if there does exist a content-type header,
528 # but it happens to be something we don't understand.
Raymond Hettinger54f02222002-06-01 14:18:47 +0000529 if 'content-type' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000530 ctype, pdict = parse_header(self.headers['content-type'])
Guido van Rossumce900de1999-06-02 18:44:22 +0000531 elif self.outerboundary or method != 'POST':
Barry Warsaw302331a1999-01-08 17:42:03 +0000532 ctype, pdict = "text/plain", {}
533 else:
534 ctype, pdict = 'application/x-www-form-urlencoded', {}
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000535 self.type = ctype
536 self.type_options = pdict
Raymond Hettinger54f02222002-06-01 14:18:47 +0000537 if 'boundary' in pdict:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000538 self.innerboundary = pdict['boundary'].encode(self.encoding)
539 else:
540 self.innerboundary = b""
541
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000542 clen = -1
Raymond Hettinger54f02222002-06-01 14:18:47 +0000543 if 'content-length' in self.headers:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000544 try:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000545 clen = int(self.headers['content-length'])
Skip Montanarodb5d1442002-03-23 05:50:17 +0000546 except ValueError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000547 pass
548 if maxlen and clen > maxlen:
Collin Winterce36ad82007-08-30 01:19:48 +0000549 raise ValueError('Maximum content length exceeded')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000550 self.length = clen
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000551 if self.limit is None and clen:
552 self.limit = clen
Guido van Rossum7aee3841996-03-07 18:00:44 +0000553
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000554 self.list = self.file = None
555 self.done = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000556 if ctype == 'application/x-www-form-urlencoded':
557 self.read_urlencoded()
558 elif ctype[:10] == 'multipart/':
Guido van Rossumf5745001998-10-20 14:43:02 +0000559 self.read_multi(environ, keep_blank_values, strict_parsing)
Barry Warsaw302331a1999-01-08 17:42:03 +0000560 else:
Guido van Rossum60a3bd81999-06-11 18:26:09 +0000561 self.read_single()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000562
Brett Cannonf79126f2013-08-23 15:15:48 -0400563 def __del__(self):
564 try:
565 self.file.close()
566 except AttributeError:
567 pass
568
Berker Peksagbf5e9602015-02-06 10:21:37 +0200569 def __enter__(self):
570 return self
571
572 def __exit__(self, *args):
573 self.file.close()
574
Guido van Rossum7aee3841996-03-07 18:00:44 +0000575 def __repr__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000576 """Return a printable representation."""
Walter Dörwald70a6b492004-02-12 17:35:32 +0000577 return "FieldStorage(%r, %r, %r)" % (
578 self.name, self.filename, self.value)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000579
Guido van Rossum4061cbe2002-09-11 18:20:34 +0000580 def __iter__(self):
581 return iter(self.keys())
582
Guido van Rossum7aee3841996-03-07 18:00:44 +0000583 def __getattr__(self, name):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000584 if name != 'value':
Collin Winterce36ad82007-08-30 01:19:48 +0000585 raise AttributeError(name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000586 if self.file:
587 self.file.seek(0)
588 value = self.file.read()
589 self.file.seek(0)
590 elif self.list is not None:
591 value = self.list
592 else:
593 value = None
594 return value
Guido van Rossum7aee3841996-03-07 18:00:44 +0000595
596 def __getitem__(self, key):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000597 """Dictionary style indexing."""
598 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000599 raise TypeError("not indexable")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000600 found = []
601 for item in self.list:
602 if item.name == key: found.append(item)
603 if not found:
Collin Winterce36ad82007-08-30 01:19:48 +0000604 raise KeyError(key)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000605 if len(found) == 1:
606 return found[0]
607 else:
608 return found
Guido van Rossum7aee3841996-03-07 18:00:44 +0000609
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000610 def getvalue(self, key, default=None):
611 """Dictionary style get() method, including 'value' lookup."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000612 if key in self:
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000613 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000614 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000615 return [x.value for x in value]
Moshe Zadkaa1a4b592000-08-25 21:47:56 +0000616 else:
617 return value.value
618 else:
619 return default
620
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000621 def getfirst(self, key, default=None):
622 """ Return the first value received."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000623 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000624 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000625 if isinstance(value, list):
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000626 return value[0].value
627 else:
628 return value.value
629 else:
630 return default
631
632 def getlist(self, key):
633 """ Return list of received values."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000634 if key in self:
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000635 value = self[key]
Victor Stinnerf1c7ca92011-01-14 13:08:27 +0000636 if isinstance(value, list):
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000637 return [x.value for x in value]
Guido van Rossum1bfb3882001-09-05 19:45:34 +0000638 else:
639 return [value.value]
640 else:
641 return []
642
Guido van Rossum7aee3841996-03-07 18:00:44 +0000643 def keys(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000644 """Dictionary style keys() method."""
645 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000646 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000647 return list(set(item.name for item in self.list))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000648
Raymond Hettinger54f02222002-06-01 14:18:47 +0000649 def __contains__(self, key):
650 """Dictionary style __contains__ method."""
651 if self.list is None:
Collin Winterce36ad82007-08-30 01:19:48 +0000652 raise TypeError("not indexable")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000653 return any(item.name == key for item in self.list)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000654
Guido van Rossum88b85d41997-01-11 19:21:33 +0000655 def __len__(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000656 """Dictionary style len(x) support."""
657 return len(self.keys())
Guido van Rossum88b85d41997-01-11 19:21:33 +0000658
Senthil Kumaranb4cbb922014-01-11 22:20:16 -0800659 def __bool__(self):
660 if self.list is None:
661 raise TypeError("Cannot be converted to bool.")
Thomas Wouters8ce81f72007-09-20 18:22:40 +0000662 return bool(self.list)
663
Guido van Rossum7aee3841996-03-07 18:00:44 +0000664 def read_urlencoded(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000665 """Internal: read data in query string format."""
666 qs = self.fp.read(self.length)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000667 if not isinstance(qs, bytes):
668 raise ValueError("%s should return bytes, got %s" \
669 % (self.fp, type(qs).__name__))
670 qs = qs.decode(self.encoding, self.errors)
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000671 if self.qs_on_post:
672 qs += '&' + self.qs_on_post
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000673 self.list = []
674 query = urllib.parse.parse_qsl(
675 qs, self.keep_blank_values, self.strict_parsing,
676 encoding=self.encoding, errors=self.errors)
677 for key, value in query:
678 self.list.append(MiniFieldStorage(key, value))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000679 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000680
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000681 FieldStorageClass = None
682
Guido van Rossumf5745001998-10-20 14:43:02 +0000683 def read_multi(self, environ, keep_blank_values, strict_parsing):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000684 """Internal: read a part that is itself multipart."""
Guido van Rossum2e441f72001-07-25 21:00:19 +0000685 ib = self.innerboundary
686 if not valid_boundary(ib):
Collin Winterce36ad82007-08-30 01:19:48 +0000687 raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000688 self.list = []
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000689 if self.qs_on_post:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000690 query = urllib.parse.parse_qsl(
691 self.qs_on_post, self.keep_blank_values, self.strict_parsing,
692 encoding=self.encoding, errors=self.errors)
693 for key, value in query:
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000694 self.list.append(MiniFieldStorage(key, value))
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000695
Guido van Rossum030d2ec1998-12-09 22:16:46 +0000696 klass = self.FieldStorageClass or self.__class__
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000697 first_line = self.fp.readline() # bytes
698 if not isinstance(first_line, bytes):
699 raise ValueError("%s should return bytes, got %s" \
700 % (self.fp, type(first_line).__name__))
701 self.bytes_read += len(first_line)
Donald Stufftd90f8d12015-03-29 16:43:23 -0400702
703 # Ensure that we consume the file until we've hit our inner boundary
704 while (first_line.strip() != (b"--" + self.innerboundary) and
705 first_line):
706 first_line = self.fp.readline()
707 self.bytes_read += len(first_line)
708
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000709 while True:
710 parser = FeedParser()
711 hdr_text = b""
712 while True:
713 data = self.fp.readline()
714 hdr_text += data
715 if not data.strip():
716 break
717 if not hdr_text:
718 break
719 # parser takes strings, not bytes
720 self.bytes_read += len(hdr_text)
721 parser.feed(hdr_text.decode(self.encoding, self.errors))
722 headers = parser.close()
Victor Stinner65794592015-08-18 10:21:10 -0700723
724 # Some clients add Content-Length for part headers, ignore them
725 if 'content-length' in headers:
726 del headers['content-length']
727
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000728 part = klass(self.fp, headers, ib, environ, keep_blank_values,
729 strict_parsing,self.limit-self.bytes_read,
730 self.encoding, self.errors)
731 self.bytes_read += part.bytes_read
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000732 self.list.append(part)
Florent Xicluna331c3fd2013-07-07 12:44:28 +0200733 if part.done or self.bytes_read >= self.length > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000734 break
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000735 self.skip_lines()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000736
737 def read_single(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000738 """Internal: read an atomic part."""
739 if self.length >= 0:
740 self.read_binary()
741 self.skip_lines()
742 else:
743 self.read_lines()
744 self.file.seek(0)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000745
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000746 bufsize = 8*1024 # I/O buffering size for copy to file
Guido van Rossum7aee3841996-03-07 18:00:44 +0000747
748 def read_binary(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000749 """Internal: read binary data."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000750 self.file = self.make_file()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000751 todo = self.length
752 if todo >= 0:
753 while todo > 0:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000754 data = self.fp.read(min(todo, self.bufsize)) # bytes
755 if not isinstance(data, bytes):
756 raise ValueError("%s should return bytes, got %s"
757 % (self.fp, type(data).__name__))
758 self.bytes_read += len(data)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000759 if not data:
760 self.done = -1
761 break
762 self.file.write(data)
763 todo = todo - len(data)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000764
765 def read_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000766 """Internal: read lines until EOF or outerboundary."""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000767 if self._binary_file:
768 self.file = self.__file = BytesIO() # store data as bytes for files
769 else:
770 self.file = self.__file = StringIO() # as strings for other fields
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000771 if self.outerboundary:
772 self.read_lines_to_outerboundary()
773 else:
774 self.read_lines_to_eof()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000775
Guido van Rossum52b8c292001-06-29 13:06:06 +0000776 def __write(self, line):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000777 """line is always bytes, not string"""
Guido van Rossum52b8c292001-06-29 13:06:06 +0000778 if self.__file is not None:
779 if self.__file.tell() + len(line) > 1000:
Guido van Rossuma1a68522007-08-28 03:11:34 +0000780 self.file = self.make_file()
781 data = self.__file.getvalue()
782 self.file.write(data)
Guido van Rossum52b8c292001-06-29 13:06:06 +0000783 self.__file = None
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000784 if self._binary_file:
785 # keep bytes
786 self.file.write(line)
787 else:
788 # decode to string
789 self.file.write(line.decode(self.encoding, self.errors))
Guido van Rossum52b8c292001-06-29 13:06:06 +0000790
Guido van Rossum7aee3841996-03-07 18:00:44 +0000791 def read_lines_to_eof(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000792 """Internal: read lines until EOF."""
793 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000794 line = self.fp.readline(1<<16) # bytes
795 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000796 if not line:
797 self.done = -1
798 break
Guido van Rossum52b8c292001-06-29 13:06:06 +0000799 self.__write(line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000800
801 def read_lines_to_outerboundary(self):
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000802 """Internal: read lines until outerboundary.
803 Data is read as bytes: boundaries and line ends must be converted
804 to bytes for comparisons.
805 """
806 next_boundary = b"--" + self.outerboundary
807 last_boundary = next_boundary + b"--"
808 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000809 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000810 _read = 0
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000811 while 1:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000812 if _read >= self.limit:
813 break
814 line = self.fp.readline(1<<16) # bytes
815 self.bytes_read += len(line)
816 _read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000817 if not line:
818 self.done = -1
819 break
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300820 if delim == b"\r":
821 line = delim + line
822 delim = b""
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000823 if line.startswith(b"--") and last_line_lfend:
824 strippedline = line.rstrip()
825 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000826 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000827 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000828 self.done = 1
829 break
830 odelim = delim
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000831 if line.endswith(b"\r\n"):
832 delim = b"\r\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000833 line = line[:-2]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000834 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000835 elif line.endswith(b"\n"):
836 delim = b"\n"
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000837 line = line[:-1]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000838 last_line_lfend = True
Serhiy Storchakac7bfe0e2013-06-17 16:34:41 +0300839 elif line.endswith(b"\r"):
840 # We may interrupt \r\n sequences if they span the 2**16
841 # byte boundary
842 delim = b"\r"
843 line = line[:-1]
844 last_line_lfend = False
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000845 else:
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000846 delim = b""
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000847 last_line_lfend = False
Guido van Rossum52b8c292001-06-29 13:06:06 +0000848 self.__write(odelim + line)
Guido van Rossum7aee3841996-03-07 18:00:44 +0000849
850 def skip_lines(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000851 """Internal: skip lines until outer boundary if defined."""
852 if not self.outerboundary or self.done:
853 return
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000854 next_boundary = b"--" + self.outerboundary
855 last_boundary = next_boundary + b"--"
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000856 last_line_lfend = True
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000857 while True:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000858 line = self.fp.readline(1<<16)
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000859 self.bytes_read += len(line)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000860 if not line:
861 self.done = -1
862 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000863 if line.endswith(b"--") and last_line_lfend:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +0000864 strippedline = line.strip()
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000865 if strippedline == next_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000866 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000867 if strippedline == last_boundary:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000868 self.done = 1
869 break
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000870 last_line_lfend = line.endswith(b'\n')
Guido van Rossum7aee3841996-03-07 18:00:44 +0000871
Guido van Rossuma1a68522007-08-28 03:11:34 +0000872 def make_file(self):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000873 """Overridable: return a readable & writable file.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000874
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000875 The file will be used as follows:
876 - data is written to it
877 - seek(0)
878 - data is read from it
Guido van Rossum7aee3841996-03-07 18:00:44 +0000879
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000880 The file is opened in binary mode for files, in text mode
881 for other fields
Guido van Rossum7aee3841996-03-07 18:00:44 +0000882
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000883 This version opens a temporary file for reading and writing,
884 and immediately deletes (unlinks) it. The trick (on Unix!) is
885 that the file can still be used, but it can't be opened by
886 another process, and it will automatically be deleted when it
887 is closed or when the current process terminates.
Guido van Rossum4032c2c1996-03-09 04:04:35 +0000888
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000889 If you want a more permanent file, you derive a class which
890 overrides this method. If you want a visible temporary file
891 that is nevertheless automatically deleted when the script
892 terminates, try defining a __del__ method in a derived class
893 which unlinks the temporary files you have created.
Guido van Rossum7aee3841996-03-07 18:00:44 +0000894
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000895 """
Victor Stinner5c23b8e2011-01-14 13:05:21 +0000896 if self._binary_file:
897 return tempfile.TemporaryFile("wb+")
898 else:
899 return tempfile.TemporaryFile("w+",
900 encoding=self.encoding, newline = '\n')
Tim Peters88869f92001-01-14 23:36:06 +0000901
Guido van Rossum243ddcd1996-03-07 06:33:07 +0000902
Guido van Rossum72755611996-03-06 07:20:06 +0000903# Test/debug code
904# ===============
Guido van Rossum9a22de11995-01-12 12:29:47 +0000905
Guido van Rossum773ab271996-07-23 03:46:24 +0000906def test(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000907 """Robust test CGI script, usable as main program.
Guido van Rossum9a22de11995-01-12 12:29:47 +0000908
Guido van Rossum7aee3841996-03-07 18:00:44 +0000909 Write minimal HTTP headers and dump all information provided to
910 the script in HTML form.
911
912 """
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000913 print("Content-type: text/html")
914 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000915 sys.stderr = sys.stdout
916 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000917 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000918 print_directory()
919 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000920 print_form(form)
921 print_environ(environ)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000922 print_environ_usage()
923 def f():
Georg Brandl7cae87c2006-09-06 06:51:57 +0000924 exec("testing print_exception() -- <I>italics?</I>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000925 def g(f=f):
926 f()
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000927 print("<H3>What follows is a test, not an actual exception:</H3>")
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000928 g()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000929 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000930 print_exception()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000931
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000932 print("<H1>Second try with a small maxlen...</H1>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000933
Guido van Rossumad164711997-05-13 19:03:23 +0000934 global maxlen
935 maxlen = 50
936 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000937 form = FieldStorage() # Replace with other classes to test those
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000938 print_directory()
939 print_arguments()
Guido van Rossuma3c6a8a2000-09-19 04:11:46 +0000940 print_form(form)
941 print_environ(environ)
Guido van Rossumad164711997-05-13 19:03:23 +0000942 except:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000943 print_exception()
Guido van Rossumad164711997-05-13 19:03:23 +0000944
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000945def print_exception(type=None, value=None, tb=None, limit=None):
946 if type is None:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000947 type, value, tb = sys.exc_info()
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000948 import traceback
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000949 print()
950 print("<H3>Traceback (most recent call last):</H3>")
Guido van Rossumf85de8a1996-08-20 20:22:39 +0000951 list = traceback.format_tb(tb, limit) + \
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000952 traceback.format_exception_only(type, value)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000953 print("<PRE>%s<B>%s</B></PRE>" % (
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000954 html.escape("".join(list[:-1])),
955 html.escape(list[-1]),
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000956 ))
Guido van Rossumf15d1591997-09-29 23:22:12 +0000957 del tb
Guido van Rossum9a22de11995-01-12 12:29:47 +0000958
Guido van Rossum773ab271996-07-23 03:46:24 +0000959def print_environ(environ=os.environ):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000960 """Dump the shell environment as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000961 keys = sorted(environ.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000962 print()
963 print("<H3>Shell Environment:</H3>")
964 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000965 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000966 print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000967 print("</DL>")
968 print()
Guido van Rossum72755611996-03-06 07:20:06 +0000969
970def print_form(form):
Guido van Rossum7aee3841996-03-07 18:00:44 +0000971 """Dump the contents of a form as HTML."""
Guido van Rossuma1a68522007-08-28 03:11:34 +0000972 keys = sorted(form.keys())
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000973 print()
974 print("<H3>Form Contents:</H3>")
Guido van Rossum57d51f22000-09-16 21:16:01 +0000975 if not keys:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000976 print("<P>No form fields.")
977 print("<DL>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000978 for key in keys:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000979 print("<DT>" + html.escape(key) + ":", end=' ')
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000980 value = form[key]
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000981 print("<i>" + html.escape(repr(type(value))) + "</i>")
982 print("<DD>" + html.escape(repr(value)))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000983 print("</DL>")
984 print()
Guido van Rossum7aee3841996-03-07 18:00:44 +0000985
986def print_directory():
987 """Dump the current directory as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000988 print()
989 print("<H3>Current Working Directory:</H3>")
Guido van Rossum7aee3841996-03-07 18:00:44 +0000990 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000991 pwd = os.getcwd()
Andrew Svetlovad28c7f2012-12-18 22:02:39 +0200992 except OSError as msg:
Andrew Svetlov8b33dd82012-12-24 19:58:48 +0200993 print("OSError:", html.escape(str(msg)))
Guido van Rossum7aee3841996-03-07 18:00:44 +0000994 else:
Georg Brandl1f7fffb2010-10-15 15:57:45 +0000995 print(html.escape(pwd))
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000996 print()
Guido van Rossum9a22de11995-01-12 12:29:47 +0000997
Guido van Rossuma8738a51996-03-14 21:30:28 +0000998def print_arguments():
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000999 print()
1000 print("<H3>Command Line Arguments:</H3>")
1001 print()
1002 print(sys.argv)
1003 print()
Guido van Rossuma8738a51996-03-14 21:30:28 +00001004
Guido van Rossum9a22de11995-01-12 12:29:47 +00001005def print_environ_usage():
Guido van Rossum7aee3841996-03-07 18:00:44 +00001006 """Dump a list of environment variables used by CGI as HTML."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001007 print("""
Guido van Rossum72755611996-03-06 07:20:06 +00001008<H3>These environment variables could have been set:</H3>
1009<UL>
Guido van Rossum9a22de11995-01-12 12:29:47 +00001010<LI>AUTH_TYPE
1011<LI>CONTENT_LENGTH
1012<LI>CONTENT_TYPE
1013<LI>DATE_GMT
1014<LI>DATE_LOCAL
1015<LI>DOCUMENT_NAME
1016<LI>DOCUMENT_ROOT
1017<LI>DOCUMENT_URI
1018<LI>GATEWAY_INTERFACE
1019<LI>LAST_MODIFIED
1020<LI>PATH
1021<LI>PATH_INFO
1022<LI>PATH_TRANSLATED
1023<LI>QUERY_STRING
1024<LI>REMOTE_ADDR
1025<LI>REMOTE_HOST
1026<LI>REMOTE_IDENT
1027<LI>REMOTE_USER
1028<LI>REQUEST_METHOD
1029<LI>SCRIPT_NAME
1030<LI>SERVER_NAME
1031<LI>SERVER_PORT
1032<LI>SERVER_PROTOCOL
1033<LI>SERVER_ROOT
1034<LI>SERVER_SOFTWARE
1035</UL>
Guido van Rossum7aee3841996-03-07 18:00:44 +00001036In addition, HTTP headers sent by the server may be passed in the
1037environment as well. Here are some common variable names:
1038<UL>
1039<LI>HTTP_ACCEPT
1040<LI>HTTP_CONNECTION
1041<LI>HTTP_HOST
1042<LI>HTTP_PRAGMA
1043<LI>HTTP_REFERER
1044<LI>HTTP_USER_AGENT
1045</UL>
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001046""")
Guido van Rossum9a22de11995-01-12 12:29:47 +00001047
Guido van Rossum9a22de11995-01-12 12:29:47 +00001048
Guido van Rossum72755611996-03-06 07:20:06 +00001049# Utilities
1050# =========
Guido van Rossum9a22de11995-01-12 12:29:47 +00001051
Guido van Rossum64c66201997-07-19 20:11:53 +00001052def escape(s, quote=None):
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001053 """Deprecated API."""
1054 warn("cgi.escape is deprecated, use html.escape instead",
Florent Xicluna67317752011-12-10 11:07:42 +01001055 DeprecationWarning, stacklevel=2)
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001056 s = s.replace("&", "&amp;") # Must be done first!
1057 s = s.replace("<", "&lt;")
1058 s = s.replace(">", "&gt;")
Guido van Rossum64c66201997-07-19 20:11:53 +00001059 if quote:
Eric S. Raymond7e9b4f52001-02-09 09:59:10 +00001060 s = s.replace('"', "&quot;")
Guido van Rossum7aee3841996-03-07 18:00:44 +00001061 return s
Guido van Rossum9a22de11995-01-12 12:29:47 +00001062
Georg Brandl1f7fffb2010-10-15 15:57:45 +00001063
Benjamin Peterson4d59a782014-04-03 10:22:10 -04001064def valid_boundary(s):
Guido van Rossum2e441f72001-07-25 21:00:19 +00001065 import re
Victor Stinner5c23b8e2011-01-14 13:05:21 +00001066 if isinstance(s, bytes):
1067 _vb_pattern = b"^[ -~]{0,200}[!-~]$"
1068 else:
1069 _vb_pattern = "^[ -~]{0,200}[!-~]$"
Guido van Rossum2e441f72001-07-25 21:00:19 +00001070 return re.match(_vb_pattern, s)
Guido van Rossum9a22de11995-01-12 12:29:47 +00001071
Guido van Rossum72755611996-03-06 07:20:06 +00001072# Invoke mainline
1073# ===============
1074
1075# Call test() when this file is run as a script (not imported as a module)
Tim Peters88869f92001-01-14 23:36:06 +00001076if __name__ == '__main__':
Guido van Rossum7aee3841996-03-07 18:00:44 +00001077 test()