Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 1 | #!/usr/local/bin/python |
Guido van Rossum | 1c9daa8 | 1995-09-18 21:52:37 +0000 | [diff] [blame] | 2 | |
| 3 | # XXX TODO |
| 4 | # - proper doc strings instead of this rambling dialogue style |
| 5 | # - more utilities, e.g. |
| 6 | # - print_header(type="test/html", blankline=1) -- print MIME header |
| 7 | # - utility to format a nice error message in HTML |
| 8 | # - utility to format a Location: ... response, including HTML |
| 9 | # - utility to catch errors and display traceback |
| 10 | |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 11 | # |
| 12 | # A class for wrapping the WWW Forms Common Gateway Interface (CGI) |
| 13 | # Michael McLay, NIST mclay@eeel.nist.gov 6/14/94 |
| 14 | # |
| 15 | # modified by Steve Majewski <sdm7g@Virginia.EDU> 12/5/94 |
| 16 | # |
Guido van Rossum | e780877 | 1995-08-07 20:12:09 +0000 | [diff] [blame] | 17 | # now maintained as part of the Python distribution |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 18 | |
| 19 | # Several classes to parse the name/value pairs that are passed to |
| 20 | # a server's CGI by GET, POST or PUT methods by a WWW FORM. This |
| 21 | # module is based on Mike McLay's original cgi.py after discussing |
| 22 | # changes with him and others on the comp.lang.python newsgroup, and |
| 23 | # at the NIST Python workshop. |
| 24 | # |
| 25 | # The rationale for changes was: |
| 26 | # The original FormContent class was almost, but not quite like |
| 27 | # a dictionary object. Besides adding some extra access methods, |
| 28 | # it had a values() method with different arguments and semantics |
| 29 | # from the standard values() method of a mapping object. Also, |
| 30 | # it provided several different access methods that may be necessary |
| 31 | # or useful, but made it a little more confusing to figure out how |
| 32 | # to use. Also, we wanted to make the most typical cases the simplest |
| 33 | # and most convenient access methods. ( Most form fields just return |
| 34 | # a single value, and in practice, a lot of code was just assuming |
| 35 | # a single value and ignoring all others. On the other hand, the |
| 36 | # protocol allows multiple values to be returned. |
| 37 | # |
| 38 | # The new base class (FormContentDict) is just like a dictionary. |
| 39 | # In fact, if you just want a dictionary, all of the stuff that was |
| 40 | # in __init__ has been extracted into a cgi.parse() function that will |
| 41 | # return the "raw" dictionary, but having a class allows you to customize |
| 42 | # it further. |
| 43 | # Mike McLay's original FormContent class is reimplemented as a |
| 44 | # subclass of FormContentDict. |
| 45 | # There are two additional sub-classes, but I'm not yet too sure |
| 46 | # whether they are what I want. |
| 47 | # |
| 48 | |
| 49 | import string,regsub,sys,os,urllib |
| 50 | # since os.environ may often be used in cgi code, we name it in this module. |
| 51 | from os import environ |
| 52 | |
| 53 | |
| 54 | def parse(): |
Guido van Rossum | e780877 | 1995-08-07 20:12:09 +0000 | [diff] [blame] | 55 | """Parse the query passed in the environment or on stdin""" |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 56 | if environ['REQUEST_METHOD'] == 'POST': |
| 57 | qs = sys.stdin.read(string.atoi(environ['CONTENT_LENGTH'])) |
| 58 | environ['QUERY_STRING'] = qs |
Guido van Rossum | 1c9daa8 | 1995-09-18 21:52:37 +0000 | [diff] [blame] | 59 | elif environ.has_key('QUERY_STRING'): |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 60 | qs = environ['QUERY_STRING'] |
Guido van Rossum | 1c9daa8 | 1995-09-18 21:52:37 +0000 | [diff] [blame] | 61 | else: |
| 62 | environ['QUERY_STRING'] = qs = '' |
Guido van Rossum | e780877 | 1995-08-07 20:12:09 +0000 | [diff] [blame] | 63 | return parse_qs(qs) |
| 64 | |
| 65 | |
| 66 | def parse_qs(qs): |
| 67 | """Parse a query given as a string argument""" |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 68 | name_value_pairs = string.splitfields(qs, '&') |
| 69 | dict = {} |
| 70 | for name_value in name_value_pairs: |
| 71 | nv = string.splitfields(name_value, '=') |
| 72 | if len(nv) != 2: |
| 73 | continue |
| 74 | name = nv[0] |
| 75 | value = urllib.unquote(regsub.gsub('+',' ',nv[1])) |
| 76 | if len(value): |
| 77 | if dict.has_key (name): |
| 78 | dict[name].append(value) |
| 79 | else: |
| 80 | dict[name] = [value] |
| 81 | return dict |
| 82 | |
| 83 | |
| 84 | |
| 85 | # The FormContent constructor creates a dictionary from the name/value pairs |
| 86 | # passed through the CGI interface. |
| 87 | |
| 88 | |
| 89 | # |
| 90 | # form['key'] |
| 91 | # form.__getitem__('key') |
| 92 | # form.has_key('key') |
| 93 | # form.keys() |
| 94 | # form.values() |
| 95 | # form.items() |
| 96 | # form.dict |
| 97 | |
| 98 | class FormContentDict: |
| 99 | def __init__( self ): |
| 100 | self.dict = parse() |
| 101 | self.query_string = environ['QUERY_STRING'] |
| 102 | def __getitem__(self,key): |
| 103 | return self.dict[key] |
| 104 | def keys(self): |
| 105 | return self.dict.keys() |
| 106 | def has_key(self, key): |
| 107 | return self.dict.has_key(key) |
| 108 | def values(self): |
| 109 | return self.dict.values() |
| 110 | def items(self): |
| 111 | return self.dict.items() |
| 112 | def __len__( self ): |
| 113 | return len(self.dict) |
| 114 | |
| 115 | |
| 116 | # This is the "strict" single-value expecting version. |
| 117 | # IF you only expect a single value for each field, then form[key] |
| 118 | # will return that single value ( the [0]-th ), and raise an |
| 119 | # IndexError if that expectation is not true. |
| 120 | # IF you expect a field to have possible multiple values, than you |
| 121 | # can use form.getlist( key ) to get all of the values. |
| 122 | # values() and items() are a compromise: they return single strings |
| 123 | # where there is a single value, and lists of strings otherwise. |
| 124 | |
| 125 | class SvFormContentDict(FormContentDict): |
| 126 | def __getitem__( self, key ): |
| 127 | if len( self.dict[key] ) > 1 : |
| 128 | raise IndexError, 'expecting a single value' |
| 129 | return self.dict[key][0] |
| 130 | def getlist( self, key ): |
| 131 | return self.dict[key] |
| 132 | def values( self ): |
| 133 | lis = [] |
| 134 | for each in self.dict.values() : |
| 135 | if len( each ) == 1 : |
| 136 | lis.append( each[0] ) |
| 137 | else: lis.append( each ) |
| 138 | return lis |
| 139 | def items( self ): |
| 140 | lis = [] |
| 141 | for key,value in self.dict.items(): |
| 142 | if len(value) == 1 : |
| 143 | lis.append( (key,value[0]) ) |
| 144 | else: lis.append( (key,value) ) |
| 145 | return lis |
| 146 | |
| 147 | |
| 148 | # And this sub-class is similar to the above, but it will attempt to |
| 149 | # interpret numerical values. This is here as mostly as an example, |
| 150 | # but I think the real way to handle typed-data from a form may be |
| 151 | # to make an additional table driver parsing stage that has a table |
| 152 | # of allowed input patterns and the output conversion types - it |
| 153 | # would signal type-errors on parse, not on access. |
| 154 | class InterpFormContentDict(SvFormContentDict): |
| 155 | def __getitem__( self, key ): |
| 156 | v = SvFormContentDict.__getitem__( self, key ) |
| 157 | if v[0] in string.digits+'+-.' : |
| 158 | try: return string.atoi( v ) |
| 159 | except ValueError: |
| 160 | try: return string.atof( v ) |
| 161 | except ValueError: pass |
| 162 | return string.strip(v) |
| 163 | def values( self ): |
| 164 | lis = [] |
| 165 | for key in self.keys(): |
| 166 | try: |
| 167 | lis.append( self[key] ) |
| 168 | except IndexError: |
| 169 | lis.append( self.dict[key] ) |
| 170 | return lis |
| 171 | def items( self ): |
| 172 | lis = [] |
| 173 | for key in self.keys(): |
| 174 | try: |
| 175 | lis.append( (key, self[key]) ) |
| 176 | except IndexError: |
| 177 | lis.append( (key, self.dict[key]) ) |
| 178 | return lis |
| 179 | |
| 180 | |
| 181 | # class FormContent parses the name/value pairs that are passed to a |
| 182 | # server's CGI by GET, POST, or PUT methods by a WWW FORM. several |
| 183 | # specialized FormContent dictionary access methods have been added |
| 184 | # for convenience. |
| 185 | |
| 186 | # function return value |
| 187 | # |
| 188 | # form.keys() all keys in dictionary |
| 189 | # form.has_key('key') test keys existance |
| 190 | # form[key] returns list associated with key |
| 191 | # form.values('key') key's list (same as form.[key]) |
| 192 | # form.indexed_value('key' index) nth element in key's value list |
| 193 | # form.value(key) key's unstripped value |
| 194 | # form.length(key) number of elements in key's list |
| 195 | # form.stripped(key) key's value with whitespace stripped |
| 196 | # form.pars() full dictionary |
| 197 | |
| 198 | |
| 199 | |
| 200 | class FormContent(FormContentDict): |
| 201 | # This is the original FormContent semantics of values, |
| 202 | # not the dictionary like semantics. |
| 203 | def values(self,key): |
| 204 | if self.dict.has_key(key):return self.dict[key] |
| 205 | else: return None |
| 206 | def indexed_value(self,key, location): |
| 207 | if self.dict.has_key(key): |
| 208 | if len (self.dict[key]) > location: |
| 209 | return self.dict[key][location] |
| 210 | else: return None |
| 211 | else: return None |
| 212 | def value(self,key): |
| 213 | if self.dict.has_key(key):return self.dict[key][0] |
| 214 | else: return None |
| 215 | def length(self,key): |
| 216 | return len (self.dict[key]) |
| 217 | def stripped(self,key): |
| 218 | if self.dict.has_key(key):return string.strip(self.dict[key][0]) |
| 219 | else: return None |
| 220 | def pars(self): |
| 221 | return self.dict |
| 222 | |
| 223 | |
| 224 | |
| 225 | |
| 226 | |
| 227 | |
| 228 | def print_environ_usage(): |
| 229 | print """ |
| 230 | <H3>These operating system environment variables could have been |
| 231 | set:</H3> <UL> |
| 232 | <LI>AUTH_TYPE |
| 233 | <LI>CONTENT_LENGTH |
| 234 | <LI>CONTENT_TYPE |
| 235 | <LI>DATE_GMT |
| 236 | <LI>DATE_LOCAL |
| 237 | <LI>DOCUMENT_NAME |
| 238 | <LI>DOCUMENT_ROOT |
| 239 | <LI>DOCUMENT_URI |
| 240 | <LI>GATEWAY_INTERFACE |
| 241 | <LI>LAST_MODIFIED |
| 242 | <LI>PATH |
| 243 | <LI>PATH_INFO |
| 244 | <LI>PATH_TRANSLATED |
| 245 | <LI>QUERY_STRING |
| 246 | <LI>REMOTE_ADDR |
| 247 | <LI>REMOTE_HOST |
| 248 | <LI>REMOTE_IDENT |
| 249 | <LI>REMOTE_USER |
| 250 | <LI>REQUEST_METHOD |
| 251 | <LI>SCRIPT_NAME |
| 252 | <LI>SERVER_NAME |
| 253 | <LI>SERVER_PORT |
| 254 | <LI>SERVER_PROTOCOL |
| 255 | <LI>SERVER_ROOT |
| 256 | <LI>SERVER_SOFTWARE |
| 257 | </UL> |
| 258 | """ |
| 259 | |
| 260 | def print_environ(): |
| 261 | skeys = environ.keys() |
| 262 | skeys.sort() |
Guido van Rossum | eb9e9d2 | 1995-02-27 13:16:11 +0000 | [diff] [blame] | 263 | print '<h3> The following environment variables ' \ |
| 264 | 'were set by the CGI script: </h3>' |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 265 | print '<dl>' |
| 266 | for key in skeys: |
Guido van Rossum | eb9e9d2 | 1995-02-27 13:16:11 +0000 | [diff] [blame] | 267 | print '<dt>', escape(key), '<dd>', escape(environ[key]) |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 268 | print '</dl>' |
| 269 | |
| 270 | def print_form( form ): |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 271 | skeys = form.keys() |
| 272 | skeys.sort() |
Guido van Rossum | eb9e9d2 | 1995-02-27 13:16:11 +0000 | [diff] [blame] | 273 | print '<h3> The following name/value pairs ' \ |
| 274 | 'were entered in the form: </h3>' |
| 275 | print '<dl>' |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 276 | for key in skeys: |
Guido van Rossum | eb9e9d2 | 1995-02-27 13:16:11 +0000 | [diff] [blame] | 277 | print '<dt>', escape(key), ':', |
| 278 | print '<i>', escape(`type(form[key])`), '</i>', |
Guido van Rossum | dcce73a | 1995-03-14 17:22:28 +0000 | [diff] [blame] | 279 | print '<dd>', escape(`form[key]`) |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 280 | print '</dl>' |
| 281 | |
| 282 | def escape( s ): |
Guido van Rossum | dcce73a | 1995-03-14 17:22:28 +0000 | [diff] [blame] | 283 | s = regsub.gsub('&', '&', s) # Must be done first |
| 284 | s = regsub.gsub('<', '<', s) |
| 285 | s = regsub.gsub('>', '>', s) |
Guido van Rossum | eb9e9d2 | 1995-02-27 13:16:11 +0000 | [diff] [blame] | 286 | return s |
Guido van Rossum | 9a22de1 | 1995-01-12 12:29:47 +0000 | [diff] [blame] | 287 | |
| 288 | def test( what ): |
| 289 | label = escape(str(what)) |
| 290 | print 'Content-type: text/html\n\n' |
| 291 | print '<HEADER>\n<TITLE>' + label + '</TITLE>\n</HEADER>\n' |
| 292 | print '<BODY>\n' |
| 293 | print "<H1>" + label +"</H1>\n" |
| 294 | form = what() |
| 295 | print_form( form ) |
| 296 | print_environ() |
| 297 | print_environ_usage() |
| 298 | print '</body>' |
| 299 | |
| 300 | if __name__ == '__main__' : |
| 301 | test_classes = ( FormContent, FormContentDict, SvFormContentDict, InterpFormContentDict ) |
| 302 | test( test_classes[0] ) # by default, test compatibility with |
| 303 | # old version, change index to test others. |