Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 1 | """Guess the MIME type of a file. |
| 2 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 3 | This module defines two useful functions: |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 4 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 5 | guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 6 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 7 | guess_extension(type, strict=1) -- guess the extension for a given MIME type. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 8 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 9 | It also contains the following, for tuning the behavior: |
| 10 | |
| 11 | Data: |
| 12 | |
| 13 | knownfiles -- list of files to parse |
| 14 | inited -- flag set when init() has been called |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 15 | suffix_map -- dictionary mapping suffixes to suffixes |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 16 | encodings_map -- dictionary mapping suffixes to encodings |
| 17 | types_map -- dictionary mapping suffixes to types |
| 18 | |
| 19 | Functions: |
| 20 | |
| 21 | init([files]) -- parse a list of files, default knownfiles |
| 22 | read_mime_types(file) -- parse one file, return a dictionary or None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 23 | """ |
| 24 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 25 | import os |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 26 | import posixpath |
Guido van Rossum | 1c5fb1c | 1998-10-12 15:12:28 +0000 | [diff] [blame] | 27 | import urllib |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 28 | |
Skip Montanaro | 03d9014 | 2001-01-25 15:29:22 +0000 | [diff] [blame] | 29 | __all__ = ["guess_type","guess_extension","read_mime_types","init"] |
| 30 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 31 | knownfiles = [ |
| 32 | "/usr/local/etc/httpd/conf/mime.types", |
| 33 | "/usr/local/lib/netscape/mime.types", |
Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 34 | "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 |
| 35 | "/usr/local/etc/mime.types", # Apache 1.3 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 36 | ] |
| 37 | |
| 38 | inited = 0 |
| 39 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 40 | |
| 41 | class MimeTypes: |
| 42 | """MIME-types datastore. |
| 43 | |
| 44 | This datastore can handle information from mime.types-style files |
| 45 | and supports basic determination of MIME type from a filename or |
| 46 | URL, and can guess a reasonable extension given a MIME type. |
| 47 | """ |
| 48 | |
| 49 | def __init__(self, filenames=()): |
| 50 | if not inited: |
| 51 | init() |
| 52 | self.encodings_map = encodings_map.copy() |
| 53 | self.suffix_map = suffix_map.copy() |
| 54 | self.types_map = types_map.copy() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 55 | self.common_types = common_types.copy() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 56 | for name in filenames: |
| 57 | self.read(name) |
| 58 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 59 | def guess_type(self, url, strict=1): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 60 | """Guess the type of a file based on its URL. |
| 61 | |
| 62 | Return value is a tuple (type, encoding) where type is None if |
| 63 | the type can't be guessed (no or unknown suffix) or a string |
| 64 | of the form type/subtype, usable for a MIME Content-type |
| 65 | header; and encoding is None for no encoding or the name of |
| 66 | the program used to encode (e.g. compress or gzip). The |
| 67 | mappings are table driven. Encoding suffixes are case |
| 68 | sensitive; type suffixes are first tried case sensitive, then |
| 69 | case insensitive. |
| 70 | |
| 71 | The suffixes .tgz, .taz and .tz (case sensitive!) are all |
| 72 | mapped to '.tar.gz'. (This is table-driven too, using the |
| 73 | dictionary suffix_map.) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 74 | |
| 75 | Optional `strict' argument when false adds a bunch of commonly found, |
| 76 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 77 | """ |
| 78 | scheme, url = urllib.splittype(url) |
| 79 | if scheme == 'data': |
| 80 | # syntax of data URLs: |
| 81 | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
| 82 | # mediatype := [ type "/" subtype ] *( ";" parameter ) |
| 83 | # data := *urlchar |
| 84 | # parameter := attribute "=" value |
| 85 | # type/subtype defaults to "text/plain" |
| 86 | comma = url.find(',') |
| 87 | if comma < 0: |
| 88 | # bad data URL |
| 89 | return None, None |
| 90 | semi = url.find(';', 0, comma) |
| 91 | if semi >= 0: |
| 92 | type = url[:semi] |
| 93 | else: |
| 94 | type = url[:comma] |
| 95 | if '=' in type or '/' not in type: |
| 96 | type = 'text/plain' |
| 97 | return type, None # never compressed, so encoding is None |
| 98 | base, ext = posixpath.splitext(url) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 99 | while ext in self.suffix_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 100 | base, ext = posixpath.splitext(base + self.suffix_map[ext]) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 101 | if ext in self.encodings_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 102 | encoding = self.encodings_map[ext] |
| 103 | base, ext = posixpath.splitext(base) |
| 104 | else: |
| 105 | encoding = None |
| 106 | types_map = self.types_map |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 107 | common_types = self.common_types |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 108 | if ext in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 109 | return types_map[ext], encoding |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 110 | elif ext.lower() in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 111 | return types_map[ext.lower()], encoding |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 112 | elif strict: |
| 113 | return None, encoding |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 114 | elif ext in common_types: |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 115 | return common_types[ext], encoding |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 116 | elif ext.lower() in common_types: |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 117 | return common_types[ext.lower()], encoding |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 118 | else: |
| 119 | return None, encoding |
| 120 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 121 | def guess_extension(self, type, strict=1): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 122 | """Guess the extension for a file based on its MIME type. |
| 123 | |
| 124 | Return value is a string giving a filename extension, |
| 125 | including the leading dot ('.'). The extension is not |
| 126 | guaranteed to have been associated with any particular data |
| 127 | stream, but would be mapped to the MIME type `type' by |
| 128 | guess_type(). If no extension can be guessed for `type', None |
| 129 | is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 130 | |
| 131 | Optional `strict' argument when false adds a bunch of commonly found, |
| 132 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 133 | """ |
| 134 | type = type.lower() |
| 135 | for ext, stype in self.types_map.items(): |
| 136 | if type == stype: |
| 137 | return ext |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 138 | if not strict: |
| 139 | for ext, stype in common_types.items(): |
| 140 | if type == stype: |
| 141 | return ext |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 142 | return None |
| 143 | |
| 144 | def read(self, filename): |
| 145 | """Read a single mime.types-format file, specified by pathname.""" |
| 146 | fp = open(filename) |
| 147 | self.readfp(fp) |
| 148 | fp.close() |
| 149 | |
Fred Drake | c019ecb | 2001-08-16 15:54:28 +0000 | [diff] [blame] | 150 | def readfp(self, fp): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 151 | """Read a single mime.types-format file.""" |
| 152 | map = self.types_map |
| 153 | while 1: |
Fred Drake | c019ecb | 2001-08-16 15:54:28 +0000 | [diff] [blame] | 154 | line = fp.readline() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 155 | if not line: |
| 156 | break |
| 157 | words = line.split() |
| 158 | for i in range(len(words)): |
| 159 | if words[i][0] == '#': |
| 160 | del words[i:] |
| 161 | break |
| 162 | if not words: |
| 163 | continue |
| 164 | type, suffixes = words[0], words[1:] |
| 165 | for suff in suffixes: |
| 166 | map['.' + suff] = type |
| 167 | |
| 168 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 169 | def guess_type(url, strict=1): |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 170 | """Guess the type of a file based on its URL. |
| 171 | |
| 172 | Return value is a tuple (type, encoding) where type is None if the |
| 173 | type can't be guessed (no or unknown suffix) or a string of the |
| 174 | form type/subtype, usable for a MIME Content-type header; and |
| 175 | encoding is None for no encoding or the name of the program used |
| 176 | to encode (e.g. compress or gzip). The mappings are table |
| 177 | driven. Encoding suffixes are case sensitive; type suffixes are |
| 178 | first tried case sensitive, then case insensitive. |
| 179 | |
| 180 | The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped |
| 181 | to ".tar.gz". (This is table-driven too, using the dictionary |
Fred Drake | 3130b7a | 1998-05-18 16:05:24 +0000 | [diff] [blame] | 182 | suffix_map). |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 183 | |
| 184 | Optional `strict' argument when false adds a bunch of commonly found, but |
| 185 | non-standard types. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 186 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 187 | init() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 188 | return guess_type(url, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 189 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 190 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 191 | def guess_extension(type, strict=1): |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 192 | """Guess the extension for a file based on its MIME type. |
| 193 | |
| 194 | Return value is a string giving a filename extension, including the |
| 195 | leading dot ('.'). The extension is not guaranteed to have been |
Fred Drake | 4941341 | 1998-05-19 15:15:59 +0000 | [diff] [blame] | 196 | associated with any particular data stream, but would be mapped to the |
| 197 | MIME type `type' by guess_type(). If no extension can be guessed for |
| 198 | `type', None is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 199 | |
| 200 | Optional `strict' argument when false adds a bunch of commonly found, |
| 201 | but non-standard types. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 202 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 203 | init() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 204 | return guess_extension(type, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 205 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 206 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 207 | def init(files=None): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 208 | global guess_extension, guess_type |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 209 | global suffix_map, types_map, encodings_map, common_types |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 210 | global inited |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 211 | inited = 1 |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 212 | db = MimeTypes() |
| 213 | if files is None: |
| 214 | files = knownfiles |
| 215 | for file in files: |
| 216 | if os.path.isfile(file): |
| 217 | db.readfp(open(file)) |
| 218 | encodings_map = db.encodings_map |
Fred Drake | c81a069 | 2001-08-16 18:14:38 +0000 | [diff] [blame] | 219 | suffix_map = db.suffix_map |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 220 | types_map = db.types_map |
| 221 | guess_extension = db.guess_extension |
| 222 | guess_type = db.guess_type |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 223 | common_types = db.common_types |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 224 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 225 | |
| 226 | def read_mime_types(file): |
| 227 | try: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 228 | f = open(file) |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 229 | except IOError: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 230 | return None |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 231 | db = MimeTypes() |
| 232 | db.readfp(f) |
| 233 | return db.types_map |
| 234 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 235 | |
| 236 | suffix_map = { |
| 237 | '.tgz': '.tar.gz', |
| 238 | '.taz': '.tar.gz', |
| 239 | '.tz': '.tar.gz', |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 240 | } |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 241 | |
| 242 | encodings_map = { |
| 243 | '.gz': 'gzip', |
| 244 | '.Z': 'compress', |
| 245 | } |
| 246 | |
Martin v. Löwis | a3689fe | 2001-09-07 16:49:12 +0000 | [diff] [blame] | 247 | # Before adding new types, make sure they are either registered with IANA, at |
| 248 | # http://www.isi.edu/in-notes/iana/assignments/media-types |
| 249 | # or extensions, i.e. using the x- prefix |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 250 | |
| 251 | # If you add to these, please keep them sorted! |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 252 | types_map = { |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 253 | '.a' : 'application/octet-stream', |
| 254 | '.ai' : 'application/postscript', |
| 255 | '.aif' : 'audio/x-aiff', |
| 256 | '.aifc' : 'audio/x-aiff', |
| 257 | '.aiff' : 'audio/x-aiff', |
| 258 | '.au' : 'audio/basic', |
| 259 | '.avi' : 'video/x-msvideo', |
| 260 | '.bat' : 'text/plain', |
| 261 | '.bcpio' : 'application/x-bcpio', |
| 262 | '.bin' : 'application/octet-stream', |
| 263 | '.bmp' : 'image/x-ms-bmp', |
| 264 | '.c' : 'text/plain', |
| 265 | # Duplicates :( |
| 266 | '.cdf' : 'application/x-cdf', |
| 267 | '.cdf' : 'application/x-netcdf', |
| 268 | '.cpio' : 'application/x-cpio', |
| 269 | '.csh' : 'application/x-csh', |
| 270 | '.css' : 'text/css', |
| 271 | '.dll' : 'application/octet-stream', |
| 272 | '.doc' : 'application/msword', |
| 273 | '.dot' : 'application/msword', |
| 274 | '.dvi' : 'application/x-dvi', |
| 275 | '.eml' : 'message/rfc822', |
| 276 | '.eps' : 'application/postscript', |
| 277 | '.etx' : 'text/x-setext', |
| 278 | '.exe' : 'application/octet-stream', |
| 279 | '.gif' : 'image/gif', |
| 280 | '.gtar' : 'application/x-gtar', |
| 281 | '.h' : 'text/plain', |
| 282 | '.hdf' : 'application/x-hdf', |
| 283 | '.htm' : 'text/html', |
| 284 | '.html' : 'text/html', |
| 285 | '.ief' : 'image/ief', |
| 286 | '.jpe' : 'image/jpeg', |
| 287 | '.jpeg' : 'image/jpeg', |
| 288 | '.jpg' : 'image/jpeg', |
| 289 | '.js' : 'application/x-javascript', |
| 290 | '.ksh' : 'text/plain', |
| 291 | '.latex' : 'application/x-latex', |
| 292 | '.m1v' : 'video/mpeg', |
| 293 | '.man' : 'application/x-troff-man', |
| 294 | '.me' : 'application/x-troff-me', |
| 295 | '.mht' : 'message/rfc822', |
| 296 | '.mhtml' : 'message/rfc822', |
| 297 | '.mif' : 'application/x-mif', |
| 298 | '.mov' : 'video/quicktime', |
| 299 | '.movie' : 'video/x-sgi-movie', |
| 300 | '.mp2' : 'audio/mpeg', |
| 301 | '.mp3' : 'audio/mpeg', |
| 302 | '.mpa' : 'video/mpeg', |
| 303 | '.mpe' : 'video/mpeg', |
| 304 | '.mpeg' : 'video/mpeg', |
| 305 | '.mpg' : 'video/mpeg', |
| 306 | '.ms' : 'application/x-troff-ms', |
| 307 | '.nc' : 'application/x-netcdf', |
| 308 | '.nws' : 'message/rfc822', |
| 309 | '.o' : 'application/octet-stream', |
| 310 | '.obj' : 'application/octet-stream', |
| 311 | '.oda' : 'application/oda', |
| 312 | '.p12' : 'application/x-pkcs12', |
| 313 | '.p7c' : 'application/pkcs7-mime', |
| 314 | '.pbm' : 'image/x-portable-bitmap', |
| 315 | '.pdf' : 'application/pdf', |
| 316 | '.pfx' : 'application/x-pkcs12', |
| 317 | '.pgm' : 'image/x-portable-graymap', |
| 318 | '.pl' : 'text/plain', |
| 319 | '.png' : 'image/png', |
| 320 | '.pnm' : 'image/x-portable-anymap', |
| 321 | '.pot' : 'application/vnd.ms-powerpoint', |
| 322 | '.ppa' : 'application/vnd.ms-powerpoint', |
| 323 | '.ppm' : 'image/x-portable-pixmap', |
| 324 | '.pps' : 'application/vnd.ms-powerpoint', |
| 325 | '.ppt' : 'application/vnd.ms-powerpoint', |
| 326 | '.ps' : 'application/postscript', |
| 327 | '.pwz' : 'application/vnd.ms-powerpoint', |
| 328 | '.py' : 'text/x-python', |
| 329 | '.pyc' : 'application/x-python-code', |
| 330 | '.pyo' : 'application/x-python-code', |
| 331 | '.qt' : 'video/quicktime', |
| 332 | '.ra' : 'audio/x-pn-realaudio', |
| 333 | '.ram' : 'application/x-pn-realaudio', |
| 334 | '.ras' : 'image/x-cmu-raster', |
| 335 | '.rdf' : 'application/xml', |
| 336 | '.rgb' : 'image/x-rgb', |
| 337 | '.roff' : 'application/x-troff', |
| 338 | '.rtx' : 'text/richtext', |
| 339 | '.sgm' : 'text/x-sgml', |
| 340 | '.sgml' : 'text/x-sgml', |
| 341 | '.sh' : 'application/x-sh', |
| 342 | '.shar' : 'application/x-shar', |
| 343 | '.snd' : 'audio/basic', |
| 344 | '.so' : 'application/octet-stream', |
| 345 | '.src' : 'application/x-wais-source', |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 346 | '.sv4cpio': 'application/x-sv4cpio', |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 347 | '.sv4crc' : 'application/x-sv4crc', |
| 348 | '.t' : 'application/x-troff', |
| 349 | '.tar' : 'application/x-tar', |
| 350 | '.tcl' : 'application/x-tcl', |
| 351 | '.tex' : 'application/x-tex', |
| 352 | '.texi' : 'application/x-texinfo', |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 353 | '.texinfo': 'application/x-texinfo', |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 354 | '.tif' : 'image/tiff', |
| 355 | '.tiff' : 'image/tiff', |
| 356 | '.tr' : 'application/x-troff', |
| 357 | '.tsv' : 'text/tab-separated-values', |
| 358 | '.txt' : 'text/plain', |
| 359 | '.ustar' : 'application/x-ustar', |
| 360 | '.vcf' : 'text/x-vcard', |
| 361 | '.wav' : 'audio/x-wav', |
| 362 | '.wiz' : 'application/msword', |
| 363 | '.xbm' : 'image/x-xbitmap', |
| 364 | '.xlb' : 'application/vnd.ms-excel', |
| 365 | # Duplicates :( |
| 366 | '.xls' : 'application/excel', |
| 367 | '.xls' : 'application/vnd.ms-excel', |
| 368 | '.xml' : 'text/xml', |
| 369 | '.xpm' : 'image/x-xpixmap', |
| 370 | '.xsl' : 'application/xml', |
| 371 | '.xwd' : 'image/x-xwindowdump', |
| 372 | '.zip' : 'application/zip', |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 373 | } |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 374 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 375 | # These are non-standard types, commonly found in the wild. They will only |
| 376 | # match if strict=0 flag is given to the API methods. |
| 377 | |
| 378 | # Please sort these too |
| 379 | common_types = { |
| 380 | '.jpg' : 'image/jpg', |
| 381 | '.mid' : 'audio/midi', |
| 382 | '.midi': 'audio/midi', |
| 383 | '.pct' : 'image/pict', |
| 384 | '.pic' : 'image/pict', |
| 385 | '.pict': 'image/pict', |
| 386 | '.rtf' : 'application/rtf', |
| 387 | '.xul' : 'text/xul' |
| 388 | } |
| 389 | |
| 390 | |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 391 | if __name__ == '__main__': |
| 392 | import sys |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 393 | import getopt |
| 394 | |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 395 | USAGE = """\ |
| 396 | Usage: mimetypes.py [options] type |
| 397 | |
| 398 | Options: |
| 399 | --help / -h -- print this message and exit |
| 400 | --lenient / -l -- additionally search of some common, but non-standard |
| 401 | types. |
| 402 | --extension / -e -- guess extension instead of type |
| 403 | |
| 404 | More than one type argument may be given. |
| 405 | """ |
| 406 | |
| 407 | def usage(code, msg=''): |
| 408 | print USAGE |
| 409 | if msg: print msg |
| 410 | sys.exit(code) |
| 411 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 412 | try: |
| 413 | opts, args = getopt.getopt(sys.argv[1:], 'hle', |
| 414 | ['help', 'lenient', 'extension']) |
| 415 | except getopt.error, msg: |
| 416 | usage(1, msg) |
| 417 | |
| 418 | strict = 1 |
| 419 | extension = 0 |
| 420 | for opt, arg in opts: |
| 421 | if opt in ('-h', '--help'): |
| 422 | usage(0) |
| 423 | elif opt in ('-l', '--lenient'): |
| 424 | strict = 0 |
| 425 | elif opt in ('-e', '--extension'): |
| 426 | extension = 1 |
| 427 | for gtype in args: |
| 428 | if extension: |
| 429 | guess = guess_extension(gtype, strict) |
| 430 | if not guess: print "I don't know anything about type", gtype |
| 431 | else: print guess |
| 432 | else: |
| 433 | guess, encoding = guess_type(gtype, strict) |
| 434 | if not guess: print "I don't know anything about type", gtype |
| 435 | else: print 'type:', guess, 'encoding:', encoding |