Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 1 | """Guess the MIME type of a file. |
| 2 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 3 | This module defines two useful functions: |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 4 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 5 | guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 6 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 7 | guess_extension(type, strict=1) -- guess the extension for a given MIME type. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 8 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 9 | It also contains the following, for tuning the behavior: |
| 10 | |
| 11 | Data: |
| 12 | |
| 13 | knownfiles -- list of files to parse |
| 14 | inited -- flag set when init() has been called |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 15 | suffix_map -- dictionary mapping suffixes to suffixes |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 16 | encodings_map -- dictionary mapping suffixes to encodings |
| 17 | types_map -- dictionary mapping suffixes to types |
| 18 | |
| 19 | Functions: |
| 20 | |
| 21 | init([files]) -- parse a list of files, default knownfiles |
| 22 | read_mime_types(file) -- parse one file, return a dictionary or None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 23 | """ |
| 24 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 25 | import os |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 26 | import posixpath |
Guido van Rossum | 1c5fb1c | 1998-10-12 15:12:28 +0000 | [diff] [blame] | 27 | import urllib |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 28 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 29 | __all__ = [ |
| 30 | "guess_type","guess_extension","guess_all_extensions", |
| 31 | "add_type","read_mime_types","init" |
| 32 | ] |
Skip Montanaro | 03d9014 | 2001-01-25 15:29:22 +0000 | [diff] [blame] | 33 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 34 | knownfiles = [ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 35 | "/etc/mime.types", |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 36 | "/usr/local/etc/httpd/conf/mime.types", |
| 37 | "/usr/local/lib/netscape/mime.types", |
Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 38 | "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 |
| 39 | "/usr/local/etc/mime.types", # Apache 1.3 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 40 | ] |
| 41 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 42 | inited = False |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 43 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 44 | |
| 45 | class MimeTypes: |
| 46 | """MIME-types datastore. |
| 47 | |
| 48 | This datastore can handle information from mime.types-style files |
| 49 | and supports basic determination of MIME type from a filename or |
| 50 | URL, and can guess a reasonable extension given a MIME type. |
| 51 | """ |
| 52 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 53 | def __init__(self, filenames=(), strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 54 | if not inited: |
| 55 | init() |
| 56 | self.encodings_map = encodings_map.copy() |
| 57 | self.suffix_map = suffix_map.copy() |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 58 | self.types_map = ({}, {}) # dict for (non-strict, strict) |
| 59 | self.types_map_inv = ({}, {}) |
| 60 | for (ext, type) in types_map.items(): |
| 61 | self.add_type(type, ext, True) |
| 62 | for (ext, type) in common_types.items(): |
| 63 | self.add_type(type, ext, False) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 64 | for name in filenames: |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 65 | self.read(name, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 66 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 67 | def add_type(self, type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 68 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 69 | |
| 70 | When the extension is already known, the new |
| 71 | type will replace the old one. When the type |
| 72 | is already known the extension will be added |
| 73 | to the list of known extensions. |
| 74 | |
| 75 | If strict is true, information will be added to |
| 76 | list of standard types, else to the list of non-standard |
| 77 | types. |
| 78 | """ |
| 79 | self.types_map[strict][ext] = type |
| 80 | exts = self.types_map_inv[strict].setdefault(type, []) |
| 81 | if ext not in exts: |
| 82 | exts.append(ext) |
| 83 | |
| 84 | def guess_type(self, url, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 85 | """Guess the type of a file based on its URL. |
| 86 | |
| 87 | Return value is a tuple (type, encoding) where type is None if |
| 88 | the type can't be guessed (no or unknown suffix) or a string |
| 89 | of the form type/subtype, usable for a MIME Content-type |
| 90 | header; and encoding is None for no encoding or the name of |
| 91 | the program used to encode (e.g. compress or gzip). The |
| 92 | mappings are table driven. Encoding suffixes are case |
| 93 | sensitive; type suffixes are first tried case sensitive, then |
| 94 | case insensitive. |
| 95 | |
| 96 | The suffixes .tgz, .taz and .tz (case sensitive!) are all |
| 97 | mapped to '.tar.gz'. (This is table-driven too, using the |
| 98 | dictionary suffix_map.) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 99 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 100 | Optional `strict' argument when False adds a bunch of commonly found, |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 101 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 102 | """ |
| 103 | scheme, url = urllib.splittype(url) |
| 104 | if scheme == 'data': |
| 105 | # syntax of data URLs: |
| 106 | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
| 107 | # mediatype := [ type "/" subtype ] *( ";" parameter ) |
| 108 | # data := *urlchar |
| 109 | # parameter := attribute "=" value |
| 110 | # type/subtype defaults to "text/plain" |
| 111 | comma = url.find(',') |
| 112 | if comma < 0: |
| 113 | # bad data URL |
| 114 | return None, None |
| 115 | semi = url.find(';', 0, comma) |
| 116 | if semi >= 0: |
| 117 | type = url[:semi] |
| 118 | else: |
| 119 | type = url[:comma] |
| 120 | if '=' in type or '/' not in type: |
| 121 | type = 'text/plain' |
| 122 | return type, None # never compressed, so encoding is None |
| 123 | base, ext = posixpath.splitext(url) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 124 | while ext in self.suffix_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 125 | base, ext = posixpath.splitext(base + self.suffix_map[ext]) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 126 | if ext in self.encodings_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 127 | encoding = self.encodings_map[ext] |
| 128 | base, ext = posixpath.splitext(base) |
| 129 | else: |
| 130 | encoding = None |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 131 | types_map = self.types_map[True] |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 132 | if ext in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 133 | return types_map[ext], encoding |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 134 | elif ext.lower() in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 135 | return types_map[ext.lower()], encoding |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 136 | elif strict: |
| 137 | return None, encoding |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 138 | types_map = self.types_map[False] |
| 139 | if ext in types_map: |
| 140 | return types_map[ext], encoding |
| 141 | elif ext.lower() in types_map: |
| 142 | return types_map[ext.lower()], encoding |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 143 | else: |
| 144 | return None, encoding |
| 145 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 146 | def guess_all_extensions(self, type, strict=True): |
| 147 | """Guess the extensions for a file based on its MIME type. |
| 148 | |
| 149 | Return value is a list of strings giving the possible filename |
| 150 | extensions, including the leading dot ('.'). The extension is not |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 151 | guaranteed to have been associated with any particular data stream, |
| 152 | but would be mapped to the MIME type `type' by guess_type(). |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 153 | |
| 154 | Optional `strict' argument when false adds a bunch of commonly found, |
| 155 | but non-standard types. |
| 156 | """ |
| 157 | type = type.lower() |
| 158 | extensions = self.types_map_inv[True].get(type, []) |
| 159 | if not strict: |
| 160 | for ext in self.types_map_inv[False].get(type, []): |
| 161 | if ext not in extensions: |
| 162 | extensions.append(ext) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 163 | return extensions |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 164 | |
| 165 | def guess_extension(self, type, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 166 | """Guess the extension for a file based on its MIME type. |
| 167 | |
| 168 | Return value is a string giving a filename extension, |
| 169 | including the leading dot ('.'). The extension is not |
| 170 | guaranteed to have been associated with any particular data |
| 171 | stream, but would be mapped to the MIME type `type' by |
| 172 | guess_type(). If no extension can be guessed for `type', None |
| 173 | is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 174 | |
| 175 | Optional `strict' argument when false adds a bunch of commonly found, |
| 176 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 177 | """ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 178 | extensions = self.guess_all_extensions(type, strict) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 179 | if not extensions: |
| 180 | return None |
| 181 | return extensions[0] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 182 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 183 | def read(self, filename, strict=True): |
| 184 | """ |
| 185 | Read a single mime.types-format file, specified by pathname. |
| 186 | |
| 187 | If strict is true, information will be added to |
| 188 | list of standard types, else to the list of non-standard |
| 189 | types. |
| 190 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 191 | fp = open(filename) |
Walter Dörwald | 51cc72c | 2003-01-03 21:02:36 +0000 | [diff] [blame] | 192 | self.readfp(fp, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 193 | fp.close() |
| 194 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 195 | def readfp(self, fp, strict=True): |
| 196 | """ |
| 197 | Read a single mime.types-format file. |
| 198 | |
| 199 | If strict is true, information will be added to |
| 200 | list of standard types, else to the list of non-standard |
| 201 | types. |
| 202 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 203 | while 1: |
Fred Drake | c019ecb | 2001-08-16 15:54:28 +0000 | [diff] [blame] | 204 | line = fp.readline() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 205 | if not line: |
| 206 | break |
| 207 | words = line.split() |
| 208 | for i in range(len(words)): |
| 209 | if words[i][0] == '#': |
| 210 | del words[i:] |
| 211 | break |
| 212 | if not words: |
| 213 | continue |
| 214 | type, suffixes = words[0], words[1:] |
| 215 | for suff in suffixes: |
Walter Dörwald | 8fa8972 | 2003-01-03 21:06:46 +0000 | [diff] [blame] | 216 | self.add_type(type, '.' + suff, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 217 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 218 | def guess_type(url, strict=True): |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 219 | """Guess the type of a file based on its URL. |
| 220 | |
| 221 | Return value is a tuple (type, encoding) where type is None if the |
| 222 | type can't be guessed (no or unknown suffix) or a string of the |
| 223 | form type/subtype, usable for a MIME Content-type header; and |
| 224 | encoding is None for no encoding or the name of the program used |
| 225 | to encode (e.g. compress or gzip). The mappings are table |
| 226 | driven. Encoding suffixes are case sensitive; type suffixes are |
| 227 | first tried case sensitive, then case insensitive. |
| 228 | |
| 229 | The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped |
| 230 | to ".tar.gz". (This is table-driven too, using the dictionary |
Fred Drake | 3130b7a | 1998-05-18 16:05:24 +0000 | [diff] [blame] | 231 | suffix_map). |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 232 | |
| 233 | Optional `strict' argument when false adds a bunch of commonly found, but |
| 234 | non-standard types. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 235 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 236 | init() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 237 | return guess_type(url, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 238 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 239 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 240 | def guess_all_extensions(type, strict=True): |
| 241 | """Guess the extensions for a file based on its MIME type. |
| 242 | |
| 243 | Return value is a list of strings giving the possible filename |
| 244 | extensions, including the leading dot ('.'). The extension is not |
| 245 | guaranteed to have been associated with any particular data |
| 246 | stream, but would be mapped to the MIME type `type' by |
| 247 | guess_type(). If no extension can be guessed for `type', None |
| 248 | is returned. |
| 249 | |
| 250 | Optional `strict' argument when false adds a bunch of commonly found, |
| 251 | but non-standard types. |
| 252 | """ |
| 253 | init() |
| 254 | return guess_all_extensions(type, strict) |
| 255 | |
| 256 | def guess_extension(type, strict=True): |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 257 | """Guess the extension for a file based on its MIME type. |
| 258 | |
| 259 | Return value is a string giving a filename extension, including the |
| 260 | leading dot ('.'). The extension is not guaranteed to have been |
Fred Drake | 4941341 | 1998-05-19 15:15:59 +0000 | [diff] [blame] | 261 | associated with any particular data stream, but would be mapped to the |
| 262 | MIME type `type' by guess_type(). If no extension can be guessed for |
| 263 | `type', None is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 264 | |
| 265 | Optional `strict' argument when false adds a bunch of commonly found, |
| 266 | but non-standard types. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 267 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 268 | init() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 269 | return guess_extension(type, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 270 | |
Walter Dörwald | 893020b | 2003-12-19 18:15:10 +0000 | [diff] [blame] | 271 | def add_type(type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 272 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 273 | |
| 274 | When the extension is already known, the new |
| 275 | type will replace the old one. When the type |
| 276 | is already known the extension will be added |
| 277 | to the list of known extensions. |
| 278 | |
| 279 | If strict is true, information will be added to |
| 280 | list of standard types, else to the list of non-standard |
| 281 | types. |
| 282 | """ |
| 283 | init() |
| 284 | return add_type(type, ext, strict) |
| 285 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 286 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 287 | def init(files=None): |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 288 | global guess_all_extensions, guess_extension, guess_type |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 289 | global suffix_map, types_map, encodings_map, common_types |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 290 | global add_type, inited |
| 291 | inited = True |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 292 | db = MimeTypes() |
| 293 | if files is None: |
| 294 | files = knownfiles |
| 295 | for file in files: |
| 296 | if os.path.isfile(file): |
| 297 | db.readfp(open(file)) |
| 298 | encodings_map = db.encodings_map |
Fred Drake | c81a069 | 2001-08-16 18:14:38 +0000 | [diff] [blame] | 299 | suffix_map = db.suffix_map |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 300 | types_map = db.types_map[True] |
| 301 | guess_all_extensions = db.guess_all_extensions |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 302 | guess_extension = db.guess_extension |
| 303 | guess_type = db.guess_type |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 304 | add_type = db.add_type |
| 305 | common_types = db.types_map[False] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 306 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 307 | |
| 308 | def read_mime_types(file): |
| 309 | try: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 310 | f = open(file) |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 311 | except IOError: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 312 | return None |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 313 | db = MimeTypes() |
Walter Dörwald | bb51ed3 | 2003-01-03 19:33:17 +0000 | [diff] [blame] | 314 | db.readfp(f, True) |
| 315 | return db.types_map[True] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 316 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 317 | |
| 318 | suffix_map = { |
| 319 | '.tgz': '.tar.gz', |
| 320 | '.taz': '.tar.gz', |
| 321 | '.tz': '.tar.gz', |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 322 | } |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 323 | |
| 324 | encodings_map = { |
| 325 | '.gz': 'gzip', |
| 326 | '.Z': 'compress', |
| 327 | } |
| 328 | |
Martin v. Löwis | a3689fe | 2001-09-07 16:49:12 +0000 | [diff] [blame] | 329 | # Before adding new types, make sure they are either registered with IANA, at |
| 330 | # http://www.isi.edu/in-notes/iana/assignments/media-types |
| 331 | # or extensions, i.e. using the x- prefix |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 332 | |
| 333 | # If you add to these, please keep them sorted! |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 334 | types_map = { |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 335 | '.a' : 'application/octet-stream', |
| 336 | '.ai' : 'application/postscript', |
| 337 | '.aif' : 'audio/x-aiff', |
| 338 | '.aifc' : 'audio/x-aiff', |
| 339 | '.aiff' : 'audio/x-aiff', |
| 340 | '.au' : 'audio/basic', |
| 341 | '.avi' : 'video/x-msvideo', |
| 342 | '.bat' : 'text/plain', |
| 343 | '.bcpio' : 'application/x-bcpio', |
| 344 | '.bin' : 'application/octet-stream', |
| 345 | '.bmp' : 'image/x-ms-bmp', |
| 346 | '.c' : 'text/plain', |
| 347 | # Duplicates :( |
| 348 | '.cdf' : 'application/x-cdf', |
| 349 | '.cdf' : 'application/x-netcdf', |
| 350 | '.cpio' : 'application/x-cpio', |
| 351 | '.csh' : 'application/x-csh', |
| 352 | '.css' : 'text/css', |
| 353 | '.dll' : 'application/octet-stream', |
| 354 | '.doc' : 'application/msword', |
| 355 | '.dot' : 'application/msword', |
| 356 | '.dvi' : 'application/x-dvi', |
| 357 | '.eml' : 'message/rfc822', |
| 358 | '.eps' : 'application/postscript', |
| 359 | '.etx' : 'text/x-setext', |
| 360 | '.exe' : 'application/octet-stream', |
| 361 | '.gif' : 'image/gif', |
| 362 | '.gtar' : 'application/x-gtar', |
| 363 | '.h' : 'text/plain', |
| 364 | '.hdf' : 'application/x-hdf', |
| 365 | '.htm' : 'text/html', |
| 366 | '.html' : 'text/html', |
| 367 | '.ief' : 'image/ief', |
| 368 | '.jpe' : 'image/jpeg', |
| 369 | '.jpeg' : 'image/jpeg', |
| 370 | '.jpg' : 'image/jpeg', |
| 371 | '.js' : 'application/x-javascript', |
| 372 | '.ksh' : 'text/plain', |
| 373 | '.latex' : 'application/x-latex', |
| 374 | '.m1v' : 'video/mpeg', |
| 375 | '.man' : 'application/x-troff-man', |
| 376 | '.me' : 'application/x-troff-me', |
| 377 | '.mht' : 'message/rfc822', |
| 378 | '.mhtml' : 'message/rfc822', |
| 379 | '.mif' : 'application/x-mif', |
| 380 | '.mov' : 'video/quicktime', |
| 381 | '.movie' : 'video/x-sgi-movie', |
| 382 | '.mp2' : 'audio/mpeg', |
| 383 | '.mp3' : 'audio/mpeg', |
| 384 | '.mpa' : 'video/mpeg', |
| 385 | '.mpe' : 'video/mpeg', |
| 386 | '.mpeg' : 'video/mpeg', |
| 387 | '.mpg' : 'video/mpeg', |
| 388 | '.ms' : 'application/x-troff-ms', |
| 389 | '.nc' : 'application/x-netcdf', |
| 390 | '.nws' : 'message/rfc822', |
| 391 | '.o' : 'application/octet-stream', |
| 392 | '.obj' : 'application/octet-stream', |
| 393 | '.oda' : 'application/oda', |
| 394 | '.p12' : 'application/x-pkcs12', |
| 395 | '.p7c' : 'application/pkcs7-mime', |
| 396 | '.pbm' : 'image/x-portable-bitmap', |
| 397 | '.pdf' : 'application/pdf', |
| 398 | '.pfx' : 'application/x-pkcs12', |
| 399 | '.pgm' : 'image/x-portable-graymap', |
| 400 | '.pl' : 'text/plain', |
| 401 | '.png' : 'image/png', |
| 402 | '.pnm' : 'image/x-portable-anymap', |
| 403 | '.pot' : 'application/vnd.ms-powerpoint', |
| 404 | '.ppa' : 'application/vnd.ms-powerpoint', |
| 405 | '.ppm' : 'image/x-portable-pixmap', |
| 406 | '.pps' : 'application/vnd.ms-powerpoint', |
| 407 | '.ppt' : 'application/vnd.ms-powerpoint', |
| 408 | '.ps' : 'application/postscript', |
| 409 | '.pwz' : 'application/vnd.ms-powerpoint', |
| 410 | '.py' : 'text/x-python', |
| 411 | '.pyc' : 'application/x-python-code', |
| 412 | '.pyo' : 'application/x-python-code', |
| 413 | '.qt' : 'video/quicktime', |
| 414 | '.ra' : 'audio/x-pn-realaudio', |
| 415 | '.ram' : 'application/x-pn-realaudio', |
| 416 | '.ras' : 'image/x-cmu-raster', |
| 417 | '.rdf' : 'application/xml', |
| 418 | '.rgb' : 'image/x-rgb', |
| 419 | '.roff' : 'application/x-troff', |
| 420 | '.rtx' : 'text/richtext', |
| 421 | '.sgm' : 'text/x-sgml', |
| 422 | '.sgml' : 'text/x-sgml', |
| 423 | '.sh' : 'application/x-sh', |
| 424 | '.shar' : 'application/x-shar', |
| 425 | '.snd' : 'audio/basic', |
| 426 | '.so' : 'application/octet-stream', |
| 427 | '.src' : 'application/x-wais-source', |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 428 | '.sv4cpio': 'application/x-sv4cpio', |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 429 | '.sv4crc' : 'application/x-sv4crc', |
Guido van Rossum | 5e1222b | 2003-01-04 14:11:38 +0000 | [diff] [blame] | 430 | '.swf' : 'application/x-shockwave-flash', |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 431 | '.t' : 'application/x-troff', |
| 432 | '.tar' : 'application/x-tar', |
| 433 | '.tcl' : 'application/x-tcl', |
| 434 | '.tex' : 'application/x-tex', |
| 435 | '.texi' : 'application/x-texinfo', |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 436 | '.texinfo': 'application/x-texinfo', |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 437 | '.tif' : 'image/tiff', |
| 438 | '.tiff' : 'image/tiff', |
| 439 | '.tr' : 'application/x-troff', |
| 440 | '.tsv' : 'text/tab-separated-values', |
| 441 | '.txt' : 'text/plain', |
| 442 | '.ustar' : 'application/x-ustar', |
| 443 | '.vcf' : 'text/x-vcard', |
| 444 | '.wav' : 'audio/x-wav', |
| 445 | '.wiz' : 'application/msword', |
| 446 | '.xbm' : 'image/x-xbitmap', |
| 447 | '.xlb' : 'application/vnd.ms-excel', |
| 448 | # Duplicates :( |
| 449 | '.xls' : 'application/excel', |
| 450 | '.xls' : 'application/vnd.ms-excel', |
| 451 | '.xml' : 'text/xml', |
| 452 | '.xpm' : 'image/x-xpixmap', |
| 453 | '.xsl' : 'application/xml', |
| 454 | '.xwd' : 'image/x-xwindowdump', |
| 455 | '.zip' : 'application/zip', |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 456 | } |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 457 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 458 | # These are non-standard types, commonly found in the wild. They will only |
| 459 | # match if strict=0 flag is given to the API methods. |
| 460 | |
| 461 | # Please sort these too |
| 462 | common_types = { |
| 463 | '.jpg' : 'image/jpg', |
| 464 | '.mid' : 'audio/midi', |
| 465 | '.midi': 'audio/midi', |
| 466 | '.pct' : 'image/pict', |
| 467 | '.pic' : 'image/pict', |
| 468 | '.pict': 'image/pict', |
| 469 | '.rtf' : 'application/rtf', |
| 470 | '.xul' : 'text/xul' |
| 471 | } |
| 472 | |
| 473 | |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 474 | if __name__ == '__main__': |
| 475 | import sys |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 476 | import getopt |
| 477 | |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 478 | USAGE = """\ |
| 479 | Usage: mimetypes.py [options] type |
| 480 | |
| 481 | Options: |
| 482 | --help / -h -- print this message and exit |
| 483 | --lenient / -l -- additionally search of some common, but non-standard |
| 484 | types. |
| 485 | --extension / -e -- guess extension instead of type |
| 486 | |
| 487 | More than one type argument may be given. |
| 488 | """ |
| 489 | |
| 490 | def usage(code, msg=''): |
| 491 | print USAGE |
| 492 | if msg: print msg |
| 493 | sys.exit(code) |
| 494 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 495 | try: |
| 496 | opts, args = getopt.getopt(sys.argv[1:], 'hle', |
| 497 | ['help', 'lenient', 'extension']) |
| 498 | except getopt.error, msg: |
| 499 | usage(1, msg) |
| 500 | |
| 501 | strict = 1 |
| 502 | extension = 0 |
| 503 | for opt, arg in opts: |
| 504 | if opt in ('-h', '--help'): |
| 505 | usage(0) |
| 506 | elif opt in ('-l', '--lenient'): |
| 507 | strict = 0 |
| 508 | elif opt in ('-e', '--extension'): |
| 509 | extension = 1 |
| 510 | for gtype in args: |
| 511 | if extension: |
| 512 | guess = guess_extension(gtype, strict) |
| 513 | if not guess: print "I don't know anything about type", gtype |
| 514 | else: print guess |
| 515 | else: |
| 516 | guess, encoding = guess_type(gtype, strict) |
| 517 | if not guess: print "I don't know anything about type", gtype |
| 518 | else: print 'type:', guess, 'encoding:', encoding |