Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 1 | """Guess the MIME type of a file. |
| 2 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 3 | This module defines two useful functions: |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 4 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 5 | guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 6 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 7 | guess_extension(type, strict=1) -- guess the extension for a given MIME type. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 8 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 9 | It also contains the following, for tuning the behavior: |
| 10 | |
| 11 | Data: |
| 12 | |
| 13 | knownfiles -- list of files to parse |
| 14 | inited -- flag set when init() has been called |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 15 | suffix_map -- dictionary mapping suffixes to suffixes |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 16 | encodings_map -- dictionary mapping suffixes to encodings |
| 17 | types_map -- dictionary mapping suffixes to types |
| 18 | |
| 19 | Functions: |
| 20 | |
| 21 | init([files]) -- parse a list of files, default knownfiles |
| 22 | read_mime_types(file) -- parse one file, return a dictionary or None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 23 | """ |
| 24 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 25 | import os |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 26 | import posixpath |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 27 | import urllib.parse |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 28 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 29 | __all__ = [ |
| 30 | "guess_type","guess_extension","guess_all_extensions", |
| 31 | "add_type","read_mime_types","init" |
| 32 | ] |
Skip Montanaro | 03d9014 | 2001-01-25 15:29:22 +0000 | [diff] [blame] | 33 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 34 | knownfiles = [ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 35 | "/etc/mime.types", |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 36 | "/etc/httpd/mime.types", # Mac OS X |
| 37 | "/etc/httpd/conf/mime.types", # Apache |
| 38 | "/etc/apache/mime.types", # Apache 1 |
| 39 | "/etc/apache2/mime.types", # Apache 2 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 40 | "/usr/local/etc/httpd/conf/mime.types", |
| 41 | "/usr/local/lib/netscape/mime.types", |
Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 42 | "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 |
| 43 | "/usr/local/etc/mime.types", # Apache 1.3 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 44 | ] |
| 45 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 46 | inited = False |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 47 | _db = None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 48 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 49 | |
| 50 | class MimeTypes: |
| 51 | """MIME-types datastore. |
| 52 | |
| 53 | This datastore can handle information from mime.types-style files |
| 54 | and supports basic determination of MIME type from a filename or |
| 55 | URL, and can guess a reasonable extension given a MIME type. |
| 56 | """ |
| 57 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 58 | def __init__(self, filenames=(), strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 59 | if not inited: |
| 60 | init() |
| 61 | self.encodings_map = encodings_map.copy() |
| 62 | self.suffix_map = suffix_map.copy() |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 63 | self.types_map = ({}, {}) # dict for (non-strict, strict) |
| 64 | self.types_map_inv = ({}, {}) |
| 65 | for (ext, type) in types_map.items(): |
| 66 | self.add_type(type, ext, True) |
| 67 | for (ext, type) in common_types.items(): |
| 68 | self.add_type(type, ext, False) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 69 | for name in filenames: |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 70 | self.read(name, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 71 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 72 | def add_type(self, type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 73 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 74 | |
| 75 | When the extension is already known, the new |
| 76 | type will replace the old one. When the type |
| 77 | is already known the extension will be added |
| 78 | to the list of known extensions. |
| 79 | |
| 80 | If strict is true, information will be added to |
| 81 | list of standard types, else to the list of non-standard |
| 82 | types. |
| 83 | """ |
| 84 | self.types_map[strict][ext] = type |
| 85 | exts = self.types_map_inv[strict].setdefault(type, []) |
| 86 | if ext not in exts: |
| 87 | exts.append(ext) |
| 88 | |
| 89 | def guess_type(self, url, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 90 | """Guess the type of a file based on its URL. |
| 91 | |
| 92 | Return value is a tuple (type, encoding) where type is None if |
| 93 | the type can't be guessed (no or unknown suffix) or a string |
| 94 | of the form type/subtype, usable for a MIME Content-type |
| 95 | header; and encoding is None for no encoding or the name of |
| 96 | the program used to encode (e.g. compress or gzip). The |
| 97 | mappings are table driven. Encoding suffixes are case |
| 98 | sensitive; type suffixes are first tried case sensitive, then |
| 99 | case insensitive. |
| 100 | |
| 101 | The suffixes .tgz, .taz and .tz (case sensitive!) are all |
| 102 | mapped to '.tar.gz'. (This is table-driven too, using the |
| 103 | dictionary suffix_map.) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 104 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 105 | Optional `strict' argument when False adds a bunch of commonly found, |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 106 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 107 | """ |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 108 | scheme, url = urllib.parse.splittype(url) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 109 | if scheme == 'data': |
| 110 | # syntax of data URLs: |
| 111 | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
| 112 | # mediatype := [ type "/" subtype ] *( ";" parameter ) |
| 113 | # data := *urlchar |
| 114 | # parameter := attribute "=" value |
| 115 | # type/subtype defaults to "text/plain" |
| 116 | comma = url.find(',') |
| 117 | if comma < 0: |
| 118 | # bad data URL |
| 119 | return None, None |
| 120 | semi = url.find(';', 0, comma) |
| 121 | if semi >= 0: |
| 122 | type = url[:semi] |
| 123 | else: |
| 124 | type = url[:comma] |
| 125 | if '=' in type or '/' not in type: |
| 126 | type = 'text/plain' |
| 127 | return type, None # never compressed, so encoding is None |
| 128 | base, ext = posixpath.splitext(url) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 129 | while ext in self.suffix_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 130 | base, ext = posixpath.splitext(base + self.suffix_map[ext]) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 131 | if ext in self.encodings_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 132 | encoding = self.encodings_map[ext] |
| 133 | base, ext = posixpath.splitext(base) |
| 134 | else: |
| 135 | encoding = None |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 136 | types_map = self.types_map[True] |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 137 | if ext in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 138 | return types_map[ext], encoding |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 139 | elif ext.lower() in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 140 | return types_map[ext.lower()], encoding |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 141 | elif strict: |
| 142 | return None, encoding |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 143 | types_map = self.types_map[False] |
| 144 | if ext in types_map: |
| 145 | return types_map[ext], encoding |
| 146 | elif ext.lower() in types_map: |
| 147 | return types_map[ext.lower()], encoding |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 148 | else: |
| 149 | return None, encoding |
| 150 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 151 | def guess_all_extensions(self, type, strict=True): |
| 152 | """Guess the extensions for a file based on its MIME type. |
| 153 | |
| 154 | Return value is a list of strings giving the possible filename |
| 155 | extensions, including the leading dot ('.'). The extension is not |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 156 | guaranteed to have been associated with any particular data stream, |
| 157 | but would be mapped to the MIME type `type' by guess_type(). |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 158 | |
| 159 | Optional `strict' argument when false adds a bunch of commonly found, |
| 160 | but non-standard types. |
| 161 | """ |
| 162 | type = type.lower() |
| 163 | extensions = self.types_map_inv[True].get(type, []) |
| 164 | if not strict: |
| 165 | for ext in self.types_map_inv[False].get(type, []): |
| 166 | if ext not in extensions: |
| 167 | extensions.append(ext) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 168 | return extensions |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 169 | |
| 170 | def guess_extension(self, type, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 171 | """Guess the extension for a file based on its MIME type. |
| 172 | |
| 173 | Return value is a string giving a filename extension, |
| 174 | including the leading dot ('.'). The extension is not |
| 175 | guaranteed to have been associated with any particular data |
| 176 | stream, but would be mapped to the MIME type `type' by |
| 177 | guess_type(). If no extension can be guessed for `type', None |
| 178 | is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 179 | |
| 180 | Optional `strict' argument when false adds a bunch of commonly found, |
| 181 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 182 | """ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 183 | extensions = self.guess_all_extensions(type, strict) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 184 | if not extensions: |
| 185 | return None |
| 186 | return extensions[0] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 187 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 188 | def read(self, filename, strict=True): |
| 189 | """ |
| 190 | Read a single mime.types-format file, specified by pathname. |
| 191 | |
| 192 | If strict is true, information will be added to |
| 193 | list of standard types, else to the list of non-standard |
| 194 | types. |
| 195 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 196 | fp = open(filename) |
Walter Dörwald | 51cc72c | 2003-01-03 21:02:36 +0000 | [diff] [blame] | 197 | self.readfp(fp, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 198 | fp.close() |
| 199 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 200 | def readfp(self, fp, strict=True): |
| 201 | """ |
| 202 | Read a single mime.types-format file. |
| 203 | |
| 204 | If strict is true, information will be added to |
| 205 | list of standard types, else to the list of non-standard |
| 206 | types. |
| 207 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 208 | while 1: |
Fred Drake | c019ecb | 2001-08-16 15:54:28 +0000 | [diff] [blame] | 209 | line = fp.readline() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 210 | if not line: |
| 211 | break |
| 212 | words = line.split() |
| 213 | for i in range(len(words)): |
| 214 | if words[i][0] == '#': |
| 215 | del words[i:] |
| 216 | break |
| 217 | if not words: |
| 218 | continue |
| 219 | type, suffixes = words[0], words[1:] |
| 220 | for suff in suffixes: |
Walter Dörwald | 8fa8972 | 2003-01-03 21:06:46 +0000 | [diff] [blame] | 221 | self.add_type(type, '.' + suff, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 222 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 223 | def guess_type(url, strict=True): |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 224 | """Guess the type of a file based on its URL. |
| 225 | |
| 226 | Return value is a tuple (type, encoding) where type is None if the |
| 227 | type can't be guessed (no or unknown suffix) or a string of the |
| 228 | form type/subtype, usable for a MIME Content-type header; and |
| 229 | encoding is None for no encoding or the name of the program used |
| 230 | to encode (e.g. compress or gzip). The mappings are table |
| 231 | driven. Encoding suffixes are case sensitive; type suffixes are |
| 232 | first tried case sensitive, then case insensitive. |
| 233 | |
| 234 | The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped |
| 235 | to ".tar.gz". (This is table-driven too, using the dictionary |
Fred Drake | 3130b7a | 1998-05-18 16:05:24 +0000 | [diff] [blame] | 236 | suffix_map). |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 237 | |
| 238 | Optional `strict' argument when false adds a bunch of commonly found, but |
| 239 | non-standard types. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 240 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 241 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 242 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 243 | return _db.guess_type(url, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 244 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 245 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 246 | def guess_all_extensions(type, strict=True): |
| 247 | """Guess the extensions for a file based on its MIME type. |
| 248 | |
| 249 | Return value is a list of strings giving the possible filename |
| 250 | extensions, including the leading dot ('.'). The extension is not |
| 251 | guaranteed to have been associated with any particular data |
| 252 | stream, but would be mapped to the MIME type `type' by |
| 253 | guess_type(). If no extension can be guessed for `type', None |
| 254 | is returned. |
| 255 | |
| 256 | Optional `strict' argument when false adds a bunch of commonly found, |
| 257 | but non-standard types. |
| 258 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 259 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 260 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 261 | return _db.guess_all_extensions(type, strict) |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 262 | |
| 263 | def guess_extension(type, strict=True): |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 264 | """Guess the extension for a file based on its MIME type. |
| 265 | |
| 266 | Return value is a string giving a filename extension, including the |
| 267 | leading dot ('.'). The extension is not guaranteed to have been |
Fred Drake | 4941341 | 1998-05-19 15:15:59 +0000 | [diff] [blame] | 268 | associated with any particular data stream, but would be mapped to the |
| 269 | MIME type `type' by guess_type(). If no extension can be guessed for |
| 270 | `type', None is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 271 | |
| 272 | Optional `strict' argument when false adds a bunch of commonly found, |
| 273 | but non-standard types. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 274 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 275 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 276 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 277 | return _db.guess_extension(type, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 278 | |
Walter Dörwald | 893020b | 2003-12-19 18:15:10 +0000 | [diff] [blame] | 279 | def add_type(type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 280 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 281 | |
| 282 | When the extension is already known, the new |
| 283 | type will replace the old one. When the type |
| 284 | is already known the extension will be added |
| 285 | to the list of known extensions. |
| 286 | |
| 287 | If strict is true, information will be added to |
| 288 | list of standard types, else to the list of non-standard |
| 289 | types. |
| 290 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 291 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 292 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 293 | return _db.add_type(type, ext, strict) |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 294 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 295 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 296 | def init(files=None): |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 297 | global suffix_map, types_map, encodings_map, common_types |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 298 | global inited, _db |
| 299 | inited = True # so that MimeTypes.__init__() doesn't call us again |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 300 | db = MimeTypes() |
| 301 | if files is None: |
| 302 | files = knownfiles |
| 303 | for file in files: |
| 304 | if os.path.isfile(file): |
| 305 | db.readfp(open(file)) |
| 306 | encodings_map = db.encodings_map |
Fred Drake | c81a069 | 2001-08-16 18:14:38 +0000 | [diff] [blame] | 307 | suffix_map = db.suffix_map |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 308 | types_map = db.types_map[True] |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 309 | common_types = db.types_map[False] |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 310 | # Make the DB a global variable now that it is fully initialized |
| 311 | _db = db |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 312 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 313 | |
| 314 | def read_mime_types(file): |
| 315 | try: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 316 | f = open(file) |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 317 | except IOError: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 318 | return None |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 319 | db = MimeTypes() |
Walter Dörwald | bb51ed3 | 2003-01-03 19:33:17 +0000 | [diff] [blame] | 320 | db.readfp(f, True) |
| 321 | return db.types_map[True] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 322 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 323 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 324 | def _default_mime_types(): |
| 325 | global suffix_map |
| 326 | global encodings_map |
| 327 | global types_map |
| 328 | global common_types |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 329 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 330 | suffix_map = { |
| 331 | '.tgz': '.tar.gz', |
| 332 | '.taz': '.tar.gz', |
| 333 | '.tz': '.tar.gz', |
Guido van Rossum | 360e4b8 | 2007-05-14 22:51:27 +0000 | [diff] [blame] | 334 | '.tbz2': '.tar.bz2', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 335 | } |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 336 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 337 | encodings_map = { |
| 338 | '.gz': 'gzip', |
| 339 | '.Z': 'compress', |
Guido van Rossum | 360e4b8 | 2007-05-14 22:51:27 +0000 | [diff] [blame] | 340 | '.bz2': 'bzip2', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 341 | } |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 342 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 343 | # Before adding new types, make sure they are either registered with IANA, |
| 344 | # at http://www.isi.edu/in-notes/iana/assignments/media-types |
| 345 | # or extensions, i.e. using the x- prefix |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 346 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 347 | # If you add to these, please keep them sorted! |
| 348 | types_map = { |
| 349 | '.a' : 'application/octet-stream', |
| 350 | '.ai' : 'application/postscript', |
| 351 | '.aif' : 'audio/x-aiff', |
| 352 | '.aifc' : 'audio/x-aiff', |
| 353 | '.aiff' : 'audio/x-aiff', |
| 354 | '.au' : 'audio/basic', |
| 355 | '.avi' : 'video/x-msvideo', |
| 356 | '.bat' : 'text/plain', |
| 357 | '.bcpio' : 'application/x-bcpio', |
| 358 | '.bin' : 'application/octet-stream', |
| 359 | '.bmp' : 'image/x-ms-bmp', |
| 360 | '.c' : 'text/plain', |
| 361 | # Duplicates :( |
| 362 | '.cdf' : 'application/x-cdf', |
| 363 | '.cdf' : 'application/x-netcdf', |
| 364 | '.cpio' : 'application/x-cpio', |
| 365 | '.csh' : 'application/x-csh', |
| 366 | '.css' : 'text/css', |
| 367 | '.dll' : 'application/octet-stream', |
| 368 | '.doc' : 'application/msword', |
| 369 | '.dot' : 'application/msword', |
| 370 | '.dvi' : 'application/x-dvi', |
| 371 | '.eml' : 'message/rfc822', |
| 372 | '.eps' : 'application/postscript', |
| 373 | '.etx' : 'text/x-setext', |
| 374 | '.exe' : 'application/octet-stream', |
| 375 | '.gif' : 'image/gif', |
| 376 | '.gtar' : 'application/x-gtar', |
| 377 | '.h' : 'text/plain', |
| 378 | '.hdf' : 'application/x-hdf', |
| 379 | '.htm' : 'text/html', |
| 380 | '.html' : 'text/html', |
| 381 | '.ief' : 'image/ief', |
| 382 | '.jpe' : 'image/jpeg', |
| 383 | '.jpeg' : 'image/jpeg', |
| 384 | '.jpg' : 'image/jpeg', |
| 385 | '.js' : 'application/x-javascript', |
| 386 | '.ksh' : 'text/plain', |
| 387 | '.latex' : 'application/x-latex', |
| 388 | '.m1v' : 'video/mpeg', |
| 389 | '.man' : 'application/x-troff-man', |
| 390 | '.me' : 'application/x-troff-me', |
| 391 | '.mht' : 'message/rfc822', |
| 392 | '.mhtml' : 'message/rfc822', |
| 393 | '.mif' : 'application/x-mif', |
| 394 | '.mov' : 'video/quicktime', |
| 395 | '.movie' : 'video/x-sgi-movie', |
| 396 | '.mp2' : 'audio/mpeg', |
| 397 | '.mp3' : 'audio/mpeg', |
Guido van Rossum | 8ce8a78 | 2007-11-01 19:42:39 +0000 | [diff] [blame] | 398 | '.mp4' : 'video/mp4', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 399 | '.mpa' : 'video/mpeg', |
| 400 | '.mpe' : 'video/mpeg', |
| 401 | '.mpeg' : 'video/mpeg', |
| 402 | '.mpg' : 'video/mpeg', |
| 403 | '.ms' : 'application/x-troff-ms', |
| 404 | '.nc' : 'application/x-netcdf', |
| 405 | '.nws' : 'message/rfc822', |
| 406 | '.o' : 'application/octet-stream', |
| 407 | '.obj' : 'application/octet-stream', |
| 408 | '.oda' : 'application/oda', |
| 409 | '.p12' : 'application/x-pkcs12', |
| 410 | '.p7c' : 'application/pkcs7-mime', |
| 411 | '.pbm' : 'image/x-portable-bitmap', |
| 412 | '.pdf' : 'application/pdf', |
| 413 | '.pfx' : 'application/x-pkcs12', |
| 414 | '.pgm' : 'image/x-portable-graymap', |
| 415 | '.pl' : 'text/plain', |
| 416 | '.png' : 'image/png', |
| 417 | '.pnm' : 'image/x-portable-anymap', |
| 418 | '.pot' : 'application/vnd.ms-powerpoint', |
| 419 | '.ppa' : 'application/vnd.ms-powerpoint', |
| 420 | '.ppm' : 'image/x-portable-pixmap', |
| 421 | '.pps' : 'application/vnd.ms-powerpoint', |
| 422 | '.ppt' : 'application/vnd.ms-powerpoint', |
| 423 | '.ps' : 'application/postscript', |
| 424 | '.pwz' : 'application/vnd.ms-powerpoint', |
| 425 | '.py' : 'text/x-python', |
| 426 | '.pyc' : 'application/x-python-code', |
| 427 | '.pyo' : 'application/x-python-code', |
| 428 | '.qt' : 'video/quicktime', |
| 429 | '.ra' : 'audio/x-pn-realaudio', |
| 430 | '.ram' : 'application/x-pn-realaudio', |
| 431 | '.ras' : 'image/x-cmu-raster', |
| 432 | '.rdf' : 'application/xml', |
| 433 | '.rgb' : 'image/x-rgb', |
| 434 | '.roff' : 'application/x-troff', |
| 435 | '.rtx' : 'text/richtext', |
| 436 | '.sgm' : 'text/x-sgml', |
| 437 | '.sgml' : 'text/x-sgml', |
| 438 | '.sh' : 'application/x-sh', |
| 439 | '.shar' : 'application/x-shar', |
| 440 | '.snd' : 'audio/basic', |
| 441 | '.so' : 'application/octet-stream', |
| 442 | '.src' : 'application/x-wais-source', |
| 443 | '.sv4cpio': 'application/x-sv4cpio', |
| 444 | '.sv4crc' : 'application/x-sv4crc', |
| 445 | '.swf' : 'application/x-shockwave-flash', |
| 446 | '.t' : 'application/x-troff', |
| 447 | '.tar' : 'application/x-tar', |
| 448 | '.tcl' : 'application/x-tcl', |
| 449 | '.tex' : 'application/x-tex', |
| 450 | '.texi' : 'application/x-texinfo', |
| 451 | '.texinfo': 'application/x-texinfo', |
| 452 | '.tif' : 'image/tiff', |
| 453 | '.tiff' : 'image/tiff', |
| 454 | '.tr' : 'application/x-troff', |
| 455 | '.tsv' : 'text/tab-separated-values', |
| 456 | '.txt' : 'text/plain', |
| 457 | '.ustar' : 'application/x-ustar', |
| 458 | '.vcf' : 'text/x-vcard', |
| 459 | '.wav' : 'audio/x-wav', |
| 460 | '.wiz' : 'application/msword', |
| 461 | '.wsdl' : 'application/xml', |
| 462 | '.xbm' : 'image/x-xbitmap', |
| 463 | '.xlb' : 'application/vnd.ms-excel', |
| 464 | # Duplicates :( |
| 465 | '.xls' : 'application/excel', |
| 466 | '.xls' : 'application/vnd.ms-excel', |
| 467 | '.xml' : 'text/xml', |
| 468 | '.xpdl' : 'application/xml', |
| 469 | '.xpm' : 'image/x-xpixmap', |
| 470 | '.xsl' : 'application/xml', |
| 471 | '.xwd' : 'image/x-xwindowdump', |
| 472 | '.zip' : 'application/zip', |
| 473 | } |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 474 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 475 | # These are non-standard types, commonly found in the wild. They will |
| 476 | # only match if strict=0 flag is given to the API methods. |
| 477 | |
| 478 | # Please sort these too |
| 479 | common_types = { |
| 480 | '.jpg' : 'image/jpg', |
| 481 | '.mid' : 'audio/midi', |
| 482 | '.midi': 'audio/midi', |
| 483 | '.pct' : 'image/pict', |
| 484 | '.pic' : 'image/pict', |
| 485 | '.pict': 'image/pict', |
| 486 | '.rtf' : 'application/rtf', |
| 487 | '.xul' : 'text/xul' |
| 488 | } |
| 489 | |
| 490 | |
| 491 | _default_mime_types() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 492 | |
| 493 | |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 494 | if __name__ == '__main__': |
| 495 | import sys |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 496 | import getopt |
| 497 | |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 498 | USAGE = """\ |
| 499 | Usage: mimetypes.py [options] type |
| 500 | |
| 501 | Options: |
| 502 | --help / -h -- print this message and exit |
| 503 | --lenient / -l -- additionally search of some common, but non-standard |
| 504 | types. |
| 505 | --extension / -e -- guess extension instead of type |
| 506 | |
| 507 | More than one type argument may be given. |
| 508 | """ |
| 509 | |
| 510 | def usage(code, msg=''): |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 511 | print(USAGE) |
| 512 | if msg: print(msg) |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 513 | sys.exit(code) |
| 514 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 515 | try: |
| 516 | opts, args = getopt.getopt(sys.argv[1:], 'hle', |
| 517 | ['help', 'lenient', 'extension']) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 518 | except getopt.error as msg: |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 519 | usage(1, msg) |
| 520 | |
| 521 | strict = 1 |
| 522 | extension = 0 |
| 523 | for opt, arg in opts: |
| 524 | if opt in ('-h', '--help'): |
| 525 | usage(0) |
| 526 | elif opt in ('-l', '--lenient'): |
| 527 | strict = 0 |
| 528 | elif opt in ('-e', '--extension'): |
| 529 | extension = 1 |
| 530 | for gtype in args: |
| 531 | if extension: |
| 532 | guess = guess_extension(gtype, strict) |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 533 | if not guess: print("I don't know anything about type", gtype) |
| 534 | else: print(guess) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 535 | else: |
| 536 | guess, encoding = guess_type(gtype, strict) |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 537 | if not guess: print("I don't know anything about type", gtype) |
| 538 | else: print('type:', guess, 'encoding:', encoding) |