Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 1 | """Guess the MIME type of a file. |
| 2 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 3 | This module defines two useful functions: |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 4 | |
Georg Brandl | cdf8b34 | 2009-06-08 09:07:34 +0000 | [diff] [blame] | 5 | guess_type(url, strict=True) -- guess the MIME type and encoding of a URL. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 6 | |
Georg Brandl | cdf8b34 | 2009-06-08 09:07:34 +0000 | [diff] [blame] | 7 | guess_extension(type, strict=True) -- guess the extension for a given MIME type. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 8 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 9 | It also contains the following, for tuning the behavior: |
| 10 | |
| 11 | Data: |
| 12 | |
| 13 | knownfiles -- list of files to parse |
| 14 | inited -- flag set when init() has been called |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 15 | suffix_map -- dictionary mapping suffixes to suffixes |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 16 | encodings_map -- dictionary mapping suffixes to encodings |
| 17 | types_map -- dictionary mapping suffixes to types |
| 18 | |
| 19 | Functions: |
| 20 | |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 21 | init([files]) -- parse a list of files, default knownfiles (on Windows, the |
| 22 | default values are taken from the registry) |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 23 | read_mime_types(file) -- parse one file, return a dictionary or None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 24 | """ |
| 25 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 26 | import os |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 27 | import sys |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 28 | import posixpath |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 29 | import urllib.parse |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 30 | try: |
| 31 | import winreg as _winreg |
| 32 | except ImportError: |
| 33 | _winreg = None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 34 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 35 | __all__ = [ |
| 36 | "guess_type","guess_extension","guess_all_extensions", |
| 37 | "add_type","read_mime_types","init" |
| 38 | ] |
Skip Montanaro | 03d9014 | 2001-01-25 15:29:22 +0000 | [diff] [blame] | 39 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 40 | knownfiles = [ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 41 | "/etc/mime.types", |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 42 | "/etc/httpd/mime.types", # Mac OS X |
| 43 | "/etc/httpd/conf/mime.types", # Apache |
| 44 | "/etc/apache/mime.types", # Apache 1 |
| 45 | "/etc/apache2/mime.types", # Apache 2 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 46 | "/usr/local/etc/httpd/conf/mime.types", |
| 47 | "/usr/local/lib/netscape/mime.types", |
Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 48 | "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 |
| 49 | "/usr/local/etc/mime.types", # Apache 1.3 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 50 | ] |
| 51 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 52 | inited = False |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 53 | _db = None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 54 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 55 | |
| 56 | class MimeTypes: |
| 57 | """MIME-types datastore. |
| 58 | |
| 59 | This datastore can handle information from mime.types-style files |
| 60 | and supports basic determination of MIME type from a filename or |
| 61 | URL, and can guess a reasonable extension given a MIME type. |
| 62 | """ |
| 63 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 64 | def __init__(self, filenames=(), strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 65 | if not inited: |
| 66 | init() |
| 67 | self.encodings_map = encodings_map.copy() |
| 68 | self.suffix_map = suffix_map.copy() |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 69 | self.types_map = ({}, {}) # dict for (non-strict, strict) |
| 70 | self.types_map_inv = ({}, {}) |
| 71 | for (ext, type) in types_map.items(): |
| 72 | self.add_type(type, ext, True) |
| 73 | for (ext, type) in common_types.items(): |
| 74 | self.add_type(type, ext, False) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 75 | for name in filenames: |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 76 | self.read(name, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 77 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 78 | def add_type(self, type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 79 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 80 | |
| 81 | When the extension is already known, the new |
| 82 | type will replace the old one. When the type |
| 83 | is already known the extension will be added |
| 84 | to the list of known extensions. |
| 85 | |
| 86 | If strict is true, information will be added to |
| 87 | list of standard types, else to the list of non-standard |
| 88 | types. |
| 89 | """ |
| 90 | self.types_map[strict][ext] = type |
| 91 | exts = self.types_map_inv[strict].setdefault(type, []) |
| 92 | if ext not in exts: |
| 93 | exts.append(ext) |
| 94 | |
| 95 | def guess_type(self, url, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 96 | """Guess the type of a file based on its URL. |
| 97 | |
| 98 | Return value is a tuple (type, encoding) where type is None if |
| 99 | the type can't be guessed (no or unknown suffix) or a string |
| 100 | of the form type/subtype, usable for a MIME Content-type |
| 101 | header; and encoding is None for no encoding or the name of |
| 102 | the program used to encode (e.g. compress or gzip). The |
| 103 | mappings are table driven. Encoding suffixes are case |
| 104 | sensitive; type suffixes are first tried case sensitive, then |
| 105 | case insensitive. |
| 106 | |
| 107 | The suffixes .tgz, .taz and .tz (case sensitive!) are all |
| 108 | mapped to '.tar.gz'. (This is table-driven too, using the |
| 109 | dictionary suffix_map.) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 110 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 111 | Optional `strict' argument when False adds a bunch of commonly found, |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 112 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 113 | """ |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 114 | scheme, url = urllib.parse.splittype(url) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 115 | if scheme == 'data': |
| 116 | # syntax of data URLs: |
| 117 | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
| 118 | # mediatype := [ type "/" subtype ] *( ";" parameter ) |
| 119 | # data := *urlchar |
| 120 | # parameter := attribute "=" value |
| 121 | # type/subtype defaults to "text/plain" |
| 122 | comma = url.find(',') |
| 123 | if comma < 0: |
| 124 | # bad data URL |
| 125 | return None, None |
| 126 | semi = url.find(';', 0, comma) |
| 127 | if semi >= 0: |
| 128 | type = url[:semi] |
| 129 | else: |
| 130 | type = url[:comma] |
| 131 | if '=' in type or '/' not in type: |
| 132 | type = 'text/plain' |
| 133 | return type, None # never compressed, so encoding is None |
| 134 | base, ext = posixpath.splitext(url) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 135 | while ext in self.suffix_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 136 | base, ext = posixpath.splitext(base + self.suffix_map[ext]) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 137 | if ext in self.encodings_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 138 | encoding = self.encodings_map[ext] |
| 139 | base, ext = posixpath.splitext(base) |
| 140 | else: |
| 141 | encoding = None |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 142 | types_map = self.types_map[True] |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 143 | if ext in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 144 | return types_map[ext], encoding |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 145 | elif ext.lower() in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 146 | return types_map[ext.lower()], encoding |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 147 | elif strict: |
| 148 | return None, encoding |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 149 | types_map = self.types_map[False] |
| 150 | if ext in types_map: |
| 151 | return types_map[ext], encoding |
| 152 | elif ext.lower() in types_map: |
| 153 | return types_map[ext.lower()], encoding |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 154 | else: |
| 155 | return None, encoding |
| 156 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 157 | def guess_all_extensions(self, type, strict=True): |
| 158 | """Guess the extensions for a file based on its MIME type. |
| 159 | |
| 160 | Return value is a list of strings giving the possible filename |
| 161 | extensions, including the leading dot ('.'). The extension is not |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 162 | guaranteed to have been associated with any particular data stream, |
| 163 | but would be mapped to the MIME type `type' by guess_type(). |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 164 | |
| 165 | Optional `strict' argument when false adds a bunch of commonly found, |
| 166 | but non-standard types. |
| 167 | """ |
| 168 | type = type.lower() |
| 169 | extensions = self.types_map_inv[True].get(type, []) |
| 170 | if not strict: |
| 171 | for ext in self.types_map_inv[False].get(type, []): |
| 172 | if ext not in extensions: |
| 173 | extensions.append(ext) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 174 | return extensions |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 175 | |
| 176 | def guess_extension(self, type, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 177 | """Guess the extension for a file based on its MIME type. |
| 178 | |
| 179 | Return value is a string giving a filename extension, |
| 180 | including the leading dot ('.'). The extension is not |
| 181 | guaranteed to have been associated with any particular data |
| 182 | stream, but would be mapped to the MIME type `type' by |
| 183 | guess_type(). If no extension can be guessed for `type', None |
| 184 | is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 185 | |
| 186 | Optional `strict' argument when false adds a bunch of commonly found, |
| 187 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 188 | """ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 189 | extensions = self.guess_all_extensions(type, strict) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 190 | if not extensions: |
| 191 | return None |
| 192 | return extensions[0] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 193 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 194 | def read(self, filename, strict=True): |
| 195 | """ |
| 196 | Read a single mime.types-format file, specified by pathname. |
| 197 | |
| 198 | If strict is true, information will be added to |
| 199 | list of standard types, else to the list of non-standard |
| 200 | types. |
| 201 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 202 | fp = open(filename) |
Walter Dörwald | 51cc72c | 2003-01-03 21:02:36 +0000 | [diff] [blame] | 203 | self.readfp(fp, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 204 | fp.close() |
| 205 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 206 | def readfp(self, fp, strict=True): |
| 207 | """ |
| 208 | Read a single mime.types-format file. |
| 209 | |
| 210 | If strict is true, information will be added to |
| 211 | list of standard types, else to the list of non-standard |
| 212 | types. |
| 213 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 214 | while 1: |
Fred Drake | c019ecb | 2001-08-16 15:54:28 +0000 | [diff] [blame] | 215 | line = fp.readline() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 216 | if not line: |
| 217 | break |
| 218 | words = line.split() |
| 219 | for i in range(len(words)): |
| 220 | if words[i][0] == '#': |
| 221 | del words[i:] |
| 222 | break |
| 223 | if not words: |
| 224 | continue |
| 225 | type, suffixes = words[0], words[1:] |
| 226 | for suff in suffixes: |
Walter Dörwald | 8fa8972 | 2003-01-03 21:06:46 +0000 | [diff] [blame] | 227 | self.add_type(type, '.' + suff, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 228 | |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 229 | def read_windows_registry(self, strict=True): |
| 230 | """ |
| 231 | Load the MIME types database from Windows registry. |
| 232 | |
| 233 | If strict is true, information will be added to |
| 234 | list of standard types, else to the list of non-standard |
| 235 | types. |
| 236 | """ |
| 237 | |
| 238 | # Windows only |
| 239 | if not _winreg: |
| 240 | return |
| 241 | |
| 242 | def enum_types(mimedb): |
| 243 | i = 0 |
| 244 | while True: |
| 245 | try: |
| 246 | ctype = _winreg.EnumKey(mimedb, i) |
| 247 | except EnvironmentError: |
| 248 | break |
| 249 | else: |
| 250 | yield ctype |
| 251 | i += 1 |
| 252 | |
| 253 | default_encoding = sys.getdefaultencoding() |
| 254 | with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, |
| 255 | r'MIME\Database\Content Type') as mimedb: |
| 256 | for ctype in enum_types(mimedb): |
| 257 | with _winreg.OpenKey(mimedb, ctype) as key: |
| 258 | try: |
| 259 | suffix, datatype = _winreg.QueryValueEx(key, 'Extension') |
| 260 | except EnvironmentError: |
| 261 | continue |
| 262 | if datatype != _winreg.REG_SZ: |
| 263 | continue |
| 264 | self.add_type(ctype, suffix, strict) |
| 265 | |
| 266 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 267 | def guess_type(url, strict=True): |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 268 | """Guess the type of a file based on its URL. |
| 269 | |
| 270 | Return value is a tuple (type, encoding) where type is None if the |
| 271 | type can't be guessed (no or unknown suffix) or a string of the |
| 272 | form type/subtype, usable for a MIME Content-type header; and |
| 273 | encoding is None for no encoding or the name of the program used |
| 274 | to encode (e.g. compress or gzip). The mappings are table |
| 275 | driven. Encoding suffixes are case sensitive; type suffixes are |
| 276 | first tried case sensitive, then case insensitive. |
| 277 | |
| 278 | The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped |
| 279 | to ".tar.gz". (This is table-driven too, using the dictionary |
Fred Drake | 3130b7a | 1998-05-18 16:05:24 +0000 | [diff] [blame] | 280 | suffix_map). |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 281 | |
| 282 | Optional `strict' argument when false adds a bunch of commonly found, but |
| 283 | non-standard types. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 284 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 285 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 286 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 287 | return _db.guess_type(url, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 288 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 289 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 290 | def guess_all_extensions(type, strict=True): |
| 291 | """Guess the extensions for a file based on its MIME type. |
| 292 | |
| 293 | Return value is a list of strings giving the possible filename |
| 294 | extensions, including the leading dot ('.'). The extension is not |
| 295 | guaranteed to have been associated with any particular data |
| 296 | stream, but would be mapped to the MIME type `type' by |
| 297 | guess_type(). If no extension can be guessed for `type', None |
| 298 | is returned. |
| 299 | |
| 300 | Optional `strict' argument when false adds a bunch of commonly found, |
| 301 | but non-standard types. |
| 302 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 303 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 304 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 305 | return _db.guess_all_extensions(type, strict) |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 306 | |
| 307 | def guess_extension(type, strict=True): |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 308 | """Guess the extension for a file based on its MIME type. |
| 309 | |
| 310 | Return value is a string giving a filename extension, including the |
| 311 | leading dot ('.'). The extension is not guaranteed to have been |
Fred Drake | 4941341 | 1998-05-19 15:15:59 +0000 | [diff] [blame] | 312 | associated with any particular data stream, but would be mapped to the |
| 313 | MIME type `type' by guess_type(). If no extension can be guessed for |
| 314 | `type', None is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 315 | |
| 316 | Optional `strict' argument when false adds a bunch of commonly found, |
| 317 | but non-standard types. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 318 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 319 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 320 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 321 | return _db.guess_extension(type, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 322 | |
Walter Dörwald | 893020b | 2003-12-19 18:15:10 +0000 | [diff] [blame] | 323 | def add_type(type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 324 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 325 | |
| 326 | When the extension is already known, the new |
| 327 | type will replace the old one. When the type |
| 328 | is already known the extension will be added |
| 329 | to the list of known extensions. |
| 330 | |
| 331 | If strict is true, information will be added to |
| 332 | list of standard types, else to the list of non-standard |
| 333 | types. |
| 334 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 335 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 336 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 337 | return _db.add_type(type, ext, strict) |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 338 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 339 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 340 | def init(files=None): |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 341 | global suffix_map, types_map, encodings_map, common_types |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 342 | global inited, _db |
| 343 | inited = True # so that MimeTypes.__init__() doesn't call us again |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 344 | db = MimeTypes() |
| 345 | if files is None: |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 346 | if _winreg: |
| 347 | db.read_windows_registry() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 348 | files = knownfiles |
| 349 | for file in files: |
| 350 | if os.path.isfile(file): |
| 351 | db.readfp(open(file)) |
| 352 | encodings_map = db.encodings_map |
Fred Drake | c81a069 | 2001-08-16 18:14:38 +0000 | [diff] [blame] | 353 | suffix_map = db.suffix_map |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 354 | types_map = db.types_map[True] |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 355 | common_types = db.types_map[False] |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 356 | # Make the DB a global variable now that it is fully initialized |
| 357 | _db = db |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 358 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 359 | |
| 360 | def read_mime_types(file): |
| 361 | try: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 362 | f = open(file) |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 363 | except IOError: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 364 | return None |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 365 | db = MimeTypes() |
Walter Dörwald | bb51ed3 | 2003-01-03 19:33:17 +0000 | [diff] [blame] | 366 | db.readfp(f, True) |
| 367 | return db.types_map[True] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 368 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 369 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 370 | def _default_mime_types(): |
| 371 | global suffix_map |
| 372 | global encodings_map |
| 373 | global types_map |
| 374 | global common_types |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 375 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 376 | suffix_map = { |
| 377 | '.tgz': '.tar.gz', |
| 378 | '.taz': '.tar.gz', |
| 379 | '.tz': '.tar.gz', |
Guido van Rossum | 360e4b8 | 2007-05-14 22:51:27 +0000 | [diff] [blame] | 380 | '.tbz2': '.tar.bz2', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 381 | } |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 382 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 383 | encodings_map = { |
| 384 | '.gz': 'gzip', |
| 385 | '.Z': 'compress', |
Guido van Rossum | 360e4b8 | 2007-05-14 22:51:27 +0000 | [diff] [blame] | 386 | '.bz2': 'bzip2', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 387 | } |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 388 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 389 | # Before adding new types, make sure they are either registered with IANA, |
| 390 | # at http://www.isi.edu/in-notes/iana/assignments/media-types |
| 391 | # or extensions, i.e. using the x- prefix |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 392 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 393 | # If you add to these, please keep them sorted! |
| 394 | types_map = { |
| 395 | '.a' : 'application/octet-stream', |
| 396 | '.ai' : 'application/postscript', |
| 397 | '.aif' : 'audio/x-aiff', |
| 398 | '.aifc' : 'audio/x-aiff', |
| 399 | '.aiff' : 'audio/x-aiff', |
| 400 | '.au' : 'audio/basic', |
| 401 | '.avi' : 'video/x-msvideo', |
| 402 | '.bat' : 'text/plain', |
| 403 | '.bcpio' : 'application/x-bcpio', |
| 404 | '.bin' : 'application/octet-stream', |
| 405 | '.bmp' : 'image/x-ms-bmp', |
| 406 | '.c' : 'text/plain', |
| 407 | # Duplicates :( |
| 408 | '.cdf' : 'application/x-cdf', |
| 409 | '.cdf' : 'application/x-netcdf', |
| 410 | '.cpio' : 'application/x-cpio', |
| 411 | '.csh' : 'application/x-csh', |
| 412 | '.css' : 'text/css', |
| 413 | '.dll' : 'application/octet-stream', |
| 414 | '.doc' : 'application/msword', |
| 415 | '.dot' : 'application/msword', |
| 416 | '.dvi' : 'application/x-dvi', |
| 417 | '.eml' : 'message/rfc822', |
| 418 | '.eps' : 'application/postscript', |
| 419 | '.etx' : 'text/x-setext', |
| 420 | '.exe' : 'application/octet-stream', |
| 421 | '.gif' : 'image/gif', |
| 422 | '.gtar' : 'application/x-gtar', |
| 423 | '.h' : 'text/plain', |
| 424 | '.hdf' : 'application/x-hdf', |
| 425 | '.htm' : 'text/html', |
| 426 | '.html' : 'text/html', |
| 427 | '.ief' : 'image/ief', |
| 428 | '.jpe' : 'image/jpeg', |
| 429 | '.jpeg' : 'image/jpeg', |
| 430 | '.jpg' : 'image/jpeg', |
| 431 | '.js' : 'application/x-javascript', |
| 432 | '.ksh' : 'text/plain', |
| 433 | '.latex' : 'application/x-latex', |
| 434 | '.m1v' : 'video/mpeg', |
| 435 | '.man' : 'application/x-troff-man', |
| 436 | '.me' : 'application/x-troff-me', |
| 437 | '.mht' : 'message/rfc822', |
| 438 | '.mhtml' : 'message/rfc822', |
| 439 | '.mif' : 'application/x-mif', |
| 440 | '.mov' : 'video/quicktime', |
| 441 | '.movie' : 'video/x-sgi-movie', |
| 442 | '.mp2' : 'audio/mpeg', |
| 443 | '.mp3' : 'audio/mpeg', |
Guido van Rossum | 8ce8a78 | 2007-11-01 19:42:39 +0000 | [diff] [blame] | 444 | '.mp4' : 'video/mp4', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 445 | '.mpa' : 'video/mpeg', |
| 446 | '.mpe' : 'video/mpeg', |
| 447 | '.mpeg' : 'video/mpeg', |
| 448 | '.mpg' : 'video/mpeg', |
| 449 | '.ms' : 'application/x-troff-ms', |
| 450 | '.nc' : 'application/x-netcdf', |
| 451 | '.nws' : 'message/rfc822', |
| 452 | '.o' : 'application/octet-stream', |
| 453 | '.obj' : 'application/octet-stream', |
| 454 | '.oda' : 'application/oda', |
| 455 | '.p12' : 'application/x-pkcs12', |
| 456 | '.p7c' : 'application/pkcs7-mime', |
| 457 | '.pbm' : 'image/x-portable-bitmap', |
| 458 | '.pdf' : 'application/pdf', |
| 459 | '.pfx' : 'application/x-pkcs12', |
| 460 | '.pgm' : 'image/x-portable-graymap', |
| 461 | '.pl' : 'text/plain', |
| 462 | '.png' : 'image/png', |
| 463 | '.pnm' : 'image/x-portable-anymap', |
| 464 | '.pot' : 'application/vnd.ms-powerpoint', |
| 465 | '.ppa' : 'application/vnd.ms-powerpoint', |
| 466 | '.ppm' : 'image/x-portable-pixmap', |
| 467 | '.pps' : 'application/vnd.ms-powerpoint', |
| 468 | '.ppt' : 'application/vnd.ms-powerpoint', |
| 469 | '.ps' : 'application/postscript', |
| 470 | '.pwz' : 'application/vnd.ms-powerpoint', |
| 471 | '.py' : 'text/x-python', |
| 472 | '.pyc' : 'application/x-python-code', |
| 473 | '.pyo' : 'application/x-python-code', |
| 474 | '.qt' : 'video/quicktime', |
| 475 | '.ra' : 'audio/x-pn-realaudio', |
| 476 | '.ram' : 'application/x-pn-realaudio', |
| 477 | '.ras' : 'image/x-cmu-raster', |
| 478 | '.rdf' : 'application/xml', |
| 479 | '.rgb' : 'image/x-rgb', |
| 480 | '.roff' : 'application/x-troff', |
| 481 | '.rtx' : 'text/richtext', |
| 482 | '.sgm' : 'text/x-sgml', |
| 483 | '.sgml' : 'text/x-sgml', |
| 484 | '.sh' : 'application/x-sh', |
| 485 | '.shar' : 'application/x-shar', |
| 486 | '.snd' : 'audio/basic', |
| 487 | '.so' : 'application/octet-stream', |
| 488 | '.src' : 'application/x-wais-source', |
| 489 | '.sv4cpio': 'application/x-sv4cpio', |
| 490 | '.sv4crc' : 'application/x-sv4crc', |
| 491 | '.swf' : 'application/x-shockwave-flash', |
| 492 | '.t' : 'application/x-troff', |
| 493 | '.tar' : 'application/x-tar', |
| 494 | '.tcl' : 'application/x-tcl', |
| 495 | '.tex' : 'application/x-tex', |
| 496 | '.texi' : 'application/x-texinfo', |
| 497 | '.texinfo': 'application/x-texinfo', |
| 498 | '.tif' : 'image/tiff', |
| 499 | '.tiff' : 'image/tiff', |
| 500 | '.tr' : 'application/x-troff', |
| 501 | '.tsv' : 'text/tab-separated-values', |
| 502 | '.txt' : 'text/plain', |
| 503 | '.ustar' : 'application/x-ustar', |
| 504 | '.vcf' : 'text/x-vcard', |
| 505 | '.wav' : 'audio/x-wav', |
| 506 | '.wiz' : 'application/msword', |
| 507 | '.wsdl' : 'application/xml', |
| 508 | '.xbm' : 'image/x-xbitmap', |
| 509 | '.xlb' : 'application/vnd.ms-excel', |
| 510 | # Duplicates :( |
| 511 | '.xls' : 'application/excel', |
| 512 | '.xls' : 'application/vnd.ms-excel', |
| 513 | '.xml' : 'text/xml', |
| 514 | '.xpdl' : 'application/xml', |
| 515 | '.xpm' : 'image/x-xpixmap', |
| 516 | '.xsl' : 'application/xml', |
| 517 | '.xwd' : 'image/x-xwindowdump', |
| 518 | '.zip' : 'application/zip', |
| 519 | } |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 520 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 521 | # These are non-standard types, commonly found in the wild. They will |
| 522 | # only match if strict=0 flag is given to the API methods. |
| 523 | |
| 524 | # Please sort these too |
| 525 | common_types = { |
| 526 | '.jpg' : 'image/jpg', |
| 527 | '.mid' : 'audio/midi', |
| 528 | '.midi': 'audio/midi', |
| 529 | '.pct' : 'image/pict', |
| 530 | '.pic' : 'image/pict', |
| 531 | '.pict': 'image/pict', |
| 532 | '.rtf' : 'application/rtf', |
| 533 | '.xul' : 'text/xul' |
| 534 | } |
| 535 | |
| 536 | |
| 537 | _default_mime_types() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 538 | |
| 539 | |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 540 | if __name__ == '__main__': |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 541 | import getopt |
| 542 | |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 543 | USAGE = """\ |
| 544 | Usage: mimetypes.py [options] type |
| 545 | |
| 546 | Options: |
| 547 | --help / -h -- print this message and exit |
| 548 | --lenient / -l -- additionally search of some common, but non-standard |
| 549 | types. |
| 550 | --extension / -e -- guess extension instead of type |
| 551 | |
| 552 | More than one type argument may be given. |
| 553 | """ |
| 554 | |
| 555 | def usage(code, msg=''): |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 556 | print(USAGE) |
| 557 | if msg: print(msg) |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 558 | sys.exit(code) |
| 559 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 560 | try: |
| 561 | opts, args = getopt.getopt(sys.argv[1:], 'hle', |
| 562 | ['help', 'lenient', 'extension']) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 563 | except getopt.error as msg: |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 564 | usage(1, msg) |
| 565 | |
| 566 | strict = 1 |
| 567 | extension = 0 |
| 568 | for opt, arg in opts: |
| 569 | if opt in ('-h', '--help'): |
| 570 | usage(0) |
| 571 | elif opt in ('-l', '--lenient'): |
| 572 | strict = 0 |
| 573 | elif opt in ('-e', '--extension'): |
| 574 | extension = 1 |
| 575 | for gtype in args: |
| 576 | if extension: |
| 577 | guess = guess_extension(gtype, strict) |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 578 | if not guess: print("I don't know anything about type", gtype) |
| 579 | else: print(guess) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 580 | else: |
| 581 | guess, encoding = guess_type(gtype, strict) |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 582 | if not guess: print("I don't know anything about type", gtype) |
| 583 | else: print('type:', guess, 'encoding:', encoding) |