Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 1 | """Guess the MIME type of a file. |
| 2 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 3 | This module defines two useful functions: |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 4 | |
Georg Brandl | cdf8b34 | 2009-06-08 09:07:34 +0000 | [diff] [blame] | 5 | guess_type(url, strict=True) -- guess the MIME type and encoding of a URL. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 6 | |
Georg Brandl | cdf8b34 | 2009-06-08 09:07:34 +0000 | [diff] [blame] | 7 | guess_extension(type, strict=True) -- guess the extension for a given MIME type. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 8 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 9 | It also contains the following, for tuning the behavior: |
| 10 | |
| 11 | Data: |
| 12 | |
| 13 | knownfiles -- list of files to parse |
| 14 | inited -- flag set when init() has been called |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 15 | suffix_map -- dictionary mapping suffixes to suffixes |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 16 | encodings_map -- dictionary mapping suffixes to encodings |
| 17 | types_map -- dictionary mapping suffixes to types |
| 18 | |
| 19 | Functions: |
| 20 | |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 21 | init([files]) -- parse a list of files, default knownfiles (on Windows, the |
| 22 | default values are taken from the registry) |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 23 | read_mime_types(file) -- parse one file, return a dictionary or None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 24 | """ |
| 25 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 26 | import os |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 27 | import sys |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 28 | import posixpath |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 29 | import urllib.parse |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 30 | try: |
| 31 | import winreg as _winreg |
Brett Cannon | cd171c8 | 2013-07-04 17:43:24 -0400 | [diff] [blame] | 32 | except ImportError: |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 33 | _winreg = None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 34 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 35 | __all__ = [ |
Martin Panter | f8f3121 | 2016-06-06 01:59:19 +0000 | [diff] [blame] | 36 | "knownfiles", "inited", "MimeTypes", |
| 37 | "guess_type", "guess_all_extensions", "guess_extension", |
| 38 | "add_type", "init", "read_mime_types", |
| 39 | "suffix_map", "encodings_map", "types_map", "common_types" |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 40 | ] |
Skip Montanaro | 03d9014 | 2001-01-25 15:29:22 +0000 | [diff] [blame] | 41 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 42 | knownfiles = [ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 43 | "/etc/mime.types", |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 44 | "/etc/httpd/mime.types", # Mac OS X |
| 45 | "/etc/httpd/conf/mime.types", # Apache |
| 46 | "/etc/apache/mime.types", # Apache 1 |
| 47 | "/etc/apache2/mime.types", # Apache 2 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 48 | "/usr/local/etc/httpd/conf/mime.types", |
| 49 | "/usr/local/lib/netscape/mime.types", |
Fred Drake | 13a2c27 | 2000-02-10 17:17:14 +0000 | [diff] [blame] | 50 | "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 |
| 51 | "/usr/local/etc/mime.types", # Apache 1.3 |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 52 | ] |
| 53 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 54 | inited = False |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 55 | _db = None |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 56 | |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 57 | |
| 58 | class MimeTypes: |
| 59 | """MIME-types datastore. |
| 60 | |
| 61 | This datastore can handle information from mime.types-style files |
| 62 | and supports basic determination of MIME type from a filename or |
| 63 | URL, and can guess a reasonable extension given a MIME type. |
| 64 | """ |
| 65 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 66 | def __init__(self, filenames=(), strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 67 | if not inited: |
| 68 | init() |
| 69 | self.encodings_map = encodings_map.copy() |
| 70 | self.suffix_map = suffix_map.copy() |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 71 | self.types_map = ({}, {}) # dict for (non-strict, strict) |
| 72 | self.types_map_inv = ({}, {}) |
| 73 | for (ext, type) in types_map.items(): |
| 74 | self.add_type(type, ext, True) |
| 75 | for (ext, type) in common_types.items(): |
| 76 | self.add_type(type, ext, False) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 77 | for name in filenames: |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 78 | self.read(name, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 79 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 80 | def add_type(self, type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 81 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 82 | |
| 83 | When the extension is already known, the new |
| 84 | type will replace the old one. When the type |
| 85 | is already known the extension will be added |
| 86 | to the list of known extensions. |
| 87 | |
| 88 | If strict is true, information will be added to |
| 89 | list of standard types, else to the list of non-standard |
| 90 | types. |
| 91 | """ |
| 92 | self.types_map[strict][ext] = type |
| 93 | exts = self.types_map_inv[strict].setdefault(type, []) |
| 94 | if ext not in exts: |
| 95 | exts.append(ext) |
| 96 | |
| 97 | def guess_type(self, url, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 98 | """Guess the type of a file based on its URL. |
| 99 | |
| 100 | Return value is a tuple (type, encoding) where type is None if |
| 101 | the type can't be guessed (no or unknown suffix) or a string |
| 102 | of the form type/subtype, usable for a MIME Content-type |
| 103 | header; and encoding is None for no encoding or the name of |
| 104 | the program used to encode (e.g. compress or gzip). The |
| 105 | mappings are table driven. Encoding suffixes are case |
| 106 | sensitive; type suffixes are first tried case sensitive, then |
| 107 | case insensitive. |
| 108 | |
| 109 | The suffixes .tgz, .taz and .tz (case sensitive!) are all |
| 110 | mapped to '.tar.gz'. (This is table-driven too, using the |
| 111 | dictionary suffix_map.) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 112 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 113 | Optional `strict' argument when False adds a bunch of commonly found, |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 114 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 115 | """ |
Jeremy Hylton | 1afc169 | 2008-06-18 20:49:58 +0000 | [diff] [blame] | 116 | scheme, url = urllib.parse.splittype(url) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 117 | if scheme == 'data': |
| 118 | # syntax of data URLs: |
| 119 | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
| 120 | # mediatype := [ type "/" subtype ] *( ";" parameter ) |
| 121 | # data := *urlchar |
| 122 | # parameter := attribute "=" value |
| 123 | # type/subtype defaults to "text/plain" |
| 124 | comma = url.find(',') |
| 125 | if comma < 0: |
| 126 | # bad data URL |
| 127 | return None, None |
| 128 | semi = url.find(';', 0, comma) |
| 129 | if semi >= 0: |
| 130 | type = url[:semi] |
| 131 | else: |
| 132 | type = url[:comma] |
| 133 | if '=' in type or '/' not in type: |
| 134 | type = 'text/plain' |
| 135 | return type, None # never compressed, so encoding is None |
| 136 | base, ext = posixpath.splitext(url) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 137 | while ext in self.suffix_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 138 | base, ext = posixpath.splitext(base + self.suffix_map[ext]) |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 139 | if ext in self.encodings_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 140 | encoding = self.encodings_map[ext] |
| 141 | base, ext = posixpath.splitext(base) |
| 142 | else: |
| 143 | encoding = None |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 144 | types_map = self.types_map[True] |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 145 | if ext in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 146 | return types_map[ext], encoding |
Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 147 | elif ext.lower() in types_map: |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 148 | return types_map[ext.lower()], encoding |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 149 | elif strict: |
| 150 | return None, encoding |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 151 | types_map = self.types_map[False] |
| 152 | if ext in types_map: |
| 153 | return types_map[ext], encoding |
| 154 | elif ext.lower() in types_map: |
| 155 | return types_map[ext.lower()], encoding |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 156 | else: |
| 157 | return None, encoding |
| 158 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 159 | def guess_all_extensions(self, type, strict=True): |
| 160 | """Guess the extensions for a file based on its MIME type. |
| 161 | |
| 162 | Return value is a list of strings giving the possible filename |
| 163 | extensions, including the leading dot ('.'). The extension is not |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 164 | guaranteed to have been associated with any particular data stream, |
| 165 | but would be mapped to the MIME type `type' by guess_type(). |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 166 | |
| 167 | Optional `strict' argument when false adds a bunch of commonly found, |
| 168 | but non-standard types. |
| 169 | """ |
| 170 | type = type.lower() |
| 171 | extensions = self.types_map_inv[True].get(type, []) |
| 172 | if not strict: |
| 173 | for ext in self.types_map_inv[False].get(type, []): |
| 174 | if ext not in extensions: |
| 175 | extensions.append(ext) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 176 | return extensions |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 177 | |
| 178 | def guess_extension(self, type, strict=True): |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 179 | """Guess the extension for a file based on its MIME type. |
| 180 | |
| 181 | Return value is a string giving a filename extension, |
| 182 | including the leading dot ('.'). The extension is not |
| 183 | guaranteed to have been associated with any particular data |
| 184 | stream, but would be mapped to the MIME type `type' by |
| 185 | guess_type(). If no extension can be guessed for `type', None |
| 186 | is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 187 | |
| 188 | Optional `strict' argument when false adds a bunch of commonly found, |
| 189 | but non-standard types. |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 190 | """ |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 191 | extensions = self.guess_all_extensions(type, strict) |
Barry Warsaw | 9caa0d1 | 2003-06-09 22:27:41 +0000 | [diff] [blame] | 192 | if not extensions: |
| 193 | return None |
| 194 | return extensions[0] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 195 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 196 | def read(self, filename, strict=True): |
| 197 | """ |
| 198 | Read a single mime.types-format file, specified by pathname. |
| 199 | |
| 200 | If strict is true, information will be added to |
| 201 | list of standard types, else to the list of non-standard |
| 202 | types. |
| 203 | """ |
Victor Stinner | 82ac9bc | 2011-10-14 03:03:35 +0200 | [diff] [blame] | 204 | with open(filename, encoding='utf-8') as fp: |
Antoine Pitrou | b86680e | 2010-10-14 21:15:17 +0000 | [diff] [blame] | 205 | self.readfp(fp, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 206 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 207 | def readfp(self, fp, strict=True): |
| 208 | """ |
| 209 | Read a single mime.types-format file. |
| 210 | |
| 211 | If strict is true, information will be added to |
| 212 | list of standard types, else to the list of non-standard |
| 213 | types. |
| 214 | """ |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 215 | while 1: |
Fred Drake | c019ecb | 2001-08-16 15:54:28 +0000 | [diff] [blame] | 216 | line = fp.readline() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 217 | if not line: |
| 218 | break |
| 219 | words = line.split() |
| 220 | for i in range(len(words)): |
| 221 | if words[i][0] == '#': |
| 222 | del words[i:] |
| 223 | break |
| 224 | if not words: |
| 225 | continue |
| 226 | type, suffixes = words[0], words[1:] |
| 227 | for suff in suffixes: |
Walter Dörwald | 8fa8972 | 2003-01-03 21:06:46 +0000 | [diff] [blame] | 228 | self.add_type(type, '.' + suff, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 229 | |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 230 | def read_windows_registry(self, strict=True): |
| 231 | """ |
| 232 | Load the MIME types database from Windows registry. |
| 233 | |
| 234 | If strict is true, information will be added to |
| 235 | list of standard types, else to the list of non-standard |
| 236 | types. |
| 237 | """ |
| 238 | |
| 239 | # Windows only |
| 240 | if not _winreg: |
| 241 | return |
| 242 | |
| 243 | def enum_types(mimedb): |
| 244 | i = 0 |
| 245 | while True: |
| 246 | try: |
| 247 | ctype = _winreg.EnumKey(mimedb, i) |
| 248 | except EnvironmentError: |
| 249 | break |
| 250 | else: |
Steve Dower | ebb8c2d | 2015-03-10 13:17:21 -0700 | [diff] [blame] | 251 | if '\0' not in ctype: |
| 252 | yield ctype |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 253 | i += 1 |
| 254 | |
Tim Golden | 27a8564 | 2013-10-22 19:27:34 +0100 | [diff] [blame] | 255 | with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr: |
| 256 | for subkeyname in enum_types(hkcr): |
Brian Curtin | 000f974 | 2010-10-21 14:11:48 +0000 | [diff] [blame] | 257 | try: |
Tim Golden | 27a8564 | 2013-10-22 19:27:34 +0100 | [diff] [blame] | 258 | with _winreg.OpenKey(hkcr, subkeyname) as subkey: |
| 259 | # Only check file extensions |
| 260 | if not subkeyname.startswith("."): |
| 261 | continue |
| 262 | # raises EnvironmentError if no 'Content Type' value |
| 263 | mimetype, datatype = _winreg.QueryValueEx( |
| 264 | subkey, 'Content Type') |
| 265 | if datatype != _winreg.REG_SZ: |
| 266 | continue |
| 267 | self.add_type(mimetype, subkeyname, strict) |
Brian Curtin | 000f974 | 2010-10-21 14:11:48 +0000 | [diff] [blame] | 268 | except EnvironmentError: |
| 269 | continue |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 270 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 271 | def guess_type(url, strict=True): |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 272 | """Guess the type of a file based on its URL. |
| 273 | |
| 274 | Return value is a tuple (type, encoding) where type is None if the |
| 275 | type can't be guessed (no or unknown suffix) or a string of the |
| 276 | form type/subtype, usable for a MIME Content-type header; and |
| 277 | encoding is None for no encoding or the name of the program used |
| 278 | to encode (e.g. compress or gzip). The mappings are table |
| 279 | driven. Encoding suffixes are case sensitive; type suffixes are |
| 280 | first tried case sensitive, then case insensitive. |
| 281 | |
| 282 | The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped |
| 283 | to ".tar.gz". (This is table-driven too, using the dictionary |
Fred Drake | 3130b7a | 1998-05-18 16:05:24 +0000 | [diff] [blame] | 284 | suffix_map). |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 285 | |
| 286 | Optional `strict' argument when false adds a bunch of commonly found, but |
| 287 | non-standard types. |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 288 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 289 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 290 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 291 | return _db.guess_type(url, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 292 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 293 | |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 294 | def guess_all_extensions(type, strict=True): |
| 295 | """Guess the extensions for a file based on its MIME type. |
| 296 | |
| 297 | Return value is a list of strings giving the possible filename |
| 298 | extensions, including the leading dot ('.'). The extension is not |
| 299 | guaranteed to have been associated with any particular data |
| 300 | stream, but would be mapped to the MIME type `type' by |
| 301 | guess_type(). If no extension can be guessed for `type', None |
| 302 | is returned. |
| 303 | |
| 304 | Optional `strict' argument when false adds a bunch of commonly found, |
| 305 | but non-standard types. |
| 306 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 307 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 308 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 309 | return _db.guess_all_extensions(type, strict) |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 310 | |
| 311 | def guess_extension(type, strict=True): |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 312 | """Guess the extension for a file based on its MIME type. |
| 313 | |
| 314 | Return value is a string giving a filename extension, including the |
| 315 | leading dot ('.'). The extension is not guaranteed to have been |
Fred Drake | 4941341 | 1998-05-19 15:15:59 +0000 | [diff] [blame] | 316 | associated with any particular data stream, but would be mapped to the |
| 317 | MIME type `type' by guess_type(). If no extension can be guessed for |
| 318 | `type', None is returned. |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 319 | |
| 320 | Optional `strict' argument when false adds a bunch of commonly found, |
| 321 | but non-standard types. |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 322 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 323 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 324 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 325 | return _db.guess_extension(type, strict) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 326 | |
Walter Dörwald | 893020b | 2003-12-19 18:15:10 +0000 | [diff] [blame] | 327 | def add_type(type, ext, strict=True): |
Walter Dörwald | f0dfc7a | 2003-10-20 14:01:56 +0000 | [diff] [blame] | 328 | """Add a mapping between a type and an extension. |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 329 | |
| 330 | When the extension is already known, the new |
| 331 | type will replace the old one. When the type |
| 332 | is already known the extension will be added |
| 333 | to the list of known extensions. |
| 334 | |
| 335 | If strict is true, information will be added to |
| 336 | list of standard types, else to the list of non-standard |
| 337 | types. |
| 338 | """ |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 339 | if _db is None: |
Benjamin Peterson | e0124bd | 2009-03-09 21:04:33 +0000 | [diff] [blame] | 340 | init() |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 341 | return _db.add_type(type, ext, strict) |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 342 | |
Fred Drake | 5109ffd | 1998-05-18 16:27:20 +0000 | [diff] [blame] | 343 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 344 | def init(files=None): |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 345 | global suffix_map, types_map, encodings_map, common_types |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 346 | global inited, _db |
| 347 | inited = True # so that MimeTypes.__init__() doesn't call us again |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 348 | db = MimeTypes() |
| 349 | if files is None: |
Antoine Pitrou | b8108e2 | 2009-11-15 14:25:16 +0000 | [diff] [blame] | 350 | if _winreg: |
| 351 | db.read_windows_registry() |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 352 | files = knownfiles |
| 353 | for file in files: |
| 354 | if os.path.isfile(file): |
Antoine Pitrou | b86680e | 2010-10-14 21:15:17 +0000 | [diff] [blame] | 355 | db.read(file) |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 356 | encodings_map = db.encodings_map |
Fred Drake | c81a069 | 2001-08-16 18:14:38 +0000 | [diff] [blame] | 357 | suffix_map = db.suffix_map |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 358 | types_map = db.types_map[True] |
Walter Dörwald | 5ccaf8f | 2002-09-06 16:15:58 +0000 | [diff] [blame] | 359 | common_types = db.types_map[False] |
Antoine Pitrou | 57f3d93 | 2009-04-27 21:04:19 +0000 | [diff] [blame] | 360 | # Make the DB a global variable now that it is fully initialized |
| 361 | _db = db |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 362 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 363 | |
| 364 | def read_mime_types(file): |
| 365 | try: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 366 | f = open(file) |
Andrew Svetlov | f7a17b4 | 2012-12-25 16:47:37 +0200 | [diff] [blame] | 367 | except OSError: |
Guido van Rossum | 45e2fbc | 1998-03-26 21:13:24 +0000 | [diff] [blame] | 368 | return None |
Serhiy Storchaka | 91b0bc2 | 2014-01-25 19:43:02 +0200 | [diff] [blame] | 369 | with f: |
| 370 | db = MimeTypes() |
| 371 | db.readfp(f, True) |
| 372 | return db.types_map[True] |
Fred Drake | eeee4ec | 2001-08-03 21:01:44 +0000 | [diff] [blame] | 373 | |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 374 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 375 | def _default_mime_types(): |
| 376 | global suffix_map |
| 377 | global encodings_map |
| 378 | global types_map |
| 379 | global common_types |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 380 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 381 | suffix_map = { |
Terry Reedy | d149c6c | 2010-12-23 23:10:28 +0000 | [diff] [blame] | 382 | '.svgz': '.svg.gz', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 383 | '.tgz': '.tar.gz', |
| 384 | '.taz': '.tar.gz', |
| 385 | '.tz': '.tar.gz', |
Guido van Rossum | 360e4b8 | 2007-05-14 22:51:27 +0000 | [diff] [blame] | 386 | '.tbz2': '.tar.bz2', |
Nadeem Vawda | 84833aa | 2012-10-28 14:52:34 +0100 | [diff] [blame] | 387 | '.txz': '.tar.xz', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 388 | } |
Guido van Rossum | ac8a9f3 | 1997-09-30 19:05:50 +0000 | [diff] [blame] | 389 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 390 | encodings_map = { |
| 391 | '.gz': 'gzip', |
| 392 | '.Z': 'compress', |
Guido van Rossum | 360e4b8 | 2007-05-14 22:51:27 +0000 | [diff] [blame] | 393 | '.bz2': 'bzip2', |
Nadeem Vawda | 84833aa | 2012-10-28 14:52:34 +0100 | [diff] [blame] | 394 | '.xz': 'xz', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 395 | } |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 396 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 397 | # Before adding new types, make sure they are either registered with IANA, |
Terry Reedy | d149c6c | 2010-12-23 23:10:28 +0000 | [diff] [blame] | 398 | # at http://www.iana.org/assignments/media-types |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 399 | # or extensions, i.e. using the x- prefix |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 400 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 401 | # If you add to these, please keep them sorted! |
| 402 | types_map = { |
| 403 | '.a' : 'application/octet-stream', |
| 404 | '.ai' : 'application/postscript', |
| 405 | '.aif' : 'audio/x-aiff', |
| 406 | '.aifc' : 'audio/x-aiff', |
| 407 | '.aiff' : 'audio/x-aiff', |
| 408 | '.au' : 'audio/basic', |
| 409 | '.avi' : 'video/x-msvideo', |
| 410 | '.bat' : 'text/plain', |
| 411 | '.bcpio' : 'application/x-bcpio', |
| 412 | '.bin' : 'application/octet-stream', |
| 413 | '.bmp' : 'image/x-ms-bmp', |
| 414 | '.c' : 'text/plain', |
| 415 | # Duplicates :( |
| 416 | '.cdf' : 'application/x-cdf', |
| 417 | '.cdf' : 'application/x-netcdf', |
| 418 | '.cpio' : 'application/x-cpio', |
| 419 | '.csh' : 'application/x-csh', |
| 420 | '.css' : 'text/css', |
Berker Peksag | a2d7cf0 | 2016-04-09 07:52:05 +0300 | [diff] [blame] | 421 | '.csv' : 'text/csv', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 422 | '.dll' : 'application/octet-stream', |
| 423 | '.doc' : 'application/msword', |
| 424 | '.dot' : 'application/msword', |
| 425 | '.dvi' : 'application/x-dvi', |
| 426 | '.eml' : 'message/rfc822', |
| 427 | '.eps' : 'application/postscript', |
| 428 | '.etx' : 'text/x-setext', |
| 429 | '.exe' : 'application/octet-stream', |
| 430 | '.gif' : 'image/gif', |
| 431 | '.gtar' : 'application/x-gtar', |
| 432 | '.h' : 'text/plain', |
| 433 | '.hdf' : 'application/x-hdf', |
| 434 | '.htm' : 'text/html', |
| 435 | '.html' : 'text/html', |
Sandro Tosi | 83f7d34 | 2011-08-21 00:16:18 +0200 | [diff] [blame] | 436 | '.ico' : 'image/vnd.microsoft.icon', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 437 | '.ief' : 'image/ief', |
| 438 | '.jpe' : 'image/jpeg', |
| 439 | '.jpeg' : 'image/jpeg', |
| 440 | '.jpg' : 'image/jpeg', |
Petri Lehtinen | c6fdafc | 2012-08-20 21:05:56 +0300 | [diff] [blame] | 441 | '.js' : 'application/javascript', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 442 | '.ksh' : 'text/plain', |
| 443 | '.latex' : 'application/x-latex', |
| 444 | '.m1v' : 'video/mpeg', |
Sandro Tosi | a56ee04 | 2012-01-01 18:34:29 +0100 | [diff] [blame] | 445 | '.m3u' : 'application/vnd.apple.mpegurl', |
| 446 | '.m3u8' : 'application/vnd.apple.mpegurl', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 447 | '.man' : 'application/x-troff-man', |
| 448 | '.me' : 'application/x-troff-me', |
| 449 | '.mht' : 'message/rfc822', |
| 450 | '.mhtml' : 'message/rfc822', |
| 451 | '.mif' : 'application/x-mif', |
| 452 | '.mov' : 'video/quicktime', |
| 453 | '.movie' : 'video/x-sgi-movie', |
| 454 | '.mp2' : 'audio/mpeg', |
| 455 | '.mp3' : 'audio/mpeg', |
Guido van Rossum | 8ce8a78 | 2007-11-01 19:42:39 +0000 | [diff] [blame] | 456 | '.mp4' : 'video/mp4', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 457 | '.mpa' : 'video/mpeg', |
| 458 | '.mpe' : 'video/mpeg', |
| 459 | '.mpeg' : 'video/mpeg', |
| 460 | '.mpg' : 'video/mpeg', |
| 461 | '.ms' : 'application/x-troff-ms', |
| 462 | '.nc' : 'application/x-netcdf', |
| 463 | '.nws' : 'message/rfc822', |
| 464 | '.o' : 'application/octet-stream', |
| 465 | '.obj' : 'application/octet-stream', |
| 466 | '.oda' : 'application/oda', |
| 467 | '.p12' : 'application/x-pkcs12', |
| 468 | '.p7c' : 'application/pkcs7-mime', |
| 469 | '.pbm' : 'image/x-portable-bitmap', |
| 470 | '.pdf' : 'application/pdf', |
| 471 | '.pfx' : 'application/x-pkcs12', |
| 472 | '.pgm' : 'image/x-portable-graymap', |
| 473 | '.pl' : 'text/plain', |
| 474 | '.png' : 'image/png', |
| 475 | '.pnm' : 'image/x-portable-anymap', |
| 476 | '.pot' : 'application/vnd.ms-powerpoint', |
| 477 | '.ppa' : 'application/vnd.ms-powerpoint', |
| 478 | '.ppm' : 'image/x-portable-pixmap', |
| 479 | '.pps' : 'application/vnd.ms-powerpoint', |
| 480 | '.ppt' : 'application/vnd.ms-powerpoint', |
| 481 | '.ps' : 'application/postscript', |
| 482 | '.pwz' : 'application/vnd.ms-powerpoint', |
| 483 | '.py' : 'text/x-python', |
| 484 | '.pyc' : 'application/x-python-code', |
| 485 | '.pyo' : 'application/x-python-code', |
| 486 | '.qt' : 'video/quicktime', |
| 487 | '.ra' : 'audio/x-pn-realaudio', |
| 488 | '.ram' : 'application/x-pn-realaudio', |
| 489 | '.ras' : 'image/x-cmu-raster', |
| 490 | '.rdf' : 'application/xml', |
| 491 | '.rgb' : 'image/x-rgb', |
| 492 | '.roff' : 'application/x-troff', |
| 493 | '.rtx' : 'text/richtext', |
| 494 | '.sgm' : 'text/x-sgml', |
| 495 | '.sgml' : 'text/x-sgml', |
| 496 | '.sh' : 'application/x-sh', |
| 497 | '.shar' : 'application/x-shar', |
| 498 | '.snd' : 'audio/basic', |
| 499 | '.so' : 'application/octet-stream', |
| 500 | '.src' : 'application/x-wais-source', |
| 501 | '.sv4cpio': 'application/x-sv4cpio', |
| 502 | '.sv4crc' : 'application/x-sv4crc', |
Éric Araujo | 1696f82 | 2010-12-23 23:18:41 +0000 | [diff] [blame] | 503 | '.svg' : 'image/svg+xml', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 504 | '.swf' : 'application/x-shockwave-flash', |
| 505 | '.t' : 'application/x-troff', |
| 506 | '.tar' : 'application/x-tar', |
| 507 | '.tcl' : 'application/x-tcl', |
| 508 | '.tex' : 'application/x-tex', |
| 509 | '.texi' : 'application/x-texinfo', |
| 510 | '.texinfo': 'application/x-texinfo', |
| 511 | '.tif' : 'image/tiff', |
| 512 | '.tiff' : 'image/tiff', |
| 513 | '.tr' : 'application/x-troff', |
| 514 | '.tsv' : 'text/tab-separated-values', |
| 515 | '.txt' : 'text/plain', |
| 516 | '.ustar' : 'application/x-ustar', |
| 517 | '.vcf' : 'text/x-vcard', |
| 518 | '.wav' : 'audio/x-wav', |
Berker Peksag | d7fdc86 | 2016-04-09 08:00:20 +0300 | [diff] [blame] | 519 | '.webm' : 'video/webm', |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 520 | '.wiz' : 'application/msword', |
| 521 | '.wsdl' : 'application/xml', |
| 522 | '.xbm' : 'image/x-xbitmap', |
| 523 | '.xlb' : 'application/vnd.ms-excel', |
| 524 | # Duplicates :( |
| 525 | '.xls' : 'application/excel', |
| 526 | '.xls' : 'application/vnd.ms-excel', |
| 527 | '.xml' : 'text/xml', |
| 528 | '.xpdl' : 'application/xml', |
| 529 | '.xpm' : 'image/x-xpixmap', |
| 530 | '.xsl' : 'application/xml', |
| 531 | '.xwd' : 'image/x-xwindowdump', |
| 532 | '.zip' : 'application/zip', |
| 533 | } |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 534 | |
Thomas Wouters | 49fd7fa | 2006-04-21 10:40:58 +0000 | [diff] [blame] | 535 | # These are non-standard types, commonly found in the wild. They will |
| 536 | # only match if strict=0 flag is given to the API methods. |
| 537 | |
| 538 | # Please sort these too |
| 539 | common_types = { |
| 540 | '.jpg' : 'image/jpg', |
| 541 | '.mid' : 'audio/midi', |
| 542 | '.midi': 'audio/midi', |
| 543 | '.pct' : 'image/pict', |
| 544 | '.pic' : 'image/pict', |
| 545 | '.pict': 'image/pict', |
| 546 | '.rtf' : 'application/rtf', |
| 547 | '.xul' : 'text/xul' |
| 548 | } |
| 549 | |
| 550 | |
| 551 | _default_mime_types() |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 552 | |
| 553 | |
Eric S. Raymond | 51cc3bc | 2001-02-09 09:44:47 +0000 | [diff] [blame] | 554 | if __name__ == '__main__': |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 555 | import getopt |
| 556 | |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 557 | USAGE = """\ |
| 558 | Usage: mimetypes.py [options] type |
| 559 | |
| 560 | Options: |
| 561 | --help / -h -- print this message and exit |
| 562 | --lenient / -l -- additionally search of some common, but non-standard |
| 563 | types. |
| 564 | --extension / -e -- guess extension instead of type |
| 565 | |
| 566 | More than one type argument may be given. |
| 567 | """ |
| 568 | |
| 569 | def usage(code, msg=''): |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 570 | print(USAGE) |
| 571 | if msg: print(msg) |
Fred Drake | 698da02 | 2001-12-05 15:58:29 +0000 | [diff] [blame] | 572 | sys.exit(code) |
| 573 | |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 574 | try: |
| 575 | opts, args = getopt.getopt(sys.argv[1:], 'hle', |
| 576 | ['help', 'lenient', 'extension']) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 577 | except getopt.error as msg: |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 578 | usage(1, msg) |
| 579 | |
| 580 | strict = 1 |
| 581 | extension = 0 |
| 582 | for opt, arg in opts: |
| 583 | if opt in ('-h', '--help'): |
| 584 | usage(0) |
| 585 | elif opt in ('-l', '--lenient'): |
| 586 | strict = 0 |
| 587 | elif opt in ('-e', '--extension'): |
| 588 | extension = 1 |
| 589 | for gtype in args: |
| 590 | if extension: |
| 591 | guess = guess_extension(gtype, strict) |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 592 | if not guess: print("I don't know anything about type", gtype) |
| 593 | else: print(guess) |
Barry Warsaw | 107771a | 2001-10-25 21:49:18 +0000 | [diff] [blame] | 594 | else: |
| 595 | guess, encoding = guess_type(gtype, strict) |
Guido van Rossum | be19ed7 | 2007-02-09 05:37:30 +0000 | [diff] [blame] | 596 | if not guess: print("I don't know anything about type", gtype) |
| 597 | else: print('type:', guess, 'encoding:', encoding) |