blob: 9a886803dc1e2a901b99eff00a92ce2b89e7a04d [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
Fred Drake5109ffd1998-05-18 16:27:20 +00003This module defines two useful functions:
Guido van Rossumac8a9f31997-09-30 19:05:50 +00004
Georg Brandlcdf8b342009-06-08 09:07:34 +00005guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
Guido van Rossumac8a9f31997-09-30 19:05:50 +00006
Georg Brandlcdf8b342009-06-08 09:07:34 +00007guess_extension(type, strict=True) -- guess the extension for a given MIME type.
Fred Drake5109ffd1998-05-18 16:27:20 +00008
Guido van Rossumac8a9f31997-09-30 19:05:50 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
Fred Drakeeeee4ec2001-08-03 21:01:44 +000015suffix_map -- dictionary mapping suffixes to suffixes
Guido van Rossumac8a9f31997-09-30 19:05:50 +000016encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
Antoine Pitroub8108e22009-11-15 14:25:16 +000021init([files]) -- parse a list of files, default knownfiles (on Windows, the
22 default values are taken from the registry)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000023read_mime_types(file) -- parse one file, return a dictionary or None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000024"""
25
Fred Drakeeeee4ec2001-08-03 21:01:44 +000026import os
Antoine Pitroub8108e22009-11-15 14:25:16 +000027import sys
Guido van Rossumac8a9f31997-09-30 19:05:50 +000028import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000029import urllib.parse
Antoine Pitroub8108e22009-11-15 14:25:16 +000030try:
31 import winreg as _winreg
Brett Cannoncd171c82013-07-04 17:43:24 -040032except ImportError:
Antoine Pitroub8108e22009-11-15 14:25:16 +000033 _winreg = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000034
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000035__all__ = [
Martin Panterf8f31212016-06-06 01:59:19 +000036 "knownfiles", "inited", "MimeTypes",
37 "guess_type", "guess_all_extensions", "guess_extension",
38 "add_type", "init", "read_mime_types",
39 "suffix_map", "encodings_map", "types_map", "common_types"
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000040]
Skip Montanaro03d90142001-01-25 15:29:22 +000041
Guido van Rossumac8a9f31997-09-30 19:05:50 +000042knownfiles = [
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000043 "/etc/mime.types",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044 "/etc/httpd/mime.types", # Mac OS X
45 "/etc/httpd/conf/mime.types", # Apache
46 "/etc/apache/mime.types", # Apache 1
47 "/etc/apache2/mime.types", # Apache 2
Guido van Rossumac8a9f31997-09-30 19:05:50 +000048 "/usr/local/etc/httpd/conf/mime.types",
49 "/usr/local/lib/netscape/mime.types",
Fred Drake13a2c272000-02-10 17:17:14 +000050 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
51 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossumac8a9f31997-09-30 19:05:50 +000052 ]
53
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000054inited = False
Antoine Pitrou57f3d932009-04-27 21:04:19 +000055_db = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000056
Fred Drakeeeee4ec2001-08-03 21:01:44 +000057
58class MimeTypes:
59 """MIME-types datastore.
60
61 This datastore can handle information from mime.types-style files
62 and supports basic determination of MIME type from a filename or
63 URL, and can guess a reasonable extension given a MIME type.
64 """
65
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000066 def __init__(self, filenames=(), strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000067 if not inited:
68 init()
69 self.encodings_map = encodings_map.copy()
70 self.suffix_map = suffix_map.copy()
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000071 self.types_map = ({}, {}) # dict for (non-strict, strict)
72 self.types_map_inv = ({}, {})
73 for (ext, type) in types_map.items():
74 self.add_type(type, ext, True)
75 for (ext, type) in common_types.items():
76 self.add_type(type, ext, False)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000077 for name in filenames:
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000078 self.read(name, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000079
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000080 def add_type(self, type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +000081 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000082
83 When the extension is already known, the new
84 type will replace the old one. When the type
85 is already known the extension will be added
86 to the list of known extensions.
87
88 If strict is true, information will be added to
89 list of standard types, else to the list of non-standard
90 types.
91 """
92 self.types_map[strict][ext] = type
93 exts = self.types_map_inv[strict].setdefault(type, [])
94 if ext not in exts:
95 exts.append(ext)
96
97 def guess_type(self, url, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000098 """Guess the type of a file based on its URL.
99
100 Return value is a tuple (type, encoding) where type is None if
101 the type can't be guessed (no or unknown suffix) or a string
102 of the form type/subtype, usable for a MIME Content-type
103 header; and encoding is None for no encoding or the name of
104 the program used to encode (e.g. compress or gzip). The
105 mappings are table driven. Encoding suffixes are case
106 sensitive; type suffixes are first tried case sensitive, then
107 case insensitive.
108
109 The suffixes .tgz, .taz and .tz (case sensitive!) are all
110 mapped to '.tar.gz'. (This is table-driven too, using the
111 dictionary suffix_map.)
Barry Warsaw107771a2001-10-25 21:49:18 +0000112
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000113 Optional `strict' argument when False adds a bunch of commonly found,
Barry Warsaw107771a2001-10-25 21:49:18 +0000114 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000115 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000116 scheme, url = urllib.parse.splittype(url)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000117 if scheme == 'data':
118 # syntax of data URLs:
119 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
120 # mediatype := [ type "/" subtype ] *( ";" parameter )
121 # data := *urlchar
122 # parameter := attribute "=" value
123 # type/subtype defaults to "text/plain"
124 comma = url.find(',')
125 if comma < 0:
126 # bad data URL
127 return None, None
128 semi = url.find(';', 0, comma)
129 if semi >= 0:
130 type = url[:semi]
131 else:
132 type = url[:comma]
133 if '=' in type or '/' not in type:
134 type = 'text/plain'
135 return type, None # never compressed, so encoding is None
136 base, ext = posixpath.splitext(url)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000137 while ext in self.suffix_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000138 base, ext = posixpath.splitext(base + self.suffix_map[ext])
Raymond Hettinger54f02222002-06-01 14:18:47 +0000139 if ext in self.encodings_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000140 encoding = self.encodings_map[ext]
141 base, ext = posixpath.splitext(base)
142 else:
143 encoding = None
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000144 types_map = self.types_map[True]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000145 if ext in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000146 return types_map[ext], encoding
Raymond Hettinger54f02222002-06-01 14:18:47 +0000147 elif ext.lower() in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000148 return types_map[ext.lower()], encoding
Barry Warsaw107771a2001-10-25 21:49:18 +0000149 elif strict:
150 return None, encoding
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000151 types_map = self.types_map[False]
152 if ext in types_map:
153 return types_map[ext], encoding
154 elif ext.lower() in types_map:
155 return types_map[ext.lower()], encoding
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000156 else:
157 return None, encoding
158
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000159 def guess_all_extensions(self, type, strict=True):
160 """Guess the extensions for a file based on its MIME type.
161
162 Return value is a list of strings giving the possible filename
163 extensions, including the leading dot ('.'). The extension is not
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000164 guaranteed to have been associated with any particular data stream,
165 but would be mapped to the MIME type `type' by guess_type().
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000166
167 Optional `strict' argument when false adds a bunch of commonly found,
168 but non-standard types.
169 """
170 type = type.lower()
171 extensions = self.types_map_inv[True].get(type, [])
172 if not strict:
173 for ext in self.types_map_inv[False].get(type, []):
174 if ext not in extensions:
175 extensions.append(ext)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000176 return extensions
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000177
178 def guess_extension(self, type, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000179 """Guess the extension for a file based on its MIME type.
180
181 Return value is a string giving a filename extension,
182 including the leading dot ('.'). The extension is not
183 guaranteed to have been associated with any particular data
184 stream, but would be mapped to the MIME type `type' by
185 guess_type(). If no extension can be guessed for `type', None
186 is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000187
188 Optional `strict' argument when false adds a bunch of commonly found,
189 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000190 """
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000191 extensions = self.guess_all_extensions(type, strict)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000192 if not extensions:
193 return None
194 return extensions[0]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000195
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000196 def read(self, filename, strict=True):
197 """
198 Read a single mime.types-format file, specified by pathname.
199
200 If strict is true, information will be added to
201 list of standard types, else to the list of non-standard
202 types.
203 """
Victor Stinner82ac9bc2011-10-14 03:03:35 +0200204 with open(filename, encoding='utf-8') as fp:
Antoine Pitroub86680e2010-10-14 21:15:17 +0000205 self.readfp(fp, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000206
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000207 def readfp(self, fp, strict=True):
208 """
209 Read a single mime.types-format file.
210
211 If strict is true, information will be added to
212 list of standard types, else to the list of non-standard
213 types.
214 """
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000215 while 1:
Fred Drakec019ecb2001-08-16 15:54:28 +0000216 line = fp.readline()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000217 if not line:
218 break
219 words = line.split()
220 for i in range(len(words)):
221 if words[i][0] == '#':
222 del words[i:]
223 break
224 if not words:
225 continue
226 type, suffixes = words[0], words[1:]
227 for suff in suffixes:
Walter Dörwald8fa89722003-01-03 21:06:46 +0000228 self.add_type(type, '.' + suff, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000229
Antoine Pitroub8108e22009-11-15 14:25:16 +0000230 def read_windows_registry(self, strict=True):
231 """
232 Load the MIME types database from Windows registry.
233
234 If strict is true, information will be added to
235 list of standard types, else to the list of non-standard
236 types.
237 """
238
239 # Windows only
240 if not _winreg:
241 return
242
243 def enum_types(mimedb):
244 i = 0
245 while True:
246 try:
247 ctype = _winreg.EnumKey(mimedb, i)
248 except EnvironmentError:
249 break
250 else:
Steve Dowerebb8c2d2015-03-10 13:17:21 -0700251 if '\0' not in ctype:
252 yield ctype
Antoine Pitroub8108e22009-11-15 14:25:16 +0000253 i += 1
254
Tim Golden27a85642013-10-22 19:27:34 +0100255 with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
256 for subkeyname in enum_types(hkcr):
Brian Curtin000f9742010-10-21 14:11:48 +0000257 try:
Tim Golden27a85642013-10-22 19:27:34 +0100258 with _winreg.OpenKey(hkcr, subkeyname) as subkey:
259 # Only check file extensions
260 if not subkeyname.startswith("."):
261 continue
262 # raises EnvironmentError if no 'Content Type' value
263 mimetype, datatype = _winreg.QueryValueEx(
264 subkey, 'Content Type')
265 if datatype != _winreg.REG_SZ:
266 continue
267 self.add_type(mimetype, subkeyname, strict)
Brian Curtin000f9742010-10-21 14:11:48 +0000268 except EnvironmentError:
269 continue
Antoine Pitroub8108e22009-11-15 14:25:16 +0000270
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000271def guess_type(url, strict=True):
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000272 """Guess the type of a file based on its URL.
273
274 Return value is a tuple (type, encoding) where type is None if the
275 type can't be guessed (no or unknown suffix) or a string of the
276 form type/subtype, usable for a MIME Content-type header; and
277 encoding is None for no encoding or the name of the program used
278 to encode (e.g. compress or gzip). The mappings are table
279 driven. Encoding suffixes are case sensitive; type suffixes are
280 first tried case sensitive, then case insensitive.
281
282 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
283 to ".tar.gz". (This is table-driven too, using the dictionary
Fred Drake3130b7a1998-05-18 16:05:24 +0000284 suffix_map).
Barry Warsaw107771a2001-10-25 21:49:18 +0000285
286 Optional `strict' argument when false adds a bunch of commonly found, but
287 non-standard types.
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000288 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000289 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000290 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000291 return _db.guess_type(url, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000292
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000293
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000294def guess_all_extensions(type, strict=True):
295 """Guess the extensions for a file based on its MIME type.
296
297 Return value is a list of strings giving the possible filename
298 extensions, including the leading dot ('.'). The extension is not
299 guaranteed to have been associated with any particular data
300 stream, but would be mapped to the MIME type `type' by
301 guess_type(). If no extension can be guessed for `type', None
302 is returned.
303
304 Optional `strict' argument when false adds a bunch of commonly found,
305 but non-standard types.
306 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000307 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000308 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000309 return _db.guess_all_extensions(type, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000310
311def guess_extension(type, strict=True):
Fred Drake5109ffd1998-05-18 16:27:20 +0000312 """Guess the extension for a file based on its MIME type.
313
314 Return value is a string giving a filename extension, including the
315 leading dot ('.'). The extension is not guaranteed to have been
Fred Drake49413411998-05-19 15:15:59 +0000316 associated with any particular data stream, but would be mapped to the
317 MIME type `type' by guess_type(). If no extension can be guessed for
318 `type', None is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000319
320 Optional `strict' argument when false adds a bunch of commonly found,
321 but non-standard types.
Fred Drake5109ffd1998-05-18 16:27:20 +0000322 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000323 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000324 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000325 return _db.guess_extension(type, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000326
Walter Dörwald893020b2003-12-19 18:15:10 +0000327def add_type(type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000328 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000329
330 When the extension is already known, the new
331 type will replace the old one. When the type
332 is already known the extension will be added
333 to the list of known extensions.
334
335 If strict is true, information will be added to
336 list of standard types, else to the list of non-standard
337 types.
338 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000339 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000340 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000341 return _db.add_type(type, ext, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000342
Fred Drake5109ffd1998-05-18 16:27:20 +0000343
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000344def init(files=None):
Barry Warsaw107771a2001-10-25 21:49:18 +0000345 global suffix_map, types_map, encodings_map, common_types
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000346 global inited, _db
347 inited = True # so that MimeTypes.__init__() doesn't call us again
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000348 db = MimeTypes()
349 if files is None:
Antoine Pitroub8108e22009-11-15 14:25:16 +0000350 if _winreg:
351 db.read_windows_registry()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000352 files = knownfiles
353 for file in files:
354 if os.path.isfile(file):
Antoine Pitroub86680e2010-10-14 21:15:17 +0000355 db.read(file)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000356 encodings_map = db.encodings_map
Fred Drakec81a0692001-08-16 18:14:38 +0000357 suffix_map = db.suffix_map
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000358 types_map = db.types_map[True]
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000359 common_types = db.types_map[False]
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000360 # Make the DB a global variable now that it is fully initialized
361 _db = db
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000362
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000363
364def read_mime_types(file):
365 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000366 f = open(file)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200367 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000368 return None
Serhiy Storchaka91b0bc22014-01-25 19:43:02 +0200369 with f:
370 db = MimeTypes()
371 db.readfp(f, True)
372 return db.types_map[True]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000373
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000374
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000375def _default_mime_types():
376 global suffix_map
377 global encodings_map
378 global types_map
379 global common_types
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000380
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000381 suffix_map = {
Terry Reedyd149c6c2010-12-23 23:10:28 +0000382 '.svgz': '.svg.gz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000383 '.tgz': '.tar.gz',
384 '.taz': '.tar.gz',
385 '.tz': '.tar.gz',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000386 '.tbz2': '.tar.bz2',
Nadeem Vawda84833aa2012-10-28 14:52:34 +0100387 '.txz': '.tar.xz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000388 }
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000389
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000390 encodings_map = {
391 '.gz': 'gzip',
392 '.Z': 'compress',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000393 '.bz2': 'bzip2',
Nadeem Vawda84833aa2012-10-28 14:52:34 +0100394 '.xz': 'xz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000395 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000396
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000397 # Before adding new types, make sure they are either registered with IANA,
Terry Reedyd149c6c2010-12-23 23:10:28 +0000398 # at http://www.iana.org/assignments/media-types
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 # or extensions, i.e. using the x- prefix
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000400
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000401 # If you add to these, please keep them sorted!
402 types_map = {
403 '.a' : 'application/octet-stream',
404 '.ai' : 'application/postscript',
405 '.aif' : 'audio/x-aiff',
406 '.aifc' : 'audio/x-aiff',
407 '.aiff' : 'audio/x-aiff',
408 '.au' : 'audio/basic',
409 '.avi' : 'video/x-msvideo',
410 '.bat' : 'text/plain',
411 '.bcpio' : 'application/x-bcpio',
412 '.bin' : 'application/octet-stream',
413 '.bmp' : 'image/x-ms-bmp',
414 '.c' : 'text/plain',
415 # Duplicates :(
416 '.cdf' : 'application/x-cdf',
417 '.cdf' : 'application/x-netcdf',
418 '.cpio' : 'application/x-cpio',
419 '.csh' : 'application/x-csh',
420 '.css' : 'text/css',
Berker Peksaga2d7cf02016-04-09 07:52:05 +0300421 '.csv' : 'text/csv',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000422 '.dll' : 'application/octet-stream',
423 '.doc' : 'application/msword',
424 '.dot' : 'application/msword',
425 '.dvi' : 'application/x-dvi',
426 '.eml' : 'message/rfc822',
427 '.eps' : 'application/postscript',
428 '.etx' : 'text/x-setext',
429 '.exe' : 'application/octet-stream',
430 '.gif' : 'image/gif',
431 '.gtar' : 'application/x-gtar',
432 '.h' : 'text/plain',
433 '.hdf' : 'application/x-hdf',
434 '.htm' : 'text/html',
435 '.html' : 'text/html',
Sandro Tosi83f7d342011-08-21 00:16:18 +0200436 '.ico' : 'image/vnd.microsoft.icon',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000437 '.ief' : 'image/ief',
438 '.jpe' : 'image/jpeg',
439 '.jpeg' : 'image/jpeg',
440 '.jpg' : 'image/jpeg',
Petri Lehtinenc6fdafc2012-08-20 21:05:56 +0300441 '.js' : 'application/javascript',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000442 '.ksh' : 'text/plain',
443 '.latex' : 'application/x-latex',
444 '.m1v' : 'video/mpeg',
Sandro Tosia56ee042012-01-01 18:34:29 +0100445 '.m3u' : 'application/vnd.apple.mpegurl',
446 '.m3u8' : 'application/vnd.apple.mpegurl',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000447 '.man' : 'application/x-troff-man',
448 '.me' : 'application/x-troff-me',
449 '.mht' : 'message/rfc822',
450 '.mhtml' : 'message/rfc822',
451 '.mif' : 'application/x-mif',
452 '.mov' : 'video/quicktime',
453 '.movie' : 'video/x-sgi-movie',
454 '.mp2' : 'audio/mpeg',
455 '.mp3' : 'audio/mpeg',
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000456 '.mp4' : 'video/mp4',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000457 '.mpa' : 'video/mpeg',
458 '.mpe' : 'video/mpeg',
459 '.mpeg' : 'video/mpeg',
460 '.mpg' : 'video/mpeg',
461 '.ms' : 'application/x-troff-ms',
462 '.nc' : 'application/x-netcdf',
463 '.nws' : 'message/rfc822',
464 '.o' : 'application/octet-stream',
465 '.obj' : 'application/octet-stream',
466 '.oda' : 'application/oda',
467 '.p12' : 'application/x-pkcs12',
468 '.p7c' : 'application/pkcs7-mime',
469 '.pbm' : 'image/x-portable-bitmap',
470 '.pdf' : 'application/pdf',
471 '.pfx' : 'application/x-pkcs12',
472 '.pgm' : 'image/x-portable-graymap',
473 '.pl' : 'text/plain',
474 '.png' : 'image/png',
475 '.pnm' : 'image/x-portable-anymap',
476 '.pot' : 'application/vnd.ms-powerpoint',
477 '.ppa' : 'application/vnd.ms-powerpoint',
478 '.ppm' : 'image/x-portable-pixmap',
479 '.pps' : 'application/vnd.ms-powerpoint',
480 '.ppt' : 'application/vnd.ms-powerpoint',
481 '.ps' : 'application/postscript',
482 '.pwz' : 'application/vnd.ms-powerpoint',
483 '.py' : 'text/x-python',
484 '.pyc' : 'application/x-python-code',
485 '.pyo' : 'application/x-python-code',
486 '.qt' : 'video/quicktime',
487 '.ra' : 'audio/x-pn-realaudio',
488 '.ram' : 'application/x-pn-realaudio',
489 '.ras' : 'image/x-cmu-raster',
490 '.rdf' : 'application/xml',
491 '.rgb' : 'image/x-rgb',
492 '.roff' : 'application/x-troff',
493 '.rtx' : 'text/richtext',
494 '.sgm' : 'text/x-sgml',
495 '.sgml' : 'text/x-sgml',
496 '.sh' : 'application/x-sh',
497 '.shar' : 'application/x-shar',
498 '.snd' : 'audio/basic',
499 '.so' : 'application/octet-stream',
500 '.src' : 'application/x-wais-source',
501 '.sv4cpio': 'application/x-sv4cpio',
502 '.sv4crc' : 'application/x-sv4crc',
Éric Araujo1696f822010-12-23 23:18:41 +0000503 '.svg' : 'image/svg+xml',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000504 '.swf' : 'application/x-shockwave-flash',
505 '.t' : 'application/x-troff',
506 '.tar' : 'application/x-tar',
507 '.tcl' : 'application/x-tcl',
508 '.tex' : 'application/x-tex',
509 '.texi' : 'application/x-texinfo',
510 '.texinfo': 'application/x-texinfo',
511 '.tif' : 'image/tiff',
512 '.tiff' : 'image/tiff',
513 '.tr' : 'application/x-troff',
514 '.tsv' : 'text/tab-separated-values',
515 '.txt' : 'text/plain',
516 '.ustar' : 'application/x-ustar',
517 '.vcf' : 'text/x-vcard',
518 '.wav' : 'audio/x-wav',
Berker Peksagd7fdc862016-04-09 08:00:20 +0300519 '.webm' : 'video/webm',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000520 '.wiz' : 'application/msword',
521 '.wsdl' : 'application/xml',
522 '.xbm' : 'image/x-xbitmap',
523 '.xlb' : 'application/vnd.ms-excel',
524 # Duplicates :(
525 '.xls' : 'application/excel',
526 '.xls' : 'application/vnd.ms-excel',
527 '.xml' : 'text/xml',
528 '.xpdl' : 'application/xml',
529 '.xpm' : 'image/x-xpixmap',
530 '.xsl' : 'application/xml',
531 '.xwd' : 'image/x-xwindowdump',
532 '.zip' : 'application/zip',
533 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000534
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000535 # These are non-standard types, commonly found in the wild. They will
536 # only match if strict=0 flag is given to the API methods.
537
538 # Please sort these too
539 common_types = {
540 '.jpg' : 'image/jpg',
541 '.mid' : 'audio/midi',
542 '.midi': 'audio/midi',
543 '.pct' : 'image/pict',
544 '.pic' : 'image/pict',
545 '.pict': 'image/pict',
546 '.rtf' : 'application/rtf',
547 '.xul' : 'text/xul'
548 }
549
550
551_default_mime_types()
Barry Warsaw107771a2001-10-25 21:49:18 +0000552
553
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000554if __name__ == '__main__':
Barry Warsaw107771a2001-10-25 21:49:18 +0000555 import getopt
556
Fred Drake698da022001-12-05 15:58:29 +0000557 USAGE = """\
558Usage: mimetypes.py [options] type
559
560Options:
561 --help / -h -- print this message and exit
562 --lenient / -l -- additionally search of some common, but non-standard
563 types.
564 --extension / -e -- guess extension instead of type
565
566More than one type argument may be given.
567"""
568
569 def usage(code, msg=''):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000570 print(USAGE)
571 if msg: print(msg)
Fred Drake698da022001-12-05 15:58:29 +0000572 sys.exit(code)
573
Barry Warsaw107771a2001-10-25 21:49:18 +0000574 try:
575 opts, args = getopt.getopt(sys.argv[1:], 'hle',
576 ['help', 'lenient', 'extension'])
Guido van Rossumb940e112007-01-10 16:19:56 +0000577 except getopt.error as msg:
Barry Warsaw107771a2001-10-25 21:49:18 +0000578 usage(1, msg)
579
580 strict = 1
581 extension = 0
582 for opt, arg in opts:
583 if opt in ('-h', '--help'):
584 usage(0)
585 elif opt in ('-l', '--lenient'):
586 strict = 0
587 elif opt in ('-e', '--extension'):
588 extension = 1
589 for gtype in args:
590 if extension:
591 guess = guess_extension(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000592 if not guess: print("I don't know anything about type", gtype)
593 else: print(guess)
Barry Warsaw107771a2001-10-25 21:49:18 +0000594 else:
595 guess, encoding = guess_type(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000596 if not guess: print("I don't know anything about type", gtype)
597 else: print('type:', guess, 'encoding:', encoding)