blob: 0be76ad4f754c8abab0328fb4016c37ef7207600 [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
Fred Drake5109ffd1998-05-18 16:27:20 +00003This module defines two useful functions:
Guido van Rossumac8a9f31997-09-30 19:05:50 +00004
Georg Brandlcdf8b342009-06-08 09:07:34 +00005guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
Guido van Rossumac8a9f31997-09-30 19:05:50 +00006
Georg Brandlcdf8b342009-06-08 09:07:34 +00007guess_extension(type, strict=True) -- guess the extension for a given MIME type.
Fred Drake5109ffd1998-05-18 16:27:20 +00008
Guido van Rossumac8a9f31997-09-30 19:05:50 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
Fred Drakeeeee4ec2001-08-03 21:01:44 +000015suffix_map -- dictionary mapping suffixes to suffixes
Guido van Rossumac8a9f31997-09-30 19:05:50 +000016encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
Antoine Pitroub8108e22009-11-15 14:25:16 +000021init([files]) -- parse a list of files, default knownfiles (on Windows, the
22 default values are taken from the registry)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000023read_mime_types(file) -- parse one file, return a dictionary or None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000024"""
25
Fred Drakeeeee4ec2001-08-03 21:01:44 +000026import os
Antoine Pitroub8108e22009-11-15 14:25:16 +000027import sys
Guido van Rossumac8a9f31997-09-30 19:05:50 +000028import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000029import urllib.parse
Antoine Pitroub8108e22009-11-15 14:25:16 +000030try:
31 import winreg as _winreg
Brett Cannoncd171c82013-07-04 17:43:24 -040032except ImportError:
Antoine Pitroub8108e22009-11-15 14:25:16 +000033 _winreg = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000034
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000035__all__ = [
36 "guess_type","guess_extension","guess_all_extensions",
37 "add_type","read_mime_types","init"
38]
Skip Montanaro03d90142001-01-25 15:29:22 +000039
Guido van Rossumac8a9f31997-09-30 19:05:50 +000040knownfiles = [
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000041 "/etc/mime.types",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042 "/etc/httpd/mime.types", # Mac OS X
43 "/etc/httpd/conf/mime.types", # Apache
44 "/etc/apache/mime.types", # Apache 1
45 "/etc/apache2/mime.types", # Apache 2
Guido van Rossumac8a9f31997-09-30 19:05:50 +000046 "/usr/local/etc/httpd/conf/mime.types",
47 "/usr/local/lib/netscape/mime.types",
Fred Drake13a2c272000-02-10 17:17:14 +000048 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
49 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossumac8a9f31997-09-30 19:05:50 +000050 ]
51
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000052inited = False
Antoine Pitrou57f3d932009-04-27 21:04:19 +000053_db = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000054
Fred Drakeeeee4ec2001-08-03 21:01:44 +000055
56class MimeTypes:
57 """MIME-types datastore.
58
59 This datastore can handle information from mime.types-style files
60 and supports basic determination of MIME type from a filename or
61 URL, and can guess a reasonable extension given a MIME type.
62 """
63
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000064 def __init__(self, filenames=(), strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000065 if not inited:
66 init()
67 self.encodings_map = encodings_map.copy()
68 self.suffix_map = suffix_map.copy()
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000069 self.types_map = ({}, {}) # dict for (non-strict, strict)
70 self.types_map_inv = ({}, {})
71 for (ext, type) in types_map.items():
72 self.add_type(type, ext, True)
73 for (ext, type) in common_types.items():
74 self.add_type(type, ext, False)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000075 for name in filenames:
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000076 self.read(name, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000077
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000078 def add_type(self, type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +000079 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000080
81 When the extension is already known, the new
82 type will replace the old one. When the type
83 is already known the extension will be added
84 to the list of known extensions.
85
86 If strict is true, information will be added to
87 list of standard types, else to the list of non-standard
88 types.
89 """
90 self.types_map[strict][ext] = type
91 exts = self.types_map_inv[strict].setdefault(type, [])
92 if ext not in exts:
93 exts.append(ext)
94
95 def guess_type(self, url, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000096 """Guess the type of a file based on its URL.
97
98 Return value is a tuple (type, encoding) where type is None if
99 the type can't be guessed (no or unknown suffix) or a string
100 of the form type/subtype, usable for a MIME Content-type
101 header; and encoding is None for no encoding or the name of
102 the program used to encode (e.g. compress or gzip). The
103 mappings are table driven. Encoding suffixes are case
104 sensitive; type suffixes are first tried case sensitive, then
105 case insensitive.
106
107 The suffixes .tgz, .taz and .tz (case sensitive!) are all
108 mapped to '.tar.gz'. (This is table-driven too, using the
109 dictionary suffix_map.)
Barry Warsaw107771a2001-10-25 21:49:18 +0000110
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000111 Optional `strict' argument when False adds a bunch of commonly found,
Barry Warsaw107771a2001-10-25 21:49:18 +0000112 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000113 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000114 scheme, url = urllib.parse.splittype(url)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000115 if scheme == 'data':
116 # syntax of data URLs:
117 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
118 # mediatype := [ type "/" subtype ] *( ";" parameter )
119 # data := *urlchar
120 # parameter := attribute "=" value
121 # type/subtype defaults to "text/plain"
122 comma = url.find(',')
123 if comma < 0:
124 # bad data URL
125 return None, None
126 semi = url.find(';', 0, comma)
127 if semi >= 0:
128 type = url[:semi]
129 else:
130 type = url[:comma]
131 if '=' in type or '/' not in type:
132 type = 'text/plain'
133 return type, None # never compressed, so encoding is None
134 base, ext = posixpath.splitext(url)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000135 while ext in self.suffix_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000136 base, ext = posixpath.splitext(base + self.suffix_map[ext])
Raymond Hettinger54f02222002-06-01 14:18:47 +0000137 if ext in self.encodings_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000138 encoding = self.encodings_map[ext]
139 base, ext = posixpath.splitext(base)
140 else:
141 encoding = None
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000142 types_map = self.types_map[True]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000143 if ext in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000144 return types_map[ext], encoding
Raymond Hettinger54f02222002-06-01 14:18:47 +0000145 elif ext.lower() in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000146 return types_map[ext.lower()], encoding
Barry Warsaw107771a2001-10-25 21:49:18 +0000147 elif strict:
148 return None, encoding
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000149 types_map = self.types_map[False]
150 if ext in types_map:
151 return types_map[ext], encoding
152 elif ext.lower() in types_map:
153 return types_map[ext.lower()], encoding
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000154 else:
155 return None, encoding
156
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000157 def guess_all_extensions(self, type, strict=True):
158 """Guess the extensions for a file based on its MIME type.
159
160 Return value is a list of strings giving the possible filename
161 extensions, including the leading dot ('.'). The extension is not
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000162 guaranteed to have been associated with any particular data stream,
163 but would be mapped to the MIME type `type' by guess_type().
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000164
165 Optional `strict' argument when false adds a bunch of commonly found,
166 but non-standard types.
167 """
168 type = type.lower()
169 extensions = self.types_map_inv[True].get(type, [])
170 if not strict:
171 for ext in self.types_map_inv[False].get(type, []):
172 if ext not in extensions:
173 extensions.append(ext)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000174 return extensions
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000175
176 def guess_extension(self, type, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000177 """Guess the extension for a file based on its MIME type.
178
179 Return value is a string giving a filename extension,
180 including the leading dot ('.'). The extension is not
181 guaranteed to have been associated with any particular data
182 stream, but would be mapped to the MIME type `type' by
183 guess_type(). If no extension can be guessed for `type', None
184 is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000185
186 Optional `strict' argument when false adds a bunch of commonly found,
187 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000188 """
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000189 extensions = self.guess_all_extensions(type, strict)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000190 if not extensions:
191 return None
192 return extensions[0]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000193
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000194 def read(self, filename, strict=True):
195 """
196 Read a single mime.types-format file, specified by pathname.
197
198 If strict is true, information will be added to
199 list of standard types, else to the list of non-standard
200 types.
201 """
Victor Stinner82ac9bc2011-10-14 03:03:35 +0200202 with open(filename, encoding='utf-8') as fp:
Antoine Pitroub86680e2010-10-14 21:15:17 +0000203 self.readfp(fp, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000204
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000205 def readfp(self, fp, strict=True):
206 """
207 Read a single mime.types-format file.
208
209 If strict is true, information will be added to
210 list of standard types, else to the list of non-standard
211 types.
212 """
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000213 while 1:
Fred Drakec019ecb2001-08-16 15:54:28 +0000214 line = fp.readline()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000215 if not line:
216 break
217 words = line.split()
218 for i in range(len(words)):
219 if words[i][0] == '#':
220 del words[i:]
221 break
222 if not words:
223 continue
224 type, suffixes = words[0], words[1:]
225 for suff in suffixes:
Walter Dörwald8fa89722003-01-03 21:06:46 +0000226 self.add_type(type, '.' + suff, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000227
Antoine Pitroub8108e22009-11-15 14:25:16 +0000228 def read_windows_registry(self, strict=True):
229 """
230 Load the MIME types database from Windows registry.
231
232 If strict is true, information will be added to
233 list of standard types, else to the list of non-standard
234 types.
235 """
236
237 # Windows only
238 if not _winreg:
239 return
240
241 def enum_types(mimedb):
242 i = 0
243 while True:
244 try:
245 ctype = _winreg.EnumKey(mimedb, i)
246 except EnvironmentError:
247 break
248 else:
Steve Dowerebb8c2d2015-03-10 13:17:21 -0700249 if '\0' not in ctype:
250 yield ctype
Antoine Pitroub8108e22009-11-15 14:25:16 +0000251 i += 1
252
Tim Golden27a85642013-10-22 19:27:34 +0100253 with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
254 for subkeyname in enum_types(hkcr):
Brian Curtin000f9742010-10-21 14:11:48 +0000255 try:
Tim Golden27a85642013-10-22 19:27:34 +0100256 with _winreg.OpenKey(hkcr, subkeyname) as subkey:
257 # Only check file extensions
258 if not subkeyname.startswith("."):
259 continue
260 # raises EnvironmentError if no 'Content Type' value
261 mimetype, datatype = _winreg.QueryValueEx(
262 subkey, 'Content Type')
263 if datatype != _winreg.REG_SZ:
264 continue
265 self.add_type(mimetype, subkeyname, strict)
Brian Curtin000f9742010-10-21 14:11:48 +0000266 except EnvironmentError:
267 continue
Antoine Pitroub8108e22009-11-15 14:25:16 +0000268
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000269def guess_type(url, strict=True):
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000270 """Guess the type of a file based on its URL.
271
272 Return value is a tuple (type, encoding) where type is None if the
273 type can't be guessed (no or unknown suffix) or a string of the
274 form type/subtype, usable for a MIME Content-type header; and
275 encoding is None for no encoding or the name of the program used
276 to encode (e.g. compress or gzip). The mappings are table
277 driven. Encoding suffixes are case sensitive; type suffixes are
278 first tried case sensitive, then case insensitive.
279
280 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
281 to ".tar.gz". (This is table-driven too, using the dictionary
Fred Drake3130b7a1998-05-18 16:05:24 +0000282 suffix_map).
Barry Warsaw107771a2001-10-25 21:49:18 +0000283
284 Optional `strict' argument when false adds a bunch of commonly found, but
285 non-standard types.
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000286 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000287 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000288 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000289 return _db.guess_type(url, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000290
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000291
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000292def guess_all_extensions(type, strict=True):
293 """Guess the extensions for a file based on its MIME type.
294
295 Return value is a list of strings giving the possible filename
296 extensions, including the leading dot ('.'). The extension is not
297 guaranteed to have been associated with any particular data
298 stream, but would be mapped to the MIME type `type' by
299 guess_type(). If no extension can be guessed for `type', None
300 is returned.
301
302 Optional `strict' argument when false adds a bunch of commonly found,
303 but non-standard types.
304 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000305 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000306 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000307 return _db.guess_all_extensions(type, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000308
309def guess_extension(type, strict=True):
Fred Drake5109ffd1998-05-18 16:27:20 +0000310 """Guess the extension for a file based on its MIME type.
311
312 Return value is a string giving a filename extension, including the
313 leading dot ('.'). The extension is not guaranteed to have been
Fred Drake49413411998-05-19 15:15:59 +0000314 associated with any particular data stream, but would be mapped to the
315 MIME type `type' by guess_type(). If no extension can be guessed for
316 `type', None is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000317
318 Optional `strict' argument when false adds a bunch of commonly found,
319 but non-standard types.
Fred Drake5109ffd1998-05-18 16:27:20 +0000320 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000321 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000322 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000323 return _db.guess_extension(type, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000324
Walter Dörwald893020b2003-12-19 18:15:10 +0000325def add_type(type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000326 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000327
328 When the extension is already known, the new
329 type will replace the old one. When the type
330 is already known the extension will be added
331 to the list of known extensions.
332
333 If strict is true, information will be added to
334 list of standard types, else to the list of non-standard
335 types.
336 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000337 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000338 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000339 return _db.add_type(type, ext, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000340
Fred Drake5109ffd1998-05-18 16:27:20 +0000341
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000342def init(files=None):
Barry Warsaw107771a2001-10-25 21:49:18 +0000343 global suffix_map, types_map, encodings_map, common_types
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000344 global inited, _db
345 inited = True # so that MimeTypes.__init__() doesn't call us again
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000346 db = MimeTypes()
347 if files is None:
Antoine Pitroub8108e22009-11-15 14:25:16 +0000348 if _winreg:
349 db.read_windows_registry()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000350 files = knownfiles
351 for file in files:
352 if os.path.isfile(file):
Antoine Pitroub86680e2010-10-14 21:15:17 +0000353 db.read(file)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000354 encodings_map = db.encodings_map
Fred Drakec81a0692001-08-16 18:14:38 +0000355 suffix_map = db.suffix_map
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000356 types_map = db.types_map[True]
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000357 common_types = db.types_map[False]
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000358 # Make the DB a global variable now that it is fully initialized
359 _db = db
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000360
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000361
362def read_mime_types(file):
363 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000364 f = open(file)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200365 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000366 return None
Serhiy Storchaka91b0bc22014-01-25 19:43:02 +0200367 with f:
368 db = MimeTypes()
369 db.readfp(f, True)
370 return db.types_map[True]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000371
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000372
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000373def _default_mime_types():
374 global suffix_map
375 global encodings_map
376 global types_map
377 global common_types
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000378
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000379 suffix_map = {
Terry Reedyd149c6c2010-12-23 23:10:28 +0000380 '.svgz': '.svg.gz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000381 '.tgz': '.tar.gz',
382 '.taz': '.tar.gz',
383 '.tz': '.tar.gz',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000384 '.tbz2': '.tar.bz2',
Nadeem Vawda84833aa2012-10-28 14:52:34 +0100385 '.txz': '.tar.xz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000386 }
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000387
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000388 encodings_map = {
389 '.gz': 'gzip',
390 '.Z': 'compress',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000391 '.bz2': 'bzip2',
Nadeem Vawda84833aa2012-10-28 14:52:34 +0100392 '.xz': 'xz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000393 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000394
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000395 # Before adding new types, make sure they are either registered with IANA,
Terry Reedyd149c6c2010-12-23 23:10:28 +0000396 # at http://www.iana.org/assignments/media-types
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000397 # or extensions, i.e. using the x- prefix
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000398
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000399 # If you add to these, please keep them sorted!
400 types_map = {
401 '.a' : 'application/octet-stream',
402 '.ai' : 'application/postscript',
403 '.aif' : 'audio/x-aiff',
404 '.aifc' : 'audio/x-aiff',
405 '.aiff' : 'audio/x-aiff',
406 '.au' : 'audio/basic',
407 '.avi' : 'video/x-msvideo',
408 '.bat' : 'text/plain',
409 '.bcpio' : 'application/x-bcpio',
410 '.bin' : 'application/octet-stream',
411 '.bmp' : 'image/x-ms-bmp',
412 '.c' : 'text/plain',
413 # Duplicates :(
414 '.cdf' : 'application/x-cdf',
415 '.cdf' : 'application/x-netcdf',
416 '.cpio' : 'application/x-cpio',
417 '.csh' : 'application/x-csh',
418 '.css' : 'text/css',
Berker Peksaga2d7cf02016-04-09 07:52:05 +0300419 '.csv' : 'text/csv',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000420 '.dll' : 'application/octet-stream',
421 '.doc' : 'application/msword',
422 '.dot' : 'application/msword',
423 '.dvi' : 'application/x-dvi',
424 '.eml' : 'message/rfc822',
425 '.eps' : 'application/postscript',
426 '.etx' : 'text/x-setext',
427 '.exe' : 'application/octet-stream',
428 '.gif' : 'image/gif',
429 '.gtar' : 'application/x-gtar',
430 '.h' : 'text/plain',
431 '.hdf' : 'application/x-hdf',
432 '.htm' : 'text/html',
433 '.html' : 'text/html',
Sandro Tosi83f7d342011-08-21 00:16:18 +0200434 '.ico' : 'image/vnd.microsoft.icon',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000435 '.ief' : 'image/ief',
436 '.jpe' : 'image/jpeg',
437 '.jpeg' : 'image/jpeg',
438 '.jpg' : 'image/jpeg',
Petri Lehtinenc6fdafc2012-08-20 21:05:56 +0300439 '.js' : 'application/javascript',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000440 '.ksh' : 'text/plain',
441 '.latex' : 'application/x-latex',
442 '.m1v' : 'video/mpeg',
Sandro Tosia56ee042012-01-01 18:34:29 +0100443 '.m3u' : 'application/vnd.apple.mpegurl',
444 '.m3u8' : 'application/vnd.apple.mpegurl',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000445 '.man' : 'application/x-troff-man',
446 '.me' : 'application/x-troff-me',
447 '.mht' : 'message/rfc822',
448 '.mhtml' : 'message/rfc822',
449 '.mif' : 'application/x-mif',
450 '.mov' : 'video/quicktime',
451 '.movie' : 'video/x-sgi-movie',
452 '.mp2' : 'audio/mpeg',
453 '.mp3' : 'audio/mpeg',
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000454 '.mp4' : 'video/mp4',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000455 '.mpa' : 'video/mpeg',
456 '.mpe' : 'video/mpeg',
457 '.mpeg' : 'video/mpeg',
458 '.mpg' : 'video/mpeg',
459 '.ms' : 'application/x-troff-ms',
460 '.nc' : 'application/x-netcdf',
461 '.nws' : 'message/rfc822',
462 '.o' : 'application/octet-stream',
463 '.obj' : 'application/octet-stream',
464 '.oda' : 'application/oda',
465 '.p12' : 'application/x-pkcs12',
466 '.p7c' : 'application/pkcs7-mime',
467 '.pbm' : 'image/x-portable-bitmap',
468 '.pdf' : 'application/pdf',
469 '.pfx' : 'application/x-pkcs12',
470 '.pgm' : 'image/x-portable-graymap',
471 '.pl' : 'text/plain',
472 '.png' : 'image/png',
473 '.pnm' : 'image/x-portable-anymap',
474 '.pot' : 'application/vnd.ms-powerpoint',
475 '.ppa' : 'application/vnd.ms-powerpoint',
476 '.ppm' : 'image/x-portable-pixmap',
477 '.pps' : 'application/vnd.ms-powerpoint',
478 '.ppt' : 'application/vnd.ms-powerpoint',
479 '.ps' : 'application/postscript',
480 '.pwz' : 'application/vnd.ms-powerpoint',
481 '.py' : 'text/x-python',
482 '.pyc' : 'application/x-python-code',
483 '.pyo' : 'application/x-python-code',
484 '.qt' : 'video/quicktime',
485 '.ra' : 'audio/x-pn-realaudio',
486 '.ram' : 'application/x-pn-realaudio',
487 '.ras' : 'image/x-cmu-raster',
488 '.rdf' : 'application/xml',
489 '.rgb' : 'image/x-rgb',
490 '.roff' : 'application/x-troff',
491 '.rtx' : 'text/richtext',
492 '.sgm' : 'text/x-sgml',
493 '.sgml' : 'text/x-sgml',
494 '.sh' : 'application/x-sh',
495 '.shar' : 'application/x-shar',
496 '.snd' : 'audio/basic',
497 '.so' : 'application/octet-stream',
498 '.src' : 'application/x-wais-source',
499 '.sv4cpio': 'application/x-sv4cpio',
500 '.sv4crc' : 'application/x-sv4crc',
Éric Araujo1696f822010-12-23 23:18:41 +0000501 '.svg' : 'image/svg+xml',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000502 '.swf' : 'application/x-shockwave-flash',
503 '.t' : 'application/x-troff',
504 '.tar' : 'application/x-tar',
505 '.tcl' : 'application/x-tcl',
506 '.tex' : 'application/x-tex',
507 '.texi' : 'application/x-texinfo',
508 '.texinfo': 'application/x-texinfo',
509 '.tif' : 'image/tiff',
510 '.tiff' : 'image/tiff',
511 '.tr' : 'application/x-troff',
512 '.tsv' : 'text/tab-separated-values',
513 '.txt' : 'text/plain',
514 '.ustar' : 'application/x-ustar',
515 '.vcf' : 'text/x-vcard',
516 '.wav' : 'audio/x-wav',
Berker Peksagd7fdc862016-04-09 08:00:20 +0300517 '.webm' : 'video/webm',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000518 '.wiz' : 'application/msword',
519 '.wsdl' : 'application/xml',
520 '.xbm' : 'image/x-xbitmap',
521 '.xlb' : 'application/vnd.ms-excel',
522 # Duplicates :(
523 '.xls' : 'application/excel',
524 '.xls' : 'application/vnd.ms-excel',
525 '.xml' : 'text/xml',
526 '.xpdl' : 'application/xml',
527 '.xpm' : 'image/x-xpixmap',
528 '.xsl' : 'application/xml',
529 '.xwd' : 'image/x-xwindowdump',
530 '.zip' : 'application/zip',
531 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000532
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000533 # These are non-standard types, commonly found in the wild. They will
534 # only match if strict=0 flag is given to the API methods.
535
536 # Please sort these too
537 common_types = {
538 '.jpg' : 'image/jpg',
539 '.mid' : 'audio/midi',
540 '.midi': 'audio/midi',
541 '.pct' : 'image/pict',
542 '.pic' : 'image/pict',
543 '.pict': 'image/pict',
544 '.rtf' : 'application/rtf',
545 '.xul' : 'text/xul'
546 }
547
548
549_default_mime_types()
Barry Warsaw107771a2001-10-25 21:49:18 +0000550
551
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000552if __name__ == '__main__':
Barry Warsaw107771a2001-10-25 21:49:18 +0000553 import getopt
554
Fred Drake698da022001-12-05 15:58:29 +0000555 USAGE = """\
556Usage: mimetypes.py [options] type
557
558Options:
559 --help / -h -- print this message and exit
560 --lenient / -l -- additionally search of some common, but non-standard
561 types.
562 --extension / -e -- guess extension instead of type
563
564More than one type argument may be given.
565"""
566
567 def usage(code, msg=''):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000568 print(USAGE)
569 if msg: print(msg)
Fred Drake698da022001-12-05 15:58:29 +0000570 sys.exit(code)
571
Barry Warsaw107771a2001-10-25 21:49:18 +0000572 try:
573 opts, args = getopt.getopt(sys.argv[1:], 'hle',
574 ['help', 'lenient', 'extension'])
Guido van Rossumb940e112007-01-10 16:19:56 +0000575 except getopt.error as msg:
Barry Warsaw107771a2001-10-25 21:49:18 +0000576 usage(1, msg)
577
578 strict = 1
579 extension = 0
580 for opt, arg in opts:
581 if opt in ('-h', '--help'):
582 usage(0)
583 elif opt in ('-l', '--lenient'):
584 strict = 0
585 elif opt in ('-e', '--extension'):
586 extension = 1
587 for gtype in args:
588 if extension:
589 guess = guess_extension(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000590 if not guess: print("I don't know anything about type", gtype)
591 else: print(guess)
Barry Warsaw107771a2001-10-25 21:49:18 +0000592 else:
593 guess, encoding = guess_type(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000594 if not guess: print("I don't know anything about type", gtype)
595 else: print('type:', guess, 'encoding:', encoding)