blob: 01a16fdf9aa1b35472a4e0eede54765efec0e5d7 [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
Fred Drake5109ffd1998-05-18 16:27:20 +00003This module defines two useful functions:
Guido van Rossumac8a9f31997-09-30 19:05:50 +00004
Georg Brandlcdf8b342009-06-08 09:07:34 +00005guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
Guido van Rossumac8a9f31997-09-30 19:05:50 +00006
Georg Brandlcdf8b342009-06-08 09:07:34 +00007guess_extension(type, strict=True) -- guess the extension for a given MIME type.
Fred Drake5109ffd1998-05-18 16:27:20 +00008
Guido van Rossumac8a9f31997-09-30 19:05:50 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
Fred Drakeeeee4ec2001-08-03 21:01:44 +000015suffix_map -- dictionary mapping suffixes to suffixes
Guido van Rossumac8a9f31997-09-30 19:05:50 +000016encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
Antoine Pitroub8108e22009-11-15 14:25:16 +000021init([files]) -- parse a list of files, default knownfiles (on Windows, the
22 default values are taken from the registry)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000023read_mime_types(file) -- parse one file, return a dictionary or None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000024"""
25
Fred Drakeeeee4ec2001-08-03 21:01:44 +000026import os
Antoine Pitroub8108e22009-11-15 14:25:16 +000027import sys
Guido van Rossumac8a9f31997-09-30 19:05:50 +000028import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000029import urllib.parse
Antoine Pitroub8108e22009-11-15 14:25:16 +000030try:
31 import winreg as _winreg
Brett Cannoncd171c82013-07-04 17:43:24 -040032except ImportError:
Antoine Pitroub8108e22009-11-15 14:25:16 +000033 _winreg = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000034
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000035__all__ = [
Martin Panterf8f31212016-06-06 01:59:19 +000036 "knownfiles", "inited", "MimeTypes",
37 "guess_type", "guess_all_extensions", "guess_extension",
38 "add_type", "init", "read_mime_types",
39 "suffix_map", "encodings_map", "types_map", "common_types"
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000040]
Skip Montanaro03d90142001-01-25 15:29:22 +000041
Guido van Rossumac8a9f31997-09-30 19:05:50 +000042knownfiles = [
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000043 "/etc/mime.types",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044 "/etc/httpd/mime.types", # Mac OS X
45 "/etc/httpd/conf/mime.types", # Apache
46 "/etc/apache/mime.types", # Apache 1
47 "/etc/apache2/mime.types", # Apache 2
Guido van Rossumac8a9f31997-09-30 19:05:50 +000048 "/usr/local/etc/httpd/conf/mime.types",
49 "/usr/local/lib/netscape/mime.types",
Fred Drake13a2c272000-02-10 17:17:14 +000050 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
51 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossumac8a9f31997-09-30 19:05:50 +000052 ]
53
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000054inited = False
Antoine Pitrou57f3d932009-04-27 21:04:19 +000055_db = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000056
Fred Drakeeeee4ec2001-08-03 21:01:44 +000057
58class MimeTypes:
59 """MIME-types datastore.
60
61 This datastore can handle information from mime.types-style files
62 and supports basic determination of MIME type from a filename or
63 URL, and can guess a reasonable extension given a MIME type.
64 """
65
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000066 def __init__(self, filenames=(), strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000067 if not inited:
68 init()
David K. Hess9fc720e2019-06-24 18:46:59 -050069 self.encodings_map = _encodings_map_default.copy()
70 self.suffix_map = _suffix_map_default.copy()
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000071 self.types_map = ({}, {}) # dict for (non-strict, strict)
72 self.types_map_inv = ({}, {})
David K. Hess9fc720e2019-06-24 18:46:59 -050073 for (ext, type) in _types_map_default.items():
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000074 self.add_type(type, ext, True)
David K. Hess9fc720e2019-06-24 18:46:59 -050075 for (ext, type) in _common_types_default.items():
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000076 self.add_type(type, ext, False)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000077 for name in filenames:
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000078 self.read(name, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000079
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000080 def add_type(self, type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +000081 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000082
83 When the extension is already known, the new
84 type will replace the old one. When the type
85 is already known the extension will be added
86 to the list of known extensions.
87
88 If strict is true, information will be added to
89 list of standard types, else to the list of non-standard
90 types.
91 """
92 self.types_map[strict][ext] = type
93 exts = self.types_map_inv[strict].setdefault(type, [])
94 if ext not in exts:
95 exts.append(ext)
96
97 def guess_type(self, url, strict=True):
Mayank Asthana7e18dee2018-10-10 20:16:44 +053098 """Guess the type of a file which is either a URL or a path-like object.
Fred Drakeeeee4ec2001-08-03 21:01:44 +000099
100 Return value is a tuple (type, encoding) where type is None if
101 the type can't be guessed (no or unknown suffix) or a string
102 of the form type/subtype, usable for a MIME Content-type
103 header; and encoding is None for no encoding or the name of
104 the program used to encode (e.g. compress or gzip). The
105 mappings are table driven. Encoding suffixes are case
106 sensitive; type suffixes are first tried case sensitive, then
107 case insensitive.
108
109 The suffixes .tgz, .taz and .tz (case sensitive!) are all
110 mapped to '.tar.gz'. (This is table-driven too, using the
111 dictionary suffix_map.)
Barry Warsaw107771a2001-10-25 21:49:18 +0000112
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000113 Optional `strict' argument when False adds a bunch of commonly found,
Barry Warsaw107771a2001-10-25 21:49:18 +0000114 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000115 """
Mayank Asthana7e18dee2018-10-10 20:16:44 +0530116 url = os.fspath(url)
Cheryl Sabella0250de42018-04-25 16:51:54 -0700117 scheme, url = urllib.parse._splittype(url)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000118 if scheme == 'data':
119 # syntax of data URLs:
120 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
121 # mediatype := [ type "/" subtype ] *( ";" parameter )
122 # data := *urlchar
123 # parameter := attribute "=" value
124 # type/subtype defaults to "text/plain"
125 comma = url.find(',')
126 if comma < 0:
127 # bad data URL
128 return None, None
129 semi = url.find(';', 0, comma)
130 if semi >= 0:
131 type = url[:semi]
132 else:
133 type = url[:comma]
134 if '=' in type or '/' not in type:
135 type = 'text/plain'
136 return type, None # never compressed, so encoding is None
137 base, ext = posixpath.splitext(url)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000138 while ext in self.suffix_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000139 base, ext = posixpath.splitext(base + self.suffix_map[ext])
Raymond Hettinger54f02222002-06-01 14:18:47 +0000140 if ext in self.encodings_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000141 encoding = self.encodings_map[ext]
142 base, ext = posixpath.splitext(base)
143 else:
144 encoding = None
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000145 types_map = self.types_map[True]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000146 if ext in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000147 return types_map[ext], encoding
Raymond Hettinger54f02222002-06-01 14:18:47 +0000148 elif ext.lower() in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000149 return types_map[ext.lower()], encoding
Barry Warsaw107771a2001-10-25 21:49:18 +0000150 elif strict:
151 return None, encoding
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000152 types_map = self.types_map[False]
153 if ext in types_map:
154 return types_map[ext], encoding
155 elif ext.lower() in types_map:
156 return types_map[ext.lower()], encoding
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000157 else:
158 return None, encoding
159
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000160 def guess_all_extensions(self, type, strict=True):
161 """Guess the extensions for a file based on its MIME type.
162
163 Return value is a list of strings giving the possible filename
164 extensions, including the leading dot ('.'). The extension is not
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000165 guaranteed to have been associated with any particular data stream,
166 but would be mapped to the MIME type `type' by guess_type().
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000167
168 Optional `strict' argument when false adds a bunch of commonly found,
169 but non-standard types.
170 """
171 type = type.lower()
172 extensions = self.types_map_inv[True].get(type, [])
173 if not strict:
174 for ext in self.types_map_inv[False].get(type, []):
175 if ext not in extensions:
176 extensions.append(ext)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000177 return extensions
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000178
179 def guess_extension(self, type, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000180 """Guess the extension for a file based on its MIME type.
181
182 Return value is a string giving a filename extension,
183 including the leading dot ('.'). The extension is not
184 guaranteed to have been associated with any particular data
185 stream, but would be mapped to the MIME type `type' by
186 guess_type(). If no extension can be guessed for `type', None
187 is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000188
189 Optional `strict' argument when false adds a bunch of commonly found,
190 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000191 """
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000192 extensions = self.guess_all_extensions(type, strict)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000193 if not extensions:
194 return None
195 return extensions[0]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000196
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000197 def read(self, filename, strict=True):
198 """
199 Read a single mime.types-format file, specified by pathname.
200
201 If strict is true, information will be added to
202 list of standard types, else to the list of non-standard
203 types.
204 """
Victor Stinner82ac9bc2011-10-14 03:03:35 +0200205 with open(filename, encoding='utf-8') as fp:
Antoine Pitroub86680e2010-10-14 21:15:17 +0000206 self.readfp(fp, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000207
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000208 def readfp(self, fp, strict=True):
209 """
210 Read a single mime.types-format file.
211
212 If strict is true, information will be added to
213 list of standard types, else to the list of non-standard
214 types.
215 """
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000216 while 1:
Fred Drakec019ecb2001-08-16 15:54:28 +0000217 line = fp.readline()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000218 if not line:
219 break
220 words = line.split()
221 for i in range(len(words)):
222 if words[i][0] == '#':
223 del words[i:]
224 break
225 if not words:
226 continue
227 type, suffixes = words[0], words[1:]
228 for suff in suffixes:
Walter Dörwald8fa89722003-01-03 21:06:46 +0000229 self.add_type(type, '.' + suff, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000230
Antoine Pitroub8108e22009-11-15 14:25:16 +0000231 def read_windows_registry(self, strict=True):
232 """
233 Load the MIME types database from Windows registry.
234
235 If strict is true, information will be added to
236 list of standard types, else to the list of non-standard
237 types.
238 """
239
240 # Windows only
241 if not _winreg:
242 return
243
244 def enum_types(mimedb):
245 i = 0
246 while True:
247 try:
248 ctype = _winreg.EnumKey(mimedb, i)
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300249 except OSError:
Antoine Pitroub8108e22009-11-15 14:25:16 +0000250 break
251 else:
Steve Dowerebb8c2d2015-03-10 13:17:21 -0700252 if '\0' not in ctype:
253 yield ctype
Antoine Pitroub8108e22009-11-15 14:25:16 +0000254 i += 1
255
Tim Golden27a85642013-10-22 19:27:34 +0100256 with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:
257 for subkeyname in enum_types(hkcr):
Brian Curtin000f9742010-10-21 14:11:48 +0000258 try:
Tim Golden27a85642013-10-22 19:27:34 +0100259 with _winreg.OpenKey(hkcr, subkeyname) as subkey:
260 # Only check file extensions
261 if not subkeyname.startswith("."):
262 continue
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300263 # raises OSError if no 'Content Type' value
Tim Golden27a85642013-10-22 19:27:34 +0100264 mimetype, datatype = _winreg.QueryValueEx(
265 subkey, 'Content Type')
266 if datatype != _winreg.REG_SZ:
267 continue
268 self.add_type(mimetype, subkeyname, strict)
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +0300269 except OSError:
Brian Curtin000f9742010-10-21 14:11:48 +0000270 continue
Antoine Pitroub8108e22009-11-15 14:25:16 +0000271
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000272def guess_type(url, strict=True):
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000273 """Guess the type of a file based on its URL.
274
275 Return value is a tuple (type, encoding) where type is None if the
276 type can't be guessed (no or unknown suffix) or a string of the
277 form type/subtype, usable for a MIME Content-type header; and
278 encoding is None for no encoding or the name of the program used
279 to encode (e.g. compress or gzip). The mappings are table
280 driven. Encoding suffixes are case sensitive; type suffixes are
281 first tried case sensitive, then case insensitive.
282
283 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
284 to ".tar.gz". (This is table-driven too, using the dictionary
Fred Drake3130b7a1998-05-18 16:05:24 +0000285 suffix_map).
Barry Warsaw107771a2001-10-25 21:49:18 +0000286
287 Optional `strict' argument when false adds a bunch of commonly found, but
288 non-standard types.
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000289 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000290 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000291 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000292 return _db.guess_type(url, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000293
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000294
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000295def guess_all_extensions(type, strict=True):
296 """Guess the extensions for a file based on its MIME type.
297
298 Return value is a list of strings giving the possible filename
299 extensions, including the leading dot ('.'). The extension is not
300 guaranteed to have been associated with any particular data
301 stream, but would be mapped to the MIME type `type' by
302 guess_type(). If no extension can be guessed for `type', None
303 is returned.
304
305 Optional `strict' argument when false adds a bunch of commonly found,
306 but non-standard types.
307 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000308 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000309 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000310 return _db.guess_all_extensions(type, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000311
312def guess_extension(type, strict=True):
Fred Drake5109ffd1998-05-18 16:27:20 +0000313 """Guess the extension for a file based on its MIME type.
314
315 Return value is a string giving a filename extension, including the
316 leading dot ('.'). The extension is not guaranteed to have been
Fred Drake49413411998-05-19 15:15:59 +0000317 associated with any particular data stream, but would be mapped to the
318 MIME type `type' by guess_type(). If no extension can be guessed for
319 `type', None is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000320
321 Optional `strict' argument when false adds a bunch of commonly found,
322 but non-standard types.
Fred Drake5109ffd1998-05-18 16:27:20 +0000323 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000324 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000325 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000326 return _db.guess_extension(type, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000327
Walter Dörwald893020b2003-12-19 18:15:10 +0000328def add_type(type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000329 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000330
331 When the extension is already known, the new
332 type will replace the old one. When the type
333 is already known the extension will be added
334 to the list of known extensions.
335
336 If strict is true, information will be added to
337 list of standard types, else to the list of non-standard
338 types.
339 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000340 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000341 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000342 return _db.add_type(type, ext, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000343
Fred Drake5109ffd1998-05-18 16:27:20 +0000344
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000345def init(files=None):
Barry Warsaw107771a2001-10-25 21:49:18 +0000346 global suffix_map, types_map, encodings_map, common_types
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000347 global inited, _db
348 inited = True # so that MimeTypes.__init__() doesn't call us again
David K. Hess9fc720e2019-06-24 18:46:59 -0500349
350 if files is None or _db is None:
351 db = MimeTypes()
Antoine Pitroub8108e22009-11-15 14:25:16 +0000352 if _winreg:
353 db.read_windows_registry()
David K. Hess9fc720e2019-06-24 18:46:59 -0500354
355 if files is None:
356 files = knownfiles
357 else:
358 files = knownfiles + list(files)
359 else:
360 db = _db
361
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000362 for file in files:
363 if os.path.isfile(file):
Antoine Pitroub86680e2010-10-14 21:15:17 +0000364 db.read(file)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000365 encodings_map = db.encodings_map
Fred Drakec81a0692001-08-16 18:14:38 +0000366 suffix_map = db.suffix_map
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000367 types_map = db.types_map[True]
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000368 common_types = db.types_map[False]
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000369 # Make the DB a global variable now that it is fully initialized
370 _db = db
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000371
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000372
373def read_mime_types(file):
374 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000375 f = open(file)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200376 except OSError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000377 return None
Serhiy Storchaka91b0bc22014-01-25 19:43:02 +0200378 with f:
379 db = MimeTypes()
380 db.readfp(f, True)
381 return db.types_map[True]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000382
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000383
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000384def _default_mime_types():
David K. Hess9fc720e2019-06-24 18:46:59 -0500385 global suffix_map, _suffix_map_default
386 global encodings_map, _encodings_map_default
387 global types_map, _types_map_default
388 global common_types, _common_types_default
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000389
David K. Hess9fc720e2019-06-24 18:46:59 -0500390 suffix_map = _suffix_map_default = {
Terry Reedyd149c6c2010-12-23 23:10:28 +0000391 '.svgz': '.svg.gz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000392 '.tgz': '.tar.gz',
393 '.taz': '.tar.gz',
394 '.tz': '.tar.gz',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000395 '.tbz2': '.tar.bz2',
Nadeem Vawda84833aa2012-10-28 14:52:34 +0100396 '.txz': '.tar.xz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000397 }
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000398
David K. Hess9fc720e2019-06-24 18:46:59 -0500399 encodings_map = _encodings_map_default = {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000400 '.gz': 'gzip',
401 '.Z': 'compress',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000402 '.bz2': 'bzip2',
Nadeem Vawda84833aa2012-10-28 14:52:34 +0100403 '.xz': 'xz',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000404 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000405
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000406 # Before adding new types, make sure they are either registered with IANA,
Terry Reedyd149c6c2010-12-23 23:10:28 +0000407 # at http://www.iana.org/assignments/media-types
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000408 # or extensions, i.e. using the x- prefix
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000409
David K. Hess9fc720e2019-06-24 18:46:59 -0500410 # If you add to these, please keep them sorted by mime type.
411 # Make sure the entry with the preferred file extension for a particular mime type
412 # appears before any others of the same mimetype.
413 types_map = _types_map_default = {
414 '.js' : 'application/javascript',
415 '.mjs' : 'application/javascript',
416 '.json' : 'application/json',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000417 '.doc' : 'application/msword',
418 '.dot' : 'application/msword',
David K. Hess9fc720e2019-06-24 18:46:59 -0500419 '.wiz' : 'application/msword',
420 '.bin' : 'application/octet-stream',
421 '.a' : 'application/octet-stream',
422 '.dll' : 'application/octet-stream',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000423 '.exe' : 'application/octet-stream',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000424 '.o' : 'application/octet-stream',
425 '.obj' : 'application/octet-stream',
David K. Hess9fc720e2019-06-24 18:46:59 -0500426 '.so' : 'application/octet-stream',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000427 '.oda' : 'application/oda',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000428 '.pdf' : 'application/pdf',
David K. Hess9fc720e2019-06-24 18:46:59 -0500429 '.p7c' : 'application/pkcs7-mime',
430 '.ps' : 'application/postscript',
431 '.ai' : 'application/postscript',
432 '.eps' : 'application/postscript',
433 '.m3u' : 'application/vnd.apple.mpegurl',
434 '.m3u8' : 'application/vnd.apple.mpegurl',
435 '.xls' : 'application/vnd.ms-excel',
436 '.xlb' : 'application/vnd.ms-excel',
437 '.ppt' : 'application/vnd.ms-powerpoint',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000438 '.pot' : 'application/vnd.ms-powerpoint',
439 '.ppa' : 'application/vnd.ms-powerpoint',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000440 '.pps' : 'application/vnd.ms-powerpoint',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000441 '.pwz' : 'application/vnd.ms-powerpoint',
David K. Hess9fc720e2019-06-24 18:46:59 -0500442 '.wasm' : 'application/wasm',
443 '.bcpio' : 'application/x-bcpio',
444 '.cpio' : 'application/x-cpio',
445 '.csh' : 'application/x-csh',
446 '.dvi' : 'application/x-dvi',
447 '.gtar' : 'application/x-gtar',
448 '.hdf' : 'application/x-hdf',
449 '.latex' : 'application/x-latex',
450 '.mif' : 'application/x-mif',
451 '.cdf' : 'application/x-netcdf',
452 '.nc' : 'application/x-netcdf',
453 '.p12' : 'application/x-pkcs12',
454 '.pfx' : 'application/x-pkcs12',
455 '.ram' : 'application/x-pn-realaudio',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000456 '.pyc' : 'application/x-python-code',
457 '.pyo' : 'application/x-python-code',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000458 '.sh' : 'application/x-sh',
459 '.shar' : 'application/x-shar',
David K. Hess9fc720e2019-06-24 18:46:59 -0500460 '.swf' : 'application/x-shockwave-flash',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000461 '.sv4cpio': 'application/x-sv4cpio',
462 '.sv4crc' : 'application/x-sv4crc',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000463 '.tar' : 'application/x-tar',
464 '.tcl' : 'application/x-tcl',
465 '.tex' : 'application/x-tex',
466 '.texi' : 'application/x-texinfo',
467 '.texinfo': 'application/x-texinfo',
David K. Hess9fc720e2019-06-24 18:46:59 -0500468 '.roff' : 'application/x-troff',
469 '.t' : 'application/x-troff',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000470 '.tr' : 'application/x-troff',
David K. Hess9fc720e2019-06-24 18:46:59 -0500471 '.man' : 'application/x-troff-man',
472 '.me' : 'application/x-troff-me',
473 '.ms' : 'application/x-troff-ms',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000474 '.ustar' : 'application/x-ustar',
David K. Hess9fc720e2019-06-24 18:46:59 -0500475 '.src' : 'application/x-wais-source',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000476 '.xsl' : 'application/xml',
David K. Hess9fc720e2019-06-24 18:46:59 -0500477 '.rdf' : 'application/xml',
478 '.wsdl' : 'application/xml',
479 '.xpdl' : 'application/xml',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000480 '.zip' : 'application/zip',
David K. Hess9fc720e2019-06-24 18:46:59 -0500481 '.au' : 'audio/basic',
482 '.snd' : 'audio/basic',
483 '.mp3' : 'audio/mpeg',
484 '.mp2' : 'audio/mpeg',
485 '.aif' : 'audio/x-aiff',
486 '.aifc' : 'audio/x-aiff',
487 '.aiff' : 'audio/x-aiff',
488 '.ra' : 'audio/x-pn-realaudio',
489 '.wav' : 'audio/x-wav',
490 '.bmp' : 'image/bmp',
491 '.gif' : 'image/gif',
492 '.ief' : 'image/ief',
493 '.jpg' : 'image/jpeg',
494 '.jpe' : 'image/jpeg',
495 '.jpeg' : 'image/jpeg',
496 '.png' : 'image/png',
497 '.svg' : 'image/svg+xml',
498 '.tiff' : 'image/tiff',
499 '.tif' : 'image/tiff',
500 '.ico' : 'image/vnd.microsoft.icon',
501 '.ras' : 'image/x-cmu-raster',
502 '.bmp' : 'image/x-ms-bmp',
503 '.pnm' : 'image/x-portable-anymap',
504 '.pbm' : 'image/x-portable-bitmap',
505 '.pgm' : 'image/x-portable-graymap',
506 '.ppm' : 'image/x-portable-pixmap',
507 '.rgb' : 'image/x-rgb',
508 '.xbm' : 'image/x-xbitmap',
509 '.xpm' : 'image/x-xpixmap',
510 '.xwd' : 'image/x-xwindowdump',
511 '.eml' : 'message/rfc822',
512 '.mht' : 'message/rfc822',
513 '.mhtml' : 'message/rfc822',
514 '.nws' : 'message/rfc822',
515 '.css' : 'text/css',
516 '.csv' : 'text/csv',
517 '.html' : 'text/html',
518 '.htm' : 'text/html',
519 '.txt' : 'text/plain',
520 '.bat' : 'text/plain',
521 '.c' : 'text/plain',
522 '.h' : 'text/plain',
523 '.ksh' : 'text/plain',
524 '.pl' : 'text/plain',
525 '.rtx' : 'text/richtext',
526 '.tsv' : 'text/tab-separated-values',
527 '.py' : 'text/x-python',
528 '.etx' : 'text/x-setext',
529 '.sgm' : 'text/x-sgml',
530 '.sgml' : 'text/x-sgml',
531 '.vcf' : 'text/x-vcard',
532 '.xml' : 'text/xml',
533 '.mp4' : 'video/mp4',
534 '.mpeg' : 'video/mpeg',
535 '.m1v' : 'video/mpeg',
536 '.mpa' : 'video/mpeg',
537 '.mpe' : 'video/mpeg',
538 '.mpg' : 'video/mpeg',
539 '.mov' : 'video/quicktime',
540 '.qt' : 'video/quicktime',
541 '.webm' : 'video/webm',
542 '.avi' : 'video/x-msvideo',
543 '.movie' : 'video/x-sgi-movie',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000544 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000545
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000546 # These are non-standard types, commonly found in the wild. They will
547 # only match if strict=0 flag is given to the API methods.
548
549 # Please sort these too
David K. Hess9fc720e2019-06-24 18:46:59 -0500550 common_types = _common_types_default = {
551 '.rtf' : 'application/rtf',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000552 '.midi': 'audio/midi',
David K. Hess9fc720e2019-06-24 18:46:59 -0500553 '.mid' : 'audio/midi',
554 '.jpg' : 'image/jpg',
555 '.pict': 'image/pict',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000556 '.pct' : 'image/pict',
557 '.pic' : 'image/pict',
David K. Hess9fc720e2019-06-24 18:46:59 -0500558 '.xul' : 'text/xul',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000559 }
560
561
562_default_mime_types()
Barry Warsaw107771a2001-10-25 21:49:18 +0000563
564
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000565if __name__ == '__main__':
Barry Warsaw107771a2001-10-25 21:49:18 +0000566 import getopt
567
Fred Drake698da022001-12-05 15:58:29 +0000568 USAGE = """\
569Usage: mimetypes.py [options] type
570
571Options:
572 --help / -h -- print this message and exit
573 --lenient / -l -- additionally search of some common, but non-standard
574 types.
575 --extension / -e -- guess extension instead of type
576
577More than one type argument may be given.
578"""
579
580 def usage(code, msg=''):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000581 print(USAGE)
582 if msg: print(msg)
Fred Drake698da022001-12-05 15:58:29 +0000583 sys.exit(code)
584
Barry Warsaw107771a2001-10-25 21:49:18 +0000585 try:
586 opts, args = getopt.getopt(sys.argv[1:], 'hle',
587 ['help', 'lenient', 'extension'])
Guido van Rossumb940e112007-01-10 16:19:56 +0000588 except getopt.error as msg:
Barry Warsaw107771a2001-10-25 21:49:18 +0000589 usage(1, msg)
590
591 strict = 1
592 extension = 0
593 for opt, arg in opts:
594 if opt in ('-h', '--help'):
595 usage(0)
596 elif opt in ('-l', '--lenient'):
597 strict = 0
598 elif opt in ('-e', '--extension'):
599 extension = 1
600 for gtype in args:
601 if extension:
602 guess = guess_extension(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000603 if not guess: print("I don't know anything about type", gtype)
604 else: print(guess)
Barry Warsaw107771a2001-10-25 21:49:18 +0000605 else:
606 guess, encoding = guess_type(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000607 if not guess: print("I don't know anything about type", gtype)
608 else: print('type:', guess, 'encoding:', encoding)