blob: aead6566a5838daceeafc3e8f207fac924c9a366 [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
Fred Drake5109ffd1998-05-18 16:27:20 +00003This module defines two useful functions:
Guido van Rossumac8a9f31997-09-30 19:05:50 +00004
Barry Warsaw107771a2001-10-25 21:49:18 +00005guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
Guido van Rossumac8a9f31997-09-30 19:05:50 +00006
Barry Warsaw107771a2001-10-25 21:49:18 +00007guess_extension(type, strict=1) -- guess the extension for a given MIME type.
Fred Drake5109ffd1998-05-18 16:27:20 +00008
Guido van Rossumac8a9f31997-09-30 19:05:50 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
Fred Drakeeeee4ec2001-08-03 21:01:44 +000015suffix_map -- dictionary mapping suffixes to suffixes
Guido van Rossumac8a9f31997-09-30 19:05:50 +000016encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
Antoine Pitroud5b34d42009-11-15 14:10:48 +000021init([files]) -- parse a list of files, default knownfiles (on Windows, the
22 default values are taken from the registry)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000023read_mime_types(file) -- parse one file, return a dictionary or None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000024"""
25
Fred Drakeeeee4ec2001-08-03 21:01:44 +000026import os
Antoine Pitroud5b34d42009-11-15 14:10:48 +000027import sys
Guido van Rossumac8a9f31997-09-30 19:05:50 +000028import posixpath
Guido van Rossum1c5fb1c1998-10-12 15:12:28 +000029import urllib
Antoine Pitroud5b34d42009-11-15 14:10:48 +000030try:
31 import _winreg
32except ImportError:
33 _winreg = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000034
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000035__all__ = [
36 "guess_type","guess_extension","guess_all_extensions",
37 "add_type","read_mime_types","init"
38]
Skip Montanaro03d90142001-01-25 15:29:22 +000039
Guido van Rossumac8a9f31997-09-30 19:05:50 +000040knownfiles = [
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000041 "/etc/mime.types",
Georg Brandl0540be92006-06-14 08:42:11 +000042 "/etc/httpd/mime.types", # Mac OS X
43 "/etc/httpd/conf/mime.types", # Apache
44 "/etc/apache/mime.types", # Apache 1
45 "/etc/apache2/mime.types", # Apache 2
Guido van Rossumac8a9f31997-09-30 19:05:50 +000046 "/usr/local/etc/httpd/conf/mime.types",
47 "/usr/local/lib/netscape/mime.types",
Fred Drake13a2c272000-02-10 17:17:14 +000048 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
49 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossumac8a9f31997-09-30 19:05:50 +000050 ]
51
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000052inited = False
Antoine Pitrouf2651e12009-04-27 20:50:20 +000053_db = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000054
Fred Drakeeeee4ec2001-08-03 21:01:44 +000055
56class MimeTypes:
57 """MIME-types datastore.
58
59 This datastore can handle information from mime.types-style files
60 and supports basic determination of MIME type from a filename or
61 URL, and can guess a reasonable extension given a MIME type.
62 """
63
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000064 def __init__(self, filenames=(), strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000065 if not inited:
66 init()
67 self.encodings_map = encodings_map.copy()
68 self.suffix_map = suffix_map.copy()
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000069 self.types_map = ({}, {}) # dict for (non-strict, strict)
70 self.types_map_inv = ({}, {})
71 for (ext, type) in types_map.items():
72 self.add_type(type, ext, True)
73 for (ext, type) in common_types.items():
74 self.add_type(type, ext, False)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000075 for name in filenames:
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000076 self.read(name, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000077
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000078 def add_type(self, type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +000079 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000080
81 When the extension is already known, the new
82 type will replace the old one. When the type
83 is already known the extension will be added
84 to the list of known extensions.
85
86 If strict is true, information will be added to
87 list of standard types, else to the list of non-standard
88 types.
89 """
90 self.types_map[strict][ext] = type
91 exts = self.types_map_inv[strict].setdefault(type, [])
92 if ext not in exts:
93 exts.append(ext)
94
95 def guess_type(self, url, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000096 """Guess the type of a file based on its URL.
97
98 Return value is a tuple (type, encoding) where type is None if
99 the type can't be guessed (no or unknown suffix) or a string
100 of the form type/subtype, usable for a MIME Content-type
101 header; and encoding is None for no encoding or the name of
102 the program used to encode (e.g. compress or gzip). The
103 mappings are table driven. Encoding suffixes are case
104 sensitive; type suffixes are first tried case sensitive, then
105 case insensitive.
106
107 The suffixes .tgz, .taz and .tz (case sensitive!) are all
108 mapped to '.tar.gz'. (This is table-driven too, using the
109 dictionary suffix_map.)
Barry Warsaw107771a2001-10-25 21:49:18 +0000110
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000111 Optional `strict' argument when False adds a bunch of commonly found,
Barry Warsaw107771a2001-10-25 21:49:18 +0000112 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000113 """
114 scheme, url = urllib.splittype(url)
115 if scheme == 'data':
116 # syntax of data URLs:
117 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
118 # mediatype := [ type "/" subtype ] *( ";" parameter )
119 # data := *urlchar
120 # parameter := attribute "=" value
121 # type/subtype defaults to "text/plain"
122 comma = url.find(',')
123 if comma < 0:
124 # bad data URL
125 return None, None
126 semi = url.find(';', 0, comma)
127 if semi >= 0:
128 type = url[:semi]
129 else:
130 type = url[:comma]
131 if '=' in type or '/' not in type:
132 type = 'text/plain'
133 return type, None # never compressed, so encoding is None
134 base, ext = posixpath.splitext(url)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000135 while ext in self.suffix_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000136 base, ext = posixpath.splitext(base + self.suffix_map[ext])
Raymond Hettinger54f02222002-06-01 14:18:47 +0000137 if ext in self.encodings_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000138 encoding = self.encodings_map[ext]
139 base, ext = posixpath.splitext(base)
140 else:
141 encoding = None
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000142 types_map = self.types_map[True]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000143 if ext in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000144 return types_map[ext], encoding
Raymond Hettinger54f02222002-06-01 14:18:47 +0000145 elif ext.lower() in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000146 return types_map[ext.lower()], encoding
Barry Warsaw107771a2001-10-25 21:49:18 +0000147 elif strict:
148 return None, encoding
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000149 types_map = self.types_map[False]
150 if ext in types_map:
151 return types_map[ext], encoding
152 elif ext.lower() in types_map:
153 return types_map[ext.lower()], encoding
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000154 else:
155 return None, encoding
156
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000157 def guess_all_extensions(self, type, strict=True):
158 """Guess the extensions for a file based on its MIME type.
159
160 Return value is a list of strings giving the possible filename
161 extensions, including the leading dot ('.'). The extension is not
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000162 guaranteed to have been associated with any particular data stream,
163 but would be mapped to the MIME type `type' by guess_type().
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000164
165 Optional `strict' argument when false adds a bunch of commonly found,
166 but non-standard types.
167 """
168 type = type.lower()
169 extensions = self.types_map_inv[True].get(type, [])
170 if not strict:
171 for ext in self.types_map_inv[False].get(type, []):
172 if ext not in extensions:
173 extensions.append(ext)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000174 return extensions
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000175
176 def guess_extension(self, type, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000177 """Guess the extension for a file based on its MIME type.
178
179 Return value is a string giving a filename extension,
180 including the leading dot ('.'). The extension is not
181 guaranteed to have been associated with any particular data
182 stream, but would be mapped to the MIME type `type' by
183 guess_type(). If no extension can be guessed for `type', None
184 is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000185
186 Optional `strict' argument when false adds a bunch of commonly found,
187 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000188 """
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000189 extensions = self.guess_all_extensions(type, strict)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000190 if not extensions:
191 return None
192 return extensions[0]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000193
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000194 def read(self, filename, strict=True):
195 """
196 Read a single mime.types-format file, specified by pathname.
197
198 If strict is true, information will be added to
199 list of standard types, else to the list of non-standard
200 types.
201 """
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000202 fp = open(filename)
Walter Dörwald51cc72c2003-01-03 21:02:36 +0000203 self.readfp(fp, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000204 fp.close()
205
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000206 def readfp(self, fp, strict=True):
207 """
208 Read a single mime.types-format file.
209
210 If strict is true, information will be added to
211 list of standard types, else to the list of non-standard
212 types.
213 """
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000214 while 1:
Fred Drakec019ecb2001-08-16 15:54:28 +0000215 line = fp.readline()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000216 if not line:
217 break
218 words = line.split()
219 for i in range(len(words)):
220 if words[i][0] == '#':
221 del words[i:]
222 break
223 if not words:
224 continue
225 type, suffixes = words[0], words[1:]
226 for suff in suffixes:
Walter Dörwald8fa89722003-01-03 21:06:46 +0000227 self.add_type(type, '.' + suff, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000228
Antoine Pitroud5b34d42009-11-15 14:10:48 +0000229 def read_windows_registry(self, strict=True):
230 """
231 Load the MIME types database from Windows registry.
232
233 If strict is true, information will be added to
234 list of standard types, else to the list of non-standard
235 types.
236 """
237
238 # Windows only
239 if not _winreg:
240 return
241
242 def enum_types(mimedb):
243 i = 0
244 while True:
245 try:
246 ctype = _winreg.EnumKey(mimedb, i)
247 except EnvironmentError:
248 break
249 try:
250 ctype = ctype.encode(default_encoding) # omit in 3.x!
251 except UnicodeEncodeError:
252 pass
253 else:
254 yield ctype
255 i += 1
256
257 default_encoding = sys.getdefaultencoding()
258 with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT,
259 r'MIME\Database\Content Type') as mimedb:
260 for ctype in enum_types(mimedb):
261 with _winreg.OpenKey(mimedb, ctype) as key:
262 try:
263 suffix, datatype = _winreg.QueryValueEx(key, 'Extension')
264 except EnvironmentError:
265 continue
266 if datatype != _winreg.REG_SZ:
267 continue
268 try:
269 suffix = suffix.encode(default_encoding) # omit in 3.x!
270 except UnicodeEncodeError:
271 continue
272 self.add_type(ctype, suffix, strict)
273
274
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000275def guess_type(url, strict=True):
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000276 """Guess the type of a file based on its URL.
277
278 Return value is a tuple (type, encoding) where type is None if the
279 type can't be guessed (no or unknown suffix) or a string of the
280 form type/subtype, usable for a MIME Content-type header; and
281 encoding is None for no encoding or the name of the program used
282 to encode (e.g. compress or gzip). The mappings are table
283 driven. Encoding suffixes are case sensitive; type suffixes are
284 first tried case sensitive, then case insensitive.
285
286 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
287 to ".tar.gz". (This is table-driven too, using the dictionary
Fred Drake3130b7a1998-05-18 16:05:24 +0000288 suffix_map).
Barry Warsaw107771a2001-10-25 21:49:18 +0000289
290 Optional `strict' argument when false adds a bunch of commonly found, but
291 non-standard types.
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000292 """
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000293 if _db is None:
Benjamin Petersonad58b7c2009-03-02 03:35:12 +0000294 init()
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000295 return _db.guess_type(url, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000296
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000297
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000298def guess_all_extensions(type, strict=True):
299 """Guess the extensions for a file based on its MIME type.
300
301 Return value is a list of strings giving the possible filename
302 extensions, including the leading dot ('.'). The extension is not
303 guaranteed to have been associated with any particular data
304 stream, but would be mapped to the MIME type `type' by
305 guess_type(). If no extension can be guessed for `type', None
306 is returned.
307
308 Optional `strict' argument when false adds a bunch of commonly found,
309 but non-standard types.
310 """
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000311 if _db is None:
Benjamin Petersonad58b7c2009-03-02 03:35:12 +0000312 init()
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000313 return _db.guess_all_extensions(type, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000314
315def guess_extension(type, strict=True):
Fred Drake5109ffd1998-05-18 16:27:20 +0000316 """Guess the extension for a file based on its MIME type.
317
318 Return value is a string giving a filename extension, including the
319 leading dot ('.'). The extension is not guaranteed to have been
Fred Drake49413411998-05-19 15:15:59 +0000320 associated with any particular data stream, but would be mapped to the
321 MIME type `type' by guess_type(). If no extension can be guessed for
322 `type', None is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000323
324 Optional `strict' argument when false adds a bunch of commonly found,
325 but non-standard types.
Fred Drake5109ffd1998-05-18 16:27:20 +0000326 """
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000327 if _db is None:
Benjamin Petersonad58b7c2009-03-02 03:35:12 +0000328 init()
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000329 return _db.guess_extension(type, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000330
Walter Dörwald893020b2003-12-19 18:15:10 +0000331def add_type(type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000332 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000333
334 When the extension is already known, the new
335 type will replace the old one. When the type
336 is already known the extension will be added
337 to the list of known extensions.
338
339 If strict is true, information will be added to
340 list of standard types, else to the list of non-standard
341 types.
342 """
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000343 if _db is None:
Benjamin Petersonad58b7c2009-03-02 03:35:12 +0000344 init()
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000345 return _db.add_type(type, ext, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000346
Fred Drake5109ffd1998-05-18 16:27:20 +0000347
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000348def init(files=None):
Barry Warsaw107771a2001-10-25 21:49:18 +0000349 global suffix_map, types_map, encodings_map, common_types
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000350 global inited, _db
351 inited = True # so that MimeTypes.__init__() doesn't call us again
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000352 db = MimeTypes()
353 if files is None:
Antoine Pitroud5b34d42009-11-15 14:10:48 +0000354 if _winreg:
355 db.read_windows_registry()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000356 files = knownfiles
357 for file in files:
358 if os.path.isfile(file):
359 db.readfp(open(file))
360 encodings_map = db.encodings_map
Fred Drakec81a0692001-08-16 18:14:38 +0000361 suffix_map = db.suffix_map
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000362 types_map = db.types_map[True]
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000363 common_types = db.types_map[False]
Antoine Pitrouf2651e12009-04-27 20:50:20 +0000364 # Make the DB a global variable now that it is fully initialized
365 _db = db
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000366
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000367
368def read_mime_types(file):
369 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000370 f = open(file)
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000371 except IOError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000372 return None
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000373 db = MimeTypes()
Walter Dörwaldbb51ed32003-01-03 19:33:17 +0000374 db.readfp(f, True)
375 return db.types_map[True]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000376
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000377
Anthony Baxter04b94032006-04-06 07:31:31 +0000378def _default_mime_types():
379 global suffix_map
380 global encodings_map
381 global types_map
382 global common_types
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000383
Anthony Baxter04b94032006-04-06 07:31:31 +0000384 suffix_map = {
385 '.tgz': '.tar.gz',
386 '.taz': '.tar.gz',
387 '.tz': '.tar.gz',
Georg Brandl0c1f6fc2007-05-13 08:04:07 +0000388 '.tbz2': '.tar.bz2',
Anthony Baxter04b94032006-04-06 07:31:31 +0000389 }
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000390
Anthony Baxter04b94032006-04-06 07:31:31 +0000391 encodings_map = {
392 '.gz': 'gzip',
393 '.Z': 'compress',
Georg Brandl0c1f6fc2007-05-13 08:04:07 +0000394 '.bz2': 'bzip2',
Anthony Baxter04b94032006-04-06 07:31:31 +0000395 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000396
Tim Peters7d6b8952006-04-06 19:35:27 +0000397 # Before adding new types, make sure they are either registered with IANA,
Anthony Baxter04b94032006-04-06 07:31:31 +0000398 # at http://www.isi.edu/in-notes/iana/assignments/media-types
399 # or extensions, i.e. using the x- prefix
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000400
Anthony Baxter04b94032006-04-06 07:31:31 +0000401 # If you add to these, please keep them sorted!
402 types_map = {
403 '.a' : 'application/octet-stream',
404 '.ai' : 'application/postscript',
405 '.aif' : 'audio/x-aiff',
406 '.aifc' : 'audio/x-aiff',
407 '.aiff' : 'audio/x-aiff',
408 '.au' : 'audio/basic',
409 '.avi' : 'video/x-msvideo',
410 '.bat' : 'text/plain',
411 '.bcpio' : 'application/x-bcpio',
412 '.bin' : 'application/octet-stream',
413 '.bmp' : 'image/x-ms-bmp',
414 '.c' : 'text/plain',
415 # Duplicates :(
416 '.cdf' : 'application/x-cdf',
417 '.cdf' : 'application/x-netcdf',
418 '.cpio' : 'application/x-cpio',
419 '.csh' : 'application/x-csh',
420 '.css' : 'text/css',
421 '.dll' : 'application/octet-stream',
422 '.doc' : 'application/msword',
423 '.dot' : 'application/msword',
424 '.dvi' : 'application/x-dvi',
425 '.eml' : 'message/rfc822',
426 '.eps' : 'application/postscript',
427 '.etx' : 'text/x-setext',
428 '.exe' : 'application/octet-stream',
429 '.gif' : 'image/gif',
430 '.gtar' : 'application/x-gtar',
431 '.h' : 'text/plain',
432 '.hdf' : 'application/x-hdf',
433 '.htm' : 'text/html',
434 '.html' : 'text/html',
435 '.ief' : 'image/ief',
436 '.jpe' : 'image/jpeg',
437 '.jpeg' : 'image/jpeg',
438 '.jpg' : 'image/jpeg',
439 '.js' : 'application/x-javascript',
440 '.ksh' : 'text/plain',
441 '.latex' : 'application/x-latex',
442 '.m1v' : 'video/mpeg',
443 '.man' : 'application/x-troff-man',
444 '.me' : 'application/x-troff-me',
445 '.mht' : 'message/rfc822',
446 '.mhtml' : 'message/rfc822',
447 '.mif' : 'application/x-mif',
448 '.mov' : 'video/quicktime',
449 '.movie' : 'video/x-sgi-movie',
450 '.mp2' : 'audio/mpeg',
451 '.mp3' : 'audio/mpeg',
Guido van Rossumdff51b22007-10-29 20:52:45 +0000452 '.mp4' : 'video/mp4',
Anthony Baxter04b94032006-04-06 07:31:31 +0000453 '.mpa' : 'video/mpeg',
454 '.mpe' : 'video/mpeg',
455 '.mpeg' : 'video/mpeg',
456 '.mpg' : 'video/mpeg',
457 '.ms' : 'application/x-troff-ms',
458 '.nc' : 'application/x-netcdf',
459 '.nws' : 'message/rfc822',
460 '.o' : 'application/octet-stream',
461 '.obj' : 'application/octet-stream',
462 '.oda' : 'application/oda',
463 '.p12' : 'application/x-pkcs12',
464 '.p7c' : 'application/pkcs7-mime',
465 '.pbm' : 'image/x-portable-bitmap',
466 '.pdf' : 'application/pdf',
467 '.pfx' : 'application/x-pkcs12',
468 '.pgm' : 'image/x-portable-graymap',
469 '.pl' : 'text/plain',
470 '.png' : 'image/png',
471 '.pnm' : 'image/x-portable-anymap',
472 '.pot' : 'application/vnd.ms-powerpoint',
473 '.ppa' : 'application/vnd.ms-powerpoint',
474 '.ppm' : 'image/x-portable-pixmap',
475 '.pps' : 'application/vnd.ms-powerpoint',
476 '.ppt' : 'application/vnd.ms-powerpoint',
477 '.ps' : 'application/postscript',
478 '.pwz' : 'application/vnd.ms-powerpoint',
479 '.py' : 'text/x-python',
480 '.pyc' : 'application/x-python-code',
481 '.pyo' : 'application/x-python-code',
482 '.qt' : 'video/quicktime',
483 '.ra' : 'audio/x-pn-realaudio',
484 '.ram' : 'application/x-pn-realaudio',
485 '.ras' : 'image/x-cmu-raster',
486 '.rdf' : 'application/xml',
487 '.rgb' : 'image/x-rgb',
488 '.roff' : 'application/x-troff',
489 '.rtx' : 'text/richtext',
490 '.sgm' : 'text/x-sgml',
491 '.sgml' : 'text/x-sgml',
492 '.sh' : 'application/x-sh',
493 '.shar' : 'application/x-shar',
494 '.snd' : 'audio/basic',
495 '.so' : 'application/octet-stream',
496 '.src' : 'application/x-wais-source',
497 '.sv4cpio': 'application/x-sv4cpio',
498 '.sv4crc' : 'application/x-sv4crc',
499 '.swf' : 'application/x-shockwave-flash',
500 '.t' : 'application/x-troff',
501 '.tar' : 'application/x-tar',
502 '.tcl' : 'application/x-tcl',
503 '.tex' : 'application/x-tex',
504 '.texi' : 'application/x-texinfo',
505 '.texinfo': 'application/x-texinfo',
506 '.tif' : 'image/tiff',
507 '.tiff' : 'image/tiff',
508 '.tr' : 'application/x-troff',
509 '.tsv' : 'text/tab-separated-values',
510 '.txt' : 'text/plain',
511 '.ustar' : 'application/x-ustar',
512 '.vcf' : 'text/x-vcard',
513 '.wav' : 'audio/x-wav',
514 '.wiz' : 'application/msword',
515 '.wsdl' : 'application/xml',
516 '.xbm' : 'image/x-xbitmap',
517 '.xlb' : 'application/vnd.ms-excel',
518 # Duplicates :(
519 '.xls' : 'application/excel',
520 '.xls' : 'application/vnd.ms-excel',
521 '.xml' : 'text/xml',
522 '.xpdl' : 'application/xml',
523 '.xpm' : 'image/x-xpixmap',
524 '.xsl' : 'application/xml',
525 '.xwd' : 'image/x-xwindowdump',
526 '.zip' : 'application/zip',
527 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000528
Tim Peters7d6b8952006-04-06 19:35:27 +0000529 # These are non-standard types, commonly found in the wild. They will
Anthony Baxter04b94032006-04-06 07:31:31 +0000530 # only match if strict=0 flag is given to the API methods.
531
532 # Please sort these too
533 common_types = {
534 '.jpg' : 'image/jpg',
535 '.mid' : 'audio/midi',
536 '.midi': 'audio/midi',
537 '.pct' : 'image/pict',
538 '.pic' : 'image/pict',
539 '.pict': 'image/pict',
540 '.rtf' : 'application/rtf',
541 '.xul' : 'text/xul'
542 }
543
544
545_default_mime_types()
Barry Warsaw107771a2001-10-25 21:49:18 +0000546
547
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000548if __name__ == '__main__':
549 import sys
Barry Warsaw107771a2001-10-25 21:49:18 +0000550 import getopt
551
Fred Drake698da022001-12-05 15:58:29 +0000552 USAGE = """\
553Usage: mimetypes.py [options] type
554
555Options:
556 --help / -h -- print this message and exit
557 --lenient / -l -- additionally search of some common, but non-standard
558 types.
559 --extension / -e -- guess extension instead of type
560
561More than one type argument may be given.
562"""
563
564 def usage(code, msg=''):
565 print USAGE
566 if msg: print msg
567 sys.exit(code)
568
Barry Warsaw107771a2001-10-25 21:49:18 +0000569 try:
570 opts, args = getopt.getopt(sys.argv[1:], 'hle',
571 ['help', 'lenient', 'extension'])
572 except getopt.error, msg:
573 usage(1, msg)
574
575 strict = 1
576 extension = 0
577 for opt, arg in opts:
578 if opt in ('-h', '--help'):
579 usage(0)
580 elif opt in ('-l', '--lenient'):
581 strict = 0
582 elif opt in ('-e', '--extension'):
583 extension = 1
584 for gtype in args:
585 if extension:
586 guess = guess_extension(gtype, strict)
587 if not guess: print "I don't know anything about type", gtype
588 else: print guess
589 else:
590 guess, encoding = guess_type(gtype, strict)
591 if not guess: print "I don't know anything about type", gtype
592 else: print 'type:', guess, 'encoding:', encoding