blob: 874037e8710f3f9b69fb90470b81d01b53ac0268 [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
Fred Drake5109ffd1998-05-18 16:27:20 +00003This module defines two useful functions:
Guido van Rossumac8a9f31997-09-30 19:05:50 +00004
Georg Brandlcdf8b342009-06-08 09:07:34 +00005guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.
Guido van Rossumac8a9f31997-09-30 19:05:50 +00006
Georg Brandlcdf8b342009-06-08 09:07:34 +00007guess_extension(type, strict=True) -- guess the extension for a given MIME type.
Fred Drake5109ffd1998-05-18 16:27:20 +00008
Guido van Rossumac8a9f31997-09-30 19:05:50 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
Fred Drakeeeee4ec2001-08-03 21:01:44 +000015suffix_map -- dictionary mapping suffixes to suffixes
Guido van Rossumac8a9f31997-09-30 19:05:50 +000016encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
21init([files]) -- parse a list of files, default knownfiles
22read_mime_types(file) -- parse one file, return a dictionary or None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000023"""
24
Fred Drakeeeee4ec2001-08-03 21:01:44 +000025import os
Guido van Rossumac8a9f31997-09-30 19:05:50 +000026import posixpath
Jeremy Hylton1afc1692008-06-18 20:49:58 +000027import urllib.parse
Guido van Rossumac8a9f31997-09-30 19:05:50 +000028
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000029__all__ = [
30 "guess_type","guess_extension","guess_all_extensions",
31 "add_type","read_mime_types","init"
32]
Skip Montanaro03d90142001-01-25 15:29:22 +000033
Guido van Rossumac8a9f31997-09-30 19:05:50 +000034knownfiles = [
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000035 "/etc/mime.types",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000036 "/etc/httpd/mime.types", # Mac OS X
37 "/etc/httpd/conf/mime.types", # Apache
38 "/etc/apache/mime.types", # Apache 1
39 "/etc/apache2/mime.types", # Apache 2
Guido van Rossumac8a9f31997-09-30 19:05:50 +000040 "/usr/local/etc/httpd/conf/mime.types",
41 "/usr/local/lib/netscape/mime.types",
Fred Drake13a2c272000-02-10 17:17:14 +000042 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
43 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossumac8a9f31997-09-30 19:05:50 +000044 ]
45
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000046inited = False
Antoine Pitrou57f3d932009-04-27 21:04:19 +000047_db = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000048
Fred Drakeeeee4ec2001-08-03 21:01:44 +000049
50class MimeTypes:
51 """MIME-types datastore.
52
53 This datastore can handle information from mime.types-style files
54 and supports basic determination of MIME type from a filename or
55 URL, and can guess a reasonable extension given a MIME type.
56 """
57
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000058 def __init__(self, filenames=(), strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000059 if not inited:
60 init()
61 self.encodings_map = encodings_map.copy()
62 self.suffix_map = suffix_map.copy()
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000063 self.types_map = ({}, {}) # dict for (non-strict, strict)
64 self.types_map_inv = ({}, {})
65 for (ext, type) in types_map.items():
66 self.add_type(type, ext, True)
67 for (ext, type) in common_types.items():
68 self.add_type(type, ext, False)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000069 for name in filenames:
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000070 self.read(name, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +000071
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000072 def add_type(self, type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +000073 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +000074
75 When the extension is already known, the new
76 type will replace the old one. When the type
77 is already known the extension will be added
78 to the list of known extensions.
79
80 If strict is true, information will be added to
81 list of standard types, else to the list of non-standard
82 types.
83 """
84 self.types_map[strict][ext] = type
85 exts = self.types_map_inv[strict].setdefault(type, [])
86 if ext not in exts:
87 exts.append(ext)
88
89 def guess_type(self, url, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +000090 """Guess the type of a file based on its URL.
91
92 Return value is a tuple (type, encoding) where type is None if
93 the type can't be guessed (no or unknown suffix) or a string
94 of the form type/subtype, usable for a MIME Content-type
95 header; and encoding is None for no encoding or the name of
96 the program used to encode (e.g. compress or gzip). The
97 mappings are table driven. Encoding suffixes are case
98 sensitive; type suffixes are first tried case sensitive, then
99 case insensitive.
100
101 The suffixes .tgz, .taz and .tz (case sensitive!) are all
102 mapped to '.tar.gz'. (This is table-driven too, using the
103 dictionary suffix_map.)
Barry Warsaw107771a2001-10-25 21:49:18 +0000104
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000105 Optional `strict' argument when False adds a bunch of commonly found,
Barry Warsaw107771a2001-10-25 21:49:18 +0000106 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000107 """
Jeremy Hylton1afc1692008-06-18 20:49:58 +0000108 scheme, url = urllib.parse.splittype(url)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000109 if scheme == 'data':
110 # syntax of data URLs:
111 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
112 # mediatype := [ type "/" subtype ] *( ";" parameter )
113 # data := *urlchar
114 # parameter := attribute "=" value
115 # type/subtype defaults to "text/plain"
116 comma = url.find(',')
117 if comma < 0:
118 # bad data URL
119 return None, None
120 semi = url.find(';', 0, comma)
121 if semi >= 0:
122 type = url[:semi]
123 else:
124 type = url[:comma]
125 if '=' in type or '/' not in type:
126 type = 'text/plain'
127 return type, None # never compressed, so encoding is None
128 base, ext = posixpath.splitext(url)
Raymond Hettinger54f02222002-06-01 14:18:47 +0000129 while ext in self.suffix_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000130 base, ext = posixpath.splitext(base + self.suffix_map[ext])
Raymond Hettinger54f02222002-06-01 14:18:47 +0000131 if ext in self.encodings_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000132 encoding = self.encodings_map[ext]
133 base, ext = posixpath.splitext(base)
134 else:
135 encoding = None
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000136 types_map = self.types_map[True]
Raymond Hettinger54f02222002-06-01 14:18:47 +0000137 if ext in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000138 return types_map[ext], encoding
Raymond Hettinger54f02222002-06-01 14:18:47 +0000139 elif ext.lower() in types_map:
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000140 return types_map[ext.lower()], encoding
Barry Warsaw107771a2001-10-25 21:49:18 +0000141 elif strict:
142 return None, encoding
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000143 types_map = self.types_map[False]
144 if ext in types_map:
145 return types_map[ext], encoding
146 elif ext.lower() in types_map:
147 return types_map[ext.lower()], encoding
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000148 else:
149 return None, encoding
150
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000151 def guess_all_extensions(self, type, strict=True):
152 """Guess the extensions for a file based on its MIME type.
153
154 Return value is a list of strings giving the possible filename
155 extensions, including the leading dot ('.'). The extension is not
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000156 guaranteed to have been associated with any particular data stream,
157 but would be mapped to the MIME type `type' by guess_type().
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000158
159 Optional `strict' argument when false adds a bunch of commonly found,
160 but non-standard types.
161 """
162 type = type.lower()
163 extensions = self.types_map_inv[True].get(type, [])
164 if not strict:
165 for ext in self.types_map_inv[False].get(type, []):
166 if ext not in extensions:
167 extensions.append(ext)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000168 return extensions
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000169
170 def guess_extension(self, type, strict=True):
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000171 """Guess the extension for a file based on its MIME type.
172
173 Return value is a string giving a filename extension,
174 including the leading dot ('.'). The extension is not
175 guaranteed to have been associated with any particular data
176 stream, but would be mapped to the MIME type `type' by
177 guess_type(). If no extension can be guessed for `type', None
178 is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000179
180 Optional `strict' argument when false adds a bunch of commonly found,
181 but non-standard types.
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000182 """
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000183 extensions = self.guess_all_extensions(type, strict)
Barry Warsaw9caa0d12003-06-09 22:27:41 +0000184 if not extensions:
185 return None
186 return extensions[0]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000187
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000188 def read(self, filename, strict=True):
189 """
190 Read a single mime.types-format file, specified by pathname.
191
192 If strict is true, information will be added to
193 list of standard types, else to the list of non-standard
194 types.
195 """
Antoine Pitrou3d400b72010-10-14 21:17:39 +0000196 with open(filename) as fp:
197 self.readfp(fp, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000198
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000199 def readfp(self, fp, strict=True):
200 """
201 Read a single mime.types-format file.
202
203 If strict is true, information will be added to
204 list of standard types, else to the list of non-standard
205 types.
206 """
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000207 while 1:
Fred Drakec019ecb2001-08-16 15:54:28 +0000208 line = fp.readline()
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000209 if not line:
210 break
211 words = line.split()
212 for i in range(len(words)):
213 if words[i][0] == '#':
214 del words[i:]
215 break
216 if not words:
217 continue
218 type, suffixes = words[0], words[1:]
219 for suff in suffixes:
Walter Dörwald8fa89722003-01-03 21:06:46 +0000220 self.add_type(type, '.' + suff, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000221
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000222def guess_type(url, strict=True):
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000223 """Guess the type of a file based on its URL.
224
225 Return value is a tuple (type, encoding) where type is None if the
226 type can't be guessed (no or unknown suffix) or a string of the
227 form type/subtype, usable for a MIME Content-type header; and
228 encoding is None for no encoding or the name of the program used
229 to encode (e.g. compress or gzip). The mappings are table
230 driven. Encoding suffixes are case sensitive; type suffixes are
231 first tried case sensitive, then case insensitive.
232
233 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
234 to ".tar.gz". (This is table-driven too, using the dictionary
Fred Drake3130b7a1998-05-18 16:05:24 +0000235 suffix_map).
Barry Warsaw107771a2001-10-25 21:49:18 +0000236
237 Optional `strict' argument when false adds a bunch of commonly found, but
238 non-standard types.
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000239 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000240 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000241 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000242 return _db.guess_type(url, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000243
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000244
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000245def guess_all_extensions(type, strict=True):
246 """Guess the extensions for a file based on its MIME type.
247
248 Return value is a list of strings giving the possible filename
249 extensions, including the leading dot ('.'). The extension is not
250 guaranteed to have been associated with any particular data
251 stream, but would be mapped to the MIME type `type' by
252 guess_type(). If no extension can be guessed for `type', None
253 is returned.
254
255 Optional `strict' argument when false adds a bunch of commonly found,
256 but non-standard types.
257 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000258 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000259 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000260 return _db.guess_all_extensions(type, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000261
262def guess_extension(type, strict=True):
Fred Drake5109ffd1998-05-18 16:27:20 +0000263 """Guess the extension for a file based on its MIME type.
264
265 Return value is a string giving a filename extension, including the
266 leading dot ('.'). The extension is not guaranteed to have been
Fred Drake49413411998-05-19 15:15:59 +0000267 associated with any particular data stream, but would be mapped to the
268 MIME type `type' by guess_type(). If no extension can be guessed for
269 `type', None is returned.
Barry Warsaw107771a2001-10-25 21:49:18 +0000270
271 Optional `strict' argument when false adds a bunch of commonly found,
272 but non-standard types.
Fred Drake5109ffd1998-05-18 16:27:20 +0000273 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000274 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000275 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000276 return _db.guess_extension(type, strict)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000277
Walter Dörwald893020b2003-12-19 18:15:10 +0000278def add_type(type, ext, strict=True):
Walter Dörwaldf0dfc7a2003-10-20 14:01:56 +0000279 """Add a mapping between a type and an extension.
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000280
281 When the extension is already known, the new
282 type will replace the old one. When the type
283 is already known the extension will be added
284 to the list of known extensions.
285
286 If strict is true, information will be added to
287 list of standard types, else to the list of non-standard
288 types.
289 """
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000290 if _db is None:
Benjamin Petersone0124bd2009-03-09 21:04:33 +0000291 init()
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000292 return _db.add_type(type, ext, strict)
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000293
Fred Drake5109ffd1998-05-18 16:27:20 +0000294
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000295def init(files=None):
Barry Warsaw107771a2001-10-25 21:49:18 +0000296 global suffix_map, types_map, encodings_map, common_types
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000297 global inited, _db
298 inited = True # so that MimeTypes.__init__() doesn't call us again
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000299 db = MimeTypes()
300 if files is None:
301 files = knownfiles
302 for file in files:
303 if os.path.isfile(file):
Antoine Pitrou3d400b72010-10-14 21:17:39 +0000304 db.read(file)
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000305 encodings_map = db.encodings_map
Fred Drakec81a0692001-08-16 18:14:38 +0000306 suffix_map = db.suffix_map
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000307 types_map = db.types_map[True]
Walter Dörwald5ccaf8f2002-09-06 16:15:58 +0000308 common_types = db.types_map[False]
Antoine Pitrou57f3d932009-04-27 21:04:19 +0000309 # Make the DB a global variable now that it is fully initialized
310 _db = db
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000311
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000312
313def read_mime_types(file):
314 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000315 f = open(file)
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000316 except IOError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000317 return None
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000318 db = MimeTypes()
Walter Dörwaldbb51ed32003-01-03 19:33:17 +0000319 db.readfp(f, True)
320 return db.types_map[True]
Fred Drakeeeee4ec2001-08-03 21:01:44 +0000321
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000322
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000323def _default_mime_types():
324 global suffix_map
325 global encodings_map
326 global types_map
327 global common_types
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000328
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000329 suffix_map = {
330 '.tgz': '.tar.gz',
331 '.taz': '.tar.gz',
332 '.tz': '.tar.gz',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000333 '.tbz2': '.tar.bz2',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000334 }
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000335
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000336 encodings_map = {
337 '.gz': 'gzip',
338 '.Z': 'compress',
Guido van Rossum360e4b82007-05-14 22:51:27 +0000339 '.bz2': 'bzip2',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000340 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000341
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000342 # Before adding new types, make sure they are either registered with IANA,
343 # at http://www.isi.edu/in-notes/iana/assignments/media-types
344 # or extensions, i.e. using the x- prefix
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000345
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000346 # If you add to these, please keep them sorted!
347 types_map = {
348 '.a' : 'application/octet-stream',
349 '.ai' : 'application/postscript',
350 '.aif' : 'audio/x-aiff',
351 '.aifc' : 'audio/x-aiff',
352 '.aiff' : 'audio/x-aiff',
353 '.au' : 'audio/basic',
354 '.avi' : 'video/x-msvideo',
355 '.bat' : 'text/plain',
356 '.bcpio' : 'application/x-bcpio',
357 '.bin' : 'application/octet-stream',
358 '.bmp' : 'image/x-ms-bmp',
359 '.c' : 'text/plain',
360 # Duplicates :(
361 '.cdf' : 'application/x-cdf',
362 '.cdf' : 'application/x-netcdf',
363 '.cpio' : 'application/x-cpio',
364 '.csh' : 'application/x-csh',
365 '.css' : 'text/css',
366 '.dll' : 'application/octet-stream',
367 '.doc' : 'application/msword',
368 '.dot' : 'application/msword',
369 '.dvi' : 'application/x-dvi',
370 '.eml' : 'message/rfc822',
371 '.eps' : 'application/postscript',
372 '.etx' : 'text/x-setext',
373 '.exe' : 'application/octet-stream',
374 '.gif' : 'image/gif',
375 '.gtar' : 'application/x-gtar',
376 '.h' : 'text/plain',
377 '.hdf' : 'application/x-hdf',
378 '.htm' : 'text/html',
379 '.html' : 'text/html',
380 '.ief' : 'image/ief',
381 '.jpe' : 'image/jpeg',
382 '.jpeg' : 'image/jpeg',
383 '.jpg' : 'image/jpeg',
384 '.js' : 'application/x-javascript',
385 '.ksh' : 'text/plain',
386 '.latex' : 'application/x-latex',
387 '.m1v' : 'video/mpeg',
388 '.man' : 'application/x-troff-man',
389 '.me' : 'application/x-troff-me',
390 '.mht' : 'message/rfc822',
391 '.mhtml' : 'message/rfc822',
392 '.mif' : 'application/x-mif',
393 '.mov' : 'video/quicktime',
394 '.movie' : 'video/x-sgi-movie',
395 '.mp2' : 'audio/mpeg',
396 '.mp3' : 'audio/mpeg',
Guido van Rossum8ce8a782007-11-01 19:42:39 +0000397 '.mp4' : 'video/mp4',
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000398 '.mpa' : 'video/mpeg',
399 '.mpe' : 'video/mpeg',
400 '.mpeg' : 'video/mpeg',
401 '.mpg' : 'video/mpeg',
402 '.ms' : 'application/x-troff-ms',
403 '.nc' : 'application/x-netcdf',
404 '.nws' : 'message/rfc822',
405 '.o' : 'application/octet-stream',
406 '.obj' : 'application/octet-stream',
407 '.oda' : 'application/oda',
408 '.p12' : 'application/x-pkcs12',
409 '.p7c' : 'application/pkcs7-mime',
410 '.pbm' : 'image/x-portable-bitmap',
411 '.pdf' : 'application/pdf',
412 '.pfx' : 'application/x-pkcs12',
413 '.pgm' : 'image/x-portable-graymap',
414 '.pl' : 'text/plain',
415 '.png' : 'image/png',
416 '.pnm' : 'image/x-portable-anymap',
417 '.pot' : 'application/vnd.ms-powerpoint',
418 '.ppa' : 'application/vnd.ms-powerpoint',
419 '.ppm' : 'image/x-portable-pixmap',
420 '.pps' : 'application/vnd.ms-powerpoint',
421 '.ppt' : 'application/vnd.ms-powerpoint',
422 '.ps' : 'application/postscript',
423 '.pwz' : 'application/vnd.ms-powerpoint',
424 '.py' : 'text/x-python',
425 '.pyc' : 'application/x-python-code',
426 '.pyo' : 'application/x-python-code',
427 '.qt' : 'video/quicktime',
428 '.ra' : 'audio/x-pn-realaudio',
429 '.ram' : 'application/x-pn-realaudio',
430 '.ras' : 'image/x-cmu-raster',
431 '.rdf' : 'application/xml',
432 '.rgb' : 'image/x-rgb',
433 '.roff' : 'application/x-troff',
434 '.rtx' : 'text/richtext',
435 '.sgm' : 'text/x-sgml',
436 '.sgml' : 'text/x-sgml',
437 '.sh' : 'application/x-sh',
438 '.shar' : 'application/x-shar',
439 '.snd' : 'audio/basic',
440 '.so' : 'application/octet-stream',
441 '.src' : 'application/x-wais-source',
442 '.sv4cpio': 'application/x-sv4cpio',
443 '.sv4crc' : 'application/x-sv4crc',
444 '.swf' : 'application/x-shockwave-flash',
445 '.t' : 'application/x-troff',
446 '.tar' : 'application/x-tar',
447 '.tcl' : 'application/x-tcl',
448 '.tex' : 'application/x-tex',
449 '.texi' : 'application/x-texinfo',
450 '.texinfo': 'application/x-texinfo',
451 '.tif' : 'image/tiff',
452 '.tiff' : 'image/tiff',
453 '.tr' : 'application/x-troff',
454 '.tsv' : 'text/tab-separated-values',
455 '.txt' : 'text/plain',
456 '.ustar' : 'application/x-ustar',
457 '.vcf' : 'text/x-vcard',
458 '.wav' : 'audio/x-wav',
459 '.wiz' : 'application/msword',
460 '.wsdl' : 'application/xml',
461 '.xbm' : 'image/x-xbitmap',
462 '.xlb' : 'application/vnd.ms-excel',
463 # Duplicates :(
464 '.xls' : 'application/excel',
465 '.xls' : 'application/vnd.ms-excel',
466 '.xml' : 'text/xml',
467 '.xpdl' : 'application/xml',
468 '.xpm' : 'image/x-xpixmap',
469 '.xsl' : 'application/xml',
470 '.xwd' : 'image/x-xwindowdump',
471 '.zip' : 'application/zip',
472 }
Barry Warsaw107771a2001-10-25 21:49:18 +0000473
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000474 # These are non-standard types, commonly found in the wild. They will
475 # only match if strict=0 flag is given to the API methods.
476
477 # Please sort these too
478 common_types = {
479 '.jpg' : 'image/jpg',
480 '.mid' : 'audio/midi',
481 '.midi': 'audio/midi',
482 '.pct' : 'image/pict',
483 '.pic' : 'image/pict',
484 '.pict': 'image/pict',
485 '.rtf' : 'application/rtf',
486 '.xul' : 'text/xul'
487 }
488
489
490_default_mime_types()
Barry Warsaw107771a2001-10-25 21:49:18 +0000491
492
Eric S. Raymond51cc3bc2001-02-09 09:44:47 +0000493if __name__ == '__main__':
494 import sys
Barry Warsaw107771a2001-10-25 21:49:18 +0000495 import getopt
496
Fred Drake698da022001-12-05 15:58:29 +0000497 USAGE = """\
498Usage: mimetypes.py [options] type
499
500Options:
501 --help / -h -- print this message and exit
502 --lenient / -l -- additionally search of some common, but non-standard
503 types.
504 --extension / -e -- guess extension instead of type
505
506More than one type argument may be given.
507"""
508
509 def usage(code, msg=''):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000510 print(USAGE)
511 if msg: print(msg)
Fred Drake698da022001-12-05 15:58:29 +0000512 sys.exit(code)
513
Barry Warsaw107771a2001-10-25 21:49:18 +0000514 try:
515 opts, args = getopt.getopt(sys.argv[1:], 'hle',
516 ['help', 'lenient', 'extension'])
Guido van Rossumb940e112007-01-10 16:19:56 +0000517 except getopt.error as msg:
Barry Warsaw107771a2001-10-25 21:49:18 +0000518 usage(1, msg)
519
520 strict = 1
521 extension = 0
522 for opt, arg in opts:
523 if opt in ('-h', '--help'):
524 usage(0)
525 elif opt in ('-l', '--lenient'):
526 strict = 0
527 elif opt in ('-e', '--extension'):
528 extension = 1
529 for gtype in args:
530 if extension:
531 guess = guess_extension(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000532 if not guess: print("I don't know anything about type", gtype)
533 else: print(guess)
Barry Warsaw107771a2001-10-25 21:49:18 +0000534 else:
535 guess, encoding = guess_type(gtype, strict)
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000536 if not guess: print("I don't know anything about type", gtype)
537 else: print('type:', guess, 'encoding:', encoding)