blob: 9dc3645fd0d141b52c16ab8cb64c91640fe5540a [file] [log] [blame]
Guido van Rossuma11cccc1997-10-06 20:19:59 +00001"""Guess the MIME type of a file.
2
Guido van Rossume03c0501998-08-12 02:38:11 +00003This module defines two useful functions:
Guido van Rossuma11cccc1997-10-06 20:19:59 +00004
5guess_type(url) -- guess the MIME type and encoding of a URL.
6
Guido van Rossume03c0501998-08-12 02:38:11 +00007guess_extension(type) -- guess the extension for a given MIME type.
8
Guido van Rossuma11cccc1997-10-06 20:19:59 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
15suffixes_map -- dictionary mapping suffixes to suffixes
16encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
21init([files]) -- parse a list of files, default knownfiles
22read_mime_types(file) -- parse one file, return a dictionary or None
23
24"""
25
26import string
27import posixpath
Guido van Rossumd54fb7a1998-10-17 18:09:27 +000028import urllib
Guido van Rossuma11cccc1997-10-06 20:19:59 +000029
30knownfiles = [
31 "/usr/local/etc/httpd/conf/mime.types",
32 "/usr/local/lib/netscape/mime.types",
Guido van Rossumaad67612000-05-08 17:31:04 +000033 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
34 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossuma11cccc1997-10-06 20:19:59 +000035 ]
36
37inited = 0
38
39def guess_type(url):
40 """Guess the type of a file based on its URL.
41
42 Return value is a tuple (type, encoding) where type is None if the
43 type can't be guessed (no or unknown suffix) or a string of the
44 form type/subtype, usable for a MIME Content-type header; and
45 encoding is None for no encoding or the name of the program used
46 to encode (e.g. compress or gzip). The mappings are table
47 driven. Encoding suffixes are case sensitive; type suffixes are
48 first tried case sensitive, then case insensitive.
49
50 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
51 to ".tar.gz". (This is table-driven too, using the dictionary
Guido van Rossume03c0501998-08-12 02:38:11 +000052 suffix_map).
Guido van Rossuma11cccc1997-10-06 20:19:59 +000053
54 """
55 if not inited:
Guido van Rossum548703a1998-03-26 22:14:20 +000056 init()
Guido van Rossumd54fb7a1998-10-17 18:09:27 +000057 scheme, url = urllib.splittype(url)
58 if scheme == 'data':
Guido van Rossumaad67612000-05-08 17:31:04 +000059 # syntax of data URLs:
60 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
61 # mediatype := [ type "/" subtype ] *( ";" parameter )
62 # data := *urlchar
63 # parameter := attribute "=" value
64 # type/subtype defaults to "text/plain"
65 comma = string.find(url, ',')
66 if comma < 0:
67 # bad data URL
68 return None, None
69 semi = string.find(url, ';', 0, comma)
70 if semi >= 0:
71 type = url[:semi]
72 else:
73 type = url[:comma]
74 if '=' in type or '/' not in type:
75 type = 'text/plain'
76 return type, None # never compressed, so encoding is None
Guido van Rossuma11cccc1997-10-06 20:19:59 +000077 base, ext = posixpath.splitext(url)
78 while suffix_map.has_key(ext):
Guido van Rossum548703a1998-03-26 22:14:20 +000079 base, ext = posixpath.splitext(base + suffix_map[ext])
Guido van Rossuma11cccc1997-10-06 20:19:59 +000080 if encodings_map.has_key(ext):
Guido van Rossum548703a1998-03-26 22:14:20 +000081 encoding = encodings_map[ext]
82 base, ext = posixpath.splitext(base)
Guido van Rossuma11cccc1997-10-06 20:19:59 +000083 else:
Guido van Rossum548703a1998-03-26 22:14:20 +000084 encoding = None
Guido van Rossuma11cccc1997-10-06 20:19:59 +000085 if types_map.has_key(ext):
Guido van Rossum548703a1998-03-26 22:14:20 +000086 return types_map[ext], encoding
Guido van Rossuma11cccc1997-10-06 20:19:59 +000087 elif types_map.has_key(string.lower(ext)):
Guido van Rossum548703a1998-03-26 22:14:20 +000088 return types_map[string.lower(ext)], encoding
Guido van Rossuma11cccc1997-10-06 20:19:59 +000089 else:
Guido van Rossum548703a1998-03-26 22:14:20 +000090 return None, encoding
Guido van Rossuma11cccc1997-10-06 20:19:59 +000091
Guido van Rossume03c0501998-08-12 02:38:11 +000092def guess_extension(type):
93 """Guess the extension for a file based on its MIME type.
94
95 Return value is a string giving a filename extension, including the
96 leading dot ('.'). The extension is not guaranteed to have been
97 associated with any particular data stream, but would be mapped to the
98 MIME type `type' by guess_type(). If no extension can be guessed for
99 `type', None is returned.
100 """
101 global inited
102 if not inited:
103 init()
104 type = string.lower(type)
105 for ext, stype in types_map.items():
106 if type == stype:
107 return ext
108 return None
109
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000110def init(files=None):
111 global inited
112 for file in files or knownfiles:
Guido van Rossum548703a1998-03-26 22:14:20 +0000113 s = read_mime_types(file)
114 if s:
115 for key, value in s.items():
116 types_map[key] = value
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000117 inited = 1
118
119def read_mime_types(file):
120 try:
Guido van Rossum548703a1998-03-26 22:14:20 +0000121 f = open(file)
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000122 except IOError:
Guido van Rossum548703a1998-03-26 22:14:20 +0000123 return None
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000124 map = {}
125 while 1:
Guido van Rossum548703a1998-03-26 22:14:20 +0000126 line = f.readline()
127 if not line: break
128 words = string.split(line)
129 for i in range(len(words)):
130 if words[i][0] == '#':
131 del words[i:]
132 break
133 if not words: continue
134 type, suffixes = words[0], words[1:]
135 for suff in suffixes:
136 map['.'+suff] = type
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000137 f.close()
138 return map
139
140suffix_map = {
141 '.tgz': '.tar.gz',
142 '.taz': '.tar.gz',
143 '.tz': '.tar.gz',
144}
145
146encodings_map = {
147 '.gz': 'gzip',
148 '.Z': 'compress',
149 }
150
151types_map = {
152 '.a': 'application/octet-stream',
153 '.ai': 'application/postscript',
154 '.aif': 'audio/x-aiff',
155 '.aifc': 'audio/x-aiff',
156 '.aiff': 'audio/x-aiff',
157 '.au': 'audio/basic',
158 '.avi': 'video/x-msvideo',
159 '.bcpio': 'application/x-bcpio',
160 '.bin': 'application/octet-stream',
161 '.cdf': 'application/x-netcdf',
162 '.cpio': 'application/x-cpio',
163 '.csh': 'application/x-csh',
164 '.dll': 'application/octet-stream',
165 '.dvi': 'application/x-dvi',
166 '.exe': 'application/octet-stream',
167 '.eps': 'application/postscript',
168 '.etx': 'text/x-setext',
169 '.gif': 'image/gif',
170 '.gtar': 'application/x-gtar',
171 '.hdf': 'application/x-hdf',
172 '.htm': 'text/html',
173 '.html': 'text/html',
174 '.ief': 'image/ief',
175 '.jpe': 'image/jpeg',
176 '.jpeg': 'image/jpeg',
177 '.jpg': 'image/jpeg',
Guido van Rossumaad67612000-05-08 17:31:04 +0000178 '.js': 'application/x-javascript',
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000179 '.latex': 'application/x-latex',
180 '.man': 'application/x-troff-man',
181 '.me': 'application/x-troff-me',
182 '.mif': 'application/x-mif',
183 '.mov': 'video/quicktime',
184 '.movie': 'video/x-sgi-movie',
185 '.mpe': 'video/mpeg',
186 '.mpeg': 'video/mpeg',
187 '.mpg': 'video/mpeg',
188 '.ms': 'application/x-troff-ms',
189 '.nc': 'application/x-netcdf',
190 '.o': 'application/octet-stream',
191 '.obj': 'application/octet-stream',
192 '.oda': 'application/oda',
193 '.pbm': 'image/x-portable-bitmap',
194 '.pdf': 'application/pdf',
195 '.pgm': 'image/x-portable-graymap',
196 '.pnm': 'image/x-portable-anymap',
197 '.png': 'image/png',
198 '.ppm': 'image/x-portable-pixmap',
199 '.py': 'text/x-python',
200 '.pyc': 'application/x-python-code',
201 '.ps': 'application/postscript',
202 '.qt': 'video/quicktime',
203 '.ras': 'image/x-cmu-raster',
204 '.rgb': 'image/x-rgb',
Guido van Rossum9a744a91999-04-08 20:27:54 +0000205 '.rdf': 'application/xml',
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000206 '.roff': 'application/x-troff',
207 '.rtf': 'application/rtf',
208 '.rtx': 'text/richtext',
209 '.sgm': 'text/x-sgml',
210 '.sgml': 'text/x-sgml',
211 '.sh': 'application/x-sh',
212 '.shar': 'application/x-shar',
213 '.snd': 'audio/basic',
214 '.so': 'application/octet-stream',
215 '.src': 'application/x-wais-source',
216 '.sv4cpio': 'application/x-sv4cpio',
217 '.sv4crc': 'application/x-sv4crc',
218 '.t': 'application/x-troff',
219 '.tar': 'application/x-tar',
220 '.tcl': 'application/x-tcl',
221 '.tex': 'application/x-tex',
222 '.texi': 'application/x-texinfo',
223 '.texinfo': 'application/x-texinfo',
224 '.tif': 'image/tiff',
225 '.tiff': 'image/tiff',
226 '.tr': 'application/x-troff',
227 '.tsv': 'text/tab-separated-values',
228 '.txt': 'text/plain',
229 '.ustar': 'application/x-ustar',
230 '.wav': 'audio/x-wav',
231 '.xbm': 'image/x-xbitmap',
Guido van Rossume03c0501998-08-12 02:38:11 +0000232 '.xml': 'text/xml',
Guido van Rossum9a744a91999-04-08 20:27:54 +0000233 '.xsl': 'application/xml',
Guido van Rossuma11cccc1997-10-06 20:19:59 +0000234 '.xpm': 'image/x-xpixmap',
235 '.xwd': 'image/x-xwindowdump',
236 '.zip': 'application/zip',
237 }