blob: cbc060b1f5871d5b2579527f7407baed9c7aee7e [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
Fred Drake5109ffd1998-05-18 16:27:20 +00003This module defines two useful functions:
Guido van Rossumac8a9f31997-09-30 19:05:50 +00004
5guess_type(url) -- guess the MIME type and encoding of a URL.
6
Fred Drake5109ffd1998-05-18 16:27:20 +00007guess_extension(type) -- guess the extension for a given MIME type.
8
Guido van Rossumac8a9f31997-09-30 19:05:50 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
15suffixes_map -- dictionary mapping suffixes to suffixes
16encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
21init([files]) -- parse a list of files, default knownfiles
22read_mime_types(file) -- parse one file, return a dictionary or None
23
24"""
25
26import string
27import posixpath
Guido van Rossum1c5fb1c1998-10-12 15:12:28 +000028import urllib
Guido van Rossumac8a9f31997-09-30 19:05:50 +000029
30knownfiles = [
31 "/usr/local/etc/httpd/conf/mime.types",
32 "/usr/local/lib/netscape/mime.types",
Fred Drakec9132061998-05-21 13:15:45 +000033 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
34 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossumac8a9f31997-09-30 19:05:50 +000035 ]
36
37inited = 0
38
39def guess_type(url):
40 """Guess the type of a file based on its URL.
41
42 Return value is a tuple (type, encoding) where type is None if the
43 type can't be guessed (no or unknown suffix) or a string of the
44 form type/subtype, usable for a MIME Content-type header; and
45 encoding is None for no encoding or the name of the program used
46 to encode (e.g. compress or gzip). The mappings are table
47 driven. Encoding suffixes are case sensitive; type suffixes are
48 first tried case sensitive, then case insensitive.
49
50 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
51 to ".tar.gz". (This is table-driven too, using the dictionary
Fred Drake3130b7a1998-05-18 16:05:24 +000052 suffix_map).
Guido van Rossumac8a9f31997-09-30 19:05:50 +000053
54 """
55 if not inited:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000056 init()
Guido van Rossum1c5fb1c1998-10-12 15:12:28 +000057 scheme, url = urllib.splittype(url)
58 if scheme == 'data':
59 # syntax of data URLs:
60 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
61 # mediatype := [ type "/" subtype ] *( ";" parameter )
62 # data := *urlchar
63 # parameter := attribute "=" value
64 # type/subtype defaults to "text/plain"
65 comma = string.find(url, ',')
66 if comma < 0:
67 # bad data URL
68 return None, None
69 semi = string.find(url, ';', 0, comma)
70 if semi >= 0:
71 type = url[:semi]
72 else:
73 type = url[:comma]
74 if '=' in type or '/' not in type:
75 type = 'text/plain'
76 return type, None # never compressed, so encoding is None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000077 base, ext = posixpath.splitext(url)
78 while suffix_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000079 base, ext = posixpath.splitext(base + suffix_map[ext])
Guido van Rossumac8a9f31997-09-30 19:05:50 +000080 if encodings_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000081 encoding = encodings_map[ext]
82 base, ext = posixpath.splitext(base)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000083 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000084 encoding = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000085 if types_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000086 return types_map[ext], encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000087 elif types_map.has_key(string.lower(ext)):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000088 return types_map[string.lower(ext)], encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000089 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000090 return None, encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000091
Fred Drake5109ffd1998-05-18 16:27:20 +000092def guess_extension(type):
93 """Guess the extension for a file based on its MIME type.
94
95 Return value is a string giving a filename extension, including the
96 leading dot ('.'). The extension is not guaranteed to have been
Fred Drake49413411998-05-19 15:15:59 +000097 associated with any particular data stream, but would be mapped to the
98 MIME type `type' by guess_type(). If no extension can be guessed for
99 `type', None is returned.
Fred Drake5109ffd1998-05-18 16:27:20 +0000100 """
Fred Drake49413411998-05-19 15:15:59 +0000101 global inited
102 if not inited:
103 init()
Fred Drake5109ffd1998-05-18 16:27:20 +0000104 type = string.lower(type)
105 for ext, stype in types_map.items():
106 if type == stype:
107 return ext
108 return None
109
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000110def init(files=None):
111 global inited
112 for file in files or knownfiles:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000113 s = read_mime_types(file)
114 if s:
115 for key, value in s.items():
116 types_map[key] = value
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000117 inited = 1
118
119def read_mime_types(file):
120 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000121 f = open(file)
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000122 except IOError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000123 return None
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000124 map = {}
125 while 1:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000126 line = f.readline()
127 if not line: break
128 words = string.split(line)
129 for i in range(len(words)):
130 if words[i][0] == '#':
131 del words[i:]
132 break
133 if not words: continue
134 type, suffixes = words[0], words[1:]
135 for suff in suffixes:
136 map['.'+suff] = type
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000137 f.close()
138 return map
139
140suffix_map = {
141 '.tgz': '.tar.gz',
142 '.taz': '.tar.gz',
143 '.tz': '.tar.gz',
144}
145
146encodings_map = {
147 '.gz': 'gzip',
148 '.Z': 'compress',
149 }
150
151types_map = {
152 '.a': 'application/octet-stream',
153 '.ai': 'application/postscript',
154 '.aif': 'audio/x-aiff',
155 '.aifc': 'audio/x-aiff',
156 '.aiff': 'audio/x-aiff',
157 '.au': 'audio/basic',
158 '.avi': 'video/x-msvideo',
159 '.bcpio': 'application/x-bcpio',
160 '.bin': 'application/octet-stream',
161 '.cdf': 'application/x-netcdf',
162 '.cpio': 'application/x-cpio',
163 '.csh': 'application/x-csh',
164 '.dll': 'application/octet-stream',
165 '.dvi': 'application/x-dvi',
166 '.exe': 'application/octet-stream',
167 '.eps': 'application/postscript',
168 '.etx': 'text/x-setext',
169 '.gif': 'image/gif',
170 '.gtar': 'application/x-gtar',
171 '.hdf': 'application/x-hdf',
172 '.htm': 'text/html',
173 '.html': 'text/html',
174 '.ief': 'image/ief',
175 '.jpe': 'image/jpeg',
176 '.jpeg': 'image/jpeg',
177 '.jpg': 'image/jpeg',
Fred Drakec40c5471999-05-20 12:52:04 +0000178 '.js': 'application/x-javascript',
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000179 '.latex': 'application/x-latex',
180 '.man': 'application/x-troff-man',
181 '.me': 'application/x-troff-me',
182 '.mif': 'application/x-mif',
183 '.mov': 'video/quicktime',
184 '.movie': 'video/x-sgi-movie',
185 '.mpe': 'video/mpeg',
186 '.mpeg': 'video/mpeg',
187 '.mpg': 'video/mpeg',
188 '.ms': 'application/x-troff-ms',
189 '.nc': 'application/x-netcdf',
190 '.o': 'application/octet-stream',
191 '.obj': 'application/octet-stream',
192 '.oda': 'application/oda',
193 '.pbm': 'image/x-portable-bitmap',
194 '.pdf': 'application/pdf',
195 '.pgm': 'image/x-portable-graymap',
196 '.pnm': 'image/x-portable-anymap',
197 '.png': 'image/png',
198 '.ppm': 'image/x-portable-pixmap',
199 '.py': 'text/x-python',
200 '.pyc': 'application/x-python-code',
201 '.ps': 'application/postscript',
202 '.qt': 'video/quicktime',
203 '.ras': 'image/x-cmu-raster',
204 '.rgb': 'image/x-rgb',
Fred Drakecbd98701999-03-11 16:04:04 +0000205 '.rdf': 'application/xml',
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000206 '.roff': 'application/x-troff',
207 '.rtf': 'application/rtf',
208 '.rtx': 'text/richtext',
209 '.sgm': 'text/x-sgml',
210 '.sgml': 'text/x-sgml',
211 '.sh': 'application/x-sh',
212 '.shar': 'application/x-shar',
213 '.snd': 'audio/basic',
214 '.so': 'application/octet-stream',
215 '.src': 'application/x-wais-source',
216 '.sv4cpio': 'application/x-sv4cpio',
217 '.sv4crc': 'application/x-sv4crc',
218 '.t': 'application/x-troff',
219 '.tar': 'application/x-tar',
220 '.tcl': 'application/x-tcl',
221 '.tex': 'application/x-tex',
222 '.texi': 'application/x-texinfo',
223 '.texinfo': 'application/x-texinfo',
224 '.tif': 'image/tiff',
225 '.tiff': 'image/tiff',
226 '.tr': 'application/x-troff',
227 '.tsv': 'text/tab-separated-values',
228 '.txt': 'text/plain',
229 '.ustar': 'application/x-ustar',
230 '.wav': 'audio/x-wav',
231 '.xbm': 'image/x-xbitmap',
Guido van Rossum7beaad41998-05-18 14:25:08 +0000232 '.xml': 'text/xml',
Fred Drakecbd98701999-03-11 16:04:04 +0000233 '.xsl': 'application/xml',
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000234 '.xpm': 'image/x-xpixmap',
235 '.xwd': 'image/x-xwindowdump',
236 '.zip': 'application/zip',
237 }