blob: b35d0ff84bd0610c031d266cd470699331ec2915 [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
Fred Drake5109ffd1998-05-18 16:27:20 +00003This module defines two useful functions:
Guido van Rossumac8a9f31997-09-30 19:05:50 +00004
5guess_type(url) -- guess the MIME type and encoding of a URL.
6
Fred Drake5109ffd1998-05-18 16:27:20 +00007guess_extension(type) -- guess the extension for a given MIME type.
8
Guido van Rossumac8a9f31997-09-30 19:05:50 +00009It also contains the following, for tuning the behavior:
10
11Data:
12
13knownfiles -- list of files to parse
14inited -- flag set when init() has been called
15suffixes_map -- dictionary mapping suffixes to suffixes
16encodings_map -- dictionary mapping suffixes to encodings
17types_map -- dictionary mapping suffixes to types
18
19Functions:
20
21init([files]) -- parse a list of files, default knownfiles
22read_mime_types(file) -- parse one file, return a dictionary or None
23
24"""
25
26import string
27import posixpath
28
29knownfiles = [
30 "/usr/local/etc/httpd/conf/mime.types",
31 "/usr/local/lib/netscape/mime.types",
Fred Drakec9132061998-05-21 13:15:45 +000032 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2
33 "/usr/local/etc/mime.types", # Apache 1.3
Guido van Rossumac8a9f31997-09-30 19:05:50 +000034 ]
35
36inited = 0
37
38def guess_type(url):
39 """Guess the type of a file based on its URL.
40
41 Return value is a tuple (type, encoding) where type is None if the
42 type can't be guessed (no or unknown suffix) or a string of the
43 form type/subtype, usable for a MIME Content-type header; and
44 encoding is None for no encoding or the name of the program used
45 to encode (e.g. compress or gzip). The mappings are table
46 driven. Encoding suffixes are case sensitive; type suffixes are
47 first tried case sensitive, then case insensitive.
48
49 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
50 to ".tar.gz". (This is table-driven too, using the dictionary
Fred Drake3130b7a1998-05-18 16:05:24 +000051 suffix_map).
Guido van Rossumac8a9f31997-09-30 19:05:50 +000052
53 """
54 if not inited:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000055 init()
Guido van Rossumac8a9f31997-09-30 19:05:50 +000056 base, ext = posixpath.splitext(url)
57 while suffix_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000058 base, ext = posixpath.splitext(base + suffix_map[ext])
Guido van Rossumac8a9f31997-09-30 19:05:50 +000059 if encodings_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000060 encoding = encodings_map[ext]
61 base, ext = posixpath.splitext(base)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000062 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000063 encoding = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000064 if types_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000065 return types_map[ext], encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000066 elif types_map.has_key(string.lower(ext)):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000067 return types_map[string.lower(ext)], encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000068 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000069 return None, encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000070
Fred Drake5109ffd1998-05-18 16:27:20 +000071def guess_extension(type):
72 """Guess the extension for a file based on its MIME type.
73
74 Return value is a string giving a filename extension, including the
75 leading dot ('.'). The extension is not guaranteed to have been
Fred Drake49413411998-05-19 15:15:59 +000076 associated with any particular data stream, but would be mapped to the
77 MIME type `type' by guess_type(). If no extension can be guessed for
78 `type', None is returned.
Fred Drake5109ffd1998-05-18 16:27:20 +000079 """
Fred Drake49413411998-05-19 15:15:59 +000080 global inited
81 if not inited:
82 init()
Fred Drake5109ffd1998-05-18 16:27:20 +000083 type = string.lower(type)
84 for ext, stype in types_map.items():
85 if type == stype:
86 return ext
87 return None
88
Guido van Rossumac8a9f31997-09-30 19:05:50 +000089def init(files=None):
90 global inited
91 for file in files or knownfiles:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000092 s = read_mime_types(file)
93 if s:
94 for key, value in s.items():
95 types_map[key] = value
Guido van Rossumac8a9f31997-09-30 19:05:50 +000096 inited = 1
97
98def read_mime_types(file):
99 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000100 f = open(file)
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000101 except IOError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000102 return None
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000103 map = {}
104 while 1:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000105 line = f.readline()
106 if not line: break
107 words = string.split(line)
108 for i in range(len(words)):
109 if words[i][0] == '#':
110 del words[i:]
111 break
112 if not words: continue
113 type, suffixes = words[0], words[1:]
114 for suff in suffixes:
115 map['.'+suff] = type
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000116 f.close()
117 return map
118
119suffix_map = {
120 '.tgz': '.tar.gz',
121 '.taz': '.tar.gz',
122 '.tz': '.tar.gz',
123}
124
125encodings_map = {
126 '.gz': 'gzip',
127 '.Z': 'compress',
128 }
129
130types_map = {
131 '.a': 'application/octet-stream',
132 '.ai': 'application/postscript',
133 '.aif': 'audio/x-aiff',
134 '.aifc': 'audio/x-aiff',
135 '.aiff': 'audio/x-aiff',
136 '.au': 'audio/basic',
137 '.avi': 'video/x-msvideo',
138 '.bcpio': 'application/x-bcpio',
139 '.bin': 'application/octet-stream',
140 '.cdf': 'application/x-netcdf',
141 '.cpio': 'application/x-cpio',
142 '.csh': 'application/x-csh',
143 '.dll': 'application/octet-stream',
144 '.dvi': 'application/x-dvi',
145 '.exe': 'application/octet-stream',
146 '.eps': 'application/postscript',
147 '.etx': 'text/x-setext',
148 '.gif': 'image/gif',
149 '.gtar': 'application/x-gtar',
150 '.hdf': 'application/x-hdf',
151 '.htm': 'text/html',
152 '.html': 'text/html',
153 '.ief': 'image/ief',
154 '.jpe': 'image/jpeg',
155 '.jpeg': 'image/jpeg',
156 '.jpg': 'image/jpeg',
157 '.latex': 'application/x-latex',
158 '.man': 'application/x-troff-man',
159 '.me': 'application/x-troff-me',
160 '.mif': 'application/x-mif',
161 '.mov': 'video/quicktime',
162 '.movie': 'video/x-sgi-movie',
163 '.mpe': 'video/mpeg',
164 '.mpeg': 'video/mpeg',
165 '.mpg': 'video/mpeg',
166 '.ms': 'application/x-troff-ms',
167 '.nc': 'application/x-netcdf',
168 '.o': 'application/octet-stream',
169 '.obj': 'application/octet-stream',
170 '.oda': 'application/oda',
171 '.pbm': 'image/x-portable-bitmap',
172 '.pdf': 'application/pdf',
173 '.pgm': 'image/x-portable-graymap',
174 '.pnm': 'image/x-portable-anymap',
175 '.png': 'image/png',
176 '.ppm': 'image/x-portable-pixmap',
177 '.py': 'text/x-python',
178 '.pyc': 'application/x-python-code',
179 '.ps': 'application/postscript',
180 '.qt': 'video/quicktime',
181 '.ras': 'image/x-cmu-raster',
182 '.rgb': 'image/x-rgb',
183 '.roff': 'application/x-troff',
184 '.rtf': 'application/rtf',
185 '.rtx': 'text/richtext',
186 '.sgm': 'text/x-sgml',
187 '.sgml': 'text/x-sgml',
188 '.sh': 'application/x-sh',
189 '.shar': 'application/x-shar',
190 '.snd': 'audio/basic',
191 '.so': 'application/octet-stream',
192 '.src': 'application/x-wais-source',
193 '.sv4cpio': 'application/x-sv4cpio',
194 '.sv4crc': 'application/x-sv4crc',
195 '.t': 'application/x-troff',
196 '.tar': 'application/x-tar',
197 '.tcl': 'application/x-tcl',
198 '.tex': 'application/x-tex',
199 '.texi': 'application/x-texinfo',
200 '.texinfo': 'application/x-texinfo',
201 '.tif': 'image/tiff',
202 '.tiff': 'image/tiff',
203 '.tr': 'application/x-troff',
204 '.tsv': 'text/tab-separated-values',
205 '.txt': 'text/plain',
206 '.ustar': 'application/x-ustar',
207 '.wav': 'audio/x-wav',
208 '.xbm': 'image/x-xbitmap',
Guido van Rossum7beaad41998-05-18 14:25:08 +0000209 '.xml': 'text/xml',
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000210 '.xpm': 'image/x-xpixmap',
211 '.xwd': 'image/x-xwindowdump',
212 '.zip': 'application/zip',
213 }