blob: 823ca626ae11bb288dbf6a82fea33245b6ec6750 [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
3This module defines one useful function:
4
5guess_type(url) -- guess the MIME type and encoding of a URL.
6
7It also contains the following, for tuning the behavior:
8
9Data:
10
11knownfiles -- list of files to parse
12inited -- flag set when init() has been called
13suffixes_map -- dictionary mapping suffixes to suffixes
14encodings_map -- dictionary mapping suffixes to encodings
15types_map -- dictionary mapping suffixes to types
16
17Functions:
18
19init([files]) -- parse a list of files, default knownfiles
20read_mime_types(file) -- parse one file, return a dictionary or None
21
22"""
23
24import string
25import posixpath
26
27knownfiles = [
28 "/usr/local/etc/httpd/conf/mime.types",
29 "/usr/local/lib/netscape/mime.types",
30 ]
31
32inited = 0
33
34def guess_type(url):
35 """Guess the type of a file based on its URL.
36
37 Return value is a tuple (type, encoding) where type is None if the
38 type can't be guessed (no or unknown suffix) or a string of the
39 form type/subtype, usable for a MIME Content-type header; and
40 encoding is None for no encoding or the name of the program used
41 to encode (e.g. compress or gzip). The mappings are table
42 driven. Encoding suffixes are case sensitive; type suffixes are
43 first tried case sensitive, then case insensitive.
44
45 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
46 to ".tar.gz". (This is table-driven too, using the dictionary
47 suffixes_map).
48
49 """
50 if not inited:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000051 init()
Guido van Rossumac8a9f31997-09-30 19:05:50 +000052 base, ext = posixpath.splitext(url)
53 while suffix_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000054 base, ext = posixpath.splitext(base + suffix_map[ext])
Guido van Rossumac8a9f31997-09-30 19:05:50 +000055 if encodings_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000056 encoding = encodings_map[ext]
57 base, ext = posixpath.splitext(base)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000058 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000059 encoding = None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000060 if types_map.has_key(ext):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000061 return types_map[ext], encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000062 elif types_map.has_key(string.lower(ext)):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000063 return types_map[string.lower(ext)], encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000064 else:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000065 return None, encoding
Guido van Rossumac8a9f31997-09-30 19:05:50 +000066
67def init(files=None):
68 global inited
69 for file in files or knownfiles:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000070 s = read_mime_types(file)
71 if s:
72 for key, value in s.items():
73 types_map[key] = value
Guido van Rossumac8a9f31997-09-30 19:05:50 +000074 inited = 1
75
76def read_mime_types(file):
77 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000078 f = open(file)
Guido van Rossumac8a9f31997-09-30 19:05:50 +000079 except IOError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000080 return None
Guido van Rossumac8a9f31997-09-30 19:05:50 +000081 map = {}
82 while 1:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +000083 line = f.readline()
84 if not line: break
85 words = string.split(line)
86 for i in range(len(words)):
87 if words[i][0] == '#':
88 del words[i:]
89 break
90 if not words: continue
91 type, suffixes = words[0], words[1:]
92 for suff in suffixes:
93 map['.'+suff] = type
Guido van Rossumac8a9f31997-09-30 19:05:50 +000094 f.close()
95 return map
96
97suffix_map = {
98 '.tgz': '.tar.gz',
99 '.taz': '.tar.gz',
100 '.tz': '.tar.gz',
101}
102
103encodings_map = {
104 '.gz': 'gzip',
105 '.Z': 'compress',
106 }
107
108types_map = {
109 '.a': 'application/octet-stream',
110 '.ai': 'application/postscript',
111 '.aif': 'audio/x-aiff',
112 '.aifc': 'audio/x-aiff',
113 '.aiff': 'audio/x-aiff',
114 '.au': 'audio/basic',
115 '.avi': 'video/x-msvideo',
116 '.bcpio': 'application/x-bcpio',
117 '.bin': 'application/octet-stream',
118 '.cdf': 'application/x-netcdf',
119 '.cpio': 'application/x-cpio',
120 '.csh': 'application/x-csh',
121 '.dll': 'application/octet-stream',
122 '.dvi': 'application/x-dvi',
123 '.exe': 'application/octet-stream',
124 '.eps': 'application/postscript',
125 '.etx': 'text/x-setext',
126 '.gif': 'image/gif',
127 '.gtar': 'application/x-gtar',
128 '.hdf': 'application/x-hdf',
129 '.htm': 'text/html',
130 '.html': 'text/html',
131 '.ief': 'image/ief',
132 '.jpe': 'image/jpeg',
133 '.jpeg': 'image/jpeg',
134 '.jpg': 'image/jpeg',
135 '.latex': 'application/x-latex',
136 '.man': 'application/x-troff-man',
137 '.me': 'application/x-troff-me',
138 '.mif': 'application/x-mif',
139 '.mov': 'video/quicktime',
140 '.movie': 'video/x-sgi-movie',
141 '.mpe': 'video/mpeg',
142 '.mpeg': 'video/mpeg',
143 '.mpg': 'video/mpeg',
144 '.ms': 'application/x-troff-ms',
145 '.nc': 'application/x-netcdf',
146 '.o': 'application/octet-stream',
147 '.obj': 'application/octet-stream',
148 '.oda': 'application/oda',
149 '.pbm': 'image/x-portable-bitmap',
150 '.pdf': 'application/pdf',
151 '.pgm': 'image/x-portable-graymap',
152 '.pnm': 'image/x-portable-anymap',
153 '.png': 'image/png',
154 '.ppm': 'image/x-portable-pixmap',
155 '.py': 'text/x-python',
156 '.pyc': 'application/x-python-code',
157 '.ps': 'application/postscript',
158 '.qt': 'video/quicktime',
159 '.ras': 'image/x-cmu-raster',
160 '.rgb': 'image/x-rgb',
161 '.roff': 'application/x-troff',
162 '.rtf': 'application/rtf',
163 '.rtx': 'text/richtext',
164 '.sgm': 'text/x-sgml',
165 '.sgml': 'text/x-sgml',
166 '.sh': 'application/x-sh',
167 '.shar': 'application/x-shar',
168 '.snd': 'audio/basic',
169 '.so': 'application/octet-stream',
170 '.src': 'application/x-wais-source',
171 '.sv4cpio': 'application/x-sv4cpio',
172 '.sv4crc': 'application/x-sv4crc',
173 '.t': 'application/x-troff',
174 '.tar': 'application/x-tar',
175 '.tcl': 'application/x-tcl',
176 '.tex': 'application/x-tex',
177 '.texi': 'application/x-texinfo',
178 '.texinfo': 'application/x-texinfo',
179 '.tif': 'image/tiff',
180 '.tiff': 'image/tiff',
181 '.tr': 'application/x-troff',
182 '.tsv': 'text/tab-separated-values',
183 '.txt': 'text/plain',
184 '.ustar': 'application/x-ustar',
185 '.wav': 'audio/x-wav',
186 '.xbm': 'image/x-xbitmap',
Guido van Rossum7beaad41998-05-18 14:25:08 +0000187 '.xml': 'text/xml',
Guido van Rossumac8a9f31997-09-30 19:05:50 +0000188 '.xpm': 'image/x-xpixmap',
189 '.xwd': 'image/x-xwindowdump',
190 '.zip': 'application/zip',
191 }