blob: fd0e1c52c9640ab355030d4053f618c269ec51db [file] [log] [blame]
Guido van Rossumac8a9f31997-09-30 19:05:50 +00001"""Guess the MIME type of a file.
2
3This module defines one useful function:
4
5guess_type(url) -- guess the MIME type and encoding of a URL.
6
7It also contains the following, for tuning the behavior:
8
9Data:
10
11knownfiles -- list of files to parse
12inited -- flag set when init() has been called
13suffixes_map -- dictionary mapping suffixes to suffixes
14encodings_map -- dictionary mapping suffixes to encodings
15types_map -- dictionary mapping suffixes to types
16
17Functions:
18
19init([files]) -- parse a list of files, default knownfiles
20read_mime_types(file) -- parse one file, return a dictionary or None
21
22"""
23
24import string
25import posixpath
26
27knownfiles = [
28 "/usr/local/etc/httpd/conf/mime.types",
29 "/usr/local/lib/netscape/mime.types",
30 ]
31
32inited = 0
33
34def guess_type(url):
35 """Guess the type of a file based on its URL.
36
37 Return value is a tuple (type, encoding) where type is None if the
38 type can't be guessed (no or unknown suffix) or a string of the
39 form type/subtype, usable for a MIME Content-type header; and
40 encoding is None for no encoding or the name of the program used
41 to encode (e.g. compress or gzip). The mappings are table
42 driven. Encoding suffixes are case sensitive; type suffixes are
43 first tried case sensitive, then case insensitive.
44
45 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
46 to ".tar.gz". (This is table-driven too, using the dictionary
47 suffixes_map).
48
49 """
50 if not inited:
51 init()
52 base, ext = posixpath.splitext(url)
53 while suffix_map.has_key(ext):
54 base, ext = posixpath.splitext(base + suffix_map[ext])
55 if encodings_map.has_key(ext):
56 encoding = encodings_map[ext]
57 base, ext = posixpath.splitext(base)
58 else:
59 encoding = None
60 if types_map.has_key(ext):
61 return types_map[ext], encoding
62 elif types_map.has_key(string.lower(ext)):
63 return types_map[string.lower(ext)], encoding
64 else:
65 return None, encoding
66
67def init(files=None):
68 global inited
69 for file in files or knownfiles:
70 s = read_mime_types(file)
71 if s:
72 for key, value in s.items():
73 types_map[key] = value
74 inited = 1
75
76def read_mime_types(file):
77 try:
78 f = open(file)
79 except IOError:
80 return None
81 map = {}
82 while 1:
83 line = f.readline()
84 if not line: break
85 words = string.split(line)
86 for i in range(len(words)):
87 if words[i][0] == '#':
88 del words[i:]
89 break
90 if not words: continue
91 type, suffixes = words[0], words[1:]
92 for suff in suffixes:
93 map['.'+suff] = type
94 f.close()
95 return map
96
97suffix_map = {
98 '.tgz': '.tar.gz',
99 '.taz': '.tar.gz',
100 '.tz': '.tar.gz',
101}
102
103encodings_map = {
104 '.gz': 'gzip',
105 '.Z': 'compress',
106 }
107
108types_map = {
109 '.a': 'application/octet-stream',
110 '.ai': 'application/postscript',
111 '.aif': 'audio/x-aiff',
112 '.aifc': 'audio/x-aiff',
113 '.aiff': 'audio/x-aiff',
114 '.au': 'audio/basic',
115 '.avi': 'video/x-msvideo',
116 '.bcpio': 'application/x-bcpio',
117 '.bin': 'application/octet-stream',
118 '.cdf': 'application/x-netcdf',
119 '.cpio': 'application/x-cpio',
120 '.csh': 'application/x-csh',
121 '.dll': 'application/octet-stream',
122 '.dvi': 'application/x-dvi',
123 '.exe': 'application/octet-stream',
124 '.eps': 'application/postscript',
125 '.etx': 'text/x-setext',
126 '.gif': 'image/gif',
127 '.gtar': 'application/x-gtar',
128 '.hdf': 'application/x-hdf',
129 '.htm': 'text/html',
130 '.html': 'text/html',
131 '.ief': 'image/ief',
132 '.jpe': 'image/jpeg',
133 '.jpeg': 'image/jpeg',
134 '.jpg': 'image/jpeg',
135 '.latex': 'application/x-latex',
136 '.man': 'application/x-troff-man',
137 '.me': 'application/x-troff-me',
138 '.mif': 'application/x-mif',
139 '.mov': 'video/quicktime',
140 '.movie': 'video/x-sgi-movie',
141 '.mpe': 'video/mpeg',
142 '.mpeg': 'video/mpeg',
143 '.mpg': 'video/mpeg',
144 '.ms': 'application/x-troff-ms',
145 '.nc': 'application/x-netcdf',
146 '.o': 'application/octet-stream',
147 '.obj': 'application/octet-stream',
148 '.oda': 'application/oda',
149 '.pbm': 'image/x-portable-bitmap',
150 '.pdf': 'application/pdf',
151 '.pgm': 'image/x-portable-graymap',
152 '.pnm': 'image/x-portable-anymap',
153 '.png': 'image/png',
154 '.ppm': 'image/x-portable-pixmap',
155 '.py': 'text/x-python',
156 '.pyc': 'application/x-python-code',
157 '.ps': 'application/postscript',
158 '.qt': 'video/quicktime',
159 '.ras': 'image/x-cmu-raster',
160 '.rgb': 'image/x-rgb',
161 '.roff': 'application/x-troff',
162 '.rtf': 'application/rtf',
163 '.rtx': 'text/richtext',
164 '.sgm': 'text/x-sgml',
165 '.sgml': 'text/x-sgml',
166 '.sh': 'application/x-sh',
167 '.shar': 'application/x-shar',
168 '.snd': 'audio/basic',
169 '.so': 'application/octet-stream',
170 '.src': 'application/x-wais-source',
171 '.sv4cpio': 'application/x-sv4cpio',
172 '.sv4crc': 'application/x-sv4crc',
173 '.t': 'application/x-troff',
174 '.tar': 'application/x-tar',
175 '.tcl': 'application/x-tcl',
176 '.tex': 'application/x-tex',
177 '.texi': 'application/x-texinfo',
178 '.texinfo': 'application/x-texinfo',
179 '.tif': 'image/tiff',
180 '.tiff': 'image/tiff',
181 '.tr': 'application/x-troff',
182 '.tsv': 'text/tab-separated-values',
183 '.txt': 'text/plain',
184 '.ustar': 'application/x-ustar',
185 '.wav': 'audio/x-wav',
186 '.xbm': 'image/x-xbitmap',
187 '.xpm': 'image/x-xpixmap',
188 '.xwd': 'image/x-xwindowdump',
189 '.zip': 'application/zip',
190 }