blob: b2cdd307f4afd69d1159da50b838675f74afa8a6 [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, IOError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
33
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034def what(filename):
Fred Drakedc1aedf1999-01-06 15:20:49 +000035 """Guess the type of a sound file"""
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000036 res = whathdr(filename)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000037 return res
38
39
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000040def whathdr(filename):
Fred Drakedc1aedf1999-01-06 15:20:49 +000041 """Recognize sound headers"""
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000042 f = open(filename, 'r')
43 h = f.read(512)
44 for tf in tests:
45 res = tf(h, f)
46 if res:
47 return res
48 return None
49
50
51#-----------------------------------#
52# Subroutines per sound header type #
53#-----------------------------------#
54
55tests = []
56
57def test_aifc(h, f):
58 import aifc
59 if h[:4] <> 'FORM':
60 return None
61 if h[8:12] == 'AIFC':
62 fmt = 'aifc'
63 elif h[8:12] == 'AIFF':
64 fmt = 'aiff'
65 else:
66 return None
67 f.seek(0)
68 try:
69 a = aifc.openfp(f, 'r')
70 except (EOFError, aifc.Error):
71 return None
72 return (fmt, a.getframerate(), a.getnchannels(), \
73 a.getnframes(), 8*a.getsampwidth())
74
75tests.append(test_aifc)
76
77
78def test_au(h, f):
79 if h[:4] == '.snd':
80 f = get_long_be
81 elif h[:4] in ('\0ds.', 'dns.'):
82 f = get_long_le
83 else:
84 return None
85 type = 'au'
86 hdr_size = f(h[4:8])
87 data_size = f(h[8:12])
88 encoding = f(h[12:16])
89 rate = f(h[16:20])
90 nchannels = f(h[20:24])
91 sample_size = 1 # default
92 if encoding == 1:
93 sample_bits = 'U'
94 elif encoding == 2:
95 sample_bits = 8
96 elif encoding == 3:
97 sample_bits = 16
98 sample_size = 2
99 else:
100 sample_bits = '?'
101 frame_size = sample_size * nchannels
102 return type, rate, nchannels, data_size/frame_size, sample_bits
103
104tests.append(test_au)
105
106
107def test_hcom(h, f):
108 if h[65:69] <> 'FSSD' or h[128:132] <> 'HCOM':
109 return None
110 divisor = get_long_be(h[128+16:128+20])
111 return 'hcom', 22050/divisor, 1, -1, 8
112
113tests.append(test_hcom)
114
115
116def test_voc(h, f):
117 if h[:20] <> 'Creative Voice File\032':
118 return None
119 sbseek = get_short_le(h[20:22])
120 rate = 0
121 if 0 <= sbseek < 500 and h[sbseek] == '\1':
122 ratecode = ord(h[sbseek+4])
123 rate = int(1000000.0 / (256 - ratecode))
124 return 'voc', rate, 1, -1, 8
125
126tests.append(test_voc)
127
128
129def test_wav(h, f):
130 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
131 if h[:4] <> 'RIFF' or h[8:12] <> 'WAVE' or h[12:16] <> 'fmt ':
132 return None
133 style = get_short_le(h[20:22])
134 nchannels = get_short_le(h[22:24])
135 rate = get_long_le(h[24:28])
136 sample_bits = get_short_le(h[34:36])
137 return 'wav', rate, nchannels, -1, sample_bits
138
139tests.append(test_wav)
140
141
142def test_8svx(h, f):
143 if h[:4] <> 'FORM' or h[8:12] <> '8SVX':
144 return None
145 # Should decode it to get #channels -- assume always 1
146 return '8svx', 0, 1, 0, 8
147
148tests.append(test_8svx)
149
150
151def test_sndt(h, f):
152 if h[:5] == 'SOUND':
153 nsamples = get_long_le(h[8:12])
154 rate = get_short_le(h[20:22])
155 return 'sndt', rate, 1, nsamples, 8
156
157tests.append(test_sndt)
158
159
160def test_sndr(h, f):
161 if h[:2] == '\0\0':
162 rate = get_short_le(h[2:4])
163 if 4000 <= rate <= 25000:
164 return 'sndr', rate, 1, -1, 8
165
166tests.append(test_sndr)
167
168
169#---------------------------------------------#
170# Subroutines to extract numbers from strings #
171#---------------------------------------------#
172
173def get_long_be(s):
174 return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
175
176def get_long_le(s):
177 return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
178
179def get_short_be(s):
180 return (ord(s[0])<<8) | ord(s[1])
181
182def get_short_le(s):
183 return (ord(s[1])<<8) | ord(s[0])
184
185
186#--------------------#
187# Small test program #
188#--------------------#
189
190def test():
191 import sys
192 recursive = 0
193 if sys.argv[1:] and sys.argv[1] == '-r':
194 del sys.argv[1:2]
195 recursive = 1
196 try:
197 if sys.argv[1:]:
198 testall(sys.argv[1:], recursive, 1)
199 else:
200 testall(['.'], recursive, 1)
201 except KeyboardInterrupt:
202 sys.stderr.write('\n[Interrupted]\n')
203 sys.exit(1)
204
205def testall(list, recursive, toplevel):
206 import sys
207 import os
208 for filename in list:
209 if os.path.isdir(filename):
210 print filename + '/:',
211 if recursive or toplevel:
212 print 'recursing down:'
213 import glob
214 names = glob.glob(os.path.join(filename, '*'))
215 testall(names, recursive, 0)
216 else:
217 print '*** directory (use -r) ***'
218 else:
219 print filename + ':',
220 sys.stdout.flush()
221 try:
222 print what(filename)
223 except IOError:
224 print '*** not found ***'
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000225
226if __name__ == '__main__':
227 test()