blob: 61cd5b3710e502ccd9274afe80e3516180911bc0 [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, IOError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
33
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034def what(filename):
Tim Peters495ad3c2001-01-15 01:36:40 +000035 """Guess the type of a sound file"""
36 res = whathdr(filename)
37 return res
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000038
39
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000040def whathdr(filename):
Tim Peters495ad3c2001-01-15 01:36:40 +000041 """Recognize sound headers"""
42 f = open(filename, 'r')
43 h = f.read(512)
44 for tf in tests:
45 res = tf(h, f)
46 if res:
47 return res
48 return None
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000049
50
51#-----------------------------------#
52# Subroutines per sound header type #
53#-----------------------------------#
54
55tests = []
56
57def test_aifc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000058 import aifc
59 if h[:4] != 'FORM':
60 return None
61 if h[8:12] == 'AIFC':
62 fmt = 'aifc'
63 elif h[8:12] == 'AIFF':
64 fmt = 'aiff'
65 else:
66 return None
67 f.seek(0)
68 try:
69 a = aifc.openfp(f, 'r')
70 except (EOFError, aifc.Error):
71 return None
72 return (fmt, a.getframerate(), a.getnchannels(), \
73 a.getnframes(), 8*a.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000074
75tests.append(test_aifc)
76
77
78def test_au(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000079 if h[:4] == '.snd':
80 f = get_long_be
81 elif h[:4] in ('\0ds.', 'dns.'):
82 f = get_long_le
83 else:
84 return None
85 type = 'au'
86 hdr_size = f(h[4:8])
87 data_size = f(h[8:12])
88 encoding = f(h[12:16])
89 rate = f(h[16:20])
90 nchannels = f(h[20:24])
91 sample_size = 1 # default
92 if encoding == 1:
93 sample_bits = 'U'
94 elif encoding == 2:
95 sample_bits = 8
96 elif encoding == 3:
97 sample_bits = 16
98 sample_size = 2
99 else:
100 sample_bits = '?'
101 frame_size = sample_size * nchannels
102 return type, rate, nchannels, data_size/frame_size, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000103
104tests.append(test_au)
105
106
107def test_hcom(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000108 if h[65:69] != 'FSSD' or h[128:132] != 'HCOM':
109 return None
110 divisor = get_long_be(h[128+16:128+20])
111 return 'hcom', 22050/divisor, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000112
113tests.append(test_hcom)
114
115
116def test_voc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000117 if h[:20] != 'Creative Voice File\032':
118 return None
119 sbseek = get_short_le(h[20:22])
120 rate = 0
121 if 0 <= sbseek < 500 and h[sbseek] == '\1':
122 ratecode = ord(h[sbseek+4])
123 rate = int(1000000.0 / (256 - ratecode))
124 return 'voc', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000125
126tests.append(test_voc)
127
128
129def test_wav(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000130 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
131 if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ':
132 return None
133 style = get_short_le(h[20:22])
134 nchannels = get_short_le(h[22:24])
135 rate = get_long_le(h[24:28])
136 sample_bits = get_short_le(h[34:36])
137 return 'wav', rate, nchannels, -1, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000138
139tests.append(test_wav)
140
141
142def test_8svx(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000143 if h[:4] != 'FORM' or h[8:12] != '8SVX':
144 return None
145 # Should decode it to get #channels -- assume always 1
146 return '8svx', 0, 1, 0, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000147
148tests.append(test_8svx)
149
150
151def test_sndt(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000152 if h[:5] == 'SOUND':
153 nsamples = get_long_le(h[8:12])
154 rate = get_short_le(h[20:22])
155 return 'sndt', rate, 1, nsamples, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000156
157tests.append(test_sndt)
158
159
160def test_sndr(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000161 if h[:2] == '\0\0':
162 rate = get_short_le(h[2:4])
163 if 4000 <= rate <= 25000:
164 return 'sndr', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000165
166tests.append(test_sndr)
167
168
169#---------------------------------------------#
170# Subroutines to extract numbers from strings #
171#---------------------------------------------#
172
173def get_long_be(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000174 return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000175
176def get_long_le(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000177 return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000178
179def get_short_be(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000180 return (ord(s[0])<<8) | ord(s[1])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000181
182def get_short_le(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000183 return (ord(s[1])<<8) | ord(s[0])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000184
185
186#--------------------#
187# Small test program #
188#--------------------#
189
190def test():
Tim Peters495ad3c2001-01-15 01:36:40 +0000191 import sys
192 recursive = 0
193 if sys.argv[1:] and sys.argv[1] == '-r':
194 del sys.argv[1:2]
195 recursive = 1
196 try:
197 if sys.argv[1:]:
198 testall(sys.argv[1:], recursive, 1)
199 else:
200 testall(['.'], recursive, 1)
201 except KeyboardInterrupt:
202 sys.stderr.write('\n[Interrupted]\n')
203 sys.exit(1)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000204
205def testall(list, recursive, toplevel):
Tim Peters495ad3c2001-01-15 01:36:40 +0000206 import sys
207 import os
208 for filename in list:
209 if os.path.isdir(filename):
210 print filename + '/:',
211 if recursive or toplevel:
212 print 'recursing down:'
213 import glob
214 names = glob.glob(os.path.join(filename, '*'))
215 testall(names, recursive, 0)
216 else:
217 print '*** directory (use -r) ***'
218 else:
219 print filename + ':',
220 sys.stdout.flush()
221 try:
222 print what(filename)
223 except IOError:
224 print '*** not found ***'
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000225
226if __name__ == '__main__':
Tim Peters495ad3c2001-01-15 01:36:40 +0000227 test()