blob: df2ccf17ba4b99510329549bf147ac68f65e4fd4 [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, IOError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
Skip Montanaro0de65802001-02-15 22:15:14 +000033__all__ = ["what","whathdr"]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000035def what(filename):
Tim Peters495ad3c2001-01-15 01:36:40 +000036 """Guess the type of a sound file"""
37 res = whathdr(filename)
38 return res
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000039
40
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000041def whathdr(filename):
Tim Peters495ad3c2001-01-15 01:36:40 +000042 """Recognize sound headers"""
Guido van Rossum3fc95822001-07-31 06:27:07 +000043 f = open(filename, 'rb')
Tim Peters495ad3c2001-01-15 01:36:40 +000044 h = f.read(512)
45 for tf in tests:
46 res = tf(h, f)
47 if res:
48 return res
49 return None
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000050
51
52#-----------------------------------#
53# Subroutines per sound header type #
54#-----------------------------------#
55
56tests = []
57
58def test_aifc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000059 import aifc
60 if h[:4] != 'FORM':
61 return None
62 if h[8:12] == 'AIFC':
63 fmt = 'aifc'
64 elif h[8:12] == 'AIFF':
65 fmt = 'aiff'
66 else:
67 return None
68 f.seek(0)
69 try:
70 a = aifc.openfp(f, 'r')
71 except (EOFError, aifc.Error):
72 return None
73 return (fmt, a.getframerate(), a.getnchannels(), \
74 a.getnframes(), 8*a.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000075
76tests.append(test_aifc)
77
78
79def test_au(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000080 if h[:4] == '.snd':
81 f = get_long_be
82 elif h[:4] in ('\0ds.', 'dns.'):
83 f = get_long_le
84 else:
85 return None
86 type = 'au'
87 hdr_size = f(h[4:8])
88 data_size = f(h[8:12])
89 encoding = f(h[12:16])
90 rate = f(h[16:20])
91 nchannels = f(h[20:24])
92 sample_size = 1 # default
93 if encoding == 1:
94 sample_bits = 'U'
95 elif encoding == 2:
96 sample_bits = 8
97 elif encoding == 3:
98 sample_bits = 16
99 sample_size = 2
100 else:
101 sample_bits = '?'
102 frame_size = sample_size * nchannels
103 return type, rate, nchannels, data_size/frame_size, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000104
105tests.append(test_au)
106
107
108def test_hcom(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000109 if h[65:69] != 'FSSD' or h[128:132] != 'HCOM':
110 return None
111 divisor = get_long_be(h[128+16:128+20])
112 return 'hcom', 22050/divisor, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000113
114tests.append(test_hcom)
115
116
117def test_voc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000118 if h[:20] != 'Creative Voice File\032':
119 return None
120 sbseek = get_short_le(h[20:22])
121 rate = 0
122 if 0 <= sbseek < 500 and h[sbseek] == '\1':
123 ratecode = ord(h[sbseek+4])
124 rate = int(1000000.0 / (256 - ratecode))
125 return 'voc', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000126
127tests.append(test_voc)
128
129
130def test_wav(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000131 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
132 if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ':
133 return None
134 style = get_short_le(h[20:22])
135 nchannels = get_short_le(h[22:24])
136 rate = get_long_le(h[24:28])
137 sample_bits = get_short_le(h[34:36])
138 return 'wav', rate, nchannels, -1, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000139
140tests.append(test_wav)
141
142
143def test_8svx(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000144 if h[:4] != 'FORM' or h[8:12] != '8SVX':
145 return None
146 # Should decode it to get #channels -- assume always 1
147 return '8svx', 0, 1, 0, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000148
149tests.append(test_8svx)
150
151
152def test_sndt(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000153 if h[:5] == 'SOUND':
154 nsamples = get_long_le(h[8:12])
155 rate = get_short_le(h[20:22])
156 return 'sndt', rate, 1, nsamples, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000157
158tests.append(test_sndt)
159
160
161def test_sndr(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000162 if h[:2] == '\0\0':
163 rate = get_short_le(h[2:4])
164 if 4000 <= rate <= 25000:
165 return 'sndr', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000166
167tests.append(test_sndr)
168
169
170#---------------------------------------------#
171# Subroutines to extract numbers from strings #
172#---------------------------------------------#
173
174def get_long_be(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000175 return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000176
177def get_long_le(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000178 return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000179
180def get_short_be(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000181 return (ord(s[0])<<8) | ord(s[1])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000182
183def get_short_le(s):
Tim Peters495ad3c2001-01-15 01:36:40 +0000184 return (ord(s[1])<<8) | ord(s[0])
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000185
186
187#--------------------#
188# Small test program #
189#--------------------#
190
191def test():
Tim Peters495ad3c2001-01-15 01:36:40 +0000192 import sys
193 recursive = 0
194 if sys.argv[1:] and sys.argv[1] == '-r':
195 del sys.argv[1:2]
196 recursive = 1
197 try:
198 if sys.argv[1:]:
199 testall(sys.argv[1:], recursive, 1)
200 else:
201 testall(['.'], recursive, 1)
202 except KeyboardInterrupt:
203 sys.stderr.write('\n[Interrupted]\n')
204 sys.exit(1)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000205
206def testall(list, recursive, toplevel):
Tim Peters495ad3c2001-01-15 01:36:40 +0000207 import sys
208 import os
209 for filename in list:
210 if os.path.isdir(filename):
211 print filename + '/:',
212 if recursive or toplevel:
213 print 'recursing down:'
214 import glob
215 names = glob.glob(os.path.join(filename, '*'))
216 testall(names, recursive, 0)
217 else:
218 print '*** directory (use -r) ***'
219 else:
220 print filename + ':',
221 sys.stdout.flush()
222 try:
223 print what(filename)
224 except IOError:
225 print '*** not found ***'
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000226
227if __name__ == '__main__':
Tim Peters495ad3c2001-01-15 01:36:40 +0000228 test()