blob: aa9b3b9eeb8b8e1be230d0ffd15c00c98137d1cf [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, IOError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
Barry Warsaw72937f32007-08-12 14:37:20 +000033__all__ = ['what', 'whathdr']
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000035def what(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000036 """Guess the type of a sound file."""
Tim Peters495ad3c2001-01-15 01:36:40 +000037 res = whathdr(filename)
38 return res
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000039
40
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000041def whathdr(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000042 """Recognize sound headers."""
43 with open(filename, 'rb') as f:
44 h = f.read(512)
45 for tf in tests:
46 res = tf(h, f)
47 if res:
48 return res
49 return None
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000050
51
52#-----------------------------------#
53# Subroutines per sound header type #
54#-----------------------------------#
55
56tests = []
57
58def test_aifc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000059 import aifc
Barry Warsaw72937f32007-08-12 14:37:20 +000060 if h[:4] != b'FORM':
Tim Peters495ad3c2001-01-15 01:36:40 +000061 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000062 if h[8:12] == b'AIFC':
Tim Peters495ad3c2001-01-15 01:36:40 +000063 fmt = 'aifc'
Barry Warsaw72937f32007-08-12 14:37:20 +000064 elif h[8:12] == b'AIFF':
65 fmt = b'aiff'
Tim Peters495ad3c2001-01-15 01:36:40 +000066 else:
67 return None
68 f.seek(0)
69 try:
70 a = aifc.openfp(f, 'r')
71 except (EOFError, aifc.Error):
72 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000073 return (fmt, a.getframerate(), a.getnchannels(),
74 a.getnframes(), 8 * a.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000075
76tests.append(test_aifc)
77
78
79def test_au(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +000080 if h[:4] == b'.snd':
81 func = get_long_be
82 elif h[:4] in (b'\0ds.', b'dns.'):
83 func = get_long_le
Tim Peters495ad3c2001-01-15 01:36:40 +000084 else:
85 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000086 filetype = 'au'
87 hdr_size = func(h[4:8])
88 data_size = func(h[8:12])
89 encoding = func(h[12:16])
90 rate = func(h[16:20])
91 nchannels = func(h[20:24])
Tim Peters495ad3c2001-01-15 01:36:40 +000092 sample_size = 1 # default
93 if encoding == 1:
94 sample_bits = 'U'
95 elif encoding == 2:
96 sample_bits = 8
97 elif encoding == 3:
98 sample_bits = 16
99 sample_size = 2
100 else:
101 sample_bits = '?'
102 frame_size = sample_size * nchannels
Barry Warsaw72937f32007-08-12 14:37:20 +0000103 return filetype, rate, nchannels, data_size / frame_size, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000104
105tests.append(test_au)
106
107
108def test_hcom(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000109 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
Tim Peters495ad3c2001-01-15 01:36:40 +0000110 return None
111 divisor = get_long_be(h[128+16:128+20])
Barry Warsaw72937f32007-08-12 14:37:20 +0000112 return 'hcom', 22050 / divisor, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000113
114tests.append(test_hcom)
115
116
117def test_voc(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000118 if h[:20] != b'Creative Voice File\032':
Tim Peters495ad3c2001-01-15 01:36:40 +0000119 return None
120 sbseek = get_short_le(h[20:22])
121 rate = 0
Barry Warsaw72937f32007-08-12 14:37:20 +0000122 if 0 <= sbseek < 500 and h[sbseek] == b'\1':
Tim Peters495ad3c2001-01-15 01:36:40 +0000123 ratecode = ord(h[sbseek+4])
124 rate = int(1000000.0 / (256 - ratecode))
125 return 'voc', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000126
127tests.append(test_voc)
128
129
130def test_wav(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000131 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
Barry Warsaw72937f32007-08-12 14:37:20 +0000132 if h[:4] != b'RIFF' or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
Tim Peters495ad3c2001-01-15 01:36:40 +0000133 return None
134 style = get_short_le(h[20:22])
135 nchannels = get_short_le(h[22:24])
136 rate = get_long_le(h[24:28])
137 sample_bits = get_short_le(h[34:36])
138 return 'wav', rate, nchannels, -1, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000139
140tests.append(test_wav)
141
142
143def test_8svx(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000144 if h[:4] != b'FORM' or h[8:12] != b'8SVX':
Tim Peters495ad3c2001-01-15 01:36:40 +0000145 return None
146 # Should decode it to get #channels -- assume always 1
147 return '8svx', 0, 1, 0, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000148
149tests.append(test_8svx)
150
151
152def test_sndt(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000153 if h[:5] == b'SOUND':
Tim Peters495ad3c2001-01-15 01:36:40 +0000154 nsamples = get_long_le(h[8:12])
155 rate = get_short_le(h[20:22])
156 return 'sndt', rate, 1, nsamples, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000157
158tests.append(test_sndt)
159
160
161def test_sndr(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000162 if h[:2] == b'\0\0':
Tim Peters495ad3c2001-01-15 01:36:40 +0000163 rate = get_short_le(h[2:4])
164 if 4000 <= rate <= 25000:
165 return 'sndr', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000166
167tests.append(test_sndr)
168
169
Barry Warsaw72937f32007-08-12 14:37:20 +0000170#-------------------------------------------#
171# Subroutines to extract numbers from bytes #
172#-------------------------------------------#
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000173
Barry Warsaw72937f32007-08-12 14:37:20 +0000174def get_long_be(b):
175 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000176
Barry Warsaw72937f32007-08-12 14:37:20 +0000177def get_long_le(b):
178 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000179
Barry Warsaw72937f32007-08-12 14:37:20 +0000180def get_short_be(b):
181 return (b[0] << 8) | b[1]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000182
Barry Warsaw72937f32007-08-12 14:37:20 +0000183def get_short_le(b):
184 return (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000185
186
187#--------------------#
188# Small test program #
189#--------------------#
190
191def test():
Tim Peters495ad3c2001-01-15 01:36:40 +0000192 import sys
193 recursive = 0
194 if sys.argv[1:] and sys.argv[1] == '-r':
195 del sys.argv[1:2]
196 recursive = 1
197 try:
198 if sys.argv[1:]:
199 testall(sys.argv[1:], recursive, 1)
200 else:
201 testall(['.'], recursive, 1)
202 except KeyboardInterrupt:
203 sys.stderr.write('\n[Interrupted]\n')
204 sys.exit(1)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000205
206def testall(list, recursive, toplevel):
Tim Peters495ad3c2001-01-15 01:36:40 +0000207 import sys
208 import os
209 for filename in list:
210 if os.path.isdir(filename):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000211 print(filename + '/:', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000212 if recursive or toplevel:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000213 print('recursing down:')
Tim Peters495ad3c2001-01-15 01:36:40 +0000214 import glob
215 names = glob.glob(os.path.join(filename, '*'))
216 testall(names, recursive, 0)
217 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000218 print('*** directory (use -r) ***')
Tim Peters495ad3c2001-01-15 01:36:40 +0000219 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000220 print(filename + ':', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000221 sys.stdout.flush()
222 try:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000223 print(what(filename))
Tim Peters495ad3c2001-01-15 01:36:40 +0000224 except IOError:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000225 print('*** not found ***')
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000226
227if __name__ == '__main__':
Tim Peters495ad3c2001-01-15 01:36:40 +0000228 test()