blob: 9f5dcc90d439947e31a2697d054280a9cb1c066b [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, IOError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
Barry Warsaw72937f32007-08-12 14:37:20 +000033__all__ = ['what', 'whathdr']
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000035def what(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000036 """Guess the type of a sound file."""
Tim Peters495ad3c2001-01-15 01:36:40 +000037 res = whathdr(filename)
38 return res
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000039
40
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000041def whathdr(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000042 """Recognize sound headers."""
43 with open(filename, 'rb') as f:
44 h = f.read(512)
45 for tf in tests:
46 res = tf(h, f)
47 if res:
48 return res
49 return None
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000050
51
52#-----------------------------------#
53# Subroutines per sound header type #
54#-----------------------------------#
55
56tests = []
57
58def test_aifc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000059 import aifc
Victor Stinnere9ebde42010-07-13 23:04:56 +000060 if not h.startswith(b'FORM'):
Tim Peters495ad3c2001-01-15 01:36:40 +000061 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000062 if h[8:12] == b'AIFC':
Tim Peters495ad3c2001-01-15 01:36:40 +000063 fmt = 'aifc'
Barry Warsaw72937f32007-08-12 14:37:20 +000064 elif h[8:12] == b'AIFF':
Victor Stinnere9ebde42010-07-13 23:04:56 +000065 fmt = 'aiff'
Tim Peters495ad3c2001-01-15 01:36:40 +000066 else:
67 return None
68 f.seek(0)
69 try:
Guido van Rossumc9341282007-08-13 17:50:00 +000070 a = aifc.open(f, 'r')
Tim Peters495ad3c2001-01-15 01:36:40 +000071 except (EOFError, aifc.Error):
72 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000073 return (fmt, a.getframerate(), a.getnchannels(),
74 a.getnframes(), 8 * a.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000075
76tests.append(test_aifc)
77
78
79def test_au(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +000080 if h.startswith(b'.snd'):
Barry Warsaw72937f32007-08-12 14:37:20 +000081 func = get_long_be
82 elif h[:4] in (b'\0ds.', b'dns.'):
83 func = get_long_le
Tim Peters495ad3c2001-01-15 01:36:40 +000084 else:
85 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000086 filetype = 'au'
87 hdr_size = func(h[4:8])
88 data_size = func(h[8:12])
89 encoding = func(h[12:16])
90 rate = func(h[16:20])
91 nchannels = func(h[20:24])
Tim Peters495ad3c2001-01-15 01:36:40 +000092 sample_size = 1 # default
93 if encoding == 1:
94 sample_bits = 'U'
95 elif encoding == 2:
96 sample_bits = 8
97 elif encoding == 3:
98 sample_bits = 16
99 sample_size = 2
100 else:
101 sample_bits = '?'
102 frame_size = sample_size * nchannels
Guido van Rossumc9341282007-08-13 17:50:00 +0000103 if frame_size:
104 nframe = data_size / frame_size
105 else:
106 nframe = -1
107 return filetype, rate, nchannels, nframe, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000108
109tests.append(test_au)
110
111
112def test_hcom(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000113 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
Tim Peters495ad3c2001-01-15 01:36:40 +0000114 return None
Guido van Rossumc9341282007-08-13 17:50:00 +0000115 divisor = get_long_be(h[144:148])
116 if divisor:
117 rate = 22050 / divisor
118 else:
119 rate = 0
120 return 'hcom', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000121
122tests.append(test_hcom)
123
124
125def test_voc(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000126 if not h.startswith(b'Creative Voice File\032'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000127 return None
128 sbseek = get_short_le(h[20:22])
129 rate = 0
Guido van Rossumc9341282007-08-13 17:50:00 +0000130 if 0 <= sbseek < 500 and h[sbseek] == 1:
131 ratecode = 256 - h[sbseek+4]
132 if ratecode:
133 rate = int(1000000.0 / ratecode)
Tim Peters495ad3c2001-01-15 01:36:40 +0000134 return 'voc', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000135
136tests.append(test_voc)
137
138
139def test_wav(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +0000140 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
Guido van Rossumc9341282007-08-13 17:50:00 +0000141 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
Tim Peters495ad3c2001-01-15 01:36:40 +0000142 return None
143 style = get_short_le(h[20:22])
144 nchannels = get_short_le(h[22:24])
145 rate = get_long_le(h[24:28])
146 sample_bits = get_short_le(h[34:36])
147 return 'wav', rate, nchannels, -1, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000148
149tests.append(test_wav)
150
151
152def test_8svx(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000153 if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
Tim Peters495ad3c2001-01-15 01:36:40 +0000154 return None
155 # Should decode it to get #channels -- assume always 1
156 return '8svx', 0, 1, 0, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000157
158tests.append(test_8svx)
159
160
161def test_sndt(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000162 if h.startswith(b'SOUND'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000163 nsamples = get_long_le(h[8:12])
164 rate = get_short_le(h[20:22])
165 return 'sndt', rate, 1, nsamples, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000166
167tests.append(test_sndt)
168
169
170def test_sndr(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000171 if h.startswith(b'\0\0'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000172 rate = get_short_le(h[2:4])
173 if 4000 <= rate <= 25000:
174 return 'sndr', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000175
176tests.append(test_sndr)
177
178
Barry Warsaw72937f32007-08-12 14:37:20 +0000179#-------------------------------------------#
180# Subroutines to extract numbers from bytes #
181#-------------------------------------------#
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000182
Barry Warsaw72937f32007-08-12 14:37:20 +0000183def get_long_be(b):
184 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000185
Barry Warsaw72937f32007-08-12 14:37:20 +0000186def get_long_le(b):
187 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000188
Barry Warsaw72937f32007-08-12 14:37:20 +0000189def get_short_be(b):
190 return (b[0] << 8) | b[1]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000191
Barry Warsaw72937f32007-08-12 14:37:20 +0000192def get_short_le(b):
193 return (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000194
195
196#--------------------#
197# Small test program #
198#--------------------#
199
200def test():
Tim Peters495ad3c2001-01-15 01:36:40 +0000201 import sys
202 recursive = 0
203 if sys.argv[1:] and sys.argv[1] == '-r':
204 del sys.argv[1:2]
205 recursive = 1
206 try:
207 if sys.argv[1:]:
208 testall(sys.argv[1:], recursive, 1)
209 else:
210 testall(['.'], recursive, 1)
211 except KeyboardInterrupt:
212 sys.stderr.write('\n[Interrupted]\n')
213 sys.exit(1)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000214
215def testall(list, recursive, toplevel):
Tim Peters495ad3c2001-01-15 01:36:40 +0000216 import sys
217 import os
218 for filename in list:
219 if os.path.isdir(filename):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000220 print(filename + '/:', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000221 if recursive or toplevel:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000222 print('recursing down:')
Tim Peters495ad3c2001-01-15 01:36:40 +0000223 import glob
224 names = glob.glob(os.path.join(filename, '*'))
225 testall(names, recursive, 0)
226 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000227 print('*** directory (use -r) ***')
Tim Peters495ad3c2001-01-15 01:36:40 +0000228 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000229 print(filename + ':', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000230 sys.stdout.flush()
231 try:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000232 print(what(filename))
Tim Peters495ad3c2001-01-15 01:36:40 +0000233 except IOError:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000234 print('*** not found ***')
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000235
236if __name__ == '__main__':
Tim Peters495ad3c2001-01-15 01:36:40 +0000237 test()