blob: e5901ec58338aaae1ed33d56573b4ca0ecc0cfa1 [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020014If the file can't be opened, OSError is raised.
Guido van Rossumf4bf0441999-01-06 13:05:58 +000015
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
Barry Warsaw72937f32007-08-12 14:37:20 +000033__all__ = ['what', 'whathdr']
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034
R David Murray4487dd02014-10-09 16:59:30 -040035from collections import namedtuple
36
37SndHeaders = namedtuple('SndHeaders',
38 'filetype framerate nchannels nframes sampwidth')
39
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000040def what(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000041 """Guess the type of a sound file."""
Tim Peters495ad3c2001-01-15 01:36:40 +000042 res = whathdr(filename)
43 return res
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000044
45
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000046def whathdr(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000047 """Recognize sound headers."""
48 with open(filename, 'rb') as f:
49 h = f.read(512)
50 for tf in tests:
51 res = tf(h, f)
52 if res:
R David Murray4487dd02014-10-09 16:59:30 -040053 return SndHeaders(*res)
Barry Warsaw72937f32007-08-12 14:37:20 +000054 return None
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000055
56
57#-----------------------------------#
58# Subroutines per sound header type #
59#-----------------------------------#
60
61tests = []
62
63def test_aifc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000064 import aifc
Victor Stinnere9ebde42010-07-13 23:04:56 +000065 if not h.startswith(b'FORM'):
Tim Peters495ad3c2001-01-15 01:36:40 +000066 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000067 if h[8:12] == b'AIFC':
Tim Peters495ad3c2001-01-15 01:36:40 +000068 fmt = 'aifc'
Barry Warsaw72937f32007-08-12 14:37:20 +000069 elif h[8:12] == b'AIFF':
Victor Stinnere9ebde42010-07-13 23:04:56 +000070 fmt = 'aiff'
Tim Peters495ad3c2001-01-15 01:36:40 +000071 else:
72 return None
73 f.seek(0)
74 try:
Guido van Rossumc9341282007-08-13 17:50:00 +000075 a = aifc.open(f, 'r')
Tim Peters495ad3c2001-01-15 01:36:40 +000076 except (EOFError, aifc.Error):
77 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000078 return (fmt, a.getframerate(), a.getnchannels(),
79 a.getnframes(), 8 * a.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000080
81tests.append(test_aifc)
82
83
84def test_au(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +000085 if h.startswith(b'.snd'):
Barry Warsaw72937f32007-08-12 14:37:20 +000086 func = get_long_be
87 elif h[:4] in (b'\0ds.', b'dns.'):
88 func = get_long_le
Tim Peters495ad3c2001-01-15 01:36:40 +000089 else:
90 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000091 filetype = 'au'
92 hdr_size = func(h[4:8])
93 data_size = func(h[8:12])
94 encoding = func(h[12:16])
95 rate = func(h[16:20])
96 nchannels = func(h[20:24])
Tim Peters495ad3c2001-01-15 01:36:40 +000097 sample_size = 1 # default
98 if encoding == 1:
99 sample_bits = 'U'
100 elif encoding == 2:
101 sample_bits = 8
102 elif encoding == 3:
103 sample_bits = 16
104 sample_size = 2
105 else:
106 sample_bits = '?'
107 frame_size = sample_size * nchannels
Guido van Rossumc9341282007-08-13 17:50:00 +0000108 if frame_size:
109 nframe = data_size / frame_size
110 else:
111 nframe = -1
112 return filetype, rate, nchannels, nframe, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000113
114tests.append(test_au)
115
116
117def test_hcom(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000118 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
Tim Peters495ad3c2001-01-15 01:36:40 +0000119 return None
Guido van Rossumc9341282007-08-13 17:50:00 +0000120 divisor = get_long_be(h[144:148])
121 if divisor:
122 rate = 22050 / divisor
123 else:
124 rate = 0
125 return 'hcom', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000126
127tests.append(test_hcom)
128
129
130def test_voc(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000131 if not h.startswith(b'Creative Voice File\032'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000132 return None
133 sbseek = get_short_le(h[20:22])
134 rate = 0
Guido van Rossumc9341282007-08-13 17:50:00 +0000135 if 0 <= sbseek < 500 and h[sbseek] == 1:
136 ratecode = 256 - h[sbseek+4]
137 if ratecode:
138 rate = int(1000000.0 / ratecode)
Tim Peters495ad3c2001-01-15 01:36:40 +0000139 return 'voc', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000140
141tests.append(test_voc)
142
143
144def test_wav(h, f):
R David Murray1a48b9d2013-03-18 17:42:42 -0400145 import wave
Tim Peters495ad3c2001-01-15 01:36:40 +0000146 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
Guido van Rossumc9341282007-08-13 17:50:00 +0000147 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
Tim Peters495ad3c2001-01-15 01:36:40 +0000148 return None
R David Murray1a48b9d2013-03-18 17:42:42 -0400149 f.seek(0)
150 try:
151 w = wave.openfp(f, 'r')
152 except (EOFError, wave.Error):
153 return None
154 return ('wav', w.getframerate(), w.getnchannels(),
155 w.getnframes(), 8*w.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000156
157tests.append(test_wav)
158
159
160def test_8svx(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000161 if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
Tim Peters495ad3c2001-01-15 01:36:40 +0000162 return None
163 # Should decode it to get #channels -- assume always 1
164 return '8svx', 0, 1, 0, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000165
166tests.append(test_8svx)
167
168
169def test_sndt(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000170 if h.startswith(b'SOUND'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000171 nsamples = get_long_le(h[8:12])
172 rate = get_short_le(h[20:22])
173 return 'sndt', rate, 1, nsamples, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000174
175tests.append(test_sndt)
176
177
178def test_sndr(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000179 if h.startswith(b'\0\0'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000180 rate = get_short_le(h[2:4])
181 if 4000 <= rate <= 25000:
182 return 'sndr', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000183
184tests.append(test_sndr)
185
186
Barry Warsaw72937f32007-08-12 14:37:20 +0000187#-------------------------------------------#
188# Subroutines to extract numbers from bytes #
189#-------------------------------------------#
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000190
Barry Warsaw72937f32007-08-12 14:37:20 +0000191def get_long_be(b):
192 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000193
Barry Warsaw72937f32007-08-12 14:37:20 +0000194def get_long_le(b):
195 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000196
Barry Warsaw72937f32007-08-12 14:37:20 +0000197def get_short_be(b):
198 return (b[0] << 8) | b[1]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000199
Barry Warsaw72937f32007-08-12 14:37:20 +0000200def get_short_le(b):
201 return (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000202
203
204#--------------------#
205# Small test program #
206#--------------------#
207
208def test():
Tim Peters495ad3c2001-01-15 01:36:40 +0000209 import sys
210 recursive = 0
211 if sys.argv[1:] and sys.argv[1] == '-r':
212 del sys.argv[1:2]
213 recursive = 1
214 try:
215 if sys.argv[1:]:
216 testall(sys.argv[1:], recursive, 1)
217 else:
218 testall(['.'], recursive, 1)
219 except KeyboardInterrupt:
220 sys.stderr.write('\n[Interrupted]\n')
221 sys.exit(1)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000222
223def testall(list, recursive, toplevel):
Tim Peters495ad3c2001-01-15 01:36:40 +0000224 import sys
225 import os
226 for filename in list:
227 if os.path.isdir(filename):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000228 print(filename + '/:', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000229 if recursive or toplevel:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000230 print('recursing down:')
Tim Peters495ad3c2001-01-15 01:36:40 +0000231 import glob
232 names = glob.glob(os.path.join(filename, '*'))
233 testall(names, recursive, 0)
234 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000235 print('*** directory (use -r) ***')
Tim Peters495ad3c2001-01-15 01:36:40 +0000236 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000237 print(filename + ':', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000238 sys.stdout.flush()
239 try:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000240 print(what(filename))
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200241 except OSError:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000242 print('*** not found ***')
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000243
244if __name__ == '__main__':
Tim Peters495ad3c2001-01-15 01:36:40 +0000245 test()