blob: 240e5072f8fdf383279c98905ecbe7816f3dee0e [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020014If the file can't be opened, OSError is raised.
Guido van Rossumf4bf0441999-01-06 13:05:58 +000015
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
Barry Warsaw72937f32007-08-12 14:37:20 +000033__all__ = ['what', 'whathdr']
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000035def what(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000036 """Guess the type of a sound file."""
Tim Peters495ad3c2001-01-15 01:36:40 +000037 res = whathdr(filename)
38 return res
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000039
40
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000041def whathdr(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000042 """Recognize sound headers."""
43 with open(filename, 'rb') as f:
44 h = f.read(512)
45 for tf in tests:
46 res = tf(h, f)
47 if res:
48 return res
49 return None
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000050
51
52#-----------------------------------#
53# Subroutines per sound header type #
54#-----------------------------------#
55
56tests = []
57
58def test_aifc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000059 import aifc
Victor Stinnere9ebde42010-07-13 23:04:56 +000060 if not h.startswith(b'FORM'):
Tim Peters495ad3c2001-01-15 01:36:40 +000061 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000062 if h[8:12] == b'AIFC':
Tim Peters495ad3c2001-01-15 01:36:40 +000063 fmt = 'aifc'
Barry Warsaw72937f32007-08-12 14:37:20 +000064 elif h[8:12] == b'AIFF':
Victor Stinnere9ebde42010-07-13 23:04:56 +000065 fmt = 'aiff'
Tim Peters495ad3c2001-01-15 01:36:40 +000066 else:
67 return None
68 f.seek(0)
69 try:
Guido van Rossumc9341282007-08-13 17:50:00 +000070 a = aifc.open(f, 'r')
Tim Peters495ad3c2001-01-15 01:36:40 +000071 except (EOFError, aifc.Error):
72 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000073 return (fmt, a.getframerate(), a.getnchannels(),
74 a.getnframes(), 8 * a.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000075
76tests.append(test_aifc)
77
78
79def test_au(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +000080 if h.startswith(b'.snd'):
Barry Warsaw72937f32007-08-12 14:37:20 +000081 func = get_long_be
82 elif h[:4] in (b'\0ds.', b'dns.'):
83 func = get_long_le
Tim Peters495ad3c2001-01-15 01:36:40 +000084 else:
85 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000086 filetype = 'au'
87 hdr_size = func(h[4:8])
88 data_size = func(h[8:12])
89 encoding = func(h[12:16])
90 rate = func(h[16:20])
91 nchannels = func(h[20:24])
Tim Peters495ad3c2001-01-15 01:36:40 +000092 sample_size = 1 # default
93 if encoding == 1:
94 sample_bits = 'U'
95 elif encoding == 2:
96 sample_bits = 8
97 elif encoding == 3:
98 sample_bits = 16
99 sample_size = 2
100 else:
101 sample_bits = '?'
102 frame_size = sample_size * nchannels
Guido van Rossumc9341282007-08-13 17:50:00 +0000103 if frame_size:
104 nframe = data_size / frame_size
105 else:
106 nframe = -1
107 return filetype, rate, nchannels, nframe, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000108
109tests.append(test_au)
110
111
112def test_hcom(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000113 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
Tim Peters495ad3c2001-01-15 01:36:40 +0000114 return None
Guido van Rossumc9341282007-08-13 17:50:00 +0000115 divisor = get_long_be(h[144:148])
116 if divisor:
117 rate = 22050 / divisor
118 else:
119 rate = 0
120 return 'hcom', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000121
122tests.append(test_hcom)
123
124
125def test_voc(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000126 if not h.startswith(b'Creative Voice File\032'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000127 return None
128 sbseek = get_short_le(h[20:22])
129 rate = 0
Guido van Rossumc9341282007-08-13 17:50:00 +0000130 if 0 <= sbseek < 500 and h[sbseek] == 1:
131 ratecode = 256 - h[sbseek+4]
132 if ratecode:
133 rate = int(1000000.0 / ratecode)
Tim Peters495ad3c2001-01-15 01:36:40 +0000134 return 'voc', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000135
136tests.append(test_voc)
137
138
139def test_wav(h, f):
R David Murray1a48b9d2013-03-18 17:42:42 -0400140 import wave
Tim Peters495ad3c2001-01-15 01:36:40 +0000141 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
Guido van Rossumc9341282007-08-13 17:50:00 +0000142 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
Tim Peters495ad3c2001-01-15 01:36:40 +0000143 return None
R David Murray1a48b9d2013-03-18 17:42:42 -0400144 f.seek(0)
145 try:
146 w = wave.openfp(f, 'r')
147 except (EOFError, wave.Error):
148 return None
149 return ('wav', w.getframerate(), w.getnchannels(),
150 w.getnframes(), 8*w.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000151
152tests.append(test_wav)
153
154
155def test_8svx(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000156 if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
Tim Peters495ad3c2001-01-15 01:36:40 +0000157 return None
158 # Should decode it to get #channels -- assume always 1
159 return '8svx', 0, 1, 0, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000160
161tests.append(test_8svx)
162
163
164def test_sndt(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000165 if h.startswith(b'SOUND'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000166 nsamples = get_long_le(h[8:12])
167 rate = get_short_le(h[20:22])
168 return 'sndt', rate, 1, nsamples, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000169
170tests.append(test_sndt)
171
172
173def test_sndr(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000174 if h.startswith(b'\0\0'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000175 rate = get_short_le(h[2:4])
176 if 4000 <= rate <= 25000:
177 return 'sndr', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000178
179tests.append(test_sndr)
180
181
Barry Warsaw72937f32007-08-12 14:37:20 +0000182#-------------------------------------------#
183# Subroutines to extract numbers from bytes #
184#-------------------------------------------#
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000185
Barry Warsaw72937f32007-08-12 14:37:20 +0000186def get_long_be(b):
187 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000188
Barry Warsaw72937f32007-08-12 14:37:20 +0000189def get_long_le(b):
190 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000191
Barry Warsaw72937f32007-08-12 14:37:20 +0000192def get_short_be(b):
193 return (b[0] << 8) | b[1]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000194
Barry Warsaw72937f32007-08-12 14:37:20 +0000195def get_short_le(b):
196 return (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000197
198
199#--------------------#
200# Small test program #
201#--------------------#
202
203def test():
Tim Peters495ad3c2001-01-15 01:36:40 +0000204 import sys
205 recursive = 0
206 if sys.argv[1:] and sys.argv[1] == '-r':
207 del sys.argv[1:2]
208 recursive = 1
209 try:
210 if sys.argv[1:]:
211 testall(sys.argv[1:], recursive, 1)
212 else:
213 testall(['.'], recursive, 1)
214 except KeyboardInterrupt:
215 sys.stderr.write('\n[Interrupted]\n')
216 sys.exit(1)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000217
218def testall(list, recursive, toplevel):
Tim Peters495ad3c2001-01-15 01:36:40 +0000219 import sys
220 import os
221 for filename in list:
222 if os.path.isdir(filename):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000223 print(filename + '/:', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000224 if recursive or toplevel:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000225 print('recursing down:')
Tim Peters495ad3c2001-01-15 01:36:40 +0000226 import glob
227 names = glob.glob(os.path.join(filename, '*'))
228 testall(names, recursive, 0)
229 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000230 print('*** directory (use -r) ***')
Tim Peters495ad3c2001-01-15 01:36:40 +0000231 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000232 print(filename + ':', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000233 sys.stdout.flush()
234 try:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000235 print(what(filename))
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200236 except OSError:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000237 print('*** not found ***')
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000238
239if __name__ == '__main__':
Tim Peters495ad3c2001-01-15 01:36:40 +0000240 test()