blob: 594353136f5c374effa7aa6db6f4f75e0e99eb7a [file] [log] [blame]
Guido van Rossumf4bf0441999-01-06 13:05:58 +00001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
Andrew Svetlovf7a17b42012-12-25 16:47:37 +020014If the file can't be opened, OSError is raised.
Guido van Rossumf4bf0441999-01-06 13:05:58 +000015
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000030# The file structure is top-down except that the test program and its
31# subroutine come last.
32
Barry Warsaw72937f32007-08-12 14:37:20 +000033__all__ = ['what', 'whathdr']
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000034
R David Murray4487dd02014-10-09 16:59:30 -040035from collections import namedtuple
36
37SndHeaders = namedtuple('SndHeaders',
38 'filetype framerate nchannels nframes sampwidth')
39
Raymond Hettinger5b798ab2015-08-17 22:04:45 -070040SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type
41and will be one of the strings 'aifc', 'aiff', 'au','hcom',
42'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""")
43SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual
44value or 0 if unknown or difficult to decode.""")
45SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be
46determined or if the value is difficult to decode.""")
47SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number
48of frames or -1.""")
49SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or
50'A' for A-LAW or 'U' for u-LAW.""")
51
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000052def what(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000053 """Guess the type of a sound file."""
Tim Peters495ad3c2001-01-15 01:36:40 +000054 res = whathdr(filename)
55 return res
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000056
57
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000058def whathdr(filename):
Barry Warsaw72937f32007-08-12 14:37:20 +000059 """Recognize sound headers."""
60 with open(filename, 'rb') as f:
61 h = f.read(512)
62 for tf in tests:
63 res = tf(h, f)
64 if res:
R David Murray4487dd02014-10-09 16:59:30 -040065 return SndHeaders(*res)
Barry Warsaw72937f32007-08-12 14:37:20 +000066 return None
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000067
68
69#-----------------------------------#
70# Subroutines per sound header type #
71#-----------------------------------#
72
73tests = []
74
75def test_aifc(h, f):
Tim Peters495ad3c2001-01-15 01:36:40 +000076 import aifc
Victor Stinnere9ebde42010-07-13 23:04:56 +000077 if not h.startswith(b'FORM'):
Tim Peters495ad3c2001-01-15 01:36:40 +000078 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000079 if h[8:12] == b'AIFC':
Tim Peters495ad3c2001-01-15 01:36:40 +000080 fmt = 'aifc'
Barry Warsaw72937f32007-08-12 14:37:20 +000081 elif h[8:12] == b'AIFF':
Victor Stinnere9ebde42010-07-13 23:04:56 +000082 fmt = 'aiff'
Tim Peters495ad3c2001-01-15 01:36:40 +000083 else:
84 return None
85 f.seek(0)
86 try:
Guido van Rossumc9341282007-08-13 17:50:00 +000087 a = aifc.open(f, 'r')
Tim Peters495ad3c2001-01-15 01:36:40 +000088 except (EOFError, aifc.Error):
89 return None
Barry Warsaw72937f32007-08-12 14:37:20 +000090 return (fmt, a.getframerate(), a.getnchannels(),
91 a.getnframes(), 8 * a.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +000092
93tests.append(test_aifc)
94
95
96def test_au(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +000097 if h.startswith(b'.snd'):
Barry Warsaw72937f32007-08-12 14:37:20 +000098 func = get_long_be
99 elif h[:4] in (b'\0ds.', b'dns.'):
100 func = get_long_le
Tim Peters495ad3c2001-01-15 01:36:40 +0000101 else:
102 return None
Barry Warsaw72937f32007-08-12 14:37:20 +0000103 filetype = 'au'
104 hdr_size = func(h[4:8])
105 data_size = func(h[8:12])
106 encoding = func(h[12:16])
107 rate = func(h[16:20])
108 nchannels = func(h[20:24])
Tim Peters495ad3c2001-01-15 01:36:40 +0000109 sample_size = 1 # default
110 if encoding == 1:
111 sample_bits = 'U'
112 elif encoding == 2:
113 sample_bits = 8
114 elif encoding == 3:
115 sample_bits = 16
116 sample_size = 2
117 else:
118 sample_bits = '?'
119 frame_size = sample_size * nchannels
Guido van Rossumc9341282007-08-13 17:50:00 +0000120 if frame_size:
121 nframe = data_size / frame_size
122 else:
123 nframe = -1
124 return filetype, rate, nchannels, nframe, sample_bits
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000125
126tests.append(test_au)
127
128
129def test_hcom(h, f):
Barry Warsaw72937f32007-08-12 14:37:20 +0000130 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
Tim Peters495ad3c2001-01-15 01:36:40 +0000131 return None
Guido van Rossumc9341282007-08-13 17:50:00 +0000132 divisor = get_long_be(h[144:148])
133 if divisor:
134 rate = 22050 / divisor
135 else:
136 rate = 0
137 return 'hcom', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000138
139tests.append(test_hcom)
140
141
142def test_voc(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000143 if not h.startswith(b'Creative Voice File\032'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000144 return None
145 sbseek = get_short_le(h[20:22])
146 rate = 0
Guido van Rossumc9341282007-08-13 17:50:00 +0000147 if 0 <= sbseek < 500 and h[sbseek] == 1:
148 ratecode = 256 - h[sbseek+4]
149 if ratecode:
150 rate = int(1000000.0 / ratecode)
Tim Peters495ad3c2001-01-15 01:36:40 +0000151 return 'voc', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000152
153tests.append(test_voc)
154
155
156def test_wav(h, f):
R David Murray1a48b9d2013-03-18 17:42:42 -0400157 import wave
Tim Peters495ad3c2001-01-15 01:36:40 +0000158 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
Guido van Rossumc9341282007-08-13 17:50:00 +0000159 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
Tim Peters495ad3c2001-01-15 01:36:40 +0000160 return None
R David Murray1a48b9d2013-03-18 17:42:42 -0400161 f.seek(0)
162 try:
Brian Curtin9f914a02017-11-10 11:38:25 -0500163 w = wave.open(f, 'r')
R David Murray1a48b9d2013-03-18 17:42:42 -0400164 except (EOFError, wave.Error):
165 return None
166 return ('wav', w.getframerate(), w.getnchannels(),
167 w.getnframes(), 8*w.getsampwidth())
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000168
169tests.append(test_wav)
170
171
172def test_8svx(h, f):
Victor Stinnere9ebde42010-07-13 23:04:56 +0000173 if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
Tim Peters495ad3c2001-01-15 01:36:40 +0000174 return None
175 # Should decode it to get #channels -- assume always 1
176 return '8svx', 0, 1, 0, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000177
178tests.append(test_8svx)
179
180
181def test_sndt(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000182 if h.startswith(b'SOUND'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000183 nsamples = get_long_le(h[8:12])
184 rate = get_short_le(h[20:22])
185 return 'sndt', rate, 1, nsamples, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000186
187tests.append(test_sndt)
188
189
190def test_sndr(h, f):
Guido van Rossumc9341282007-08-13 17:50:00 +0000191 if h.startswith(b'\0\0'):
Tim Peters495ad3c2001-01-15 01:36:40 +0000192 rate = get_short_le(h[2:4])
193 if 4000 <= rate <= 25000:
194 return 'sndr', rate, 1, -1, 8
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000195
196tests.append(test_sndr)
197
198
Barry Warsaw72937f32007-08-12 14:37:20 +0000199#-------------------------------------------#
200# Subroutines to extract numbers from bytes #
201#-------------------------------------------#
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000202
Barry Warsaw72937f32007-08-12 14:37:20 +0000203def get_long_be(b):
204 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000205
Barry Warsaw72937f32007-08-12 14:37:20 +0000206def get_long_le(b):
207 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000208
Barry Warsaw72937f32007-08-12 14:37:20 +0000209def get_short_be(b):
210 return (b[0] << 8) | b[1]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000211
Barry Warsaw72937f32007-08-12 14:37:20 +0000212def get_short_le(b):
213 return (b[1] << 8) | b[0]
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000214
215
216#--------------------#
217# Small test program #
218#--------------------#
219
220def test():
Tim Peters495ad3c2001-01-15 01:36:40 +0000221 import sys
222 recursive = 0
223 if sys.argv[1:] and sys.argv[1] == '-r':
224 del sys.argv[1:2]
225 recursive = 1
226 try:
227 if sys.argv[1:]:
228 testall(sys.argv[1:], recursive, 1)
229 else:
230 testall(['.'], recursive, 1)
231 except KeyboardInterrupt:
232 sys.stderr.write('\n[Interrupted]\n')
233 sys.exit(1)
Guido van Rossumb90bdeb1994-01-14 16:46:14 +0000234
235def testall(list, recursive, toplevel):
Tim Peters495ad3c2001-01-15 01:36:40 +0000236 import sys
237 import os
238 for filename in list:
239 if os.path.isdir(filename):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000240 print(filename + '/:', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000241 if recursive or toplevel:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000242 print('recursing down:')
Tim Peters495ad3c2001-01-15 01:36:40 +0000243 import glob
244 names = glob.glob(os.path.join(filename, '*'))
245 testall(names, recursive, 0)
246 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000247 print('*** directory (use -r) ***')
Tim Peters495ad3c2001-01-15 01:36:40 +0000248 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000249 print(filename + ':', end=' ')
Tim Peters495ad3c2001-01-15 01:36:40 +0000250 sys.stdout.flush()
251 try:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000252 print(what(filename))
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200253 except OSError:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000254 print('*** not found ***')
Guido van Rossumf4bf0441999-01-06 13:05:58 +0000255
256if __name__ == '__main__':
Tim Peters495ad3c2001-01-15 01:36:40 +0000257 test()