blob: 96595c6974468213e0a93414af95f4981bb609c5 [file] [log] [blame]
Jingwen Chen475b3cc2021-01-05 21:45:16 -05001"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, OSError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr(). (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list. For directory
24arguments it calls what() for all files in that directory. Default
25argument is "." (testing all files in the current directory). The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
30# The file structure is top-down except that the test program and its
31# subroutine come last.
32
33__all__ = ['what', 'whathdr']
34
35from collections import namedtuple
36
37SndHeaders = namedtuple('SndHeaders',
38 'filetype framerate nchannels nframes sampwidth')
39
40SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type
41and will be one of the strings 'aifc', 'aiff', 'au','hcom',
42'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""")
43SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual
44value or 0 if unknown or difficult to decode.""")
45SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be
46determined or if the value is difficult to decode.""")
47SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number
48of frames or -1.""")
49SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or
50'A' for A-LAW or 'U' for u-LAW.""")
51
52def what(filename):
53 """Guess the type of a sound file."""
54 res = whathdr(filename)
55 return res
56
57
58def whathdr(filename):
59 """Recognize sound headers."""
60 with open(filename, 'rb') as f:
61 h = f.read(512)
62 for tf in tests:
63 res = tf(h, f)
64 if res:
65 return SndHeaders(*res)
66 return None
67
68
69#-----------------------------------#
70# Subroutines per sound header type #
71#-----------------------------------#
72
73tests = []
74
75def test_aifc(h, f):
76 import aifc
77 if not h.startswith(b'FORM'):
78 return None
79 if h[8:12] == b'AIFC':
80 fmt = 'aifc'
81 elif h[8:12] == b'AIFF':
82 fmt = 'aiff'
83 else:
84 return None
85 f.seek(0)
86 try:
87 a = aifc.open(f, 'r')
88 except (EOFError, aifc.Error):
89 return None
90 return (fmt, a.getframerate(), a.getnchannels(),
91 a.getnframes(), 8 * a.getsampwidth())
92
93tests.append(test_aifc)
94
95
96def test_au(h, f):
97 if h.startswith(b'.snd'):
98 func = get_long_be
99 elif h[:4] in (b'\0ds.', b'dns.'):
100 func = get_long_le
101 else:
102 return None
103 filetype = 'au'
104 hdr_size = func(h[4:8])
105 data_size = func(h[8:12])
106 encoding = func(h[12:16])
107 rate = func(h[16:20])
108 nchannels = func(h[20:24])
109 sample_size = 1 # default
110 if encoding == 1:
111 sample_bits = 'U'
112 elif encoding == 2:
113 sample_bits = 8
114 elif encoding == 3:
115 sample_bits = 16
116 sample_size = 2
117 else:
118 sample_bits = '?'
119 frame_size = sample_size * nchannels
120 if frame_size:
121 nframe = data_size / frame_size
122 else:
123 nframe = -1
124 return filetype, rate, nchannels, nframe, sample_bits
125
126tests.append(test_au)
127
128
129def test_hcom(h, f):
130 if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
131 return None
132 divisor = get_long_be(h[144:148])
133 if divisor:
134 rate = 22050 / divisor
135 else:
136 rate = 0
137 return 'hcom', rate, 1, -1, 8
138
139tests.append(test_hcom)
140
141
142def test_voc(h, f):
143 if not h.startswith(b'Creative Voice File\032'):
144 return None
145 sbseek = get_short_le(h[20:22])
146 rate = 0
147 if 0 <= sbseek < 500 and h[sbseek] == 1:
148 ratecode = 256 - h[sbseek+4]
149 if ratecode:
150 rate = int(1000000.0 / ratecode)
151 return 'voc', rate, 1, -1, 8
152
153tests.append(test_voc)
154
155
156def test_wav(h, f):
157 import wave
158 # 'RIFF' <len> 'WAVE' 'fmt ' <len>
159 if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
160 return None
161 f.seek(0)
162 try:
163 w = wave.open(f, 'r')
164 except (EOFError, wave.Error):
165 return None
166 return ('wav', w.getframerate(), w.getnchannels(),
167 w.getnframes(), 8*w.getsampwidth())
168
169tests.append(test_wav)
170
171
172def test_8svx(h, f):
173 if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
174 return None
175 # Should decode it to get #channels -- assume always 1
176 return '8svx', 0, 1, 0, 8
177
178tests.append(test_8svx)
179
180
181def test_sndt(h, f):
182 if h.startswith(b'SOUND'):
183 nsamples = get_long_le(h[8:12])
184 rate = get_short_le(h[20:22])
185 return 'sndt', rate, 1, nsamples, 8
186
187tests.append(test_sndt)
188
189
190def test_sndr(h, f):
191 if h.startswith(b'\0\0'):
192 rate = get_short_le(h[2:4])
193 if 4000 <= rate <= 25000:
194 return 'sndr', rate, 1, -1, 8
195
196tests.append(test_sndr)
197
198
199#-------------------------------------------#
200# Subroutines to extract numbers from bytes #
201#-------------------------------------------#
202
203def get_long_be(b):
204 return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
205
206def get_long_le(b):
207 return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
208
209def get_short_be(b):
210 return (b[0] << 8) | b[1]
211
212def get_short_le(b):
213 return (b[1] << 8) | b[0]
214
215
216#--------------------#
217# Small test program #
218#--------------------#
219
220def test():
221 import sys
222 recursive = 0
223 if sys.argv[1:] and sys.argv[1] == '-r':
224 del sys.argv[1:2]
225 recursive = 1
226 try:
227 if sys.argv[1:]:
228 testall(sys.argv[1:], recursive, 1)
229 else:
230 testall(['.'], recursive, 1)
231 except KeyboardInterrupt:
232 sys.stderr.write('\n[Interrupted]\n')
233 sys.exit(1)
234
235def testall(list, recursive, toplevel):
236 import sys
237 import os
238 for filename in list:
239 if os.path.isdir(filename):
240 print(filename + '/:', end=' ')
241 if recursive or toplevel:
242 print('recursing down:')
243 import glob
244 names = glob.glob(os.path.join(glob.escape(filename), '*'))
245 testall(names, recursive, 0)
246 else:
247 print('*** directory (use -r) ***')
248 else:
249 print(filename + ':', end=' ')
250 sys.stdout.flush()
251 try:
252 print(what(filename))
253 except OSError:
254 print('*** not found ***')
255
256if __name__ == '__main__':
257 test()