blob: 4828db4cf98df6ffa93d856b4fbb8e46a62789f8 [file] [log] [blame]
Bill Coxca02d872010-11-02 15:10:52 -04001/* Sonic library
2 Copyright 2010
3 Bill Cox
4 This file is part of the Sonic Library.
5
6This program is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2 of the License, or
9(at your option) any later version.
10
11This program is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with this program; if not, write to the Free Software
18Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */
19
20/*
21The Sonic Library implements Pitch Based Resampling, which is a new algorithm
22invented by Bill Cox for the specific purpose of speeding up speech by high
23factors at high quality. It generates smooth speech at speed up factors as high
24as 6X, possibly more. It is also capable of slowing down speech, and generates
25high quality results regardless of the speed up or slow down factor. For
26speeding up speech by 2X or more, the following equation is used:
27
28 newSamples = period/(speed - 1.0)
29 scale = 1.0/newSamples;
30
31where period is the current pitch period, determined using AMDF or any other
32pitch estimator, and speed is the speedup factor. If the current position in
33the input stream is pointed to by "samples", and the current output stream
34position is pointed to by "out", then newSamples number of samples can be
35generated with:
36
37 out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
38
39where t = 0 to newSamples - 1.
40
41For speed factors < 2X, an algorithm similar to PICOLA is used. The above
42algorithm is first used to double the speed of one pitch period. Then, enough
43input is directly copied from the input to the output to achieve the desired
44speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived:
45
46 speed = (2*period + length)/(period + length)
47 speed*length + speed*period = 2*period + length
48 length(speed - 1) = 2*period - speed*period
49 length = period*(2 - speed)/(speed - 1)
50
51For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
52the output twice, and length of input is copied from the input to the output
53until the output desired speed is reached. The length of data copied is:
54
55 length = period*(speed - 0.5)/(1 - speed)
56
57For slow down factors between 0.5 and 0.5, no data is copied, and an algorithm
58similar to high speed factors is used.
59*/
60
61/* This specifies the range of voice pitches we try to match. */
Bill Cox0cd49c82010-11-03 10:46:22 -040062#define SONIC_MIN_PITCH 60
63#define SONIC_MAX_PITCH 300
64
65/* These are used to down-sample some inputs to improve speed */
66#define SONIC_AMDF_FREQ 8000
67#define SONIC_AMDF_RANGE 0.05
Bill Coxca02d872010-11-02 15:10:52 -040068
69struct sonicStreamStruct;
70typedef struct sonicStreamStruct *sonicStream;
71
72/* Create a sonic stream. Return NULL only if we are out of memory and cannot
73 allocate the stream. */
74sonicStream sonicCreateStream(double speed, int sampleRate);
75/* Destroy the sonic stream. */
76void sonicDestroyStream(sonicStream stream);
Bill Cox0c4c0602010-11-08 11:46:30 -050077/* Use this to write floating point data to be speed up or down into the stream.
78 Return 0 if memory realloc failed, otherwise 1 */
79int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples);
80/* Use this to write 16-bit data to be speed up or down into the stream.
81 Return 0 if memory realloc failed, otherwise 1 */
82int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples);
83/* Use this to read floating point data out of the stream. Sometimes no data
84 will be available, and zero is returned, which is not an error condition. */
85int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples);
86/* Use this to read 16-bit data out of the stream. Sometimes no data will
87 be available, and zero is returned, which is not an error condition. */
88int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples);
Bill Coxca02d872010-11-02 15:10:52 -040089/* Force the sonic stream to generate output using whatever data it currently
90 has. Zeros will be appended to the input data if there is not enough data
91 in the stream's input buffer. Use this, followed by a final read from the
92 stream before destroying the stream. */
93int sonicFlushStream(sonicStream stream);
94/* Return the number of samples in the output buffer */
Bill Cox3a7abf92010-11-06 15:18:49 -040095int sonicSamplesAvailable(sonicStream stream);
Bill Coxaf9a6242010-11-08 09:32:27 -050096/* Get the speed of the stream. */
97double sonicGetSpeed(sonicStream stream);
98/* Get the sample rate of the stream. */
99int sonicGetSampleRate(sonicStream stream);