blob: 9b44e680930f269ec0bdfff4196acbcc6c150251 [file] [log] [blame]
Bill Coxca02d872010-11-02 15:10:52 -04001/* Sonic library
2 Copyright 2010
3 Bill Cox
4 This file is part of the Sonic Library.
5
Bill Cox60eeb062015-02-27 10:17:45 -08006 This file is licensed under the Apache 2.0 license.
7*/
Bill Coxca02d872010-11-02 15:10:52 -04008
9/*
Bill Cox3276bb02011-01-11 07:39:26 -050010The Sonic Library implements a new algorithm invented by Bill Cox for the
11specific purpose of speeding up speech by high factors at high quality. It
12generates smooth speech at speed up factors as high as 6X, possibly more. It is
13also capable of slowing down speech, and generates high quality results
14regardless of the speed up or slow down factor. For speeding up speech by 2X or
15more, the following equation is used:
Bill Coxca02d872010-11-02 15:10:52 -040016
17 newSamples = period/(speed - 1.0)
18 scale = 1.0/newSamples;
19
20where period is the current pitch period, determined using AMDF or any other
21pitch estimator, and speed is the speedup factor. If the current position in
22the input stream is pointed to by "samples", and the current output stream
23position is pointed to by "out", then newSamples number of samples can be
24generated with:
25
26 out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
27
28where t = 0 to newSamples - 1.
29
Bill Coxf37d5972011-08-14 15:52:10 -040030For speed factors < 2X, the PICOLA algorithm is used. The above
Bill Coxca02d872010-11-02 15:10:52 -040031algorithm is first used to double the speed of one pitch period. Then, enough
32input is directly copied from the input to the output to achieve the desired
33speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived:
34
35 speed = (2*period + length)/(period + length)
36 speed*length + speed*period = 2*period + length
37 length(speed - 1) = 2*period - speed*period
38 length = period*(2 - speed)/(speed - 1)
39
40For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
41the output twice, and length of input is copied from the input to the output
42until the output desired speed is reached. The length of data copied is:
43
44 length = period*(speed - 0.5)/(1 - speed)
45
Bill Coxf37d5972011-08-14 15:52:10 -040046For slow down factors below 0.5, no data is copied, and an algorithm
Bill Coxca02d872010-11-02 15:10:52 -040047similar to high speed factors is used.
48*/
49
Bill Coxc17208e2010-11-26 11:09:15 -050050#ifdef __cplusplus
51extern "C" {
52#endif
53
Bill Cox4e234d72010-12-17 05:44:02 -050054/* Uncomment this to use sin-wav based overlap add which in theory can improve
55 sound quality slightly, at the expense of lots of floating point math. */
56/* #define SONIC_USE_SIN */
57
Bill Cox6a1bbb12010-11-19 11:14:28 -050058/* This specifies the range of voice pitches we try to match.
59 Note that if we go lower than 65, we could overflow in findPitchInRange */
60#define SONIC_MIN_PITCH 65
61#define SONIC_MAX_PITCH 400
Bill Cox0cd49c82010-11-03 10:46:22 -040062
63/* These are used to down-sample some inputs to improve speed */
Bill Coxd76d2222010-11-24 11:42:29 -050064#define SONIC_AMDF_FREQ 4000
Bill Coxca02d872010-11-02 15:10:52 -040065
66struct sonicStreamStruct;
67typedef struct sonicStreamStruct *sonicStream;
68
Bill Cox1a299bb2010-11-19 15:07:17 -050069/* For all of the following functions, numChannels is multiplied by numSamples
70 to determine the actual number of values read or returned. */
71
Bill Coxca02d872010-11-02 15:10:52 -040072/* Create a sonic stream. Return NULL only if we are out of memory and cannot
Bill Cox1a299bb2010-11-19 15:07:17 -050073 allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */
Bill Coxd544fdb2010-11-23 14:13:46 -050074sonicStream sonicCreateStream(int sampleRate, int numChannels);
Bill Coxca02d872010-11-02 15:10:52 -040075/* Destroy the sonic stream. */
76void sonicDestroyStream(sonicStream stream);
Bill Cox0c4c0602010-11-08 11:46:30 -050077/* Use this to write floating point data to be speed up or down into the stream.
Bill Cox1a299bb2010-11-19 15:07:17 -050078 Values must be between -1 and 1. Return 0 if memory realloc failed, otherwise 1 */
Bill Cox0c4c0602010-11-08 11:46:30 -050079int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples);
80/* Use this to write 16-bit data to be speed up or down into the stream.
81 Return 0 if memory realloc failed, otherwise 1 */
82int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples);
Bill Cox8a23d2f2010-11-16 18:49:36 -050083/* Use this to write 8-bit unsigned data to be speed up or down into the stream.
84 Return 0 if memory realloc failed, otherwise 1 */
85int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples);
Bill Cox0c4c0602010-11-08 11:46:30 -050086/* Use this to read floating point data out of the stream. Sometimes no data
87 will be available, and zero is returned, which is not an error condition. */
88int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples);
89/* Use this to read 16-bit data out of the stream. Sometimes no data will
90 be available, and zero is returned, which is not an error condition. */
91int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples);
Bill Cox8a23d2f2010-11-16 18:49:36 -050092/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data will
93 be available, and zero is returned, which is not an error condition. */
94int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples);
Bill Coxca02d872010-11-02 15:10:52 -040095/* Force the sonic stream to generate output using whatever data it currently
Bill Cox4bbbbcc2010-11-09 05:32:38 -050096 has. No extra delay will be added to the output, but flushing in the middle of
97 words could introduce distortion. */
Bill Coxca02d872010-11-02 15:10:52 -040098int sonicFlushStream(sonicStream stream);
99/* Return the number of samples in the output buffer */
Bill Cox3a7abf92010-11-06 15:18:49 -0400100int sonicSamplesAvailable(sonicStream stream);
Bill Coxaf9a6242010-11-08 09:32:27 -0500101/* Get the speed of the stream. */
Bill Cox6a1bbb12010-11-19 11:14:28 -0500102float sonicGetSpeed(sonicStream stream);
Bill Coxd544fdb2010-11-23 14:13:46 -0500103/* Set the speed of the stream. */
104void sonicSetSpeed(sonicStream stream, float speed);
105/* Get the pitch of the stream. */
106float sonicGetPitch(sonicStream stream);
107/* Set the pitch of the stream. */
108void sonicSetPitch(sonicStream stream, float pitch);
Bill Cox3276bb02011-01-11 07:39:26 -0500109/* Get the rate of the stream. */
110float sonicGetRate(sonicStream stream);
111/* Set the rate of the stream. */
112void sonicSetRate(sonicStream stream, float rate);
Bill Coxd544fdb2010-11-23 14:13:46 -0500113/* Get the scaling factor of the stream. */
114float sonicGetVolume(sonicStream stream);
115/* Set the scaling factor of the stream. */
116void sonicSetVolume(sonicStream stream, float volume);
Bill Cox3276bb02011-01-11 07:39:26 -0500117/* Get the chord pitch setting. */
118int sonicGetChordPitch(sonicStream stream);
119/* Set chord pitch mode on or off. Default is off. See the documentation
120 page for a description of this feature. */
121void sonicSetChordPitch(sonicStream stream, int useChordPitch);
Bill Coxc978c392010-12-17 05:04:06 -0500122/* Get the quality setting. */
123int sonicGetQuality(sonicStream stream);
124/* Set the "quality". Default 0 is virtually as good as 1, but very much faster. */
125void sonicSetQuality(sonicStream stream, int quality);
Bill Coxaf9a6242010-11-08 09:32:27 -0500126/* Get the sample rate of the stream. */
127int sonicGetSampleRate(sonicStream stream);
Bill Cox69a864b2011-03-04 02:51:43 -0500128/* Set the sample rate of the stream. This will drop any samples that have not been read. */
129void sonicSetSampleRate(sonicStream stream, int sampleRate);
Bill Cox527b4e82010-11-24 17:42:58 -0500130/* Get the number of channels. */
131int sonicGetNumChannels(sonicStream stream);
Bill Cox69a864b2011-03-04 02:51:43 -0500132/* Set the number of channels. This will drop any samples that have not been read. */
133void sonicSetNumChannels(sonicStream stream, int numChannels);
Bill Cox036d7322010-11-09 09:29:24 -0500134/* This is a non-stream oriented interface to just change the speed of a sound
135 sample. It works in-place on the sample array, so there must be at least
136 speed*numSamples available space in the array. Returns the new number of samples. */
Bill Coxd544fdb2010-11-23 14:13:46 -0500137int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch,
Bill Coxd0380df2011-01-11 08:59:52 -0500138 float rate, float volume, int useChordPitch, int sampleRate, int numChannels);
Bill Cox036d7322010-11-09 09:29:24 -0500139/* This is a non-stream oriented interface to just change the speed of a sound
140 sample. It works in-place on the sample array, so there must be at least
141 speed*numSamples available space in the array. Returns the new number of samples. */
Bill Coxd544fdb2010-11-23 14:13:46 -0500142int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch,
Bill Coxd0380df2011-01-11 08:59:52 -0500143 float rate, float volume, int useChordPitch, int sampleRate, int numChannels);
Bill Coxc17208e2010-11-26 11:09:15 -0500144
145#ifdef __cplusplus
146}
147#endif