Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 1 | /* Sonic library |
| 2 | Copyright 2010 |
| 3 | Bill Cox |
| 4 | This file is part of the Sonic Library. |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU General Public License as published by |
| 8 | the Free Software Foundation; either version 2 of the License, or |
| 9 | (at your option) any later version. |
| 10 | |
| 11 | This program is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | GNU General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with this program; if not, write to the Free Software |
| 18 | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ |
| 19 | |
| 20 | /* |
| 21 | The Sonic Library implements Pitch Based Resampling, which is a new algorithm |
| 22 | invented by Bill Cox for the specific purpose of speeding up speech by high |
| 23 | factors at high quality. It generates smooth speech at speed up factors as high |
| 24 | as 6X, possibly more. It is also capable of slowing down speech, and generates |
| 25 | high quality results regardless of the speed up or slow down factor. For |
| 26 | speeding up speech by 2X or more, the following equation is used: |
| 27 | |
| 28 | newSamples = period/(speed - 1.0) |
| 29 | scale = 1.0/newSamples; |
| 30 | |
| 31 | where period is the current pitch period, determined using AMDF or any other |
| 32 | pitch estimator, and speed is the speedup factor. If the current position in |
| 33 | the input stream is pointed to by "samples", and the current output stream |
| 34 | position is pointed to by "out", then newSamples number of samples can be |
| 35 | generated with: |
| 36 | |
| 37 | out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; |
| 38 | |
| 39 | where t = 0 to newSamples - 1. |
| 40 | |
| 41 | For speed factors < 2X, an algorithm similar to PICOLA is used. The above |
| 42 | algorithm is first used to double the speed of one pitch period. Then, enough |
| 43 | input is directly copied from the input to the output to achieve the desired |
| 44 | speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived: |
| 45 | |
| 46 | speed = (2*period + length)/(period + length) |
| 47 | speed*length + speed*period = 2*period + length |
| 48 | length(speed - 1) = 2*period - speed*period |
| 49 | length = period*(2 - speed)/(speed - 1) |
| 50 | |
| 51 | For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into |
| 52 | the output twice, and length of input is copied from the input to the output |
| 53 | until the output desired speed is reached. The length of data copied is: |
| 54 | |
| 55 | length = period*(speed - 0.5)/(1 - speed) |
| 56 | |
| 57 | For slow down factors between 0.5 and 0.5, no data is copied, and an algorithm |
| 58 | similar to high speed factors is used. |
| 59 | */ |
| 60 | |
| 61 | /* This specifies the range of voice pitches we try to match. */ |
Bill Cox | 0cd49c8 | 2010-11-03 10:46:22 -0400 | [diff] [blame] | 62 | #define SONIC_MIN_PITCH 60 |
| 63 | #define SONIC_MAX_PITCH 300 |
| 64 | |
| 65 | /* These are used to down-sample some inputs to improve speed */ |
| 66 | #define SONIC_AMDF_FREQ 8000 |
| 67 | #define SONIC_AMDF_RANGE 0.05 |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 68 | |
| 69 | struct sonicStreamStruct; |
| 70 | typedef struct sonicStreamStruct *sonicStream; |
| 71 | |
| 72 | /* Create a sonic stream. Return NULL only if we are out of memory and cannot |
| 73 | allocate the stream. */ |
| 74 | sonicStream sonicCreateStream(double speed, int sampleRate); |
| 75 | /* Destroy the sonic stream. */ |
| 76 | void sonicDestroyStream(sonicStream stream); |
Bill Cox | 0c4c060 | 2010-11-08 11:46:30 -0500 | [diff] [blame^] | 77 | /* Use this to write floating point data to be speed up or down into the stream. |
| 78 | Return 0 if memory realloc failed, otherwise 1 */ |
| 79 | int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples); |
| 80 | /* Use this to write 16-bit data to be speed up or down into the stream. |
| 81 | Return 0 if memory realloc failed, otherwise 1 */ |
| 82 | int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples); |
| 83 | /* Use this to read floating point data out of the stream. Sometimes no data |
| 84 | will be available, and zero is returned, which is not an error condition. */ |
| 85 | int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples); |
| 86 | /* Use this to read 16-bit data out of the stream. Sometimes no data will |
| 87 | be available, and zero is returned, which is not an error condition. */ |
| 88 | int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples); |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 89 | /* Force the sonic stream to generate output using whatever data it currently |
| 90 | has. Zeros will be appended to the input data if there is not enough data |
| 91 | in the stream's input buffer. Use this, followed by a final read from the |
| 92 | stream before destroying the stream. */ |
| 93 | int sonicFlushStream(sonicStream stream); |
| 94 | /* Return the number of samples in the output buffer */ |
Bill Cox | 3a7abf9 | 2010-11-06 15:18:49 -0400 | [diff] [blame] | 95 | int sonicSamplesAvailable(sonicStream stream); |
Bill Cox | af9a624 | 2010-11-08 09:32:27 -0500 | [diff] [blame] | 96 | /* Get the speed of the stream. */ |
| 97 | double sonicGetSpeed(sonicStream stream); |
| 98 | /* Get the sample rate of the stream. */ |
| 99 | int sonicGetSampleRate(sonicStream stream); |