Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 1 | /* Sonic library |
| 2 | Copyright 2010 |
| 3 | Bill Cox |
| 4 | This file is part of the Sonic Library. |
| 5 | |
Bill Cox | 60eeb06 | 2015-02-27 10:17:45 -0800 | [diff] [blame] | 6 | This file is licensed under the Apache 2.0 license. |
| 7 | */ |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 8 | |
| 9 | /* |
Bill Cox | 3276bb0 | 2011-01-11 07:39:26 -0500 | [diff] [blame] | 10 | The Sonic Library implements a new algorithm invented by Bill Cox for the |
| 11 | specific purpose of speeding up speech by high factors at high quality. It |
| 12 | generates smooth speech at speed up factors as high as 6X, possibly more. It is |
| 13 | also capable of slowing down speech, and generates high quality results |
| 14 | regardless of the speed up or slow down factor. For speeding up speech by 2X or |
| 15 | more, the following equation is used: |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 16 | |
| 17 | newSamples = period/(speed - 1.0) |
| 18 | scale = 1.0/newSamples; |
| 19 | |
| 20 | where period is the current pitch period, determined using AMDF or any other |
| 21 | pitch estimator, and speed is the speedup factor. If the current position in |
| 22 | the input stream is pointed to by "samples", and the current output stream |
| 23 | position is pointed to by "out", then newSamples number of samples can be |
| 24 | generated with: |
| 25 | |
| 26 | out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; |
| 27 | |
| 28 | where t = 0 to newSamples - 1. |
| 29 | |
Bill Cox | f37d597 | 2011-08-14 15:52:10 -0400 | [diff] [blame] | 30 | For speed factors < 2X, the PICOLA algorithm is used. The above |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 31 | algorithm is first used to double the speed of one pitch period. Then, enough |
| 32 | input is directly copied from the input to the output to achieve the desired |
| 33 | speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived: |
| 34 | |
| 35 | speed = (2*period + length)/(period + length) |
| 36 | speed*length + speed*period = 2*period + length |
| 37 | length(speed - 1) = 2*period - speed*period |
| 38 | length = period*(2 - speed)/(speed - 1) |
| 39 | |
| 40 | For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into |
| 41 | the output twice, and length of input is copied from the input to the output |
| 42 | until the output desired speed is reached. The length of data copied is: |
| 43 | |
| 44 | length = period*(speed - 0.5)/(1 - speed) |
| 45 | |
Bill Cox | f37d597 | 2011-08-14 15:52:10 -0400 | [diff] [blame] | 46 | For slow down factors below 0.5, no data is copied, and an algorithm |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 47 | similar to high speed factors is used. |
| 48 | */ |
| 49 | |
Bill Cox | c17208e | 2010-11-26 11:09:15 -0500 | [diff] [blame] | 50 | #ifdef __cplusplus |
| 51 | extern "C" { |
| 52 | #endif |
| 53 | |
Bill Cox | 4e234d7 | 2010-12-17 05:44:02 -0500 | [diff] [blame] | 54 | /* Uncomment this to use sin-wav based overlap add which in theory can improve |
| 55 | sound quality slightly, at the expense of lots of floating point math. */ |
| 56 | /* #define SONIC_USE_SIN */ |
| 57 | |
Bill Cox | 6a1bbb1 | 2010-11-19 11:14:28 -0500 | [diff] [blame] | 58 | /* This specifies the range of voice pitches we try to match. |
| 59 | Note that if we go lower than 65, we could overflow in findPitchInRange */ |
| 60 | #define SONIC_MIN_PITCH 65 |
| 61 | #define SONIC_MAX_PITCH 400 |
Bill Cox | 0cd49c8 | 2010-11-03 10:46:22 -0400 | [diff] [blame] | 62 | |
| 63 | /* These are used to down-sample some inputs to improve speed */ |
Bill Cox | d76d222 | 2010-11-24 11:42:29 -0500 | [diff] [blame] | 64 | #define SONIC_AMDF_FREQ 4000 |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 65 | |
| 66 | struct sonicStreamStruct; |
| 67 | typedef struct sonicStreamStruct *sonicStream; |
| 68 | |
Bill Cox | 1a299bb | 2010-11-19 15:07:17 -0500 | [diff] [blame] | 69 | /* For all of the following functions, numChannels is multiplied by numSamples |
| 70 | to determine the actual number of values read or returned. */ |
| 71 | |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 72 | /* Create a sonic stream. Return NULL only if we are out of memory and cannot |
Bill Cox | 1a299bb | 2010-11-19 15:07:17 -0500 | [diff] [blame] | 73 | allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */ |
Bill Cox | d544fdb | 2010-11-23 14:13:46 -0500 | [diff] [blame] | 74 | sonicStream sonicCreateStream(int sampleRate, int numChannels); |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 75 | /* Destroy the sonic stream. */ |
| 76 | void sonicDestroyStream(sonicStream stream); |
Bill Cox | 0c4c060 | 2010-11-08 11:46:30 -0500 | [diff] [blame] | 77 | /* Use this to write floating point data to be speed up or down into the stream. |
Bill Cox | 1a299bb | 2010-11-19 15:07:17 -0500 | [diff] [blame] | 78 | Values must be between -1 and 1. Return 0 if memory realloc failed, otherwise 1 */ |
Bill Cox | 0c4c060 | 2010-11-08 11:46:30 -0500 | [diff] [blame] | 79 | int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples); |
| 80 | /* Use this to write 16-bit data to be speed up or down into the stream. |
| 81 | Return 0 if memory realloc failed, otherwise 1 */ |
| 82 | int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples); |
Bill Cox | 8a23d2f | 2010-11-16 18:49:36 -0500 | [diff] [blame] | 83 | /* Use this to write 8-bit unsigned data to be speed up or down into the stream. |
| 84 | Return 0 if memory realloc failed, otherwise 1 */ |
| 85 | int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples); |
Bill Cox | 0c4c060 | 2010-11-08 11:46:30 -0500 | [diff] [blame] | 86 | /* Use this to read floating point data out of the stream. Sometimes no data |
| 87 | will be available, and zero is returned, which is not an error condition. */ |
| 88 | int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples); |
| 89 | /* Use this to read 16-bit data out of the stream. Sometimes no data will |
| 90 | be available, and zero is returned, which is not an error condition. */ |
| 91 | int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples); |
Bill Cox | 8a23d2f | 2010-11-16 18:49:36 -0500 | [diff] [blame] | 92 | /* Use this to read 8-bit unsigned data out of the stream. Sometimes no data will |
| 93 | be available, and zero is returned, which is not an error condition. */ |
| 94 | int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples); |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 95 | /* Force the sonic stream to generate output using whatever data it currently |
Bill Cox | 4bbbbcc | 2010-11-09 05:32:38 -0500 | [diff] [blame] | 96 | has. No extra delay will be added to the output, but flushing in the middle of |
| 97 | words could introduce distortion. */ |
Bill Cox | ca02d87 | 2010-11-02 15:10:52 -0400 | [diff] [blame] | 98 | int sonicFlushStream(sonicStream stream); |
| 99 | /* Return the number of samples in the output buffer */ |
Bill Cox | 3a7abf9 | 2010-11-06 15:18:49 -0400 | [diff] [blame] | 100 | int sonicSamplesAvailable(sonicStream stream); |
Bill Cox | af9a624 | 2010-11-08 09:32:27 -0500 | [diff] [blame] | 101 | /* Get the speed of the stream. */ |
Bill Cox | 6a1bbb1 | 2010-11-19 11:14:28 -0500 | [diff] [blame] | 102 | float sonicGetSpeed(sonicStream stream); |
Bill Cox | d544fdb | 2010-11-23 14:13:46 -0500 | [diff] [blame] | 103 | /* Set the speed of the stream. */ |
| 104 | void sonicSetSpeed(sonicStream stream, float speed); |
| 105 | /* Get the pitch of the stream. */ |
| 106 | float sonicGetPitch(sonicStream stream); |
| 107 | /* Set the pitch of the stream. */ |
| 108 | void sonicSetPitch(sonicStream stream, float pitch); |
Bill Cox | 3276bb0 | 2011-01-11 07:39:26 -0500 | [diff] [blame] | 109 | /* Get the rate of the stream. */ |
| 110 | float sonicGetRate(sonicStream stream); |
| 111 | /* Set the rate of the stream. */ |
| 112 | void sonicSetRate(sonicStream stream, float rate); |
Bill Cox | d544fdb | 2010-11-23 14:13:46 -0500 | [diff] [blame] | 113 | /* Get the scaling factor of the stream. */ |
| 114 | float sonicGetVolume(sonicStream stream); |
| 115 | /* Set the scaling factor of the stream. */ |
| 116 | void sonicSetVolume(sonicStream stream, float volume); |
Bill Cox | 3276bb0 | 2011-01-11 07:39:26 -0500 | [diff] [blame] | 117 | /* Get the chord pitch setting. */ |
| 118 | int sonicGetChordPitch(sonicStream stream); |
| 119 | /* Set chord pitch mode on or off. Default is off. See the documentation |
| 120 | page for a description of this feature. */ |
| 121 | void sonicSetChordPitch(sonicStream stream, int useChordPitch); |
Bill Cox | c978c39 | 2010-12-17 05:04:06 -0500 | [diff] [blame] | 122 | /* Get the quality setting. */ |
| 123 | int sonicGetQuality(sonicStream stream); |
| 124 | /* Set the "quality". Default 0 is virtually as good as 1, but very much faster. */ |
| 125 | void sonicSetQuality(sonicStream stream, int quality); |
Bill Cox | af9a624 | 2010-11-08 09:32:27 -0500 | [diff] [blame] | 126 | /* Get the sample rate of the stream. */ |
| 127 | int sonicGetSampleRate(sonicStream stream); |
Bill Cox | 69a864b | 2011-03-04 02:51:43 -0500 | [diff] [blame] | 128 | /* Set the sample rate of the stream. This will drop any samples that have not been read. */ |
| 129 | void sonicSetSampleRate(sonicStream stream, int sampleRate); |
Bill Cox | 527b4e8 | 2010-11-24 17:42:58 -0500 | [diff] [blame] | 130 | /* Get the number of channels. */ |
| 131 | int sonicGetNumChannels(sonicStream stream); |
Bill Cox | 69a864b | 2011-03-04 02:51:43 -0500 | [diff] [blame] | 132 | /* Set the number of channels. This will drop any samples that have not been read. */ |
| 133 | void sonicSetNumChannels(sonicStream stream, int numChannels); |
Bill Cox | 036d732 | 2010-11-09 09:29:24 -0500 | [diff] [blame] | 134 | /* This is a non-stream oriented interface to just change the speed of a sound |
| 135 | sample. It works in-place on the sample array, so there must be at least |
| 136 | speed*numSamples available space in the array. Returns the new number of samples. */ |
Bill Cox | d544fdb | 2010-11-23 14:13:46 -0500 | [diff] [blame] | 137 | int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch, |
Bill Cox | d0380df | 2011-01-11 08:59:52 -0500 | [diff] [blame] | 138 | float rate, float volume, int useChordPitch, int sampleRate, int numChannels); |
Bill Cox | 036d732 | 2010-11-09 09:29:24 -0500 | [diff] [blame] | 139 | /* This is a non-stream oriented interface to just change the speed of a sound |
| 140 | sample. It works in-place on the sample array, so there must be at least |
| 141 | speed*numSamples available space in the array. Returns the new number of samples. */ |
Bill Cox | d544fdb | 2010-11-23 14:13:46 -0500 | [diff] [blame] | 142 | int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch, |
Bill Cox | d0380df | 2011-01-11 08:59:52 -0500 | [diff] [blame] | 143 | float rate, float volume, int useChordPitch, int sampleRate, int numChannels); |
Bill Cox | c17208e | 2010-11-26 11:09:15 -0500 | [diff] [blame] | 144 | |
| 145 | #ifdef __cplusplus |
| 146 | } |
| 147 | #endif |