henrik.lundin@webrtc.org | 9a40081 | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_ |
| 12 | #define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_ |
| 13 | |
| 14 | #include <assert.h> |
| 15 | |
| 16 | #include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" |
| 17 | #include "webrtc/system_wrappers/interface/constructor_magic.h" |
| 18 | #include "webrtc/typedefs.h" |
| 19 | |
| 20 | namespace webrtc { |
| 21 | |
| 22 | // Forward declarations. |
| 23 | class Expand; |
| 24 | class SyncBuffer; |
| 25 | |
| 26 | // This class handles the transition from expansion to normal operation. |
| 27 | // When a packet is not available for decoding when needed, the expand operation |
| 28 | // is called to generate extrapolation data. If the missing packet arrives, |
| 29 | // i.e., it was just delayed, it can be decoded and appended directly to the |
| 30 | // end of the expanded data (thanks to how the Expand class operates). However, |
| 31 | // if a later packet arrives instead, the loss is a fact, and the new data must |
| 32 | // be stitched together with the end of the expanded data. This stitching is |
| 33 | // what the Merge class does. |
| 34 | class Merge { |
| 35 | public: |
| 36 | Merge(int fs_hz, size_t num_channels, Expand* expand, SyncBuffer* sync_buffer) |
| 37 | : fs_hz_(fs_hz), |
| 38 | fs_mult_(fs_hz_ / 8000), |
| 39 | num_channels_(num_channels), |
| 40 | timestamps_per_call_(fs_hz_ / 100), |
| 41 | expand_(expand), |
| 42 | sync_buffer_(sync_buffer), |
| 43 | expanded_(num_channels_) { |
| 44 | assert(num_channels_ > 0); |
| 45 | } |
| 46 | |
| 47 | // The main method to produce the audio data. The decoded data is supplied in |
| 48 | // |input|, having |input_length| samples in total for all channels |
| 49 | // (interleaved). The result is written to |output|. The number of channels |
| 50 | // allocated in |output| defines the number of channels that will be used when |
| 51 | // de-interleaving |input|. The values in |external_mute_factor_array| (Q14) |
| 52 | // will be used to scale the audio, and is updated in the process. The array |
| 53 | // must have |num_channels_| elements. |
| 54 | int Process(int16_t* input, int input_length, |
| 55 | int16_t* external_mute_factor_array, |
| 56 | AudioMultiVector<int16_t>* output); |
| 57 | |
| 58 | private: |
| 59 | static const int kMaxSampleRate = 48000; |
| 60 | static const int kExpandDownsampLength = 100; |
| 61 | static const int kInputDownsampLength = 40; |
| 62 | static const int kMaxCorrelationLength = 60; |
| 63 | |
| 64 | // Calls |expand_| to get more expansion data to merge with. The data is |
| 65 | // written to |expanded_signal_|. Returns the length of the expanded data, |
| 66 | // while |expand_period| will be the number of samples in one expansion period |
| 67 | // (typically one pitch period). The value of |old_length| will be the number |
| 68 | // of samples that were taken from the |sync_buffer_|. |
| 69 | int GetExpandedSignal(int* old_length, int* expand_period); |
| 70 | |
| 71 | // Analyzes |input| and |expanded_signal| to find maximum values. Returns |
| 72 | // a muting factor (Q14) to be used on the new data. |
| 73 | int16_t SignalScaling(const int16_t* input, int input_length, |
| 74 | const int16_t* expanded_signal, |
| 75 | int16_t* expanded_max, int16_t* input_max) const; |
| 76 | |
| 77 | // Downsamples |input| (|input_length| samples) and |expanded_signal| to |
| 78 | // 4 kHz sample rate. The downsampled signals are written to |
| 79 | // |input_downsampled_| and |expanded_downsampled_|, respectively. |
| 80 | void Downsample(const int16_t* input, int input_length, |
| 81 | const int16_t* expanded_signal, int expanded_length); |
| 82 | |
| 83 | // Calculates cross-correlation between |input_downsampled_| and |
| 84 | // |expanded_downsampled_|, and finds the correlation maximum. The maximizing |
| 85 | // lag is returned. |
| 86 | int16_t CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, |
| 87 | int start_position, int input_length, |
| 88 | int expand_period) const; |
| 89 | |
| 90 | const int fs_hz_; |
| 91 | const int fs_mult_; // fs_hz_ / 8000. |
| 92 | const size_t num_channels_; |
| 93 | const int timestamps_per_call_; |
| 94 | Expand* expand_; |
| 95 | SyncBuffer* sync_buffer_; |
| 96 | int16_t expanded_downsampled_[kExpandDownsampLength]; |
| 97 | int16_t input_downsampled_[kInputDownsampLength]; |
| 98 | AudioMultiVector<int16_t> expanded_; |
| 99 | |
| 100 | DISALLOW_COPY_AND_ASSIGN(Merge); |
| 101 | }; |
| 102 | |
| 103 | } // namespace webrtc |
| 104 | #endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_ |