SoundTouch音频处理库源码分析及算法提取(6)

来源：互联网发布：linux文件重命名命令编辑：程序博客网时间：2024/05/21 17:51

TDStretch类的实现

SoundTouch类成员函数putSamples(const SAMPLETYPE *samples, uint nSamples)实现如下，根据上篇的分析rate是一个比率，大于1表示速度加快，小于1表示速度减慢，对于播放速度减慢这种情况。

……

#ifndef PREVENT_CLICK_AT_RATE_CROSSOVER

else if (rate <= 1.0f)

{

// transpose the rate down, output the transposed sound to tempo changer buffer

assert(output == pTDStretch);

pRateTransposer->putSamples(samples, nSamples);

pTDStretch->moveSamples(*pRateTransposer);

}

else

#endif

{

// evaluate the tempo changer, then transpose the rate up,

assert(output == pRateTransposer);

pTDStretch->putSamples(samples, nSamples);

pRateTransposer->moveSamples(*pTDStretch);

}

……

先通过pRateTransposer->putSamples(samples, nSamples);对声音进行了重采样，采用的是线性插值法，然后调用pTDStretch->moveSamples(*pRateTransposer);pTDStretch是TDStretch类的实例。TDStretch类定义如下：

/// Class that does the time-stretch (tempo change) effect for the processed

/// sound.

class TDStretch : public FIFOProcessor

{

protected:

int channels;

int sampleReq;

float tempo;

SAMPLETYPE *pMidBuffer;

SAMPLETYPE *pRefMidBuffer;

SAMPLETYPE *pRefMidBufferUnaligned;

int overlapLength;

int seekLength;

int seekWindowLength;

int overlapDividerBits;

int slopingDivider;

float nominalSkip;

float skipFract;

FIFOSampleBuffer outputBuffer;

FIFOSampleBuffer inputBuffer;

BOOL bQuickSeek;

// int outDebt;

// BOOL bMidBufferDirty;

int sampleRate;

int sequenceMs;

int seekWindowMs;

int overlapMs;

BOOL bAutoSeqSetting;

BOOL bAutoSeekSetting;

void acceptNewOverlapLength(int newOverlapLength);

virtual void clearCrossCorrState();

void calculateOverlapLength(int overlapMs);

virtual LONG_SAMPLETYPE calcCrossCorrStereo(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;

virtual LONG_SAMPLETYPE calcCrossCorrMono(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;

virtual int seekBestOverlapPositionStereo(const SAMPLETYPE *refPos);

virtual int seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos);

virtual int seekBestOverlapPositionMono(const SAMPLETYPE *refPos);

virtual int seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos);

int seekBestOverlapPosition(const SAMPLETYPE *refPos);

virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;

virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const;

void clearMidBuffer();

void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;

void precalcCorrReferenceMono();

void precalcCorrReferenceStereo();

void calcSeqParameters();

/// Changes the tempo of the given sound samples.

/// Returns amount of samples returned in the "output" buffer.

/// The maximum amount of samples that can be returned at a time is set by

/// the 'set_returnBuffer_size' function.

void processSamples();

public:

TDStretch();

virtual ~TDStretch();

/// Operator 'new' is overloaded so that it automatically creates a suitable instance

/// depending on if we've a MMX/SSE/etc-capable CPU available or not.

static void *operator new(size_t s);

/// Use this function instead of "new" operator to create a new instance of this class.

/// This function automatically chooses a correct feature set depending on if the CPU

/// supports MMX/SSE/etc extensions.

static TDStretch *newInstance();

/// Returns the output buffer object

FIFOSamplePipe *getOutput() { return &outputBuffer; };

/// Returns the input buffer object

FIFOSamplePipe *getInput() { return &inputBuffer; };

/// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower

/// tempo, larger faster tempo.

void setTempo(float newTempo);

/// Returns nonzero if there aren't any samples available for outputting.

virtual void clear();

/// Clears the input buffer

void clearInput();

/// Sets the number of channels, 1 = mono, 2 = stereo

void setChannels(int numChannels);

/// Enables/disables the quick position seeking algorithm. Zero to disable,

/// nonzero to enable

void enableQuickSeek(BOOL enable);

/// Returns nonzero if the quick seeking algorithm is enabled.

BOOL isQuickSeekEnabled() const;

/// Sets routine control parameters. These control are certain time constants

/// defining how the sound is stretched to the desired duration.

/// 'sampleRate' = sample rate of the sound

/// 'sequenceMS' = one processing sequence length in milliseconds

/// 'seekwindowMS' = seeking window length for scanning the best overlapping

/// position

/// 'overlapMS' = overlapping length

void setParameters(int sampleRate, ///< Samplerate of sound being processed (Hz)

int sequenceMS = -1, ///< Single processing sequence length (ms)

int seekwindowMS = -1, ///< Offset seeking window length (ms)

int overlapMS = -1 ///< Sequence overlapping length (ms)

);

/// Get routine control parameters, see setParameters() function.

/// Any of the parameters to this function can be NULL, in such case corresponding parameter

/// value isn't returned.

void getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const;

/// Adds 'numsamples' pcs of samples from the 'samples' memory position into

/// the input of the object.

virtual void putSamples(

const SAMPLETYPE *samples, ///< Input sample data

uint numSamples ///< Number of samples in 'samples' so that one sample

///< contains both channels if stereo

);

};

TDStretch类和基类的派生关系

FIFOSamplePipe-> FIFOProcessor->TDStretch

我们先看看他的构造函数

TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)

{

bQuickSeek = FALSE;

channels = 2;

pMidBuffer = NULL;

pRefMidBufferUnaligned = NULL;

overlapLength = 0;

bAutoSeqSetting = TRUE;

bAutoSeekSetting = TRUE;

// outDebt = 0;

skipFract = 0;

tempo = 1.0f;

setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);

setTempo(1.0f);

clear();

}

一些参数的初始化。

先看看在源代码TDStretch.cpp中实现的类成员函数setParameters()

// Sets routine control parameters. These control are certain time constants

// defining how the sound is stretched to the desired duration.

// 'sampleRate' = sample rate of the sound

// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)

// 'seekwindowMS' = seeking window length for scanning the best overlapping

// position (default = 28 ms)

// 'overlapMS' = overlapping length (default = 12 ms)

void TDStretch::setParameters(int aSampleRate, int aSequenceMS,

int aSeekWindowMS, int aOverlapMS)

{

// accept only positive parameter values - if zero or negative, use old values instead

if (aSampleRate > 0) this->sampleRate = aSampleRate;

if (aOverlapMS > 0) this->overlapMs = aOverlapMS;

if (aSequenceMS > 0)

{

this->sequenceMs = aSequenceMS;

bAutoSeqSetting = FALSE;

}

else if (aSequenceMS == 0)

{

// if zero, use automatic setting

bAutoSeqSetting = TRUE;

}

if (aSeekWindowMS > 0)

{

this->seekWindowMs = aSeekWindowMS;

bAutoSeekSetting = FALSE;

}

else if (aSeekWindowMS == 0)

{

// if zero, use automatic setting

bAutoSeekSetting = TRUE;

}

calcSeqParameters();

calculateOverlapLength(overlapMs);

// set tempo to recalculate 'sampleReq'

setTempo(tempo);

}

其中主要参数的计算通过以下三个类成员函数来完成：

calcSeqParameters();

calculateOverlapLength(overlapMs);// set tempo to calculate 'sampleReq'

setTempo(tempo);

通过代码中类成员函数的实现，我们可以知道calcSeqParameters()用来计算seekWindowLength，还有seekLength，都是通过一个简单的换算公式Length = (sampleRate * sequenceMs) / 1000;毫秒换算到多少个Sample。

/// Calculates processing sequence length according to tempo setting

void TDStretch::calcSeqParameters()

{

// Adjust tempo param according to tempo, so that variating processing sequence length is used

// at varius tempo settings, between the given low...top limits

#define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)

#define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)

// sequence-ms setting values at above low & top tempo

#define AUTOSEQ_AT_MIN 125.0

#define AUTOSEQ_AT_MAX 50.0

#define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))

#define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))

// seek-window-ms setting values at above low & top tempo

#define AUTOSEEK_AT_MIN 25.0

#define AUTOSEEK_AT_MAX 15.0

#define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))

#define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))

#define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))

double seq, seek;

if (bAutoSeqSetting)

{

seq = AUTOSEQ_C + AUTOSEQ_K * tempo;

seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);

sequenceMs = (int)(seq + 0.5);

}

if (bAutoSeekSetting)

{

seek = AUTOSEEK_C + AUTOSEEK_K * tempo;

seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);

seekWindowMs = (int)(seek + 0.5);

}

// Update seek window lengths

seekWindowLength = (sampleRate * sequenceMs) / 1000;

if (seekWindowLength < 2 * overlapLength)

{

seekWindowLength = 2 * overlapLength;

}

seekLength = (sampleRate * seekWindowMs) / 1000;

}

类成员函数calculateOverlapLength()计算重叠的长度，

/// Calculates overlapInMsec period length in samples.

void TDStretch::calculateOverlapLength(int overlapInMsec)

{

int newOvl;

assert(overlapInMsec >= 0);

newOvl = (sampleRate * overlapInMsec) / 1000;

if (newOvl < 16) newOvl = 16;

// must be divisible by 8

newOvl -= newOvl % 8;

acceptNewOverlapLength(newOvl);

}

类成员函数acceptNewOverlapLength()分配重叠部分需要占用的内存空间.

/// Set new overlap length parameter & reallocate RefMidBuffer if necessary.

void TDStretch::acceptNewOverlapLength(int newOverlapLength)

{

int prevOvl;

assert(newOverlapLength >= 0);

prevOvl = overlapLength;

overlapLength = newOverlapLength;

if (overlapLength > prevOvl)

{

delete[] pMidBuffer;

delete[] pRefMidBufferUnaligned;

pMidBuffer = new SAMPLETYPE[overlapLength * 2];

clearMidBuffer();

pRefMidBufferUnaligned = new SAMPLETYPE[2 * overlapLength + 16 / sizeof(SAMPLETYPE)];

// ensure that 'pRefMidBuffer' is aligned to 16 byte boundary for efficiency

pRefMidBuffer = (SAMPLETYPE *)((((ulong)pRefMidBufferUnaligned) + 15) & (ulong)-16);

}

类成员函数setTempo()重新设置了音频的伸缩.

// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower

// tempo, larger faster tempo.

void TDStretch::setTempo(float newTempo)

{

int intskip;

tempo = newTempo;

// Calculate new sequence duration

calcSeqParameters();

// Calculate ideal skip length (according to tempo value)

nominalSkip = tempo * (seekWindowLength - overlapLength);

intskip = (int)(nominalSkip + 0.5f);

// Calculate how many samples are needed in the 'inputBuffer' to

// process another batch of samples

//sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;

sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;

}

先记下Stretch用到的参数,现在我们来看看这些参数的实际物理意义。

音频的伸缩一般采用Sola的算法来实现。如下图所示：

算法大致如下：

从原始声音数据的开头处取出一定大小的数据，假如取7个sample，放在一个新的Buffer，如上图所示，然后在原始数据再往后面的数据中取9个sample，与前面的7个sample做一个叠加，叠加的范围我们假设为2，那么(7-2) /9 =0.555，这就意味着声音的持续时间和原来相比减少了约44.5%，同时注意到一点，时间的间隔(采样频率)并没有改变，也就是说声音的频率(音调)没有发生改变。至于为什么要叠加一部分，就是为了抑制这种由不连续的抽取声音信号造成的数据丢失所引发的噪音或者声音过度不自然。这个图对照上面TDStretch三个类成员函数，就理解了那些函数初始化的定义。同时变调不变调的处理过程更为清晰。就和SoundTouch类成员函数putSamples条件判断中的一致，无非就是先伸缩后重采样，或者先重采样再伸缩的问题。

Sola的具体流程，TDStretch类成员函数processSamples十分清晰的表达，先拷贝一个序列到开头，接着找到最佳的叠加位置，通过计算归一化互相关系数来比较得到，主要实现是通过类成员函数seekBestOverlapPosition(const SAMPLETYPE *refPos)判断是单声道和双声道，分别调用不同的 TDStretch::seekBestOverlapPositionXXXX(const SAMPLETYPE *refPos)；有浮点和定点两个版本，同样以单声道浮点版本为例：

int TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos)

{

int bestOffs;

double bestCorr, corr;

int tempOffset;

const SAMPLETYPE *compare;

// Slopes the amplitude of the 'midBuffer' samples

precalcCorrReferenceMono();

bestCorr = FLT_MIN;

bestOffs = 0;

// Scans for the best correlation value by testing each possible position

// over the permitted range.

for (tempOffset = 0; tempOffset < seekLength; tempOffset ++)

{

compare = refPos + tempOffset;

// Calculates correlation value for the mixing position corresponding

// to 'tempOffset'

corr = (double)calcCrossCorrMono(pRefMidBuffer, compare);

// heuristic rule to slightly favour values close to mid of the range

double tmp = (double)(2 * tempOffset - seekLength) / seekLength;

corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));

// Checks for the highest correlation value

if (corr > bestCorr)

{

bestCorr = corr;

bestOffs = tempOffset;

}

// clear cross correlation routine state if necessary (is so e.g. in MMX routines).

clearCrossCorrState();

return bestOffs;

}

类成员函数seekBestOverlapPositionMono调用了类成员函数calcCrossCorrMono()

double TDStretch::calcCrossCorrMono(const float *mixingPos, const float *compare) const

{

double corr;

double norm;

int i;

corr = norm = 0;

for (i = 1; i < overlapLength; i ++)

{

corr += mixingPos[i] * compare[i];

norm += mixingPos[i] * mixingPos[i];

}

if (norm < 1e-9) norm = 1.0; // to avoid div by zero

return corr / sqrt(norm);

}

根据互相关系数的计算公式corr = x(n)*h(-n);*为卷积。和我们的形式有点不一样。下次再慢慢分析。最后把后面一个序列拷贝到叠加的位置，叠加部分的幅值通过TDStretch类成员函数overlap来计算，具体代码如下，通过判断声道调用一个单声道或者双声道的类成员函数来处理。以单声道为例，主要考虑到比较好理解。其实双声道也差不多。就是注意处理数据循环的增量，和在循环处理中每次多一个右声道或者左声道的数据处理。

// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position

// of 'ovlPos'.

inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const

{

if (channels == 2)

{

// stereo sound

overlapStereo(pOutput, pInput + 2 * ovlPos);

} else {

// mono sound.

overlapMono(pOutput, pInput + ovlPos);

}

类成员函数overlapMono的具体实现如下：

// Overlaps samples in 'midBuffer' with the samples in 'pInput'

void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const

{

int i, itemp;

for (i = 0; i < overlapLength ; i ++)

{

itemp = overlapLength - i;

pOutput[i] = (pInput[i] * i + pMidBuffer[i] * itemp ) / overlapLength; // >> overlapDividerBits;

}

pMidBuffer与pInput重叠，重叠长度为 overlapLength。

留意到核心的算法仅仅是一行代码pOutput[i] = (pInput[i] * i + pMidBuffer[i] * itemp ) / overlapLength;设a = i;b = itemp;k = overlapLength;x = pInput[i],y = pMidBuffer[i], z = pOutPut[i]把这行代码用下面两行伪代码替代：x,y分别作为系统的两个输入，z作为输出。

a + b = k;

ax + by = kz;

很眼熟，但是一会半刻又说不上来是什么。暂时记下吧。以后再搞明白这个算法叫什么。