mirror of
https://github.com/WinampDesktop/winamp.git
synced 2025-06-17 12:35:45 -04:00
Initial community commit
This commit is contained in:
642
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
vendored
Normal file
642
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
vendored
Normal file
@ -0,0 +1,642 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPBlockConvolver.h
|
||||
*
|
||||
* @brief Single-block overlap-save convolution processor class.
|
||||
*
|
||||
* This file includes single-block overlap-save convolution processor class.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
|
||||
#define R8B_CDSPBLOCKCONVOLVER_INCLUDED
|
||||
|
||||
#include "CDSPFIRFilter.h"
|
||||
#include "CDSPProcessor.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @brief Single-block overlap-save convolution processing class.
|
||||
*
|
||||
* Class that implements single-block overlap-save convolution processing. The
|
||||
* length of a single FFT block used depends on the length of the filter
|
||||
* kernel.
|
||||
*
|
||||
* The rationale behind "single-block" processing is that increasing the FFT
|
||||
* block length by 2 is more efficient than performing convolution at the same
|
||||
* FFT block length but using two blocks.
|
||||
*
|
||||
* This class also implements a built-in resampling by any whole-number
|
||||
* factor, which simplifies the overall resampling objects topology.
|
||||
*/
|
||||
|
||||
class CDSPBlockConvolver : public CDSPProcessor
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initializes internal variables and constants of *this
|
||||
* object.
|
||||
*
|
||||
* @param aFilter Pre-calculated filter data. Reference to this object is
|
||||
* inhertied by *this object, and the object will be released when *this
|
||||
* object is destroyed. If upsampling is used, filter's gain should be
|
||||
* equal to the upsampling factor.
|
||||
* @param aUpFactor The upsampling factor, positive value. E.g. value of 2
|
||||
* means 2x upsampling should be performed over the input data.
|
||||
* @param aDownFactor The downsampling factor, positive value. E.g. value
|
||||
* of 2 means 2x downsampling should be performed over the output data.
|
||||
* @param PrevLatency Latency, in samples (any value >=0), which was left
|
||||
* in the output signal by a previous process. This value is usually
|
||||
* non-zero if the minimum-phase filters are in use. This value is always
|
||||
* zero if the linear-phase filters are in use.
|
||||
* @param aDoConsumeLatency "True" if the output latency should be
|
||||
* consumed. Does not apply to the fractional part of the latency (if such
|
||||
* part is available).
|
||||
*/
|
||||
|
||||
CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor,
|
||||
const int aDownFactor, const double PrevLatency = 0.0,
|
||||
const bool aDoConsumeLatency = true )
|
||||
: Filter( &aFilter )
|
||||
, UpFactor( aUpFactor )
|
||||
, DownFactor( aDownFactor )
|
||||
, DoConsumeLatency( aDoConsumeLatency )
|
||||
, BlockLen2( 2 << Filter -> getBlockLenBits() )
|
||||
{
|
||||
R8BASSERT( UpFactor > 0 );
|
||||
R8BASSERT( DownFactor > 0 );
|
||||
R8BASSERT( PrevLatency >= 0.0 );
|
||||
|
||||
int fftinBits;
|
||||
UpShift = getBitOccupancy( UpFactor ) - 1;
|
||||
|
||||
if(( 1 << UpShift ) == UpFactor )
|
||||
{
|
||||
fftinBits = Filter -> getBlockLenBits() + 1 - UpShift;
|
||||
PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) /
|
||||
UpFactor;
|
||||
|
||||
InputLen = BlockLen2 - PrevInputLen * UpFactor;
|
||||
}
|
||||
else
|
||||
{
|
||||
UpShift = -1;
|
||||
fftinBits = Filter -> getBlockLenBits() + 1;
|
||||
PrevInputLen = Filter -> getKernelLen() - 1;
|
||||
InputLen = BlockLen2 - PrevInputLen;
|
||||
}
|
||||
|
||||
OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 );
|
||||
LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor;
|
||||
Latency = (int) LatencyFrac;
|
||||
const int InLatency = Latency + Filter -> getLatency() - OutOffset;
|
||||
LatencyFrac -= Latency;
|
||||
LatencyFrac /= DownFactor;
|
||||
|
||||
Latency += InputLen + Filter -> getLatency();
|
||||
|
||||
int fftoutBits;
|
||||
InputDelay = 0;
|
||||
DownSkipInit = 0;
|
||||
DownShift = getBitOccupancy( DownFactor ) - 1;
|
||||
|
||||
if(( 1 << DownShift ) == DownFactor )
|
||||
{
|
||||
fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift;
|
||||
|
||||
if( DownFactor > 1 )
|
||||
{
|
||||
if( UpShift > 0 )
|
||||
{
|
||||
// This case never happens in practice due to mutual
|
||||
// exclusion of "power of 2" DownFactor and UpFactor
|
||||
// values.
|
||||
|
||||
R8BASSERT( UpShift == 0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Make sure InputLen is divisible by DownFactor.
|
||||
|
||||
const int ilc = InputLen & ( DownFactor - 1 );
|
||||
PrevInputLen += ilc;
|
||||
InputLen -= ilc;
|
||||
Latency -= ilc;
|
||||
|
||||
// Correct InputDelay for input and filter's latency.
|
||||
|
||||
const int lc = InLatency & ( DownFactor - 1 );
|
||||
|
||||
if( lc > 0 )
|
||||
{
|
||||
InputDelay = DownFactor - lc;
|
||||
}
|
||||
|
||||
if( !DoConsumeLatency )
|
||||
{
|
||||
Latency /= DownFactor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fftoutBits = Filter -> getBlockLenBits() + 1;
|
||||
DownShift = -1;
|
||||
|
||||
if( !DoConsumeLatency && DownFactor > 1 )
|
||||
{
|
||||
DownSkipInit = Latency % DownFactor;
|
||||
Latency /= DownFactor;
|
||||
}
|
||||
}
|
||||
|
||||
R8BASSERT( Latency >= 0 );
|
||||
|
||||
fftin = new CDSPRealFFTKeeper( fftinBits );
|
||||
|
||||
if( fftoutBits == fftinBits )
|
||||
{
|
||||
fftout = fftin;
|
||||
}
|
||||
else
|
||||
{
|
||||
ffto2 = new CDSPRealFFTKeeper( fftoutBits );
|
||||
fftout = ffto2;
|
||||
}
|
||||
|
||||
WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen );
|
||||
CurInput = &WorkBlocks[ 0 ];
|
||||
CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and
|
||||
// CurOutput are address-aligned.
|
||||
PrevInput = &WorkBlocks[ BlockLen2 * 2 ];
|
||||
|
||||
clear();
|
||||
|
||||
R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
|
||||
"fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
|
||||
UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
|
||||
getLatency() );
|
||||
}
|
||||
|
||||
virtual ~CDSPBlockConvolver()
|
||||
{
|
||||
Filter -> unref();
|
||||
}
|
||||
|
||||
virtual int getLatency() const
|
||||
{
|
||||
return( DoConsumeLatency ? 0 : Latency );
|
||||
}
|
||||
|
||||
virtual double getLatencyFrac() const
|
||||
{
|
||||
return( LatencyFrac );
|
||||
}
|
||||
|
||||
virtual int getMaxOutLen( const int MaxInLen ) const
|
||||
{
|
||||
R8BASSERT( MaxInLen >= 0 );
|
||||
|
||||
return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor );
|
||||
}
|
||||
|
||||
virtual void clear()
|
||||
{
|
||||
memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ]));
|
||||
|
||||
if( DoConsumeLatency )
|
||||
{
|
||||
LatencyLeft = Latency;
|
||||
}
|
||||
else
|
||||
{
|
||||
LatencyLeft = 0;
|
||||
|
||||
if( DownShift > 0 )
|
||||
{
|
||||
memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) *
|
||||
sizeof( CurOutput[ 0 ]));
|
||||
}
|
||||
else
|
||||
{
|
||||
memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset *
|
||||
sizeof( CurOutput[ 0 ]));
|
||||
|
||||
memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) *
|
||||
sizeof( CurOutput[ 0 ]));
|
||||
}
|
||||
}
|
||||
|
||||
memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ]));
|
||||
|
||||
InDataLeft = InputLen - InputDelay;
|
||||
UpSkip = 0;
|
||||
DownSkip = DownSkipInit;
|
||||
}
|
||||
|
||||
virtual int process( double* ip, int l0, double*& op0 )
|
||||
{
|
||||
R8BASSERT( l0 >= 0 );
|
||||
R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 );
|
||||
|
||||
double* op = op0;
|
||||
int l = l0 * UpFactor;
|
||||
l0 = 0;
|
||||
|
||||
while( l > 0 )
|
||||
{
|
||||
const int Offs = InputLen - InDataLeft;
|
||||
|
||||
if( l < InDataLeft )
|
||||
{
|
||||
InDataLeft -= l;
|
||||
|
||||
if( UpShift >= 0 )
|
||||
{
|
||||
memcpy( &CurInput[ Offs >> UpShift ], ip,
|
||||
( l >> UpShift ) * sizeof( CurInput[ 0 ]));
|
||||
}
|
||||
else
|
||||
{
|
||||
copyUpsample( ip, &CurInput[ Offs ], l );
|
||||
}
|
||||
|
||||
copyToOutput( Offs - OutOffset, op, l, l0 );
|
||||
break;
|
||||
}
|
||||
|
||||
const int b = InDataLeft;
|
||||
l -= b;
|
||||
InDataLeft = InputLen;
|
||||
int ilu;
|
||||
|
||||
if( UpShift >= 0 )
|
||||
{
|
||||
const int bu = b >> UpShift;
|
||||
memcpy( &CurInput[ Offs >> UpShift ], ip,
|
||||
bu * sizeof( CurInput[ 0 ]));
|
||||
|
||||
ip += bu;
|
||||
ilu = InputLen >> UpShift;
|
||||
}
|
||||
else
|
||||
{
|
||||
copyUpsample( ip, &CurInput[ Offs ], b );
|
||||
ilu = InputLen;
|
||||
}
|
||||
|
||||
const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]);
|
||||
memcpy( &CurInput[ ilu ], PrevInput, pil );
|
||||
memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil );
|
||||
|
||||
(*fftin) -> forward( CurInput );
|
||||
|
||||
if( UpShift > 0 )
|
||||
{
|
||||
#if R8B_FLOATFFT
|
||||
mirrorInputSpectrum( (float*) CurInput );
|
||||
#else // R8B_FLOATFFT
|
||||
mirrorInputSpectrum( CurInput );
|
||||
#endif // R8B_FLOATFFT
|
||||
}
|
||||
|
||||
if( Filter -> isZeroPhase() )
|
||||
{
|
||||
(*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(),
|
||||
CurInput );
|
||||
}
|
||||
else
|
||||
{
|
||||
(*fftout) -> multiplyBlocks( Filter -> getKernelBlock(),
|
||||
CurInput );
|
||||
}
|
||||
|
||||
if( DownShift > 0 )
|
||||
{
|
||||
const int z = BlockLen2 >> DownShift;
|
||||
|
||||
#if R8B_FLOATFFT
|
||||
float* const kb = (float*) Filter -> getKernelBlock();
|
||||
float* const p = (float*) CurInput;
|
||||
#else // R8B_FLOATFFT
|
||||
const double* const kb = Filter -> getKernelBlock();
|
||||
double* const p = CurInput;
|
||||
#endif // R8B_FLOATFFT
|
||||
|
||||
p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ];
|
||||
}
|
||||
|
||||
(*fftout) -> inverse( CurInput );
|
||||
|
||||
copyToOutput( Offs - OutOffset, op, b, l0 );
|
||||
|
||||
double* const tmp = CurInput;
|
||||
CurInput = CurOutput;
|
||||
CurOutput = tmp;
|
||||
}
|
||||
|
||||
return( l0 );
|
||||
}
|
||||
|
||||
private:
|
||||
CDSPFIRFilter* Filter; ///< Filter in use.
|
||||
///<
|
||||
CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce
|
||||
///< the input spectrum (can embed the "power of 2" upsampling).
|
||||
///<
|
||||
CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL).
|
||||
///<
|
||||
CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output
|
||||
///< signal (can embed the "power of 2" downsampling), may point to
|
||||
///< either "fftin" or "ffto2".
|
||||
///<
|
||||
int UpFactor; ///< Upsampling factor.
|
||||
///<
|
||||
int DownFactor; ///< Downsampling factor.
|
||||
///<
|
||||
bool DoConsumeLatency; ///< "True" if the output latency should be
|
||||
///< consumed. Does not apply to the fractional part of the latency
|
||||
///< (if such part is available).
|
||||
///<
|
||||
int BlockLen2; ///< Equals block length * 2.
|
||||
///<
|
||||
int OutOffset; ///< Output offset, depends on filter's introduced latency.
|
||||
///<
|
||||
int PrevInputLen; ///< The length of previous input data saved, used for
|
||||
///< overlap.
|
||||
///<
|
||||
int InputLen; ///< The number of input samples that should be accumulated
|
||||
///< before the input block is processed.
|
||||
///<
|
||||
int Latency; ///< Processing latency, in samples.
|
||||
///<
|
||||
double LatencyFrac; ///< Fractional latency, in samples, that is left in
|
||||
///< the output signal.
|
||||
///<
|
||||
int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
|
||||
///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
|
||||
///<
|
||||
int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if
|
||||
///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
|
||||
///< 1.
|
||||
///<
|
||||
int InputDelay; ///< Additional input delay, in samples. Used to make the
|
||||
///< output delay divisible by DownShift. Used only if UpShift <= 0
|
||||
///< and DownShift > 0.
|
||||
///<
|
||||
CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and
|
||||
///< output data blocks, overall capacity = BlockLen2 * 2 +
|
||||
///< PrevInputLen. Used in the flip-flop manner.
|
||||
///<
|
||||
double* PrevInput; ///< Previous input data buffer, capacity = BlockLen.
|
||||
///<
|
||||
double* CurInput; ///< Input data buffer, capacity = BlockLen2.
|
||||
///<
|
||||
double* CurOutput; ///< Output data buffer, capacity = BlockLen2.
|
||||
///<
|
||||
int InDataLeft; ///< Samples left before processing input and output FFT
|
||||
///< blocks. Initialized to InputLen on clear.
|
||||
///<
|
||||
int LatencyLeft; ///< Latency in samples left to skip.
|
||||
///<
|
||||
int UpSkip; ///< The current upsampling sample skip (value in the range
|
||||
///< 0 to UpFactor - 1).
|
||||
///<
|
||||
int DownSkip; ///< The current downsampling sample skip (value in the
|
||||
///< range 0 to DownFactor - 1). Not used if DownShift > 0.
|
||||
///<
|
||||
int DownSkipInit; ///< The initial DownSkip value after clear().
|
||||
///<
|
||||
|
||||
/**
|
||||
* Function copies samples from the input buffer to the output buffer
|
||||
* while inserting zeros inbetween them to perform the whole-numbered
|
||||
* upsampling.
|
||||
*
|
||||
* @param[in,out] ip0 Input buffer. Will be advanced on function's return.
|
||||
* @param[out] op Output buffer.
|
||||
* @param l0 The number of samples to fill in the output buffer, including
|
||||
* both input samples and interpolation (zero) samples.
|
||||
*/
|
||||
|
||||
void copyUpsample( double*& ip0, double* op, int l0 )
|
||||
{
|
||||
int b = min( UpSkip, l0 );
|
||||
|
||||
if( b != 0 )
|
||||
{
|
||||
UpSkip -= b;
|
||||
l0 -= b;
|
||||
|
||||
*op = 0.0;
|
||||
op++;
|
||||
|
||||
while( --b != 0 )
|
||||
{
|
||||
*op = 0.0;
|
||||
op++;
|
||||
}
|
||||
}
|
||||
|
||||
double* ip = ip0;
|
||||
const int upf = UpFactor;
|
||||
int l = l0 / upf;
|
||||
int lz = l0 - l * upf;
|
||||
|
||||
if( upf == 3 )
|
||||
{
|
||||
while( l != 0 )
|
||||
{
|
||||
op[ 0 ] = *ip;
|
||||
op[ 1 ] = 0.0;
|
||||
op[ 2 ] = 0.0;
|
||||
ip++;
|
||||
op += upf;
|
||||
l--;
|
||||
}
|
||||
}
|
||||
else
|
||||
if( upf == 5 )
|
||||
{
|
||||
while( l != 0 )
|
||||
{
|
||||
op[ 0 ] = *ip;
|
||||
op[ 1 ] = 0.0;
|
||||
op[ 2 ] = 0.0;
|
||||
op[ 3 ] = 0.0;
|
||||
op[ 4 ] = 0.0;
|
||||
ip++;
|
||||
op += upf;
|
||||
l--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]);
|
||||
|
||||
while( l != 0 )
|
||||
{
|
||||
*op = *ip;
|
||||
ip++;
|
||||
|
||||
memset( op + 1, 0, zc );
|
||||
op += upf;
|
||||
l--;
|
||||
}
|
||||
}
|
||||
|
||||
if( lz != 0 )
|
||||
{
|
||||
*op = *ip;
|
||||
ip++;
|
||||
op++;
|
||||
|
||||
UpSkip = upf - lz;
|
||||
|
||||
while( --lz != 0 )
|
||||
{
|
||||
*op = 0.0;
|
||||
op++;
|
||||
}
|
||||
}
|
||||
|
||||
ip0 = ip;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function copies sample data from the CurOutput buffer to the specified
|
||||
* output buffer and advances its position. If necessary, this function
|
||||
* "consumes" latency and performs downsampling.
|
||||
*
|
||||
* @param Offs CurOutput buffer offset, can be negative.
|
||||
* @param[out] op0 Output buffer pointer, will be advanced.
|
||||
* @param b The number of output samples available, including those which
|
||||
* are discarded during whole-number downsampling.
|
||||
* @param l0 The overall output sample count, will be increased.
|
||||
*/
|
||||
|
||||
void copyToOutput( int Offs, double*& op0, int b, int& l0 )
|
||||
{
|
||||
if( Offs < 0 )
|
||||
{
|
||||
if( Offs + b <= 0 )
|
||||
{
|
||||
Offs += BlockLen2;
|
||||
}
|
||||
else
|
||||
{
|
||||
copyToOutput( Offs + BlockLen2, op0, -Offs, l0 );
|
||||
b += Offs;
|
||||
Offs = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if( LatencyLeft != 0 )
|
||||
{
|
||||
if( LatencyLeft >= b )
|
||||
{
|
||||
LatencyLeft -= b;
|
||||
return;
|
||||
}
|
||||
|
||||
Offs += LatencyLeft;
|
||||
b -= LatencyLeft;
|
||||
LatencyLeft = 0;
|
||||
}
|
||||
|
||||
const int df = DownFactor;
|
||||
|
||||
if( DownShift > 0 )
|
||||
{
|
||||
int Skip = Offs & ( df - 1 );
|
||||
|
||||
if( Skip > 0 )
|
||||
{
|
||||
Skip = df - Skip;
|
||||
b -= Skip;
|
||||
Offs += Skip;
|
||||
}
|
||||
|
||||
if( b > 0 )
|
||||
{
|
||||
b = ( b + df - 1 ) >> DownShift;
|
||||
memcpy( op0, &CurOutput[ Offs >> DownShift ],
|
||||
b * sizeof( op0[ 0 ]));
|
||||
|
||||
op0 += b;
|
||||
l0 += b;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( df > 1 )
|
||||
{
|
||||
const double* ip = &CurOutput[ Offs + DownSkip ];
|
||||
int l = ( b + df - 1 - DownSkip ) / df;
|
||||
DownSkip += l * df - b;
|
||||
|
||||
double* op = op0;
|
||||
l0 += l;
|
||||
op0 += l;
|
||||
|
||||
while( l > 0 )
|
||||
{
|
||||
*op = *ip;
|
||||
ip += df;
|
||||
op++;
|
||||
l--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ]));
|
||||
op0 += b;
|
||||
l0 += b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function performs input spectrum mirroring which is used to perform a
|
||||
* fast "power of 2" upsampling. Such mirroring is equivalent to insertion
|
||||
* of zeros into the input signal.
|
||||
*
|
||||
* @param p Spectrum data block to mirror.
|
||||
* @tparam T Buffer's element type.
|
||||
*/
|
||||
|
||||
template< typename T >
|
||||
void mirrorInputSpectrum( T* const p )
|
||||
{
|
||||
const int bl1 = BlockLen2 >> UpShift;
|
||||
const int bl2 = bl1 + bl1;
|
||||
int i;
|
||||
|
||||
for( i = bl1 + 2; i < bl2; i += 2 )
|
||||
{
|
||||
p[ i ] = p[ bl2 - i ];
|
||||
p[ i + 1 ] = -p[ bl2 - i + 1 ];
|
||||
}
|
||||
|
||||
p[ bl1 ] = p[ 1 ];
|
||||
p[ bl1 + 1 ] = 0.0;
|
||||
p[ 1 ] = p[ 0 ];
|
||||
|
||||
for( i = 1; i < UpShift; i++ )
|
||||
{
|
||||
const int z = bl1 << i;
|
||||
memcpy( &p[ z ], p, z * sizeof( p[ 0 ]));
|
||||
p[ z + 1 ] = 0.0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED
|
719
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
vendored
Normal file
719
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
vendored
Normal file
@ -0,0 +1,719 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPFIRFilter.h
|
||||
*
|
||||
* @brief FIR filter generator and filter cache classes.
|
||||
*
|
||||
* This file includes low-pass FIR filter generator and filter cache.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPFIRFILTER_INCLUDED
|
||||
#define R8B_CDSPFIRFILTER_INCLUDED
|
||||
|
||||
#include "CDSPSincFilterGen.h"
|
||||
#include "CDSPRealFFT.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* Enumeration of filter's phase responses.
|
||||
*/
|
||||
|
||||
enum EDSPFilterPhaseResponse
|
||||
{
|
||||
fprLinearPhase = 0, ///< Linear-phase response. Features a linear-phase
|
||||
///< high-latency response, with the latency expressed as integer
|
||||
///< value.
|
||||
///<
|
||||
fprMinPhase ///< Minimum-phase response. Features a minimal latency
|
||||
///< response, but the response's phase is non-linear. The latency is
|
||||
///< usually expressed as non-integer value, and usually is small, but
|
||||
///< is never equal to zero. The minimum-phase filter is transformed
|
||||
///< from the linear-phase filter. The transformation has precision
|
||||
///< limits which may skew both the -3 dB point and attenuation of the
|
||||
///< filter being transformed: as it was measured, the skew happens
|
||||
///< purely at random, and in most cases it is within tolerable range.
|
||||
///< In a small (1%) random subset of cases the skew is bigger and
|
||||
///< cannot be predicted. Minimum-phase transform requires 64-bit
|
||||
///< floating point FFT precision, results with 32-bit float FFT are
|
||||
///< far from optimal.
|
||||
///<
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Calculation and storage class for FIR filters.
|
||||
*
|
||||
* Class that implements calculation and storing of a FIR filter (currently
|
||||
* contains low-pass filter calculation routine designed for sample rate
|
||||
* conversion). Objects of this class cannot be created directly, but can be
|
||||
* obtained via the CDSPFilterCache::getLPFilter() static function.
|
||||
*/
|
||||
|
||||
class CDSPFIRFilter : public R8B_BASECLASS
|
||||
{
|
||||
R8BNOCTOR( CDSPFIRFilter );
|
||||
|
||||
friend class CDSPFIRFilterCache;
|
||||
|
||||
public:
|
||||
~CDSPFIRFilter()
|
||||
{
|
||||
R8BASSERT( RefCount == 0 );
|
||||
|
||||
delete Next;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The minimal allowed low-pass filter's transition band, in
|
||||
* percent.
|
||||
*/
|
||||
|
||||
static double getLPMinTransBand()
|
||||
{
|
||||
return( 0.5 );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The maximal allowed low-pass filter's transition band, in
|
||||
* percent.
|
||||
*/
|
||||
|
||||
static double getLPMaxTransBand()
|
||||
{
|
||||
return( 45.0 );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The minimal allowed low-pass filter's stop-band attenuation, in
|
||||
* decibel.
|
||||
*/
|
||||
|
||||
static double getLPMinAtten()
|
||||
{
|
||||
return( 49.0 );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The maximal allowed low-pass filter's stop-band attenuation, in
|
||||
* decibel.
|
||||
*/
|
||||
|
||||
static double getLPMaxAtten()
|
||||
{
|
||||
return( 218.0 );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return "True" if kernel block of *this filter has zero-phase response.
|
||||
*/
|
||||
|
||||
bool isZeroPhase() const
|
||||
{
|
||||
return( IsZeroPhase );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Filter's latency, in samples (integer part).
|
||||
*/
|
||||
|
||||
int getLatency() const
|
||||
{
|
||||
return( Latency );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Filter's latency, in samples (fractional part). Always zero for
|
||||
* linear-phase filters.
|
||||
*/
|
||||
|
||||
double getLatencyFrac() const
|
||||
{
|
||||
return( LatencyFrac );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Filter kernel length, in samples. Not to be confused with the
|
||||
* block length.
|
||||
*/
|
||||
|
||||
int getKernelLen() const
|
||||
{
|
||||
return( KernelLen );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Filter's block length, espressed as Nth power of 2. The actual
|
||||
* length is twice as large due to zero-padding.
|
||||
*/
|
||||
|
||||
int getBlockLenBits() const
|
||||
{
|
||||
return( BlockLenBits );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Filter's kernel block, in complex-numbered form obtained via
|
||||
* the CDSPRealFFT::forward() function call, zero-padded, gain-adjusted
|
||||
* with the CDSPRealFFT::getInvMulConst() * ReqGain constant, immediately
|
||||
* suitable for convolution. Kernel block may have "zero-phase" response,
|
||||
* depending on the isZeroPhase() function's result.
|
||||
*/
|
||||
|
||||
const double* getKernelBlock() const
|
||||
{
|
||||
return( KernelBlock );
|
||||
}
|
||||
|
||||
/**
|
||||
* This function should be called when the filter obtained via the
|
||||
* filter cache is no longer needed.
|
||||
*/
|
||||
|
||||
void unref();
|
||||
|
||||
private:
|
||||
double ReqNormFreq; ///< Required normalized frequency, 0 to 1 inclusive.
|
||||
///<
|
||||
double ReqTransBand; ///< Required transition band in percent, as passed
|
||||
///< by the user.
|
||||
///<
|
||||
double ReqAtten; ///< Required stop-band attenuation in decibel, as passed
|
||||
///< by the user (positive value).
|
||||
///<
|
||||
EDSPFilterPhaseResponse ReqPhase; ///< Required filter's phase response.
|
||||
///<
|
||||
double ReqGain; ///< Required overall filter's gain.
|
||||
///<
|
||||
CDSPFIRFilter* Next; ///< Next FIR filter in cache's list.
|
||||
///<
|
||||
int RefCount; ///< The number of references made to *this FIR filter.
|
||||
///<
|
||||
bool IsZeroPhase; ///< "True" if kernel block of *this filter has
|
||||
///< zero-phase response.
|
||||
///<
|
||||
int Latency; ///< Filter's latency in samples (integer part).
|
||||
///<
|
||||
double LatencyFrac; ///< Filter's latency in samples (fractional part).
|
||||
///<
|
||||
int KernelLen; ///< Filter kernel length, in samples.
|
||||
///<
|
||||
int BlockLenBits; ///< Block length used to store *this FIR filter,
|
||||
///< expressed as Nth power of 2. This value is used directly by the
|
||||
///< convolver.
|
||||
///<
|
||||
CFixedBuffer< double > KernelBlock; ///< FIR filter buffer, capacity
|
||||
///< equals to 1 << ( BlockLenBits + 1 ). Second part of the buffer
|
||||
///< contains zero-padding to allow alias-free convolution.
|
||||
///< Address-aligned.
|
||||
///<
|
||||
|
||||
CDSPFIRFilter()
|
||||
: RefCount( 1 )
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Function builds filter kernel based on the "Req" parameters.
|
||||
*
|
||||
* @param ExtAttenCorrs External attentuation correction table, for
|
||||
* internal use.
|
||||
*/
|
||||
|
||||
void buildLPFilter( const double* const ExtAttenCorrs )
|
||||
{
|
||||
const double tb = ReqTransBand * 0.01;
|
||||
double pwr;
|
||||
double fo1;
|
||||
double hl;
|
||||
double atten = -ReqAtten;
|
||||
|
||||
if( tb >= 0.25 )
|
||||
{
|
||||
if( ReqAtten >= 117.0 )
|
||||
{
|
||||
atten -= 1.60;
|
||||
}
|
||||
else
|
||||
if( ReqAtten >= 60.0 )
|
||||
{
|
||||
atten -= 1.91;
|
||||
}
|
||||
else
|
||||
{
|
||||
atten -= 2.25;
|
||||
}
|
||||
}
|
||||
else
|
||||
if( tb >= 0.10 )
|
||||
{
|
||||
if( ReqAtten >= 117.0 )
|
||||
{
|
||||
atten -= 0.69;
|
||||
}
|
||||
else
|
||||
if( ReqAtten >= 60.0 )
|
||||
{
|
||||
atten -= 0.73;
|
||||
}
|
||||
else
|
||||
{
|
||||
atten -= 1.13;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( ReqAtten >= 117.0 )
|
||||
{
|
||||
atten -= 0.21;
|
||||
}
|
||||
else
|
||||
if( ReqAtten >= 60.0 )
|
||||
{
|
||||
atten -= 0.25;
|
||||
}
|
||||
else
|
||||
{
|
||||
atten -= 0.36;
|
||||
}
|
||||
}
|
||||
|
||||
static const int AttenCorrCount = 264;
|
||||
static const double AttenCorrMin = 49.0;
|
||||
static const double AttenCorrDiff = 176.25;
|
||||
int AttenCorr = (int) floor(( -atten - AttenCorrMin ) *
|
||||
AttenCorrCount / AttenCorrDiff + 0.5 );
|
||||
|
||||
AttenCorr = min( AttenCorrCount, max( 0, AttenCorr ));
|
||||
|
||||
if( ExtAttenCorrs != NULL )
|
||||
{
|
||||
atten -= ExtAttenCorrs[ AttenCorr ];
|
||||
}
|
||||
else
|
||||
if( tb >= 0.25 )
|
||||
{
|
||||
static const double AttenCorrScale = 101.0;
|
||||
static const signed char AttenCorrs[] = {
|
||||
-127, -127, -125, -125, -122, -119, -115, -110, -104, -97,
|
||||
-91, -82, -75, -24, -16, -6, 4, 14, 24, 29, 30, 32, 37, 44,
|
||||
51, 57, 63, 67, 65, 50, 53, 56, 58, 60, 63, 64, 66, 68, 74,
|
||||
77, 78, 78, 78, 79, 79, 60, 60, 60, 61, 59, 52, 47, 41, 36,
|
||||
30, 24, 17, 9, 0, -8, -10, -11, -14, -13, -18, -25, -31, -38,
|
||||
-44, -50, -57, -63, -68, -74, -81, -89, -96, -101, -104, -107,
|
||||
-109, -110, -86, -84, -85, -82, -80, -77, -73, -67, -62, -55,
|
||||
-48, -42, -35, -30, -20, -11, -2, 5, 6, 6, 7, 11, 16, 21, 26,
|
||||
34, 41, 46, 49, 52, 55, 56, 48, 49, 51, 51, 52, 52, 52, 52,
|
||||
52, 51, 51, 50, 47, 47, 50, 48, 46, 42, 38, 35, 31, 27, 24,
|
||||
20, 16, 12, 11, 12, 10, 8, 4, -1, -6, -11, -16, -19, -17, -21,
|
||||
-24, -27, -32, -34, -37, -38, -40, -41, -40, -40, -42, -41,
|
||||
-44, -45, -43, -41, -34, -31, -28, -24, -21, -18, -14, -10,
|
||||
-5, -1, 2, 5, 8, 7, 4, 3, 2, 2, 4, 6, 8, 9, 9, 10, 10, 10, 10,
|
||||
9, 8, 9, 11, 14, 13, 12, 11, 10, 8, 7, 6, 5, 3, 2, 2, -1, -1,
|
||||
-3, -3, -4, -4, -5, -4, -6, -7, -9, -5, -1, -1, 0, 1, 0, -2,
|
||||
-3, -4, -5, -5, -8, -13, -13, -13, -12, -13, -12, -11, -11,
|
||||
-9, -8, -7, -5, -3, -1, 2, 4, 6, 9, 10, 11, 14, 18, 21, 24,
|
||||
27, 30, 34, 37, 37, 39, 40 };
|
||||
|
||||
atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
|
||||
}
|
||||
else
|
||||
if( tb >= 0.10 )
|
||||
{
|
||||
static const double AttenCorrScale = 210.0;
|
||||
static const signed char AttenCorrs[] = {
|
||||
-113, -118, -122, -125, -126, -97, -95, -92, -92, -89, -82,
|
||||
-75, -69, -48, -42, -36, -30, -22, -14, -5, -2, 1, 6, 13, 22,
|
||||
28, 35, 41, 48, 55, 56, 56, 61, 65, 71, 77, 81, 83, 85, 85,
|
||||
74, 74, 73, 72, 71, 70, 68, 64, 59, 56, 49, 52, 46, 42, 36,
|
||||
32, 26, 20, 13, 7, -2, -6, -10, -15, -20, -27, -33, -38, -44,
|
||||
-43, -48, -53, -57, -63, -69, -73, -75, -79, -81, -74, -76,
|
||||
-77, -77, -78, -81, -80, -80, -78, -76, -65, -62, -59, -56,
|
||||
-51, -48, -44, -38, -33, -25, -19, -13, -5, -1, 2, 7, 13, 17,
|
||||
21, 25, 30, 35, 40, 45, 50, 53, 56, 57, 55, 58, 59, 62, 64,
|
||||
67, 67, 68, 68, 62, 61, 61, 59, 59, 57, 57, 55, 52, 48, 42,
|
||||
38, 35, 31, 26, 20, 15, 13, 10, 7, 3, -2, -8, -13, -17, -23,
|
||||
-28, -34, -37, -40, -41, -45, -48, -50, -53, -57, -59, -62,
|
||||
-63, -63, -57, -57, -56, -56, -54, -54, -53, -49, -48, -41,
|
||||
-38, -33, -31, -26, -23, -18, -12, -9, -7, -7, -3, 0, 5, 9,
|
||||
14, 16, 20, 22, 21, 23, 25, 27, 28, 29, 34, 33, 35, 33, 31,
|
||||
30, 29, 29, 26, 26, 25, 24, 20, 19, 15, 10, 8, 4, 1, -2, -6,
|
||||
-10, -16, -19, -23, -26, -27, -30, -34, -39, -43, -47, -51,
|
||||
-52, -54, -56, -58, -59, -62, -63, -66, -65, -65, -64, -59,
|
||||
-57, -54, -52, -48, -44, -42, -37, -32, -22, -17, -10, -3, 5,
|
||||
13, 22, 30, 40, 50, 60, 72 };
|
||||
|
||||
atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
|
||||
}
|
||||
else
|
||||
{
|
||||
static const double AttenCorrScale = 196.0;
|
||||
static const signed char AttenCorrs[] = {
|
||||
-15, -17, -20, -20, -20, -21, -20, -16, -17, -18, -17, -13,
|
||||
-12, -11, -9, -7, -5, -4, -1, 1, 3, 4, 5, 6, 7, 9, 9, 10, 10,
|
||||
10, 11, 11, 11, 12, 12, 12, 10, 11, 10, 10, 8, 10, 11, 10, 11,
|
||||
11, 13, 14, 15, 19, 27, 26, 23, 18, 14, 8, 4, -2, -6, -12,
|
||||
-17, -23, -28, -33, -37, -42, -46, -49, -53, -57, -60, -61,
|
||||
-64, -65, -67, -66, -66, -66, -65, -64, -61, -59, -56, -52,
|
||||
-48, -42, -38, -31, -27, -19, -13, -7, -1, 8, 14, 22, 29, 37,
|
||||
45, 52, 59, 66, 73, 80, 86, 91, 96, 100, 104, 108, 111, 114,
|
||||
115, 117, 118, 120, 120, 118, 117, 114, 113, 111, 107, 103,
|
||||
99, 95, 89, 84, 78, 72, 66, 60, 52, 44, 37, 30, 21, 14, 6, -3,
|
||||
-11, -18, -26, -34, -43, -51, -58, -65, -73, -78, -85, -90,
|
||||
-97, -102, -107, -113, -115, -118, -121, -125, -125, -126,
|
||||
-126, -126, -125, -124, -121, -119, -115, -111, -109, -101,
|
||||
-102, -95, -88, -81, -73, -67, -63, -54, -47, -40, -33, -26,
|
||||
-18, -11, -5, 2, 8, 14, 19, 25, 31, 36, 37, 43, 47, 49, 51,
|
||||
52, 57, 57, 56, 57, 58, 58, 58, 57, 56, 52, 52, 50, 48, 44,
|
||||
41, 39, 37, 33, 31, 26, 24, 21, 18, 14, 11, 8, 4, 2, -2, -5,
|
||||
-7, -9, -11, -13, -15, -16, -18, -19, -20, -23, -24, -24, -25,
|
||||
-27, -26, -27, -29, -30, -31, -32, -35, -36, -39, -40, -44,
|
||||
-46, -51, -54, -59, -63, -69, -76, -83, -91, -98 };
|
||||
|
||||
atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
|
||||
}
|
||||
|
||||
pwr = 7.43932822146293e-8 * sqr( atten ) + 0.000102747434588003 *
|
||||
cos( 0.00785021930010397 * atten ) * cos( 0.633854318781239 +
|
||||
0.103208573657699 * atten ) - 0.00798132247867036 -
|
||||
0.000903555213543865 * atten - 0.0969365532127236 * exp(
|
||||
0.0779275237937911 * atten ) - 1.37304948662012e-5 * atten * cos(
|
||||
0.00785021930010397 * atten );
|
||||
|
||||
if( pwr <= 0.067665322581 )
|
||||
{
|
||||
if( tb >= 0.25 )
|
||||
{
|
||||
hl = 2.6778150875894 / tb + 300.547590563091 * atan( atan(
|
||||
2.68959772209918 * pwr )) / ( 5.5099277187035 * tb - tb *
|
||||
tanh( cos( asinh( atten ))));
|
||||
|
||||
fo1 = 0.987205355829873 * tb + 1.00011788929851 * atan2(
|
||||
-0.321432067051302 - 6.19131357321578 * sqrt( pwr ),
|
||||
hl + -1.14861472207245 / ( hl - 14.1821147585957 ) + pow(
|
||||
0.9521145021664, pow( atan2( 1.12018764830637, tb ),
|
||||
2.10988901686912 * hl - 20.9691278378345 )));
|
||||
}
|
||||
else
|
||||
if( tb >= 0.10 )
|
||||
{
|
||||
hl = ( 1.56688617018066 + 142.064321294568 * pwr +
|
||||
0.00419441117131136 * cos( 243.633511747297 * pwr ) -
|
||||
0.022953443903576 * atten - 0.026629568860284 * cos(
|
||||
127.715550622571 * pwr )) / tb;
|
||||
|
||||
fo1 = 0.982299356642411 * tb + 0.999441744774215 * asinh((
|
||||
-0.361783054039583 - 5.80540593623676 * sqrt( pwr )) /
|
||||
hl );
|
||||
}
|
||||
else
|
||||
{
|
||||
hl = ( 2.45739657014937 + 269.183679500541 * pwr * cos(
|
||||
5.73225668178813 + atan2( cosh( 0.988861169868941 -
|
||||
17.2201556280744 * pwr ), 1.08340138240431 * pwr ))) / tb;
|
||||
|
||||
fo1 = 2.291956939 * tb + 0.01942450693 * sqr( tb ) * hl -
|
||||
4.67538973161837 * pwr * tb - 1.668433124 * tb *
|
||||
pow( pwr, pwr );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( tb >= 0.25 )
|
||||
{
|
||||
hl = ( 1.50258368698213 + 158.556968859477 * asinh( pwr ) *
|
||||
tanh( 57.9466246871383 * tanh( pwr )) -
|
||||
0.0105440479814834 * atten ) / tb;
|
||||
|
||||
fo1 = 0.994024401639321 * tb + ( -0.236282717577215 -
|
||||
6.8724924545387 * sqrt( sin( pwr ))) / hl;
|
||||
}
|
||||
else
|
||||
if( tb >= 0.10 )
|
||||
{
|
||||
hl = ( 1.50277377248945 + 158.222625721046 * asinh( pwr ) *
|
||||
tanh( 1.02875299001715 + 42.072277322604 * pwr ) -
|
||||
0.0108380943845632 * atten ) / tb;
|
||||
|
||||
fo1 = 0.992539376734551 * tb + ( -0.251747813037178 -
|
||||
6.74159892452584 * sqrt( tanh( tanh( tan( pwr ))))) / hl;
|
||||
}
|
||||
else
|
||||
{
|
||||
hl = ( 1.15990238966306 * pwr - 5.02124037125213 * sqr(
|
||||
pwr ) - 0.158676856669827 * atten * cos( 1.1609073390614 *
|
||||
pwr - 6.33932586197475 * pwr * sqr( pwr ))) / tb;
|
||||
|
||||
fo1 = 0.867344453126885 * tb + 0.052693817907757 * tb * log(
|
||||
pwr ) + 0.0895511178735932 * tb * atan( 59.7538527741309 *
|
||||
pwr ) - 0.0745653568081453 * pwr * tb;
|
||||
}
|
||||
}
|
||||
|
||||
double WinParams[ 2 ];
|
||||
WinParams[ 0 ] = 125.0;
|
||||
WinParams[ 1 ] = pwr;
|
||||
|
||||
CDSPSincFilterGen sinc;
|
||||
sinc.Len2 = 0.25 * hl / ReqNormFreq;
|
||||
sinc.Freq1 = 0.0;
|
||||
sinc.Freq2 = R8B_PI * ( 1.0 - fo1 ) * ReqNormFreq;
|
||||
sinc.initBand( CDSPSincFilterGen :: wftKaiser, WinParams, true );
|
||||
|
||||
KernelLen = sinc.KernelLen;
|
||||
BlockLenBits = getBitOccupancy( KernelLen - 1 ) + R8B_EXTFFT;
|
||||
const int BlockLen = 1 << BlockLenBits;
|
||||
|
||||
KernelBlock.alloc( BlockLen * 2 );
|
||||
sinc.generateBand( &KernelBlock[ 0 ],
|
||||
&CDSPSincFilterGen :: calcWindowKaiser );
|
||||
|
||||
if( ReqPhase == fprLinearPhase )
|
||||
{
|
||||
IsZeroPhase = true;
|
||||
Latency = sinc.fl2;
|
||||
LatencyFrac = 0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
IsZeroPhase = false;
|
||||
double DCGroupDelay;
|
||||
|
||||
calcMinPhaseTransform( &KernelBlock[ 0 ], KernelLen, 16, false,
|
||||
&DCGroupDelay );
|
||||
|
||||
Latency = (int) DCGroupDelay;
|
||||
LatencyFrac = DCGroupDelay - Latency;
|
||||
}
|
||||
|
||||
CDSPRealFFTKeeper ffto( BlockLenBits + 1 );
|
||||
|
||||
if( IsZeroPhase )
|
||||
{
|
||||
// Calculate DC gain.
|
||||
|
||||
double s = 0.0;
|
||||
int i;
|
||||
|
||||
for( i = 0; i < KernelLen; i++ )
|
||||
{
|
||||
s += KernelBlock[ i ];
|
||||
}
|
||||
|
||||
s = ffto -> getInvMulConst() * ReqGain / s;
|
||||
|
||||
// Time-shift the filter so that zero-phase response is produced.
|
||||
// Simultaneously multiply by "s".
|
||||
|
||||
for( i = 0; i <= sinc.fl2; i++ )
|
||||
{
|
||||
KernelBlock[ i ] = KernelBlock[ sinc.fl2 + i ] * s;
|
||||
}
|
||||
|
||||
for( i = 1; i <= sinc.fl2; i++ )
|
||||
{
|
||||
KernelBlock[ BlockLen * 2 - i ] = KernelBlock[ i ];
|
||||
}
|
||||
|
||||
memset( &KernelBlock[ sinc.fl2 + 1 ], 0,
|
||||
( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
|
||||
|
||||
ffto -> forward( KernelBlock );
|
||||
ffto -> convertToZP( KernelBlock );
|
||||
}
|
||||
else
|
||||
{
|
||||
normalizeFIRFilter( &KernelBlock[ 0 ], KernelLen,
|
||||
ffto -> getInvMulConst() * ReqGain );
|
||||
|
||||
memset( &KernelBlock[ KernelLen ], 0,
|
||||
( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
|
||||
|
||||
ffto -> forward( KernelBlock );
|
||||
}
|
||||
|
||||
R8BCONSOLE( "CDSPFIRFilter: flt_len=%i latency=%i nfreq=%.4f "
|
||||
"tb=%.1f att=%.1f gain=%.3f\n", KernelLen, Latency,
|
||||
ReqNormFreq, ReqTransBand, ReqAtten, ReqGain );
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief FIR filter cache class.
|
||||
*
|
||||
* Class that implements cache for calculated FIR filters. The required FIR
|
||||
* filter should be obtained via the getLPFilter() static function.
|
||||
*/
|
||||
|
||||
class CDSPFIRFilterCache : public R8B_BASECLASS
|
||||
{
|
||||
R8BNOCTOR( CDSPFIRFilterCache );
|
||||
|
||||
friend class CDSPFIRFilter;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @return The number of filters present in the cache now. This value can
|
||||
* be monitored for debugging "forgotten" filters.
|
||||
*/
|
||||
|
||||
static int getObjCount()
|
||||
{
|
||||
R8BSYNC( StateSync );
|
||||
|
||||
return( ObjCount );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function calculates or returns reference to a previously calculated
|
||||
* (cached) low-pass FIR filter. Note that the real transition band and
|
||||
* attenuation achieved by the filter varies with the magnitude of the
|
||||
* required attenuation, and are never 100% exact.
|
||||
*
|
||||
* @param ReqNormFreq Required normalized frequency, in the range 0 to 1,
|
||||
* inclusive. This is the point after which the stop-band spans.
|
||||
* @param ReqTransBand Required transition band, in percent of the
|
||||
* 0 to ReqNormFreq spectral bandwidth, in the range
|
||||
* CDSPFIRFilter::getLPMinTransBand() to
|
||||
* CDSPFIRFilter::getLPMaxTransBand(), inclusive. The transition band
|
||||
* specifies the part of the spectrum between the -3 dB and ReqNormFreq
|
||||
* points. The real resulting -3 dB point varies in the range from -3.00
|
||||
* to -3.05 dB, but is generally very close to -3 dB.
|
||||
* @param ReqAtten Required stop-band attenuation in decibel, in the range
|
||||
* CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
|
||||
* inclusive. Note that the actual stop-band attenuation of the resulting
|
||||
* filter may be 0.40-4.46 dB higher.
|
||||
* @param ReqPhase Required filter's phase response.
|
||||
* @param ReqGain Required overall filter's gain (1.0 for unity gain).
|
||||
* @param AttenCorrs Attentuation correction table, to pass to the filter
|
||||
* generation function. For internal use.
|
||||
* @return A reference to a new or a previously calculated low-pass FIR
|
||||
* filter object with the required characteristics. A reference count is
|
||||
* incremented in the returned filter object which should be released
|
||||
* after use via the CDSPFIRFilter::unref() function.
|
||||
*/
|
||||
|
||||
static CDSPFIRFilter& getLPFilter( const double ReqNormFreq,
|
||||
const double ReqTransBand, const double ReqAtten,
|
||||
const EDSPFilterPhaseResponse ReqPhase, const double ReqGain,
|
||||
const double* const AttenCorrs = NULL )
|
||||
{
|
||||
R8BASSERT( ReqNormFreq > 0.0 && ReqNormFreq <= 1.0 );
|
||||
R8BASSERT( ReqTransBand >= CDSPFIRFilter :: getLPMinTransBand() );
|
||||
R8BASSERT( ReqTransBand <= CDSPFIRFilter :: getLPMaxTransBand() );
|
||||
R8BASSERT( ReqAtten >= CDSPFIRFilter :: getLPMinAtten() );
|
||||
R8BASSERT( ReqAtten <= CDSPFIRFilter :: getLPMaxAtten() );
|
||||
R8BASSERT( ReqGain > 0.0 );
|
||||
|
||||
R8BSYNC( StateSync );
|
||||
|
||||
CDSPFIRFilter* PrevObj = NULL;
|
||||
CDSPFIRFilter* CurObj = Objects;
|
||||
|
||||
while( CurObj != NULL )
|
||||
{
|
||||
if( CurObj -> ReqNormFreq == ReqNormFreq &&
|
||||
CurObj -> ReqTransBand == ReqTransBand &&
|
||||
CurObj -> ReqAtten == ReqAtten &&
|
||||
CurObj -> ReqPhase == ReqPhase &&
|
||||
CurObj -> ReqGain == ReqGain )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if( CurObj -> Next == NULL && ObjCount >= R8B_FILTER_CACHE_MAX )
|
||||
{
|
||||
if( CurObj -> RefCount == 0 )
|
||||
{
|
||||
// Delete the last filter which is not used.
|
||||
|
||||
PrevObj -> Next = NULL;
|
||||
delete CurObj;
|
||||
ObjCount--;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Move the last filter to the top of the list since it
|
||||
// seems to be in use for a long time.
|
||||
|
||||
PrevObj -> Next = NULL;
|
||||
CurObj -> Next = Objects.unkeep();
|
||||
Objects = CurObj;
|
||||
}
|
||||
|
||||
CurObj = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
PrevObj = CurObj;
|
||||
CurObj = CurObj -> Next;
|
||||
}
|
||||
|
||||
if( CurObj != NULL )
|
||||
{
|
||||
CurObj -> RefCount++;
|
||||
|
||||
if( PrevObj == NULL )
|
||||
{
|
||||
return( *CurObj );
|
||||
}
|
||||
|
||||
// Remove the filter from the list temporarily.
|
||||
|
||||
PrevObj -> Next = CurObj -> Next;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create a new filter object (with RefCount == 1) and build the
|
||||
// filter kernel.
|
||||
|
||||
CurObj = new CDSPFIRFilter();
|
||||
CurObj -> ReqNormFreq = ReqNormFreq;
|
||||
CurObj -> ReqTransBand = ReqTransBand;
|
||||
CurObj -> ReqAtten = ReqAtten;
|
||||
CurObj -> ReqPhase = ReqPhase;
|
||||
CurObj -> ReqGain = ReqGain;
|
||||
ObjCount++;
|
||||
|
||||
CurObj -> buildLPFilter( AttenCorrs );
|
||||
}
|
||||
|
||||
// Insert the filter at the start of the list.
|
||||
|
||||
CurObj -> Next = Objects.unkeep();
|
||||
Objects = CurObj;
|
||||
|
||||
return( *CurObj );
|
||||
}
|
||||
|
||||
private:
|
||||
static CSyncObject StateSync; ///< Cache state synchronizer.
|
||||
///<
|
||||
static CPtrKeeper< CDSPFIRFilter* > Objects; ///< The chain of cached
|
||||
///< objects.
|
||||
///<
|
||||
static int ObjCount; ///< The number of objects currently preset in the
|
||||
///< cache.
|
||||
///<
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CDSPFIRFilter PUBLIC
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
inline void CDSPFIRFilter :: unref()
|
||||
{
|
||||
R8BSYNC( CDSPFIRFilterCache :: StateSync );
|
||||
|
||||
RefCount--;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8B_CDSPFIRFILTER_INCLUDED
|
1180
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
vendored
Normal file
1180
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
401
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
vendored
Normal file
401
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
vendored
Normal file
@ -0,0 +1,401 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPHBDownsampler.h
|
||||
*
|
||||
* @brief Half-band downsampling convolver class.
|
||||
*
|
||||
* This file includes half-band downsampling convolver class.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPHBDOWNSAMPLER_INCLUDED
|
||||
#define R8B_CDSPHBDOWNSAMPLER_INCLUDED
|
||||
|
||||
#include "CDSPHBUpsampler.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @brief Half-band downsampler class.
|
||||
*
|
||||
* Class implements brute-force half-band 2X downsampling that uses small
|
||||
* sparse symmetric FIR filters. The output has 2.0 gain.
|
||||
*/
|
||||
|
||||
class CDSPHBDownsampler : public CDSPProcessor
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initalizes the half-band downsampler.
|
||||
*
|
||||
* @param ReqAtten Required half-band filter attentuation.
|
||||
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
|
||||
* downsampling ratio, e.g. at 4x downsampling 0 is used, at 8x
|
||||
* downsampling 1 is used, etc.
|
||||
* @param IsThird "True" if 1/3 resampling is performed.
|
||||
* @param PrevLatency Latency, in samples (any value >=0), which was left
|
||||
* in the output signal by a previous process. Whole-number latency will
|
||||
* be consumed by *this object while remaining fractional latency can be
|
||||
* obtained via the getLatencyFrac() function.
|
||||
*/
|
||||
|
||||
CDSPHBDownsampler( const double ReqAtten, const int SteepIndex,
|
||||
const bool IsThird, const double PrevLatency )
|
||||
{
|
||||
static const CConvolveFn FltConvFn[ 14 ] = {
|
||||
&CDSPHBDownsampler :: convolve1, &CDSPHBDownsampler :: convolve2,
|
||||
&CDSPHBDownsampler :: convolve3, &CDSPHBDownsampler :: convolve4,
|
||||
&CDSPHBDownsampler :: convolve5, &CDSPHBDownsampler :: convolve6,
|
||||
&CDSPHBDownsampler :: convolve7, &CDSPHBDownsampler :: convolve8,
|
||||
&CDSPHBDownsampler :: convolve9, &CDSPHBDownsampler :: convolve10,
|
||||
&CDSPHBDownsampler :: convolve11, &CDSPHBDownsampler :: convolve12,
|
||||
&CDSPHBDownsampler :: convolve13,
|
||||
&CDSPHBDownsampler :: convolve14 };
|
||||
|
||||
int fltt;
|
||||
double att;
|
||||
|
||||
if( IsThird )
|
||||
{
|
||||
CDSPHBUpsampler :: getHBFilterThird( ReqAtten, SteepIndex, fltp,
|
||||
fltt, att );
|
||||
}
|
||||
else
|
||||
{
|
||||
CDSPHBUpsampler :: getHBFilter( ReqAtten, SteepIndex, fltp, fltt,
|
||||
att );
|
||||
}
|
||||
|
||||
convfn = FltConvFn[ fltt - 1 ];
|
||||
fll = fltt * 2 - 1;
|
||||
fl2 = fll;
|
||||
flo = fll + fl2;
|
||||
flb = BufLen - fll;
|
||||
BufRP = Buf + fll;
|
||||
|
||||
LatencyFrac = PrevLatency * 0.5;
|
||||
Latency = (int) LatencyFrac;
|
||||
LatencyFrac -= Latency;
|
||||
|
||||
R8BASSERT( Latency >= 0 );
|
||||
|
||||
R8BCONSOLE( "CDSPHBDownsampler: taps=%i third=%i att=%.1f io=1/2\n",
|
||||
fltt, (int) IsThird, att );
|
||||
|
||||
clear();
|
||||
}
|
||||
|
||||
virtual int getLatency() const
|
||||
{
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
virtual double getLatencyFrac() const
|
||||
{
|
||||
return( LatencyFrac );
|
||||
}
|
||||
|
||||
virtual int getMaxOutLen( const int MaxInLen ) const
|
||||
{
|
||||
R8BASSERT( MaxInLen >= 0 );
|
||||
|
||||
return(( MaxInLen + 1 ) >> 1 );
|
||||
}
|
||||
|
||||
virtual void clear()
|
||||
{
|
||||
LatencyLeft = Latency;
|
||||
BufLeft = 0;
|
||||
WritePos = 0;
|
||||
ReadPos = flb; // Set "read" position to account for filter's latency.
|
||||
|
||||
memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
|
||||
}
|
||||
|
||||
virtual int process( double* ip, int l, double*& op0 )
|
||||
{
|
||||
R8BASSERT( l >= 0 );
|
||||
|
||||
double* op = op0;
|
||||
|
||||
while( l > 0 )
|
||||
{
|
||||
// Add new input samples to both halves of the ring buffer.
|
||||
|
||||
const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
|
||||
|
||||
double* const wp1 = Buf + WritePos;
|
||||
memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
|
||||
|
||||
if( WritePos < flo )
|
||||
{
|
||||
const int c = min( b, flo - WritePos );
|
||||
memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
|
||||
}
|
||||
|
||||
ip += b;
|
||||
WritePos = ( WritePos + b ) & BufLenMask;
|
||||
l -= b;
|
||||
BufLeft += b;
|
||||
|
||||
// Produce output.
|
||||
|
||||
if( BufLeft > fl2 )
|
||||
{
|
||||
const int c = ( BufLeft - fl2 + 1 ) >> 1;
|
||||
|
||||
double* const opend = op + c;
|
||||
( *convfn )( op, opend, fltp, BufRP, ReadPos );
|
||||
|
||||
op = opend;
|
||||
const int c2 = c + c;
|
||||
ReadPos = ( ReadPos + c2 ) & BufLenMask;
|
||||
BufLeft -= c2;
|
||||
}
|
||||
}
|
||||
|
||||
int ol = (int) ( op - op0 );
|
||||
|
||||
if( LatencyLeft != 0 )
|
||||
{
|
||||
if( LatencyLeft >= ol )
|
||||
{
|
||||
LatencyLeft -= ol;
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
ol -= LatencyLeft;
|
||||
op0 += LatencyLeft;
|
||||
LatencyLeft = 0;
|
||||
}
|
||||
|
||||
return( ol );
|
||||
}
|
||||
|
||||
private:
|
||||
static const int BufLenBits = 10; ///< The length of the ring buffer,
|
||||
///< expressed as Nth power of 2. This value can be reduced if it is
|
||||
///< known that only short input buffers will be passed to the
|
||||
///< interpolator. The minimum value of this parameter is 5, and
|
||||
///< 1 << BufLenBits should be at least 3 times larger than the
|
||||
///< FilterLen.
|
||||
///<
|
||||
static const int BufLen = 1 << BufLenBits; ///< The length of the ring
|
||||
///< buffer. The actual length is twice as long to allow "beyond max
|
||||
///< position" positioning.
|
||||
///<
|
||||
static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
|
||||
///< position wrapping.
|
||||
///<
|
||||
double Buf[ BufLen + 54 ]; ///< The ring buffer, including overrun
|
||||
///< protection for the largest filter.
|
||||
///<
|
||||
const double* fltp; ///< Half-band filter taps.
|
||||
///<
|
||||
int fll; ///< Input latency.
|
||||
///<
|
||||
int fl2; ///< Right-side filter length.
|
||||
///<
|
||||
int flo; ///< Overrun length.
|
||||
///<
|
||||
int flb; ///< Initial read position and maximal buffer write length.
|
||||
///<
|
||||
const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
|
||||
///<
|
||||
int Latency; ///< Initial latency that should be removed from the output.
|
||||
///<
|
||||
double LatencyFrac; ///< Fractional latency left on the output.
|
||||
///<
|
||||
int BufLeft; ///< The number of samples left in the buffer to process.
|
||||
///< When this value is below FilterLenD2Plus1, the interpolation
|
||||
///< cycle ends.
|
||||
///<
|
||||
int WritePos; ///< The current buffer write position. Incremented together
|
||||
///< with the BufLeft variable.
|
||||
///<
|
||||
int ReadPos; ///< The current buffer read position.
|
||||
///<
|
||||
int LatencyLeft; ///< Latency left to remove.
|
||||
///<
|
||||
typedef void( *CConvolveFn )( double* op, double* const opend,
|
||||
const double* const flt, const double* const rp0, int rpos ); ///<
|
||||
///< Convolution function type.
|
||||
///<
|
||||
CConvolveFn convfn; ///< Convolution function in use.
|
||||
///<
|
||||
|
||||
#define R8BHBC1( fn ) \
|
||||
static void fn( double* op, double* const opend, const double* const flt, \
|
||||
const double* const rp0, int rpos ) \
|
||||
{ \
|
||||
while( op != opend ) \
|
||||
{ \
|
||||
const double* const rp = rp0 + rpos; \
|
||||
*op = rp[ 0 ] +
|
||||
|
||||
#define R8BHBC2 \
|
||||
rpos = ( rpos + 2 ) & BufLenMask; \
|
||||
op++; \
|
||||
} \
|
||||
}
|
||||
|
||||
R8BHBC1( convolve1 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve2 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve3 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve4 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve5 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve6 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve7 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve8 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
|
||||
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve9 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
|
||||
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
|
||||
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve10 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
|
||||
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
|
||||
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
|
||||
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve11 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
|
||||
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
|
||||
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
|
||||
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
|
||||
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve12 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
|
||||
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
|
||||
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
|
||||
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
|
||||
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
|
||||
flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve13 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
|
||||
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
|
||||
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
|
||||
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
|
||||
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
|
||||
flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
|
||||
flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve14 )
|
||||
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
|
||||
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
|
||||
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
|
||||
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
|
||||
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
|
||||
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
|
||||
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
|
||||
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
|
||||
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
|
||||
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
|
||||
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
|
||||
flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
|
||||
flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]) +
|
||||
flt[ 13 ] * ( rp[ 27 ] + rp[ -27 ]);
|
||||
R8BHBC2
|
||||
|
||||
#undef R8BHBC1
|
||||
#undef R8BHBC2
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8B_CDSPHBDOWNSAMPLER_INCLUDED
|
804
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
vendored
Normal file
804
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
vendored
Normal file
@ -0,0 +1,804 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPHBUpsampler.h
|
||||
*
|
||||
* @brief Half-band upsampling class.
|
||||
*
|
||||
* This file includes half-band upsampling class.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPHBUPSAMPLER_INCLUDED
|
||||
#define R8B_CDSPHBUPSAMPLER_INCLUDED
|
||||
|
||||
#include "CDSPProcessor.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @brief Half-band upsampling class.
|
||||
*
|
||||
* Class implements brute-force half-band 2X upsampling that uses small
|
||||
* sparse symmetric FIR filters. It is very efficient and should be used at
|
||||
* latter upsampling steps after initial steep 2X upsampling.
|
||||
*/
|
||||
|
||||
class CDSPHBUpsampler : public CDSPProcessor
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Function that provides filter data for various steepness indices and
|
||||
* attenuations.
|
||||
*
|
||||
* @param ReqAtten Required half-band filter attentuation.
|
||||
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
|
||||
* upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
|
||||
* etc.
|
||||
*/
|
||||
|
||||
static void getHBFilter( const double ReqAtten, const int SteepIndex,
|
||||
const double*& flt, int& fltt, double& att )
|
||||
{
|
||||
static const double HBKernel_4A[ 4 ] = { // -54.5176 dB, 4
|
||||
6.1729335650971517e-001, -1.5963945620743250e-001,
|
||||
5.5073370934086312e-002, -1.4603578989932850e-002,};
|
||||
static const double HBKernel_5A[ 5 ] = { // -66.3075 dB, 4
|
||||
6.2068807424902472e-001, -1.6827573634467302e-001,
|
||||
6.5263016720721170e-002, -2.2483331611592005e-002,
|
||||
5.2917326684281110e-003,};
|
||||
static const double HBKernel_6A[ 6 ] = { // -89.5271 dB, 4
|
||||
6.2187202340480707e-001, -1.7132842113816371e-001,
|
||||
6.9019169178765674e-002, -2.5799728312695277e-002,
|
||||
7.4880112525741666e-003, -1.2844465869952567e-003,};
|
||||
static const double HBKernel_7A[ 7 ] = { // -105.2842 dB, 4
|
||||
6.2354494135775851e-001, -1.7571220703702045e-001,
|
||||
7.4529843603968457e-002, -3.0701736822442153e-002,
|
||||
1.0716755639039573e-002, -2.7833422930759735e-003,
|
||||
4.1118797093875510e-004,};
|
||||
static const double HBKernel_8A[ 8 ] = { // -121.0063 dB, 4
|
||||
6.2488363107953926e-001, -1.7924942606514119e-001,
|
||||
7.9068155655640557e-002, -3.4907523415495731e-002,
|
||||
1.3710256799907897e-002, -4.3991142586987933e-003,
|
||||
1.0259190163889602e-003, -1.3278941979339359e-004,};
|
||||
static const double HBKernel_9A[ 9 ] = { // -136.6982 dB, 4
|
||||
6.2597763804021977e-001, -1.8216414325139055e-001,
|
||||
8.2879104876726728e-002, -3.8563442248249404e-002,
|
||||
1.6471530499739394e-002, -6.0489108881335227e-003,
|
||||
1.7805283804140392e-003, -3.7533200112729561e-004,
|
||||
4.3172840558735476e-005,};
|
||||
static const double HBKernel_10A[ 10 ] = { // -152.3572 dB, 4
|
||||
6.2688767582974092e-001, -1.8460766807559420e-001,
|
||||
8.6128943000481864e-002, -4.1774474147006607e-002,
|
||||
1.9014801985747346e-002, -7.6870397465866507e-003,
|
||||
2.6264590175341853e-003, -7.1106660285478562e-004,
|
||||
1.3645852036179345e-004, -1.4113888783332969e-005,};
|
||||
static const double HBKernel_11A[ 11 ] = { // -183.7962 dB, 4
|
||||
6.2667167706948146e-001, -1.8407153342635879e-001,
|
||||
8.5529995610836046e-002, -4.1346831462361310e-002,
|
||||
1.8844831691322637e-002, -7.7125170365394992e-003,
|
||||
2.7268674860562087e-003, -7.9745028501057233e-004,
|
||||
1.8116344606360795e-004, -2.8569149754241848e-005,
|
||||
2.3667022010173616e-006,};
|
||||
static const double HBKernel_12A[ 12 ] = { // -199.4768 dB, 4
|
||||
6.2747849730367999e-001, -1.8623616784506747e-001,
|
||||
8.8409755898467945e-002, -4.4207468821462342e-002,
|
||||
2.1149175945115381e-002, -9.2551508371115209e-003,
|
||||
3.5871562170822330e-003, -1.1923167653750219e-003,
|
||||
3.2627812189920129e-004, -6.9106902511490413e-005,
|
||||
1.0122897863125124e-005, -7.7531878906846174e-007,};
|
||||
static const double HBKernel_13A[ 13 ] = { // -215.1364 dB, 4
|
||||
6.2816416252367324e-001, -1.8809076955230414e-001,
|
||||
9.0918539867353029e-002, -4.6765502683599310e-002,
|
||||
2.3287520498995663e-002, -1.0760627245014184e-002,
|
||||
4.4853922948425683e-003, -1.6438775426910800e-003,
|
||||
5.1441312354764978e-004, -1.3211725685765050e-004,
|
||||
2.6191319837779187e-005, -3.5802430606313093e-006,
|
||||
2.5491278270628601e-007,};
|
||||
static const double HBKernel_14A[ 14 ] = { // -230.7526 dB, 4
|
||||
6.2875473120929948e-001, -1.8969941936903847e-001,
|
||||
9.3126094480960403e-002, -4.9067251179869126e-002,
|
||||
2.5273008851199916e-002, -1.2218646153393291e-002,
|
||||
5.4048942085580280e-003, -2.1409919546078581e-003,
|
||||
7.4250292812927973e-004, -2.1924542206832172e-004,
|
||||
5.3015808983125091e-005, -9.8743034923598196e-006,
|
||||
1.2650391141650221e-006, -8.4146674637474946e-008,};
|
||||
static const int FltCountA = 11;
|
||||
static const int FlttBaseA = 4;
|
||||
static const double FltAttensA[ FltCountA ] = {
|
||||
54.5176, 66.3075, 89.5271, 105.2842, 121.0063, 136.6982, 152.3572, 183.7962, 199.4768, 215.1364, 230.7526, };
|
||||
static const double* const FltPtrsA[ FltCountA ] = {
|
||||
HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, HBKernel_10A, HBKernel_11A, HBKernel_12A, HBKernel_13A, HBKernel_14A, };
|
||||
static const double HBKernel_2B[ 2 ] = { // -56.6007 dB, 8
|
||||
5.7361525854329076e-001, -7.5092074924827903e-002,};
|
||||
static const double HBKernel_3B[ 3 ] = { // -83.0295 dB, 8
|
||||
5.9277038608066912e-001, -1.0851340190268854e-001,
|
||||
1.5813570475513079e-002,};
|
||||
static const double HBKernel_4B[ 4 ] = { // -123.4724 dB, 8
|
||||
6.0140277542879617e-001, -1.2564483854574138e-001,
|
||||
2.7446500598038322e-002, -3.2051079559057435e-003,};
|
||||
static const double HBKernel_5B[ 5 ] = { // -152.4411 dB, 8
|
||||
6.0818642429088932e-001, -1.3981140187175697e-001,
|
||||
3.8489164054503623e-002, -7.6218861797853104e-003,
|
||||
7.5772358130952392e-004,};
|
||||
static const double HBKernel_6B[ 6 ] = { // -181.2501 dB, 8
|
||||
6.1278392271464355e-001, -1.5000053762513338e-001,
|
||||
4.7575323511364960e-002, -1.2320702802243476e-002,
|
||||
2.1462442592348487e-003, -1.8425092381892940e-004,};
|
||||
static const double HBKernel_7B[ 7 ] = { // -209.9472 dB, 8
|
||||
6.1610372263478952e-001, -1.5767891882524138e-001,
|
||||
5.5089691170294691e-002, -1.6895755656366061e-002,
|
||||
3.9416643438213977e-003, -6.0603623791604668e-004,
|
||||
4.5632602433393365e-005,};
|
||||
static const double HBKernel_8B[ 8 ] = { // -238.5616 dB, 8
|
||||
6.1861282914465976e-001, -1.6367179451225150e-001,
|
||||
6.1369861342939716e-002, -2.1184466539006987e-002,
|
||||
5.9623357510842061e-003, -1.2483098507454090e-003,
|
||||
1.7099297537964702e-004, -1.1448313239478885e-005,};
|
||||
static const int FltCountB = 7;
|
||||
static const int FlttBaseB = 2;
|
||||
static const double FltAttensB[ FltCountB ] = {
|
||||
56.6007, 83.0295, 123.4724, 152.4411, 181.2501, 209.9472, 238.5616, };
|
||||
static const double* const FltPtrsB[ FltCountB ] = {
|
||||
HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, HBKernel_7B, HBKernel_8B, };
|
||||
static const double HBKernel_2C[ 2 ] = { // -89.0473 dB, 16
|
||||
5.6430278013478008e-001, -6.4338068855763375e-002,};
|
||||
static const double HBKernel_3C[ 3 ] = { // -130.8951 dB, 16
|
||||
5.8706402915551448e-001, -9.9362380958670449e-002,
|
||||
1.2298637065869358e-002,};
|
||||
static const double HBKernel_4C[ 4 ] = { // -172.3192 dB, 16
|
||||
5.9896586134984675e-001, -1.2111680603434927e-001,
|
||||
2.4763118076458895e-002, -2.6121758132212989e-003,};
|
||||
static const double HBKernel_5C[ 5 ] = { // -213.4984 dB, 16
|
||||
6.0626808285230716e-001, -1.3588224032740795e-001,
|
||||
3.5544305238309003e-002, -6.5127022377289654e-003,
|
||||
5.8255449565950768e-004,};
|
||||
static const double HBKernel_6C[ 6 ] = { // -254.5186 dB, 16
|
||||
6.1120171263351242e-001, -1.4654486853757870e-001,
|
||||
4.4582959299131253e-002, -1.0840543858123995e-002,
|
||||
1.7343706485509962e-003, -1.3363018567985596e-004,};
|
||||
static const int FltCountC = 5;
|
||||
static const int FlttBaseC = 2;
|
||||
static const double FltAttensC[ FltCountC ] = {
|
||||
89.0473, 130.8951, 172.3192, 213.4984, 254.5186, };
|
||||
static const double* const FltPtrsC[ FltCountC ] = {
|
||||
HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, HBKernel_6C, };
|
||||
static const double HBKernel_1D[ 1 ] = { // -54.4754 dB, 32
|
||||
5.0188900022775451e-001,};
|
||||
static const double HBKernel_2D[ 2 ] = { // -113.2139 dB, 32
|
||||
5.6295152180538044e-001, -6.2953706070191726e-002,};
|
||||
static const double HBKernel_3D[ 3 ] = { // -167.1447 dB, 32
|
||||
5.8621968728755036e-001, -9.8080551656524531e-002,
|
||||
1.1860868761997080e-002,};
|
||||
static const double HBKernel_4D[ 4 ] = { // -220.6519 dB, 32
|
||||
5.9835028657163591e-001, -1.1999986086623511e-001,
|
||||
2.4132530854004228e-002, -2.4829565686819706e-003,};
|
||||
static const int FltCountD = 4;
|
||||
static const int FlttBaseD = 1;
|
||||
static const double FltAttensD[ FltCountD ] = {
|
||||
54.4754, 113.2139, 167.1447, 220.6519, };
|
||||
static const double* const FltPtrsD[ FltCountD ] = {
|
||||
HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
|
||||
static const double HBKernel_1E[ 1 ] = { // -66.5391 dB, 64
|
||||
5.0047102586416625e-001,};
|
||||
static const double HBKernel_2E[ 2 ] = { // -137.3173 dB, 64
|
||||
5.6261293163933568e-001, -6.2613067826620017e-002,};
|
||||
static const double HBKernel_3E[ 3 ] = { // -203.2997 dB, 64
|
||||
5.8600808139396787e-001, -9.7762185880067784e-002,
|
||||
1.1754104554493029e-002,};
|
||||
static const double HBKernel_4E[ 4 ] = { // -268.8550 dB, 64
|
||||
5.9819599352772002e-001, -1.1972157555011861e-001,
|
||||
2.3977305567947922e-002, -2.4517235455853992e-003,};
|
||||
static const int FltCountE = 4;
|
||||
static const int FlttBaseE = 1;
|
||||
static const double FltAttensE[ FltCountE ] = {
|
||||
66.5391, 137.3173, 203.2997, 268.8550, };
|
||||
static const double* const FltPtrsE[ FltCountE ] = {
|
||||
HBKernel_1E, HBKernel_2E, HBKernel_3E, HBKernel_4E, };
|
||||
static const double HBKernel_1F[ 1 ] = { // -82.4633 dB, 128
|
||||
5.0007530666642896e-001,};
|
||||
static const double HBKernel_2F[ 2 ] = { // -161.4049 dB, 128
|
||||
5.6252823610146030e-001, -6.2528244608044792e-002,};
|
||||
static const double HBKernel_3F[ 3 ] = { // -239.4313 dB, 128
|
||||
5.8595514744674237e-001, -9.7682725156791952e-002,
|
||||
1.1727577711117231e-002,};
|
||||
static const int FltCountF = 3;
|
||||
static const int FlttBaseF = 1;
|
||||
static const double FltAttensF[ FltCountF ] = {
|
||||
82.4633, 161.4049, 239.4313, };
|
||||
static const double* const FltPtrsF[ FltCountF ] = {
|
||||
HBKernel_1F, HBKernel_2F, HBKernel_3F, };
|
||||
static const double HBKernel_1G[ 1 ] = { // -94.5052 dB, 256
|
||||
5.0001882524896712e-001,};
|
||||
static const double HBKernel_2G[ 2 ] = { // -185.4886 dB, 256
|
||||
5.6250705922479682e-001, -6.2507059756378394e-002,};
|
||||
static const double HBKernel_3G[ 3 ] = { // -275.5501 dB, 256
|
||||
5.8594191201187384e-001, -9.7662868266991207e-002,
|
||||
1.1720956255134043e-002,};
|
||||
static const int FltCountG = 3;
|
||||
static const int FlttBaseG = 1;
|
||||
static const double FltAttensG[ FltCountG ] = {
|
||||
94.5052, 185.4886, 275.5501, };
|
||||
static const double* const FltPtrsG[ FltCountG ] = {
|
||||
HBKernel_1G, HBKernel_2G, HBKernel_3G, };
|
||||
|
||||
int k = 0;
|
||||
|
||||
if( SteepIndex <= 0 )
|
||||
{
|
||||
while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsA[ k ];
|
||||
fltt = FlttBaseA + k;
|
||||
att = FltAttensA[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 1 )
|
||||
{
|
||||
while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsB[ k ];
|
||||
fltt = FlttBaseB + k;
|
||||
att = FltAttensB[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 2 )
|
||||
{
|
||||
while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsC[ k ];
|
||||
fltt = FlttBaseC + k;
|
||||
att = FltAttensC[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 3 )
|
||||
{
|
||||
while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsD[ k ];
|
||||
fltt = FlttBaseD + k;
|
||||
att = FltAttensD[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 4 )
|
||||
{
|
||||
while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsE[ k ];
|
||||
fltt = FlttBaseE + k;
|
||||
att = FltAttensE[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 5 )
|
||||
{
|
||||
while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsF[ k ];
|
||||
fltt = FlttBaseF + k;
|
||||
att = FltAttensF[ k ];
|
||||
}
|
||||
else
|
||||
{
|
||||
while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsG[ k ];
|
||||
fltt = FlttBaseG + k;
|
||||
att = FltAttensG[ k ];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that provides filter data for various steepness indices and
|
||||
* attenuations. For 1/3 resamplings.
|
||||
*
|
||||
* @param ReqAtten Required half-band filter attentuation.
|
||||
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
|
||||
* upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
|
||||
* etc.
|
||||
*/
|
||||
|
||||
static void getHBFilterThird( const double ReqAtten, const int SteepIndex,
|
||||
const double*& flt, int& fltt, double& att )
|
||||
{
|
||||
static const double HBKernel_3A[ 3 ] = { // -66.3726 dB, 6
|
||||
5.9811355069551475e-001, -1.1793396656733847e-001,
|
||||
2.0300557211946322e-002,};
|
||||
static const double HBKernel_4A[ 4 ] = { // -90.2546 dB, 6
|
||||
6.0645499250612578e-001, -1.3555496505481171e-001,
|
||||
3.4022804962365975e-002, -4.9535418595798757e-003,};
|
||||
static const double HBKernel_5A[ 5 ] = { // -126.5507 dB, 6
|
||||
6.1014115058940210e-001, -1.4393081816629907e-001,
|
||||
4.1760642892852244e-002, -8.9692183234056175e-003,
|
||||
9.9871340618342070e-004,};
|
||||
static const double HBKernel_6A[ 6 ] = { // -150.1839 dB, 6
|
||||
6.1439563420546972e-001, -1.5360187826905250e-001,
|
||||
5.0840891345687034e-002, -1.4053648740561121e-002,
|
||||
2.6771286587305727e-003, -2.5815816044823123e-004,};
|
||||
static const double HBKernel_7A[ 7 ] = { // -173.7068 dB, 6
|
||||
6.1747493476329918e-001, -1.6087373733313212e-001,
|
||||
5.8263075641409430e-002, -1.8872408173431318e-002,
|
||||
4.7421376543513687e-003, -8.0196529612267474e-004,
|
||||
6.7964807393798996e-005,};
|
||||
static const double HBKernel_8A[ 8 ] = { // -197.1454 dB, 6
|
||||
6.1980610947775050e-001, -1.6654070578314714e-001,
|
||||
6.4416567441730327e-002, -2.3307744348719822e-002,
|
||||
6.9909157372312443e-003, -1.5871946293364403e-003,
|
||||
2.4017727382382763e-004, -1.8125308241541697e-005,};
|
||||
static const double HBKernel_9A[ 9 ] = { // -220.5199 dB, 6
|
||||
6.2163188951899306e-001, -1.7108115323810941e-001,
|
||||
6.9588370095600260e-002, -2.7339625080613838e-002,
|
||||
9.2954469183791771e-003, -2.5537179959555429e-003,
|
||||
5.2572290897951021e-004, -7.1813356135154921e-005,
|
||||
4.8802382808892154e-006,};
|
||||
static const int FltCountA = 7;
|
||||
static const int FlttBaseA = 3;
|
||||
static const double FltAttensA[ FltCountA ] = {
|
||||
66.3726, 90.2546, 126.5507, 150.1839, 173.7068, 197.1454, 220.5199, };
|
||||
static const double* const FltPtrsA[ FltCountA ] = {
|
||||
HBKernel_3A, HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, };
|
||||
static const double HBKernel_2B[ 2 ] = { // -71.0965 dB, 12
|
||||
5.6748544264806311e-001, -6.7764090509431732e-002,};
|
||||
static const double HBKernel_3B[ 3 ] = { // -115.7707 dB, 12
|
||||
5.8793612182667199e-001, -1.0070583248877293e-001,
|
||||
1.2771337947163834e-002,};
|
||||
static const double HBKernel_4B[ 4 ] = { // -152.1535 dB, 12
|
||||
5.9960155600862808e-001, -1.2228154335199336e-001,
|
||||
2.5433718917694709e-002, -2.7537562530837154e-003,};
|
||||
static const double HBKernel_5B[ 5 ] = { // -188.2914 dB, 12
|
||||
6.0676859170554343e-001, -1.3689667009876413e-001,
|
||||
3.6288512631926818e-002, -6.7838855305035351e-003,
|
||||
6.2345167677087547e-004,};
|
||||
static const double HBKernel_6B[ 6 ] = { // -224.2705 dB, 12
|
||||
6.1161456341904397e-001, -1.4743901958274458e-001,
|
||||
4.5344160157313275e-002, -1.1207371780924531e-002,
|
||||
1.8328497112594935e-003, -1.4518193006359589e-004,};
|
||||
static const int FltCountB = 5;
|
||||
static const int FlttBaseB = 2;
|
||||
static const double FltAttensB[ FltCountB ] = {
|
||||
71.0965, 115.7707, 152.1535, 188.2914, 224.2705, };
|
||||
static const double* const FltPtrsB[ FltCountB ] = {
|
||||
HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, };
|
||||
static const double HBKernel_1C[ 1 ] = { // -49.4544 dB, 24
|
||||
5.0336730531430562e-001,};
|
||||
static const double HBKernel_2C[ 2 ] = { // -103.1970 dB, 24
|
||||
5.6330232648142819e-001, -6.3309247177420452e-002,};
|
||||
static const double HBKernel_3C[ 3 ] = { // -152.1195 dB, 24
|
||||
5.8643891113580415e-001, -9.8411593011583087e-002,
|
||||
1.1972706651483846e-002,};
|
||||
static const double HBKernel_4C[ 4 ] = { // -200.6182 dB, 24
|
||||
5.9851012363917222e-001, -1.2028885239978220e-001,
|
||||
2.4294521083140615e-002, -2.5157924156609776e-003,};
|
||||
static const double HBKernel_5C[ 5 ] = { // -248.8730 dB, 24
|
||||
6.0590922882030196e-001, -1.3515953438018685e-001,
|
||||
3.5020857107815606e-002, -6.3256196990467053e-003,
|
||||
5.5506815147598793e-004,};
|
||||
static const int FltCountC = 5;
|
||||
static const int FlttBaseC = 1;
|
||||
static const double FltAttensC[ FltCountC ] = {
|
||||
49.4544, 103.1970, 152.1195, 200.6182, 248.8730, };
|
||||
static const double* const FltPtrsC[ FltCountC ] = {
|
||||
HBKernel_1C, HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, };
|
||||
static const double HBKernel_1D[ 1 ] = { // -61.5357 dB, 48
|
||||
5.0083794231068057e-001,};
|
||||
static const double HBKernel_2D[ 2 ] = { // -127.3167 dB, 48
|
||||
5.6270074379958690e-001, -6.2701174487726344e-002,};
|
||||
static const double HBKernel_3D[ 3 ] = { // -188.2990 dB, 48
|
||||
5.8606296210323228e-001, -9.7844644765123029e-002,
|
||||
1.1781683046528768e-002,};
|
||||
static const double HBKernel_4D[ 4 ] = { // -248.8580 dB, 48
|
||||
5.9823601243162516e-001, -1.1979368994739022e-001,
|
||||
2.4017458606412575e-002, -2.4597810910081913e-003,};
|
||||
static const int FltCountD = 4;
|
||||
static const int FlttBaseD = 1;
|
||||
static const double FltAttensD[ FltCountD ] = {
|
||||
61.5357, 127.3167, 188.2990, 248.8580, };
|
||||
static const double* const FltPtrsD[ FltCountD ] = {
|
||||
HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
|
||||
static const double HBKernel_1E[ 1 ] = { // -77.4651 dB, 96
|
||||
5.0013388897382527e-001,};
|
||||
static const double HBKernel_2E[ 2 ] = { // -151.4084 dB, 96
|
||||
5.6255019604317880e-001, -6.2550222932381064e-002,};
|
||||
static const double HBKernel_3E[ 3 ] = { // -224.4365 dB, 96
|
||||
5.8596887234201078e-001, -9.7703321113080305e-002,
|
||||
1.1734448777069783e-002,};
|
||||
static const int FltCountE = 3;
|
||||
static const int FlttBaseE = 1;
|
||||
static const double FltAttensE[ FltCountE ] = {
|
||||
77.4651, 151.4084, 224.4365, };
|
||||
static const double* const FltPtrsE[ FltCountE ] = {
|
||||
HBKernel_1E, HBKernel_2E, HBKernel_3E, };
|
||||
static const double HBKernel_1F[ 1 ] = { // -89.5075 dB, 192
|
||||
5.0003346776264190e-001,};
|
||||
static const double HBKernel_2F[ 2 ] = { // -175.4932 dB, 192
|
||||
5.6251254964097952e-001, -6.2512551321105267e-002,};
|
||||
static const double HBKernel_3F[ 3 ] = { // -260.5645 dB, 192
|
||||
5.8594534336747051e-001, -9.7668015838639821e-002,
|
||||
1.1722672471262996e-002,};
|
||||
static const int FltCountF = 3;
|
||||
static const int FlttBaseF = 1;
|
||||
static const double FltAttensF[ FltCountF ] = {
|
||||
89.5075, 175.4932, 260.5645, };
|
||||
static const double* const FltPtrsF[ FltCountF ] = {
|
||||
HBKernel_1F, HBKernel_2F, HBKernel_3F, };
|
||||
static const double HBKernel_1G[ 1 ] = { // -101.5490 dB, 384
|
||||
5.0000836666064941e-001,};
|
||||
static const double HBKernel_2G[ 2 ] = { // -199.5761 dB, 384
|
||||
5.6250313744943459e-001, -6.2503137554435345e-002,};
|
||||
static const double HBKernel_3G[ 3 ] = { // -296.5185 dB, 384
|
||||
5.8593945786963764e-001, -9.7659186853499613e-002,
|
||||
1.1719728983863425e-002,};
|
||||
static const int FltCountG = 3;
|
||||
static const int FlttBaseG = 1;
|
||||
static const double FltAttensG[ FltCountG ] = {
|
||||
101.5490, 199.5761, 296.5185, };
|
||||
static const double* const FltPtrsG[ FltCountG ] = {
|
||||
HBKernel_1G, HBKernel_2G, HBKernel_3G, };
|
||||
|
||||
int k = 0;
|
||||
|
||||
if( SteepIndex <= 0 )
|
||||
{
|
||||
while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsA[ k ];
|
||||
fltt = FlttBaseA + k;
|
||||
att = FltAttensA[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 1 )
|
||||
{
|
||||
while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsB[ k ];
|
||||
fltt = FlttBaseB + k;
|
||||
att = FltAttensB[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 2 )
|
||||
{
|
||||
while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsC[ k ];
|
||||
fltt = FlttBaseC + k;
|
||||
att = FltAttensC[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 3 )
|
||||
{
|
||||
while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsD[ k ];
|
||||
fltt = FlttBaseD + k;
|
||||
att = FltAttensD[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 4 )
|
||||
{
|
||||
while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsE[ k ];
|
||||
fltt = FlttBaseE + k;
|
||||
att = FltAttensE[ k ];
|
||||
}
|
||||
else
|
||||
if( SteepIndex == 5 )
|
||||
{
|
||||
while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsF[ k ];
|
||||
fltt = FlttBaseF + k;
|
||||
att = FltAttensF[ k ];
|
||||
}
|
||||
else
|
||||
{
|
||||
while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
|
||||
{
|
||||
k++;
|
||||
}
|
||||
|
||||
flt = FltPtrsG[ k ];
|
||||
fltt = FlttBaseG + k;
|
||||
att = FltAttensG[ k ];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor initalizes the half-band upsampler.
|
||||
*
|
||||
* @param ReqAtten Required half-band filter attentuation.
|
||||
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
|
||||
* upsampling ratio, e.g. at 4x upsampling 0 is used, at 8x upsampling 1
|
||||
* is used, etc.
|
||||
* @param IsThird "True" if 1/3 of frequency response resampling is
|
||||
* performed.
|
||||
* @param PrevLatency Latency, in samples (any value >=0), which was left
|
||||
* in the output signal by a previous process. Whole-number latency will
|
||||
* be consumed by *this object while remaining fractional latency can be
|
||||
* obtained via the getLatencyFrac() function.
|
||||
* @param aDoConsumeLatency "True" if the output latency should be
|
||||
* consumed. Does not apply to the fractional part of the latency (if such
|
||||
* part is available).
|
||||
*/
|
||||
|
||||
CDSPHBUpsampler( const double ReqAtten, const int SteepIndex,
|
||||
const bool IsThird, const double PrevLatency,
|
||||
const bool aDoConsumeLatency = true )
|
||||
: DoConsumeLatency( aDoConsumeLatency )
|
||||
{
|
||||
static const CConvolveFn FltConvFn[ 14 ] = {
|
||||
&CDSPHBUpsampler :: convolve1, &CDSPHBUpsampler :: convolve2,
|
||||
&CDSPHBUpsampler :: convolve3, &CDSPHBUpsampler :: convolve4,
|
||||
&CDSPHBUpsampler :: convolve5, &CDSPHBUpsampler :: convolve6,
|
||||
&CDSPHBUpsampler :: convolve7, &CDSPHBUpsampler :: convolve8,
|
||||
&CDSPHBUpsampler :: convolve9, &CDSPHBUpsampler :: convolve10,
|
||||
&CDSPHBUpsampler :: convolve11, &CDSPHBUpsampler :: convolve12,
|
||||
&CDSPHBUpsampler :: convolve13, &CDSPHBUpsampler :: convolve14 };
|
||||
|
||||
const double* fltp0;
|
||||
int fltt;
|
||||
double att;
|
||||
|
||||
if( IsThird )
|
||||
{
|
||||
getHBFilterThird( ReqAtten, SteepIndex, fltp0, fltt, att );
|
||||
}
|
||||
else
|
||||
{
|
||||
getHBFilter( ReqAtten, SteepIndex, fltp0, fltt, att );
|
||||
}
|
||||
|
||||
// Copy obtained filter to address-aligned buffer.
|
||||
|
||||
fltp = alignptr( FltBuf, 16 );
|
||||
memcpy( fltp, fltp0, fltt * sizeof( fltp[ 0 ]));
|
||||
|
||||
convfn = FltConvFn[ fltt - 1 ];
|
||||
fll = fltt - 1;
|
||||
fl2 = fltt;
|
||||
flo = fll + fl2;
|
||||
BufRP = Buf + fll;
|
||||
|
||||
LatencyFrac = PrevLatency * 2.0;
|
||||
Latency = (int) LatencyFrac;
|
||||
LatencyFrac -= Latency;
|
||||
|
||||
R8BASSERT( Latency >= 0 );
|
||||
|
||||
if( DoConsumeLatency )
|
||||
{
|
||||
flb = BufLen - fll;
|
||||
}
|
||||
else
|
||||
{
|
||||
Latency += fl2 + fl2;
|
||||
flb = BufLen - flo;
|
||||
}
|
||||
|
||||
R8BCONSOLE( "CDSPHBUpsampler: sti=%i third=%i taps=%i att=%.1f "
|
||||
"io=2/1\n", SteepIndex, (int) IsThird, fltt, att );
|
||||
|
||||
clear();
|
||||
}
|
||||
|
||||
virtual int getLatency() const
|
||||
{
|
||||
return( DoConsumeLatency ? 0 : Latency );
|
||||
}
|
||||
|
||||
virtual double getLatencyFrac() const
|
||||
{
|
||||
return( LatencyFrac );
|
||||
}
|
||||
|
||||
virtual int getMaxOutLen( const int MaxInLen ) const
|
||||
{
|
||||
R8BASSERT( MaxInLen >= 0 );
|
||||
|
||||
return( MaxInLen << 1 );
|
||||
}
|
||||
|
||||
virtual void clear()
|
||||
{
|
||||
if( DoConsumeLatency )
|
||||
{
|
||||
LatencyLeft = Latency;
|
||||
BufLeft = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
LatencyLeft = 0;
|
||||
BufLeft = fl2;
|
||||
}
|
||||
|
||||
WritePos = 0;
|
||||
ReadPos = flb; // Set "read" position to account for filter's latency.
|
||||
|
||||
memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
|
||||
}
|
||||
|
||||
virtual int process( double* ip, int l, double*& op0 )
|
||||
{
|
||||
R8BASSERT( l >= 0 );
|
||||
|
||||
double* op = op0;
|
||||
|
||||
while( l > 0 )
|
||||
{
|
||||
// Add new input samples to both halves of the ring buffer.
|
||||
|
||||
const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
|
||||
|
||||
double* const wp1 = Buf + WritePos;
|
||||
memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
|
||||
|
||||
if( WritePos < flo )
|
||||
{
|
||||
const int c = min( b, flo - WritePos );
|
||||
memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
|
||||
}
|
||||
|
||||
ip += b;
|
||||
WritePos = ( WritePos + b ) & BufLenMask;
|
||||
l -= b;
|
||||
BufLeft += b;
|
||||
|
||||
// Produce output.
|
||||
|
||||
if( BufLeft > fl2 )
|
||||
{
|
||||
const int c = BufLeft - fl2;
|
||||
|
||||
double* const opend = op + ( c + c );
|
||||
( *convfn )( op, opend, fltp, BufRP, ReadPos );
|
||||
|
||||
op = opend;
|
||||
ReadPos = ( ReadPos + c ) & BufLenMask;
|
||||
BufLeft -= c;
|
||||
}
|
||||
}
|
||||
|
||||
int ol = (int) ( op - op0 );
|
||||
|
||||
if( LatencyLeft != 0 )
|
||||
{
|
||||
if( LatencyLeft >= ol )
|
||||
{
|
||||
LatencyLeft -= ol;
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
ol -= LatencyLeft;
|
||||
op0 += LatencyLeft;
|
||||
LatencyLeft = 0;
|
||||
}
|
||||
|
||||
return( ol );
|
||||
}
|
||||
|
||||
private:
|
||||
static const int BufLenBits = 9; ///< The length of the ring buffer,
|
||||
///< expressed as Nth power of 2. This value can be reduced if it is
|
||||
///< known that only short input buffers will be passed to the
|
||||
///< interpolator. The minimum value of this parameter is 5, and
|
||||
///< 1 << BufLenBits should be at least 3 times larger than the
|
||||
///< FilterLen.
|
||||
///<
|
||||
static const int BufLen = 1 << BufLenBits; ///< The length of the ring
|
||||
///< buffer. The actual length is twice as long to allow "beyond max
|
||||
///< position" positioning.
|
||||
///<
|
||||
static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
|
||||
///< position wrapping.
|
||||
///<
|
||||
double Buf[ BufLen + 27 ]; ///< The ring buffer, including overrun
|
||||
///< protection for the largest filter.
|
||||
///<
|
||||
double FltBuf[ 14 + 2 ]; ///< Holder for half-band filter taps, used with
|
||||
///< 16-byte address-aligning, for SIMD use.
|
||||
///<
|
||||
double* fltp; ///< Half-band filter taps, points to FltBuf.
|
||||
///<
|
||||
int fll; ///< Input latency.
|
||||
///<
|
||||
int fl2; ///< Right-side filter length.
|
||||
///<
|
||||
int flo; ///< Overrrun length.
|
||||
///<
|
||||
int flb; ///< Initial read position and maximal buffer write length.
|
||||
///<
|
||||
const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
|
||||
///<
|
||||
bool DoConsumeLatency; ///< "True" if the output latency should be
|
||||
///< consumed. Does not apply to the fractional part of the latency
|
||||
///< (if such part is available).
|
||||
///<
|
||||
int Latency; ///< Initial latency that should be removed from the output.
|
||||
///<
|
||||
double LatencyFrac; ///< Fractional latency left on the output.
|
||||
///<
|
||||
int BufLeft; ///< The number of samples left in the buffer to process.
|
||||
///< When this value is below FilterLenD2Plus1, the interpolation
|
||||
///< cycle ends.
|
||||
///<
|
||||
int WritePos; ///< The current buffer write position. Incremented together
|
||||
///< with the BufLeft variable.
|
||||
///<
|
||||
int ReadPos; ///< The current buffer read position.
|
||||
///<
|
||||
int LatencyLeft; ///< Latency left to remove.
|
||||
///<
|
||||
typedef void( *CConvolveFn )( double* op, double* const opend,
|
||||
const double* const flt, const double* const rp0, int rpos ); ///<
|
||||
///< Convolution function type.
|
||||
///<
|
||||
CConvolveFn convfn; ///< Convolution function in use.
|
||||
///<
|
||||
|
||||
#define R8BHBC1( fn ) \
|
||||
static void fn( double* op, double* const opend, const double* const flt, \
|
||||
const double* const rp0, int rpos ) \
|
||||
{ \
|
||||
while( op != opend ) \
|
||||
{ \
|
||||
const double* const rp = rp0 + rpos; \
|
||||
op[ 0 ] = rp[ 0 ];
|
||||
|
||||
#define R8BHBC2 \
|
||||
rpos = ( rpos + 1 ) & BufLenMask; \
|
||||
op += 2; \
|
||||
} \
|
||||
}
|
||||
|
||||
#include "CDSPHBUpsampler.inc"
|
||||
|
||||
#undef R8BHBC1
|
||||
#undef R8BHBC2
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8B_CDSPHBUPSAMPLER_INCLUDED
|
711
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
vendored
Normal file
711
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
vendored
Normal file
@ -0,0 +1,711 @@
|
||||
// Auto-generated by `genhbc`, do not edit!
|
||||
|
||||
#if defined( R8B_SSE2 )
|
||||
|
||||
R8BHBC1( convolve1 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve2 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve3 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
op[ 1 ] += flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve4 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve5 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
op[ 1 ] += flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve6 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve7 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
op[ 1 ] += flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve8 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve9 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
op[ 1 ] += flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve10 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve11 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
op[ 1 ] += flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve12 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve13 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
op[ 1 ] += flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve14 )
|
||||
__m128d v1, v2, m1, s1;
|
||||
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = m1;
|
||||
__m128d v3, v4, m3, s3;
|
||||
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = m3;
|
||||
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
|
||||
m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
|
||||
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
|
||||
s3 = _mm_add_pd( s3, m3 );
|
||||
v2 = _mm_loadu_pd( rp - 13 ); v1 = _mm_loadu_pd( rp + 13 );
|
||||
m1 = _mm_mul_pd( _mm_load_pd( flt + 12 ),
|
||||
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
|
||||
s1 = _mm_add_pd( s1, m1 );
|
||||
s1 = _mm_add_pd( s1, s3 );
|
||||
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
|
||||
R8BHBC2
|
||||
|
||||
#elif defined( R8B_NEON )
|
||||
|
||||
R8BHBC1( convolve1 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve2 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
op[ 1 ] = vaddvq_f64( s1 );
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve3 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve4 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 );
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve5 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve6 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 );
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve7 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve8 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 );
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve9 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve10 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 );
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve11 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve12 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 );
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve13 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve14 )
|
||||
float64x2_t v1, v2, s1;
|
||||
s1 = vdupq_n_f64( 0.0 );
|
||||
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
float64x2_t v3, v4, s3;
|
||||
s3 = vdupq_n_f64( 0.0 );
|
||||
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
|
||||
s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
|
||||
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
|
||||
v2 = vld1q_f64( rp - 13 ); v1 = vld1q_f64( rp + 13 );
|
||||
s1 = vmlaq_f64( s1, vld1q_f64( flt + 12 ),
|
||||
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
|
||||
s1 = vaddq_f64( s1, s3 );
|
||||
op[ 1 ] = vaddvq_f64( s1 );
|
||||
R8BHBC2
|
||||
|
||||
#else // SIMD
|
||||
|
||||
R8BHBC1( convolve1 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve2 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve3 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve4 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve5 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve6 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve7 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve8 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
|
||||
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve9 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
|
||||
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
|
||||
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve10 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
|
||||
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
|
||||
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
|
||||
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve11 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
|
||||
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
|
||||
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
|
||||
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
|
||||
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve12 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
|
||||
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
|
||||
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
|
||||
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
|
||||
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
|
||||
+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve13 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
|
||||
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
|
||||
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
|
||||
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
|
||||
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
|
||||
+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
|
||||
+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
|
||||
R8BHBC2
|
||||
|
||||
R8BHBC1( convolve14 )
|
||||
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
|
||||
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
|
||||
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
|
||||
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
|
||||
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
|
||||
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
|
||||
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
|
||||
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
|
||||
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
|
||||
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
|
||||
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
|
||||
+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
|
||||
+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ])
|
||||
+ flt[ 13 ] * ( rp[ 14 ] + rp[ -13 ]);
|
||||
R8BHBC2
|
||||
|
||||
#endif // SIMD
|
103
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
vendored
Normal file
103
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
vendored
Normal file
@ -0,0 +1,103 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPProcessor.h
|
||||
*
|
||||
* @brief The base virtual class for DSP processing algorithms.
|
||||
*
|
||||
* This file includes the base virtual class for DSP processing algorithm
|
||||
* classes like FIR filtering and interpolation.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPPROCESSOR_INCLUDED
|
||||
#define R8B_CDSPPROCESSOR_INCLUDED
|
||||
|
||||
#include "r8bbase.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @brief The base virtual class for DSP processing algorithms.
|
||||
*
|
||||
* This class can be used as a base class for various DSP processing
|
||||
* algorithms (processors). DSP processors that are derived from this class
|
||||
* can be seamlessly integrated into various DSP processing graphs.
|
||||
*/
|
||||
|
||||
class CDSPProcessor : public R8B_BASECLASS
|
||||
{
|
||||
R8BNOCTOR( CDSPProcessor );
|
||||
|
||||
public:
|
||||
CDSPProcessor()
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~CDSPProcessor()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The latency, in samples, which is present in the output signal.
|
||||
* This value is usually zero if the DSP processor "consumes" the latency
|
||||
* automatically.
|
||||
*/
|
||||
|
||||
virtual int getLatency() const = 0;
|
||||
|
||||
/**
|
||||
* @return Fractional latency, in samples, which is present in the output
|
||||
* signal. This value is usually zero if a linear-phase filtering is used.
|
||||
* With minimum-phase filters in use, this value can be non-zero even if
|
||||
* the getLatency() function returns zero.
|
||||
*/
|
||||
|
||||
virtual double getLatencyFrac() const = 0;
|
||||
|
||||
/**
|
||||
* @param MaxInLen The number of samples planned to process at once, at
|
||||
* most.
|
||||
* @return The maximal length of the output buffer required when
|
||||
* processing the "MaxInLen" number of input samples.
|
||||
*/
|
||||
|
||||
virtual int getMaxOutLen( const int MaxInLen ) const = 0;
|
||||
|
||||
/**
|
||||
* Function clears (resets) the state of *this object and returns it to
|
||||
* the state after construction. All input data accumulated in the
|
||||
* internal buffer so far will be discarded.
|
||||
*/
|
||||
|
||||
virtual void clear() = 0;
|
||||
|
||||
/**
|
||||
* Function performs DSP processing.
|
||||
*
|
||||
* @param ip Input data pointer.
|
||||
* @param l0 How many samples to process.
|
||||
* @param[out] op0 Output data pointer. The capacity of this buffer should
|
||||
* be equal to the value returned by the getMaxOutLen() function for the
|
||||
* given "l0". This buffer can be equal to "ip" only if the
|
||||
* getMaxOutLen( l0 ) function returned a value lesser than "l0". This
|
||||
* pointer can be incremented on function's return if latency compensation
|
||||
* was performed by the processor. Note that on function's return, this
|
||||
* pointer may point to some internal buffers, including the "ip" buffer,
|
||||
* ignoring the originally passed value.
|
||||
* @return The number of output samples written to the "op0" buffer and
|
||||
* available after processing. This value can be smaller or larger in
|
||||
* comparison to the original "l0" value due to processing and filter's
|
||||
* latency compensation that took place, and due to resampling if it was
|
||||
* performed.
|
||||
*/
|
||||
|
||||
virtual int process( double* ip, int l0, double*& op0 ) = 0;
|
||||
};
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8B_CDSPPROCESSOR_INCLUDED
|
820
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
vendored
Normal file
820
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
vendored
Normal file
@ -0,0 +1,820 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPRealFFT.h
|
||||
*
|
||||
* @brief Real-valued FFT transform class.
|
||||
*
|
||||
* This file includes FFT object implementation. All created FFT objects are
|
||||
* kept in a global list after use, for a future reusal. Such approach
|
||||
* minimizes time necessary to initialize the FFT object of the required
|
||||
* length.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPREALFFT_INCLUDED
|
||||
#define R8B_CDSPREALFFT_INCLUDED
|
||||
|
||||
#include "r8bbase.h"
|
||||
|
||||
#if !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
|
||||
#include "fft4g.h"
|
||||
#endif // !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
|
||||
|
||||
#if R8B_PFFFT
|
||||
#include "pffft.h"
|
||||
#endif // R8B_PFFFT
|
||||
|
||||
#if R8B_PFFFT_DOUBLE
|
||||
#include "pffft_double/pffft_double.h"
|
||||
#endif // R8B_PFFFT_DOUBLE
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @brief Real-valued FFT transform class.
|
||||
*
|
||||
* Class implements a wrapper for real-valued discrete fast Fourier transform
|
||||
* functions. The object of this class can only be obtained via the
|
||||
* CDSPRealFFTKeeper class.
|
||||
*
|
||||
* Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
|
||||
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
|
||||
*
|
||||
* Also uses Intel IPP library functions if available (if the R8B_IPP=1 macro
|
||||
* was defined). Note that IPP library's FFT functions are 2-3 times more
|
||||
* efficient on the modern Intel Core i7-3770K processor than Ooura's
|
||||
* functions. It may be worthwhile investing in IPP. Note, that FFT functions
|
||||
* take less than 20% of the overall sample rate conversion time. However,
|
||||
* when the "power of 2" resampling is used the performance of FFT functions
|
||||
* becomes "everything".
|
||||
*/
|
||||
|
||||
class CDSPRealFFT : public R8B_BASECLASS
|
||||
{
|
||||
R8BNOCTOR( CDSPRealFFT );
|
||||
|
||||
friend class CDSPRealFFTKeeper;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @return A multiplication constant that should be used after inverse
|
||||
* transform to obtain a correct value scale.
|
||||
*/
|
||||
|
||||
double getInvMulConst() const
|
||||
{
|
||||
return( InvMulConst );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The length (the number of real values in a transform) of *this
|
||||
* FFT object, expressed as Nth power of 2.
|
||||
*/
|
||||
|
||||
int getLenBits() const
|
||||
{
|
||||
return( LenBits );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The length (the number of real values in a transform) of *this
|
||||
* FFT object.
|
||||
*/
|
||||
|
||||
int getLen() const
|
||||
{
|
||||
return( Len );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function performs in-place forward FFT.
|
||||
*
|
||||
* @param[in,out] p Pointer to data block to transform, length should be
|
||||
* equal to *this object's getLen().
|
||||
*/
|
||||
|
||||
void forward( double* const p ) const
|
||||
{
|
||||
#if R8B_FLOATFFT
|
||||
|
||||
float* const op = (float*) p;
|
||||
int i;
|
||||
|
||||
for( i = 0; i < Len; i++ )
|
||||
{
|
||||
op[ i ] = (float) p[ i ];
|
||||
}
|
||||
|
||||
#endif // R8B_FLOATFFT
|
||||
|
||||
#if R8B_IPP
|
||||
|
||||
ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer );
|
||||
|
||||
#elif R8B_PFFFT
|
||||
|
||||
pffft_transform_ordered( setup, op, op, work, PFFFT_FORWARD );
|
||||
|
||||
#elif R8B_PFFFT_DOUBLE
|
||||
|
||||
pffftd_transform_ordered( setup, p, p, work, PFFFT_FORWARD );
|
||||
|
||||
#else // R8B_PFFFT_DOUBLE
|
||||
|
||||
ooura_fft :: rdft( Len, 1, p, wi.getPtr(), wd.getPtr() );
|
||||
|
||||
#endif // R8B_IPP
|
||||
}
|
||||
|
||||
/**
|
||||
* Function performs in-place inverse FFT.
|
||||
*
|
||||
* @param[in,out] p Pointer to data block to transform, length should be
|
||||
* equal to *this object's getLen().
|
||||
*/
|
||||
|
||||
void inverse( double* const p ) const
|
||||
{
|
||||
#if R8B_IPP
|
||||
|
||||
ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer );
|
||||
|
||||
#elif R8B_PFFFT
|
||||
|
||||
pffft_transform_ordered( setup, (float*) p, (float*) p, work,
|
||||
PFFFT_BACKWARD );
|
||||
|
||||
#elif R8B_PFFFT_DOUBLE
|
||||
|
||||
pffftd_transform_ordered( setup, p, p, work, PFFFT_BACKWARD );
|
||||
|
||||
#else // R8B_PFFFT_DOUBLE
|
||||
|
||||
ooura_fft :: rdft( Len, -1, p, wi.getPtr(), wd.getPtr() );
|
||||
|
||||
#endif // R8B_IPP
|
||||
|
||||
#if R8B_FLOATFFT
|
||||
|
||||
const float* const ip = (const float*) p;
|
||||
int i;
|
||||
|
||||
for( i = Len - 1; i >= 0; i-- )
|
||||
{
|
||||
p[ i ] = ip[ i ];
|
||||
}
|
||||
|
||||
#endif // R8B_FLOATFFT
|
||||
}
|
||||
|
||||
/**
|
||||
* Function multiplies two complex-valued data blocks and places result in
|
||||
* a new data block. Length of all data blocks should be equal to *this
|
||||
* object's block length. Input blocks should have been produced with the
|
||||
* forward() function of *this object.
|
||||
*
|
||||
* @param aip1 Input data block 1.
|
||||
* @param aip2 Input data block 2.
|
||||
* @param[out] aop Output data block, should not be equal to aip1 nor
|
||||
* aip2.
|
||||
*/
|
||||
|
||||
void multiplyBlocks( const double* const aip1, const double* const aip2,
|
||||
double* const aop ) const
|
||||
{
|
||||
#if R8B_FLOATFFT
|
||||
|
||||
const float* const ip1 = (const float*) aip1;
|
||||
const float* const ip2 = (const float*) aip2;
|
||||
float* const op = (float*) aop;
|
||||
|
||||
#else // R8B_FLOATFFT
|
||||
|
||||
const double* const ip1 = aip1;
|
||||
const double* const ip2 = aip2;
|
||||
double* const op = aop;
|
||||
|
||||
#endif // R8B_FLOATFFT
|
||||
|
||||
#if R8B_IPP
|
||||
|
||||
ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len );
|
||||
|
||||
#else // R8B_IPP
|
||||
|
||||
op[ 0 ] = ip1[ 0 ] * ip2[ 0 ];
|
||||
op[ 1 ] = ip1[ 1 ] * ip2[ 1 ];
|
||||
|
||||
int i = 2;
|
||||
|
||||
while( i < Len )
|
||||
{
|
||||
op[ i ] = ip1[ i ] * ip2[ i ] - ip1[ i + 1 ] * ip2[ i + 1 ];
|
||||
op[ i + 1 ] = ip1[ i ] * ip2[ i + 1 ] + ip1[ i + 1 ] * ip2[ i ];
|
||||
i += 2;
|
||||
}
|
||||
|
||||
#endif // R8B_IPP
|
||||
}
|
||||
|
||||
/**
|
||||
* Function multiplies two complex-valued data blocks in-place. Length of
|
||||
* both data blocks should be equal to *this object's block length. Blocks
|
||||
* should have been produced with the forward() function of *this object.
|
||||
*
|
||||
* @param aip Input data block 1.
|
||||
* @param[in,out] aop Output/input data block 2.
|
||||
*/
|
||||
|
||||
void multiplyBlocks( const double* const aip, double* const aop ) const
|
||||
{
|
||||
#if R8B_FLOATFFT
|
||||
|
||||
const float* const ip = (const float*) aip;
|
||||
float* const op = (float*) aop;
|
||||
float t;
|
||||
|
||||
#else // R8B_FLOATFFT
|
||||
|
||||
const double* const ip = aip;
|
||||
double* const op = aop;
|
||||
|
||||
#if !R8B_IPP
|
||||
double t;
|
||||
#endif // !R8B_IPP
|
||||
|
||||
#endif // R8B_FLOATFFT
|
||||
|
||||
#if R8B_IPP
|
||||
|
||||
ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len );
|
||||
|
||||
#else // R8B_IPP
|
||||
|
||||
op[ 0 ] *= ip[ 0 ];
|
||||
op[ 1 ] *= ip[ 1 ];
|
||||
|
||||
int i = 2;
|
||||
|
||||
while( i < Len )
|
||||
{
|
||||
t = op[ i ] * ip[ i ] - op[ i + 1 ] * ip[ i + 1 ];
|
||||
op[ i + 1 ] = op[ i ] * ip[ i + 1 ] + op[ i + 1 ] * ip[ i ];
|
||||
op[ i ] = t;
|
||||
i += 2;
|
||||
}
|
||||
|
||||
#endif // R8B_IPP
|
||||
}
|
||||
|
||||
/**
|
||||
* Function multiplies two complex-valued data blocks in-place,
|
||||
* considering that the "ip" block contains "zero-phase" response. Length
|
||||
* of both data blocks should be equal to *this object's block length.
|
||||
* Blocks should have been produced with the forward() function of *this
|
||||
* object.
|
||||
*
|
||||
* @param aip Input data block 1, "zero-phase" response. This block should
|
||||
* be first transformed via the convertToZP() function.
|
||||
* @param[in,out] aop Output/input data block 2.
|
||||
*/
|
||||
|
||||
void multiplyBlocksZP( const double* const aip, double* const aop ) const
|
||||
{
|
||||
#if R8B_FLOATFFT
|
||||
|
||||
const float* const ip = (const float*) aip;
|
||||
float* const op = (float*) aop;
|
||||
|
||||
#else // R8B_FLOATFFT
|
||||
|
||||
const double* ip = aip;
|
||||
double* op = aop;
|
||||
|
||||
#endif // R8B_FLOATFFT
|
||||
|
||||
// SIMD implementations assume that pointers are address-aligned.
|
||||
|
||||
#if !R8B_FLOATFFT && defined( R8B_SSE2 )
|
||||
|
||||
int c8 = Len >> 3;
|
||||
|
||||
while( c8 != 0 )
|
||||
{
|
||||
const __m128d iv1 = _mm_load_pd( ip );
|
||||
const __m128d iv2 = _mm_load_pd( ip + 2 );
|
||||
const __m128d ov1 = _mm_load_pd( op );
|
||||
const __m128d ov2 = _mm_load_pd( op + 2 );
|
||||
_mm_store_pd( op, _mm_mul_pd( iv1, ov1 ));
|
||||
_mm_store_pd( op + 2, _mm_mul_pd( iv2, ov2 ));
|
||||
|
||||
const __m128d iv3 = _mm_load_pd( ip + 4 );
|
||||
const __m128d ov3 = _mm_load_pd( op + 4 );
|
||||
const __m128d iv4 = _mm_load_pd( ip + 6 );
|
||||
const __m128d ov4 = _mm_load_pd( op + 6 );
|
||||
_mm_store_pd( op + 4, _mm_mul_pd( iv3, ov3 ));
|
||||
_mm_store_pd( op + 6, _mm_mul_pd( iv4, ov4 ));
|
||||
|
||||
ip += 8;
|
||||
op += 8;
|
||||
c8--;
|
||||
}
|
||||
|
||||
int c = Len & 7;
|
||||
|
||||
while( c != 0 )
|
||||
{
|
||||
*op *= *ip;
|
||||
ip++;
|
||||
op++;
|
||||
c--;
|
||||
}
|
||||
|
||||
#elif !R8B_FLOATFFT && defined( R8B_NEON )
|
||||
|
||||
int c8 = Len >> 3;
|
||||
|
||||
while( c8 != 0 )
|
||||
{
|
||||
const float64x2_t iv1 = vld1q_f64( ip );
|
||||
const float64x2_t iv2 = vld1q_f64( ip + 2 );
|
||||
const float64x2_t ov1 = vld1q_f64( op );
|
||||
const float64x2_t ov2 = vld1q_f64( op + 2 );
|
||||
vst1q_f64( op, vmulq_f64( iv1, ov1 ));
|
||||
vst1q_f64( op + 2, vmulq_f64( iv2, ov2 ));
|
||||
|
||||
const float64x2_t iv3 = vld1q_f64( ip + 4 );
|
||||
const float64x2_t iv4 = vld1q_f64( ip + 6 );
|
||||
const float64x2_t ov3 = vld1q_f64( op + 4 );
|
||||
const float64x2_t ov4 = vld1q_f64( op + 6 );
|
||||
vst1q_f64( op + 4, vmulq_f64( iv3, ov3 ));
|
||||
vst1q_f64( op + 6, vmulq_f64( iv4, ov4 ));
|
||||
|
||||
ip += 8;
|
||||
op += 8;
|
||||
c8--;
|
||||
}
|
||||
|
||||
int c = Len & 7;
|
||||
|
||||
while( c != 0 )
|
||||
{
|
||||
*op *= *ip;
|
||||
ip++;
|
||||
op++;
|
||||
c--;
|
||||
}
|
||||
|
||||
#else // SIMD
|
||||
|
||||
int i;
|
||||
|
||||
for( i = 0; i < Len; i++ )
|
||||
{
|
||||
op[ i ] *= ip[ i ];
|
||||
}
|
||||
|
||||
#endif // SIMD
|
||||
}
|
||||
|
||||
/**
|
||||
* Function converts the specified forward-transformed block into
|
||||
* "zero-phase" form suitable for use with the multiplyBlocksZ() function.
|
||||
*
|
||||
* @param[in,out] ap Block to transform.
|
||||
*/
|
||||
|
||||
void convertToZP( double* const ap ) const
|
||||
{
|
||||
#if R8B_FLOATFFT
|
||||
|
||||
float* const p = (float*) ap;
|
||||
|
||||
#else // R8B_FLOATFFT
|
||||
|
||||
double* const p = ap;
|
||||
|
||||
#endif // R8B_FLOATFFT
|
||||
|
||||
int i = 2;
|
||||
|
||||
while( i < Len )
|
||||
{
|
||||
p[ i + 1 ] = p[ i ];
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int LenBits; ///< Length of FFT block (expressed as Nth power of 2).
|
||||
///<
|
||||
int Len; ///< Length of FFT block (number of real values).
|
||||
///<
|
||||
double InvMulConst; ///< Inverse FFT multiply constant.
|
||||
///<
|
||||
CDSPRealFFT* Next; ///< Next object in a singly-linked list.
|
||||
///<
|
||||
|
||||
#if R8B_IPP
|
||||
IppsFFTSpec_R_64f* SPtr; ///< Pointer to initialized data buffer
|
||||
///< to be passed to IPP's FFT functions.
|
||||
///<
|
||||
CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer.
|
||||
///<
|
||||
CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer.
|
||||
///<
|
||||
#elif R8B_PFFFT
|
||||
PFFFT_Setup* setup; ///< PFFFT setup object.
|
||||
///<
|
||||
CFixedBuffer< float > work; ///< Working buffer.
|
||||
///<
|
||||
#elif R8B_PFFFT_DOUBLE
|
||||
PFFFTD_Setup* setup; ///< PFFFTD setup object.
|
||||
///<
|
||||
CFixedBuffer< double > work; ///< Working buffer.
|
||||
///<
|
||||
#else // R8B_PFFFT_DOUBLE
|
||||
CFixedBuffer< int > wi; ///< Working buffer (ints).
|
||||
///<
|
||||
CFixedBuffer< double > wd; ///< Working buffer (doubles).
|
||||
///<
|
||||
#endif // R8B_IPP
|
||||
|
||||
/**
|
||||
* A simple class that keeps the pointer to the object and deletes it
|
||||
* automatically.
|
||||
*/
|
||||
|
||||
class CObjKeeper
|
||||
{
|
||||
R8BNOCTOR( CObjKeeper );
|
||||
|
||||
public:
|
||||
CObjKeeper()
|
||||
: Object( NULL )
|
||||
{
|
||||
}
|
||||
|
||||
~CObjKeeper()
|
||||
{
|
||||
delete Object;
|
||||
}
|
||||
|
||||
CObjKeeper& operator = ( CDSPRealFFT* const aObject )
|
||||
{
|
||||
Object = aObject;
|
||||
return( *this );
|
||||
}
|
||||
|
||||
operator CDSPRealFFT* () const
|
||||
{
|
||||
return( Object );
|
||||
}
|
||||
|
||||
private:
|
||||
CDSPRealFFT* Object; ///< FFT object being kept.
|
||||
///<
|
||||
};
|
||||
|
||||
CDSPRealFFT()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor initializes FFT object.
|
||||
*
|
||||
* @param aLenBits The length of FFT block (Nth power of 2), specifies the
|
||||
* number of real values in a block. Values from 1 to 30 inclusive are
|
||||
* supported.
|
||||
*/
|
||||
|
||||
CDSPRealFFT( const int aLenBits )
|
||||
: LenBits( aLenBits )
|
||||
, Len( 1 << aLenBits )
|
||||
#if R8B_IPP
|
||||
, InvMulConst( 1.0 / Len )
|
||||
#elif R8B_PFFFT
|
||||
, InvMulConst( 1.0 / Len )
|
||||
#elif R8B_PFFFT_DOUBLE
|
||||
, InvMulConst( 1.0 / Len )
|
||||
#else // R8B_PFFFT_DOUBLE
|
||||
, InvMulConst( 2.0 / Len )
|
||||
#endif // R8B_IPP
|
||||
{
|
||||
#if R8B_IPP
|
||||
|
||||
int SpecSize;
|
||||
int SpecBufferSize;
|
||||
int BufferSize;
|
||||
|
||||
ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY,
|
||||
ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize );
|
||||
|
||||
CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize );
|
||||
SpecBuffer.alloc( SpecSize );
|
||||
WorkBuffer.alloc( BufferSize );
|
||||
|
||||
ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY,
|
||||
ippAlgHintFast, SpecBuffer, InitBuffer );
|
||||
|
||||
#elif R8B_PFFFT
|
||||
|
||||
setup = pffft_new_setup( Len, PFFFT_REAL );
|
||||
work.alloc( Len );
|
||||
|
||||
#elif R8B_PFFFT_DOUBLE
|
||||
|
||||
setup = pffftd_new_setup( Len, PFFFT_REAL );
|
||||
work.alloc( Len );
|
||||
|
||||
#else // R8B_PFFFT_DOUBLE
|
||||
|
||||
wi.alloc( (int) ceil( 2.0 + sqrt( (double) ( Len >> 1 ))));
|
||||
wi[ 0 ] = 0;
|
||||
wd.alloc( Len >> 1 );
|
||||
|
||||
#endif // R8B_IPP
|
||||
}
|
||||
|
||||
~CDSPRealFFT()
|
||||
{
|
||||
#if R8B_PFFFT
|
||||
pffft_destroy_setup( setup );
|
||||
#elif R8B_PFFFT_DOUBLE
|
||||
pffftd_destroy_setup( setup );
|
||||
#endif // R8B_PFFFT_DOUBLE
|
||||
|
||||
delete Next;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A "keeper" class for real-valued FFT transform objects.
|
||||
*
|
||||
* Class implements "keeper" functionality for handling CDSPRealFFT objects.
|
||||
* The allocated FFT objects are placed on the global static list of objects
|
||||
* for future reuse instead of deallocation.
|
||||
*/
|
||||
|
||||
class CDSPRealFFTKeeper : public R8B_BASECLASS
|
||||
{
|
||||
R8BNOCTOR( CDSPRealFFTKeeper );
|
||||
|
||||
public:
|
||||
CDSPRealFFTKeeper()
|
||||
: Object( NULL )
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Function acquires FFT object with the specified block length.
|
||||
*
|
||||
* @param LenBits The length of FFT block (Nth power of 2), in the range
|
||||
* [1; 30] inclusive, specifies the number of real values in a FFT block.
|
||||
*/
|
||||
|
||||
CDSPRealFFTKeeper( const int LenBits )
|
||||
{
|
||||
Object = acquire( LenBits );
|
||||
}
|
||||
|
||||
~CDSPRealFFTKeeper()
|
||||
{
|
||||
if( Object != NULL )
|
||||
{
|
||||
release( Object );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Pointer to the acquired FFT object.
|
||||
*/
|
||||
|
||||
const CDSPRealFFT* operator -> () const
|
||||
{
|
||||
R8BASSERT( Object != NULL );
|
||||
|
||||
return( Object );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function acquires FFT object with the specified block length. This
|
||||
* function can be called any number of times.
|
||||
*
|
||||
* @param LenBits The length of FFT block (Nth power of 2), in the range
|
||||
* [1; 30] inclusive, specifies the number of real values in a FFT block.
|
||||
*/
|
||||
|
||||
void init( const int LenBits )
|
||||
{
|
||||
if( Object != NULL )
|
||||
{
|
||||
if( Object -> LenBits == LenBits )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
release( Object );
|
||||
}
|
||||
|
||||
Object = acquire( LenBits );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function releases a previously acquired FFT object.
|
||||
*/
|
||||
|
||||
void reset()
|
||||
{
|
||||
if( Object != NULL )
|
||||
{
|
||||
release( Object );
|
||||
Object = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
CDSPRealFFT* Object; ///< FFT object.
|
||||
///<
|
||||
|
||||
static CSyncObject StateSync; ///< FFTObjects synchronizer.
|
||||
///<
|
||||
static CDSPRealFFT :: CObjKeeper FFTObjects[]; ///< Pool of FFT objects of
|
||||
///< various lengths.
|
||||
///<
|
||||
|
||||
/**
|
||||
* Function acquires FFT object from the global pool.
|
||||
*
|
||||
* @param LenBits FFT block length (expressed as Nth power of 2).
|
||||
*/
|
||||
|
||||
CDSPRealFFT* acquire( const int LenBits )
|
||||
{
|
||||
R8BASSERT( LenBits > 0 && LenBits <= 30 );
|
||||
|
||||
R8BSYNC( StateSync );
|
||||
|
||||
if( FFTObjects[ LenBits ] == NULL )
|
||||
{
|
||||
return( new CDSPRealFFT( LenBits ));
|
||||
}
|
||||
|
||||
CDSPRealFFT* ffto = FFTObjects[ LenBits ];
|
||||
FFTObjects[ LenBits ] = ffto -> Next;
|
||||
|
||||
return( ffto );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function releases a previously acquired FFT object.
|
||||
*
|
||||
* @param ffto FFT object to release.
|
||||
*/
|
||||
|
||||
void release( CDSPRealFFT* const ffto )
|
||||
{
|
||||
R8BSYNC( StateSync );
|
||||
|
||||
ffto -> Next = FFTObjects[ ffto -> LenBits ];
|
||||
FFTObjects[ ffto -> LenBits ] = ffto;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Function calculates the minimum-phase transform of the filter kernel, using
|
||||
* a discrete Hilbert transform in cepstrum domain.
|
||||
*
|
||||
* For more details, see part III.B of
|
||||
* http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf
|
||||
*
|
||||
* @param[in,out] Kernel Filter kernel buffer.
|
||||
* @param KernelLen Filter kernel's length, in samples.
|
||||
* @param LenMult Kernel length multiplier. Used as a coefficient of the
|
||||
* "oversampling" in the frequency domain. Such oversampling is needed to
|
||||
* improve the precision of the minimum-phase transform. If the filter's
|
||||
* attenuation is high, this multiplier should be increased or otherwise the
|
||||
* required attenuation will not be reached due to "smoothing" effect of this
|
||||
* transform.
|
||||
* @param DoFinalMul "True" if the final multiplication after transform should
|
||||
* be performed or not. Such multiplication returns the gain of the signal to
|
||||
* its original value. This parameter can be set to "false" if normalization
|
||||
* of the resulting filter kernel is planned to be used.
|
||||
* @param[out] DCGroupDelay If not NULL, this variable receives group delay
|
||||
* at DC offset, in samples (can be a non-integer value).
|
||||
*/
|
||||
|
||||
inline void calcMinPhaseTransform( double* const Kernel, const int KernelLen,
|
||||
const int LenMult = 2, const bool DoFinalMul = true,
|
||||
double* const DCGroupDelay = NULL )
|
||||
{
|
||||
R8BASSERT( KernelLen > 0 );
|
||||
R8BASSERT( LenMult >= 2 );
|
||||
|
||||
const int LenBits = getBitOccupancy(( KernelLen * LenMult ) - 1 );
|
||||
const int Len = 1 << LenBits;
|
||||
const int Len2 = Len >> 1;
|
||||
int i;
|
||||
|
||||
CFixedBuffer< double > ip( Len );
|
||||
CFixedBuffer< double > ip2( Len2 + 1 );
|
||||
|
||||
memcpy( &ip[ 0 ], Kernel, KernelLen * sizeof( ip[ 0 ]));
|
||||
memset( &ip[ KernelLen ], 0, ( Len - KernelLen ) * sizeof( ip[ 0 ]));
|
||||
|
||||
CDSPRealFFTKeeper ffto( LenBits );
|
||||
ffto -> forward( ip );
|
||||
|
||||
// Create the "log |c|" spectrum while saving the original power spectrum
|
||||
// in the "ip2" buffer.
|
||||
|
||||
#if R8B_FLOATFFT
|
||||
float* const aip = (float*) &ip[ 0 ];
|
||||
float* const aip2 = (float*) &ip2[ 0 ];
|
||||
const float nzbias = 1e-35;
|
||||
#else // R8B_FLOATFFT
|
||||
double* const aip = &ip[ 0 ];
|
||||
double* const aip2 = &ip2[ 0 ];
|
||||
const double nzbias = 1e-300;
|
||||
#endif // R8B_FLOATFFT
|
||||
|
||||
aip2[ 0 ] = aip[ 0 ];
|
||||
aip[ 0 ] = log( fabs( aip[ 0 ]) + nzbias );
|
||||
aip2[ Len2 ] = aip[ 1 ];
|
||||
aip[ 1 ] = log( fabs( aip[ 1 ]) + nzbias );
|
||||
|
||||
for( i = 1; i < Len2; i++ )
|
||||
{
|
||||
aip2[ i ] = sqrt( aip[ i * 2 ] * aip[ i * 2 ] +
|
||||
aip[ i * 2 + 1 ] * aip[ i * 2 + 1 ]);
|
||||
|
||||
aip[ i * 2 ] = log( aip2[ i ] + nzbias );
|
||||
aip[ i * 2 + 1 ] = 0.0;
|
||||
}
|
||||
|
||||
// Convert to cepstrum and apply discrete Hilbert transform.
|
||||
|
||||
ffto -> inverse( ip );
|
||||
|
||||
const double m1 = ffto -> getInvMulConst();
|
||||
const double m2 = -m1;
|
||||
|
||||
ip[ 0 ] = 0.0;
|
||||
|
||||
for( i = 1; i < Len2; i++ )
|
||||
{
|
||||
ip[ i ] *= m1;
|
||||
}
|
||||
|
||||
ip[ Len2 ] = 0.0;
|
||||
|
||||
for( i = Len2 + 1; i < Len; i++ )
|
||||
{
|
||||
ip[ i ] *= m2;
|
||||
}
|
||||
|
||||
// Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and
|
||||
// perform its exponentiation, multiplied by the power spectrum previously
|
||||
// saved in the "ip2" buffer.
|
||||
|
||||
ffto -> forward( ip );
|
||||
|
||||
aip[ 0 ] = aip2[ 0 ];
|
||||
aip[ 1 ] = aip2[ Len2 ];
|
||||
|
||||
for( i = 1; i < Len2; i++ )
|
||||
{
|
||||
aip[ i * 2 + 0 ] = cos( aip[ i * 2 + 1 ]) * aip2[ i ];
|
||||
aip[ i * 2 + 1 ] = sin( aip[ i * 2 + 1 ]) * aip2[ i ];
|
||||
}
|
||||
|
||||
ffto -> inverse( ip );
|
||||
|
||||
if( DoFinalMul )
|
||||
{
|
||||
for( i = 0; i < KernelLen; i++ )
|
||||
{
|
||||
Kernel[ i ] = ip[ i ] * m1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy( &Kernel[ 0 ], &ip[ 0 ], KernelLen * sizeof( Kernel[ 0 ]));
|
||||
}
|
||||
|
||||
if( DCGroupDelay != NULL )
|
||||
{
|
||||
double tmp;
|
||||
|
||||
calcFIRFilterResponseAndGroupDelay( Kernel, KernelLen, 0.0,
|
||||
tmp, tmp, *DCGroupDelay );
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // VOX_CDSPREALFFT_INCLUDED
|
769
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
vendored
Normal file
769
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
vendored
Normal file
@ -0,0 +1,769 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPResampler.h
|
||||
*
|
||||
* @brief The master sample rate converter (resampler) class.
|
||||
*
|
||||
* This file includes the master sample rate converter (resampler) class that
|
||||
* combines all elements of this library into a single front-end class.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPRESAMPLER_INCLUDED
|
||||
#define R8B_CDSPRESAMPLER_INCLUDED
|
||||
|
||||
#include "CDSPHBDownsampler.h"
|
||||
#include "CDSPHBUpsampler.h"
|
||||
#include "CDSPBlockConvolver.h"
|
||||
#include "CDSPFracInterpolator.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @brief The master sample rate converter (resampler) class.
|
||||
*
|
||||
* This class can be considered the "master" sample rate converter (resampler)
|
||||
* class since it combines all functionality of this library into a single
|
||||
* front-end class to perform sample rate conversion to/from any sample rate,
|
||||
* including non-integer sample rates.
|
||||
*
|
||||
* Note that objects of this class can be constructed on the stack as it has a
|
||||
* small member data size. The default template parameters of this class are
|
||||
* suited for 27-bit fixed point resampling.
|
||||
*
|
||||
* Use the CDSPResampler16 class for 16-bit resampling.
|
||||
*
|
||||
* Use the CDSPResampler16IR class for 16-bit impulse response resampling.
|
||||
*
|
||||
* Use the CDSPResampler24 class for 24-bit resampling (including 32-bit
|
||||
* floating point resampling).
|
||||
*/
|
||||
|
||||
class CDSPResampler : public CDSPProcessor
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initalizes the resampler object.
|
||||
*
|
||||
* Note that increasing the transition band and decreasing attenuation
|
||||
* reduces the filter length, this in turn reduces the "input before
|
||||
* output" delay. However, the filter length has only a minor influence on
|
||||
* the overall resampling speed.
|
||||
*
|
||||
* It should be noted that the ReqAtten specifies the minimal difference
|
||||
* between the loudest input signal component and the produced aliasing
|
||||
* artifacts during resampling. For example, if ReqAtten=100 was specified
|
||||
* when performing 2x upsampling, the analysis of the resulting signal may
|
||||
* display high-frequency components which are quieter than the loudest
|
||||
* part of the input signal by only 100 decibel meaning the high-frequency
|
||||
* part did not become "magically" completely silent after resampling. You
|
||||
* have to specify a higher ReqAtten value if you need a totally clean
|
||||
* high-frequency content. On the other hand, it may not be reasonable to
|
||||
* have a high-frequency content cleaner than the input signal itself: if
|
||||
* the input signal is 16-bit, setting ReqAtten to 180 will make its
|
||||
* high-frequency content 24-bit, but the original part of the signal will
|
||||
* remain 16-bit.
|
||||
*
|
||||
* @param SrcSampleRate Source signal sample rate. Both sample rates can
|
||||
* be specified as a ratio, e.g. SrcSampleRate = 1.0, DstSampleRate = 2.0.
|
||||
* @param DstSampleRate Destination signal sample rate. The "power of 2"
|
||||
* ratios between the source and destination sample rates force resampler
|
||||
* to use several fast "power of 2" resampling steps, without using
|
||||
* fractional interpolation at all.
|
||||
* @param aMaxInLen The maximal planned length of the input buffer (in
|
||||
* samples) that will be passed to the resampler. The resampler relies on
|
||||
* this value as it allocates intermediate buffers. Input buffers longer
|
||||
* than this value should never be supplied to the resampler. Note that
|
||||
* upsampling produces more samples than was provided on input, so at
|
||||
* higher upsampling ratios it is advisable to use smaller MaxInLen
|
||||
* values to reduce memory footprint. When downsampling, a larger MaxInLen
|
||||
* is suggested in order to increase downsampling performance.
|
||||
* @param ReqTransBand Required transition band, in percent of the
|
||||
* spectral space of the input signal (or the output signal if
|
||||
* downsampling is performed) between filter's -3 dB point and the Nyquist
|
||||
* frequency. The range is from CDSPFIRFilter::getLPMinTransBand() to
|
||||
* CDSPFIRFilter::getLPMaxTransBand(), inclusive. When upsampling 88200 or
|
||||
* 96000 audio to a higher sample rates the ReqTransBand can be
|
||||
* considerably increased, up to 30. The selection of ReqTransBand depends
|
||||
* on the level of desire to preserve the high-frequency content. While
|
||||
* values 0.5 to 2 are extremely "greedy" settings, not necessary in most
|
||||
* cases, values 2 to 3 can be used in most cases. Values 3 to 4 are
|
||||
* relaxed settings, but they still offer a flat frequency response up to
|
||||
* 21kHz with 44.1k source or destination sample rate.
|
||||
* @param ReqAtten Required stop-band attenuation in decibel, in the
|
||||
* range CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
|
||||
* inclusive. The actual attenuation may be 0.40-4.46 dB higher. The
|
||||
* general formula for selecting the ReqAtten is 6.02 * Bits + 40, where
|
||||
* "Bits" is the bit resolution (e.g. 16, 24), "40" is an added resolution
|
||||
* for dynamic signals; this value can be decreased to 20 to 10 if the
|
||||
* signal being resampled is non-dynamic (e.g., an impulse response or
|
||||
* filter, with a non-steep frequency response).
|
||||
* @param ReqPhase Required filter's phase response. Note that this
|
||||
* setting does not affect interpolator's phase response which is always
|
||||
* linear-phase. Also note that if the "power of 2" resampling was engaged
|
||||
* by the resampler together with the minimum-phase response, the audio
|
||||
* stream may become fractionally delayed, depending on the minimum-phase
|
||||
* filter's actual fractional delay. Linear-phase filters do not have
|
||||
* fractional delay.
|
||||
* @see CDSPFIRFilterCache::getLPFilter()
|
||||
*/
|
||||
|
||||
CDSPResampler( const double SrcSampleRate, const double DstSampleRate,
|
||||
const int aMaxInLen, const double ReqTransBand = 2.0,
|
||||
const double ReqAtten = 206.91,
|
||||
const EDSPFilterPhaseResponse ReqPhase = fprLinearPhase )
|
||||
: StepCapacity( 0 )
|
||||
, StepCount( 0 )
|
||||
, MaxInLen( aMaxInLen )
|
||||
, CurMaxOutLen( aMaxInLen )
|
||||
, LatencyFrac( 0.0 )
|
||||
{
|
||||
R8BASSERT( SrcSampleRate > 0.0 );
|
||||
R8BASSERT( DstSampleRate > 0.0 );
|
||||
R8BASSERT( MaxInLen > 0 );
|
||||
|
||||
R8BCONSOLE( "* CDSPResampler: src=%.1f dst=%.1f len=%i tb=%.1f "
|
||||
"att=%.2f ph=%i\n", SrcSampleRate, DstSampleRate, aMaxInLen,
|
||||
ReqTransBand, ReqAtten, (int) ReqPhase );
|
||||
|
||||
if( SrcSampleRate == DstSampleRate )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
TmpBufCapacities[ 0 ] = 0;
|
||||
TmpBufCapacities[ 1 ] = 0;
|
||||
CurTmpBuf = 0;
|
||||
|
||||
// Try some common efficient ratios requiring only a single step.
|
||||
|
||||
const int CommonRatioCount = 5;
|
||||
const int CommonRatios[ CommonRatioCount ][ 2 ] = {
|
||||
{ 1, 2 },
|
||||
{ 1, 3 },
|
||||
{ 2, 3 },
|
||||
{ 3, 2 },
|
||||
{ 3, 4 }
|
||||
};
|
||||
|
||||
int i;
|
||||
|
||||
for( i = 0; i < CommonRatioCount; i++ )
|
||||
{
|
||||
const int num = CommonRatios[ i ][ 0 ];
|
||||
const int den = CommonRatios[ i ][ 1 ];
|
||||
|
||||
if( SrcSampleRate * num == DstSampleRate * den )
|
||||
{
|
||||
addProcessor( new CDSPBlockConvolver(
|
||||
CDSPFIRFilterCache :: getLPFilter(
|
||||
1.0 / ( num > den ? num : den ), ReqTransBand,
|
||||
ReqAtten, ReqPhase, num ), num, den, LatencyFrac ));
|
||||
|
||||
createTmpBuffers();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Try whole-number power-of-2 or 3*power-of-2 upsampling.
|
||||
|
||||
for( i = 2; i <= 3; i++ )
|
||||
{
|
||||
bool WasFound = false;
|
||||
int c = 0;
|
||||
|
||||
while( true )
|
||||
{
|
||||
const double NewSR = SrcSampleRate * ( i << c );
|
||||
|
||||
if( NewSR == DstSampleRate )
|
||||
{
|
||||
WasFound = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if( NewSR > DstSampleRate )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
c++;
|
||||
}
|
||||
|
||||
if( WasFound )
|
||||
{
|
||||
addProcessor( new CDSPBlockConvolver(
|
||||
CDSPFIRFilterCache :: getLPFilter( 1.0 / i, ReqTransBand,
|
||||
ReqAtten, ReqPhase, i ), i, 1, LatencyFrac ));
|
||||
|
||||
const bool IsThird = ( i == 3 );
|
||||
|
||||
for( i = 0; i < c; i++ )
|
||||
{
|
||||
addProcessor( new CDSPHBUpsampler( ReqAtten, i, IsThird,
|
||||
LatencyFrac ));
|
||||
}
|
||||
|
||||
createTmpBuffers();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if( DstSampleRate * 2.0 > SrcSampleRate )
|
||||
{
|
||||
// Upsampling or fractional downsampling down to 2X.
|
||||
|
||||
const double NormFreq = ( DstSampleRate > SrcSampleRate ? 0.5 :
|
||||
0.5 * DstSampleRate / SrcSampleRate );
|
||||
|
||||
addProcessor( new CDSPBlockConvolver(
|
||||
CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
|
||||
ReqAtten, ReqPhase, 2.0 ), 2, 1, LatencyFrac ));
|
||||
|
||||
// Try intermediate interpolated resampling with subsequent 2X
|
||||
// or 3X upsampling.
|
||||
|
||||
const double tbw = 0.0175; // Intermediate filter's transition
|
||||
// band extension coefficient.
|
||||
const double ThreshSampleRate = SrcSampleRate /
|
||||
( 1.0 - tbw * ReqTransBand ); // Make sure intermediate
|
||||
// filter's transition band is not steeper than ReqTransBand
|
||||
// (this keeps the latency under control).
|
||||
|
||||
int c = 0;
|
||||
int div = 1;
|
||||
|
||||
while( true )
|
||||
{
|
||||
const int ndiv = div * 2;
|
||||
|
||||
if( DstSampleRate < ThreshSampleRate * ndiv )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
div = ndiv;
|
||||
c++;
|
||||
}
|
||||
|
||||
int c2 = 0;
|
||||
int div2 = 1;
|
||||
|
||||
while( true )
|
||||
{
|
||||
const int ndiv = div * ( c2 == 0 ? 3 : 2 );
|
||||
|
||||
if( DstSampleRate < ThreshSampleRate * ndiv )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
div2 = ndiv;
|
||||
c2++;
|
||||
}
|
||||
|
||||
const double SrcSampleRate2 = SrcSampleRate * 2.0;
|
||||
int tmp1;
|
||||
int tmp2;
|
||||
|
||||
if( c == 1 && getWholeStepping( SrcSampleRate2, DstSampleRate,
|
||||
tmp1, tmp2 ))
|
||||
{
|
||||
// Do not use intermediate interpolation if whole stepping is
|
||||
// available as it performs very fast.
|
||||
|
||||
c = 0;
|
||||
}
|
||||
|
||||
if( c > 0 )
|
||||
{
|
||||
// Add steps using intermediate interpolation.
|
||||
|
||||
int num;
|
||||
|
||||
if( c2 > 0 && div2 > div )
|
||||
{
|
||||
div = div2;
|
||||
c = c2;
|
||||
num = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
num = 2;
|
||||
}
|
||||
|
||||
addProcessor( new CDSPFracInterpolator( SrcSampleRate2 * div,
|
||||
DstSampleRate, ReqAtten, false, LatencyFrac ));
|
||||
|
||||
double tb = ( 1.0 - SrcSampleRate * div / DstSampleRate ) /
|
||||
tbw; // Divide TransBand by a constant that assures a
|
||||
// linear response in the pass-band.
|
||||
|
||||
if( tb > CDSPFIRFilter :: getLPMaxTransBand() )
|
||||
{
|
||||
tb = CDSPFIRFilter :: getLPMaxTransBand();
|
||||
}
|
||||
|
||||
addProcessor( new CDSPBlockConvolver(
|
||||
CDSPFIRFilterCache :: getLPFilter( 1.0 / num, tb,
|
||||
ReqAtten, ReqPhase, num ), num, 1, LatencyFrac ));
|
||||
|
||||
const bool IsThird = ( num == 3 );
|
||||
|
||||
for( i = 1; i < c; i++ )
|
||||
{
|
||||
addProcessor( new CDSPHBUpsampler( ReqAtten, i - 1,
|
||||
IsThird, LatencyFrac ));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
addProcessor( new CDSPFracInterpolator( SrcSampleRate2,
|
||||
DstSampleRate, ReqAtten, false, LatencyFrac ));
|
||||
}
|
||||
|
||||
createTmpBuffers();
|
||||
return;
|
||||
}
|
||||
|
||||
// Use downsampling steps, including power-of-2 downsampling.
|
||||
|
||||
double CheckSR = DstSampleRate * 4.0;
|
||||
int c = 0;
|
||||
double FinGain = 1.0;
|
||||
|
||||
while( CheckSR <= SrcSampleRate )
|
||||
{
|
||||
c++;
|
||||
CheckSR *= 2.0;
|
||||
FinGain *= 0.5;
|
||||
}
|
||||
|
||||
const int SrcSRDiv = ( 1 << c );
|
||||
int downf;
|
||||
double NormFreq = 0.5;
|
||||
bool UseInterp = true;
|
||||
bool IsThird = false;
|
||||
|
||||
for( downf = 2; downf <= 3; downf++ )
|
||||
{
|
||||
if( DstSampleRate * SrcSRDiv * downf == SrcSampleRate )
|
||||
{
|
||||
NormFreq = 1.0 / downf;
|
||||
UseInterp = false;
|
||||
IsThird = ( downf == 3 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( UseInterp )
|
||||
{
|
||||
downf = 1;
|
||||
NormFreq = DstSampleRate * SrcSRDiv / SrcSampleRate;
|
||||
IsThird = ( NormFreq * 3.0 <= 1.0 );
|
||||
}
|
||||
|
||||
for( i = 0; i < c; i++ )
|
||||
{
|
||||
// Use a fixed very relaxed 2X downsampling filters, that at
|
||||
// the final stage only guarantees stop-band between 0.75 and
|
||||
// pi. 0.5-0.75 range will be aliased to 0.25-0.5 range which
|
||||
// will then be filtered out by the final filter.
|
||||
|
||||
addProcessor( new CDSPHBDownsampler( ReqAtten, c - 1 - i, IsThird,
|
||||
LatencyFrac ));
|
||||
}
|
||||
|
||||
addProcessor( new CDSPBlockConvolver(
|
||||
CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
|
||||
ReqAtten, ReqPhase, FinGain ), 1, downf, LatencyFrac ));
|
||||
|
||||
if( UseInterp )
|
||||
{
|
||||
addProcessor( new CDSPFracInterpolator( SrcSampleRate,
|
||||
DstSampleRate * SrcSRDiv, ReqAtten, IsThird, LatencyFrac ));
|
||||
}
|
||||
|
||||
createTmpBuffers();
|
||||
}
|
||||
|
||||
virtual ~CDSPResampler()
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < StepCount; i++ )
|
||||
{
|
||||
delete Steps[ i ];
|
||||
}
|
||||
}
|
||||
|
||||
virtual int getLatency() const
|
||||
{
|
||||
return( 0 );
|
||||
}
|
||||
|
||||
virtual double getLatencyFrac() const
|
||||
{
|
||||
return( LatencyFrac );
|
||||
}
|
||||
|
||||
/**
|
||||
* This function ignores the supplied parameter and returns the maximal
|
||||
* output buffer length that depends on the MaxInLen supplied to the
|
||||
* constructor.
|
||||
*/
|
||||
|
||||
virtual int getMaxOutLen( const int/* MaxInLen */ ) const
|
||||
{
|
||||
return( CurMaxOutLen );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function clears (resets) the state of *this object and returns it to
|
||||
* the state after construction. All input data accumulated in the
|
||||
* internal buffer so far will be discarded.
|
||||
*
|
||||
* This function makes it possible to use *this object for converting
|
||||
* separate streams from the same source sample rate to the same
|
||||
* destination sample rate without reconstructing the object. It is more
|
||||
* efficient to clear the state of the resampler object than to destroy it
|
||||
* and create a new object.
|
||||
*/
|
||||
|
||||
virtual void clear()
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < StepCount; i++ )
|
||||
{
|
||||
Steps[ i ] -> clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function performs sample rate conversion.
|
||||
*
|
||||
* If the source and destination sample rates are equal, the resampler
|
||||
* will do nothing and will simply return the input buffer unchanged.
|
||||
*
|
||||
* You do not need to allocate an intermediate output buffer for use with
|
||||
* this function. If required, the resampler will allocate a suitable
|
||||
* intermediate output buffer itself.
|
||||
*
|
||||
* @param ip0 Input buffer. This buffer is never used as output buffer by
|
||||
* this function. This pointer may be returned in "op0" if no resampling
|
||||
* is happening (source sample rate equals destination sample rate).
|
||||
* @param l The number of samples available in the input buffer. Should
|
||||
* not exceed the MaxInLen supplied in the constructor.
|
||||
* @param[out] op0 This variable receives the pointer to the resampled
|
||||
* data. On function's return, this pointer points to *this object's
|
||||
* internal buffer. In real-time applications it is suggested to pass this
|
||||
* pointer to the next output audio block and consume any data left from
|
||||
* the previous output audio block first before calling the process()
|
||||
* function again. The buffer pointed to by the "op0" on return is owned
|
||||
* by the resampler, so it should not be freed by the caller.
|
||||
* @return The number of samples available in the "op0" output buffer. If
|
||||
* the data from the output buffer "op0" is going to be written to a
|
||||
* bigger output buffer, it is suggested to check the returned number of
|
||||
* samples so that no overflow of the bigger output buffer happens.
|
||||
*/
|
||||
|
||||
virtual int process( double* ip0, int l, double*& op0 )
|
||||
{
|
||||
R8BASSERT( l >= 0 );
|
||||
|
||||
double* ip = ip0;
|
||||
int i;
|
||||
|
||||
for( i = 0; i < StepCount; i++ )
|
||||
{
|
||||
double* op = TmpBufs[ i & 1 ];
|
||||
l = Steps[ i ] -> process( ip, l, op );
|
||||
ip = op;
|
||||
}
|
||||
|
||||
op0 = ip;
|
||||
return( l );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function performs resampling of an input sample buffer of the specified
|
||||
* length in the "one-shot" mode. This function can be useful when impulse
|
||||
* response resampling is required.
|
||||
*
|
||||
* @param ip Input buffer pointer.
|
||||
* @param iplen Length of the input buffer in samples.
|
||||
* @param[out] op Output buffer pointer.
|
||||
* @param oplen Length of the output buffer in samples.
|
||||
* @tparam Tin Input buffer's element type.
|
||||
* @tparam Tout Output buffer's element type.
|
||||
*/
|
||||
|
||||
template< typename Tin, typename Tout >
|
||||
void oneshot( const Tin* ip, int iplen, Tout* op, int oplen )
|
||||
{
|
||||
CFixedBuffer< double > Buf( MaxInLen );
|
||||
bool IsZero = false;
|
||||
|
||||
while( oplen > 0 )
|
||||
{
|
||||
int rc;
|
||||
double* p;
|
||||
int i;
|
||||
|
||||
if( iplen == 0 )
|
||||
{
|
||||
rc = MaxInLen;
|
||||
p = &Buf[ 0 ];
|
||||
|
||||
if( !IsZero )
|
||||
{
|
||||
IsZero = true;
|
||||
memset( p, 0, MaxInLen * sizeof( p[ 0 ]));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = min( iplen, MaxInLen );
|
||||
|
||||
if( sizeof( Tin ) == sizeof( double ))
|
||||
{
|
||||
p = (double*) ip;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = &Buf[ 0 ];
|
||||
|
||||
for( i = 0; i < rc; i++ )
|
||||
{
|
||||
p[ i ] = ip[ i ];
|
||||
}
|
||||
}
|
||||
|
||||
ip += rc;
|
||||
iplen -= rc;
|
||||
}
|
||||
|
||||
double* op0;
|
||||
int wc = process( p, rc, op0 );
|
||||
wc = min( oplen, wc );
|
||||
|
||||
for( i = 0; i < wc; i++ )
|
||||
{
|
||||
op[ i ] = (Tout) op0[ i ];
|
||||
}
|
||||
|
||||
op += wc;
|
||||
oplen -= wc;
|
||||
}
|
||||
|
||||
clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Function obtains overall input sample count required to produce first
|
||||
* output sample. Function works by iteratively passing 1 sample at a time
|
||||
* until output begins. This is a relatively CPU-consuming operation. This
|
||||
* function should be called after the clear() function call or after
|
||||
* object's construction. The function itself calls the clear() function
|
||||
* before return.
|
||||
*
|
||||
* Note that it is advisable to cache the value returned by this function,
|
||||
* for each SrcSampleRate/DstSampleRate pair, if it is called frequently.
|
||||
*/
|
||||
|
||||
int getInLenBeforeOutStart()
|
||||
{
|
||||
int inc = 0;
|
||||
|
||||
while( true )
|
||||
{
|
||||
double ins = 0.0;
|
||||
double* op;
|
||||
|
||||
if( process( &ins, 1, op ) > 0 )
|
||||
{
|
||||
clear();
|
||||
return( inc );
|
||||
}
|
||||
|
||||
inc++;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
CFixedBuffer< CDSPProcessor* > Steps; ///< Array of processing steps.
|
||||
///<
|
||||
int StepCapacity; ///< The capacity of the Steps array.
|
||||
///<
|
||||
int StepCount; ///< The number of created processing steps.
|
||||
///<
|
||||
int MaxInLen; ///< Maximal input length.
|
||||
///<
|
||||
CFixedBuffer< double > TmpBufAll; ///< Buffer containing both temporary
|
||||
///< buffers.
|
||||
///<
|
||||
double* TmpBufs[ 2 ]; ///< Temporary output buffers.
|
||||
///<
|
||||
int TmpBufCapacities[ 2 ]; ///< Capacities of temporary buffers, updated
|
||||
///< during processing steps building.
|
||||
///<
|
||||
int CurTmpBuf; ///< Current temporary buffer.
|
||||
///<
|
||||
int CurMaxOutLen; ///< Current maximal output length.
|
||||
///<
|
||||
double LatencyFrac; ///< Current fractional latency. After object's
|
||||
///< construction, equals to the remaining fractional latency in the
|
||||
///< output.
|
||||
///<
|
||||
|
||||
/**
|
||||
* Function adds processor, updates MaxOutLen variable and adjusts length
|
||||
* of temporary internal buffers.
|
||||
*
|
||||
* @param Proc Processor to add. This pointer is inherited and will be
|
||||
* destroyed on *this object's destruction.
|
||||
*/
|
||||
|
||||
void addProcessor( CDSPProcessor* const Proc )
|
||||
{
|
||||
if( StepCount == StepCapacity )
|
||||
{
|
||||
// Reallocate and increase Steps array's capacity.
|
||||
|
||||
const int NewCapacity = StepCapacity + 8;
|
||||
Steps.realloc( StepCapacity, NewCapacity );
|
||||
StepCapacity = NewCapacity;
|
||||
}
|
||||
|
||||
LatencyFrac = Proc -> getLatencyFrac();
|
||||
CurMaxOutLen = Proc -> getMaxOutLen( CurMaxOutLen );
|
||||
|
||||
if( CurMaxOutLen > TmpBufCapacities[ CurTmpBuf ])
|
||||
{
|
||||
TmpBufCapacities[ CurTmpBuf ] = CurMaxOutLen;
|
||||
}
|
||||
|
||||
CurTmpBuf ^= 1;
|
||||
|
||||
Steps[ StepCount ] = Proc;
|
||||
StepCount++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function creates temporary buffers.
|
||||
*/
|
||||
|
||||
void createTmpBuffers()
|
||||
{
|
||||
const int ol = TmpBufCapacities[ 0 ] + TmpBufCapacities[ 1 ];
|
||||
|
||||
if( ol > 0 )
|
||||
{
|
||||
TmpBufAll.alloc( ol );
|
||||
TmpBufs[ 0 ] = &TmpBufAll[ 0 ];
|
||||
TmpBufs[ 1 ] = &TmpBufAll[ TmpBufCapacities[ 0 ]];
|
||||
}
|
||||
|
||||
R8BCONSOLE( "* CDSPResampler: init done\n" );
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The resampler class for 16-bit resampling.
|
||||
*
|
||||
* This class defines resampling parameters suitable for 16-bit resampling,
|
||||
* using linear-phase low-pass filter. See the r8b::CDSPResampler class for
|
||||
* details.
|
||||
*/
|
||||
|
||||
class CDSPResampler16 : public CDSPResampler
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initializes the 16-bit resampler. See the
|
||||
* r8b::CDSPResampler class for details.
|
||||
*
|
||||
* @param SrcSampleRate Source signal sample rate.
|
||||
* @param DstSampleRate Destination signal sample rate.
|
||||
* @param aMaxInLen The maximal planned length of the input buffer (in
|
||||
* samples) that will be passed to the resampler.
|
||||
* @param ReqTransBand Required transition band, in percent.
|
||||
*/
|
||||
|
||||
CDSPResampler16( const double SrcSampleRate, const double DstSampleRate,
|
||||
const int aMaxInLen, const double ReqTransBand = 2.0 )
|
||||
: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
|
||||
136.45, fprLinearPhase )
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The resampler class for 16-bit impulse response resampling.
|
||||
*
|
||||
* This class defines resampling parameters suitable for 16-bit impulse
|
||||
* response resampling, using linear-phase low-pass filter. Impulse responses
|
||||
* are non-dynamic signals, and thus need resampler with a lesser SNR. See the
|
||||
* r8b::CDSPResampler class for details.
|
||||
*/
|
||||
|
||||
class CDSPResampler16IR : public CDSPResampler
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initializes the 16-bit impulse response resampler. See the
|
||||
* r8b::CDSPResampler class for details.
|
||||
*
|
||||
* @param SrcSampleRate Source signal sample rate.
|
||||
* @param DstSampleRate Destination signal sample rate.
|
||||
* @param aMaxInLen The maximal planned length of the input buffer (in
|
||||
* samples) that will be passed to the resampler.
|
||||
* @param ReqTransBand Required transition band, in percent.
|
||||
*/
|
||||
|
||||
CDSPResampler16IR( const double SrcSampleRate, const double DstSampleRate,
|
||||
const int aMaxInLen, const double ReqTransBand = 2.0 )
|
||||
: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
|
||||
109.56, fprLinearPhase )
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief The resampler class for 24-bit resampling.
|
||||
*
|
||||
* This class defines resampling parameters suitable for 24-bit resampling
|
||||
* (including 32-bit floating point resampling), using linear-phase low-pass
|
||||
* filter. See the r8b::CDSPResampler class for details.
|
||||
*/
|
||||
|
||||
class CDSPResampler24 : public CDSPResampler
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor initializes the 24-bit resampler (including 32-bit floating
|
||||
* point). See the r8b::CDSPResampler class for details.
|
||||
*
|
||||
* @param SrcSampleRate Source signal sample rate.
|
||||
* @param DstSampleRate Destination signal sample rate.
|
||||
* @param aMaxInLen The maximal planned length of the input buffer (in
|
||||
* samples) that will be passed to the resampler.
|
||||
* @param ReqTransBand Required transition band, in percent.
|
||||
*/
|
||||
|
||||
CDSPResampler24( const double SrcSampleRate, const double DstSampleRate,
|
||||
const int aMaxInLen, const double ReqTransBand = 2.0 )
|
||||
: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
|
||||
180.15, fprLinearPhase )
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8B_CDSPRESAMPLER_INCLUDED
|
687
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
vendored
Normal file
687
Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
vendored
Normal file
@ -0,0 +1,687 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file CDSPSincFilterGen.h
|
||||
*
|
||||
* @brief Sinc function-based FIR filter generator class.
|
||||
*
|
||||
* This file includes the CDSPSincFilterGen class implementation that
|
||||
* generates FIR filters.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8B_CDSPSINCFILTERGEN_INCLUDED
|
||||
#define R8B_CDSPSINCFILTERGEN_INCLUDED
|
||||
|
||||
#include "r8bbase.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @brief Sinc function-based FIR filter generator class.
|
||||
*
|
||||
* Structure that holds state used to perform generation of sinc functions of
|
||||
* various types, windowed by the Blackman window by default (but the window
|
||||
* function can be changed if necessary).
|
||||
*/
|
||||
|
||||
class CDSPSincFilterGen
|
||||
{
|
||||
public:
|
||||
double Len2; ///< Required half filter kernel's length in samples (can be
|
||||
///< a fractional value). Final physical kernel length will be
|
||||
///< provided in the KernelLen variable. Len2 should be >= 2.
|
||||
///<
|
||||
int KernelLen; ///< Resulting length of the filter kernel, this variable
|
||||
///< is set after the call to one of the "init" functions.
|
||||
///<
|
||||
int fl2; ///< Internal "half kernel length" value. This value can be used
|
||||
///< as filter's latency in samples (taps), this variable is set after
|
||||
///< the call to one of the "init" functions.
|
||||
///<
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
double Freq1; ///< Required corner circular frequency 1 [0; pi].
|
||||
///< Used only in the generateBand() function.
|
||||
///<
|
||||
double Freq2; ///< Required corner circular frequency 2 [0; pi].
|
||||
///< Used only in the generateBand() function. The range
|
||||
///< [Freq1; Freq2] defines a pass band for the generateBand()
|
||||
///< function.
|
||||
///<
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
double FracDelay; ///< Fractional delay in the range [0; 1], used
|
||||
///< only in the generateFrac() function. Note that the
|
||||
///< FracDelay parameter is actually inversed. At 0.0 value it
|
||||
///< produces 1 sample delay (with the latency equal to fl2),
|
||||
///< at 1.0 value it produces 0 sample delay (with the latency
|
||||
///< equal to fl2 - 1).
|
||||
///<
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Window function type.
|
||||
*/
|
||||
|
||||
enum EWindowFunctionType
|
||||
{
|
||||
wftCosine, ///< Generalized cosine window function. No parameters
|
||||
///< required. The "Power" parameter is optional.
|
||||
///<
|
||||
wftKaiser, ///< Kaiser window function. Requires the "Beta" parameter.
|
||||
///< The "Power" parameter is optional.
|
||||
///<
|
||||
wftGaussian ///< Gaussian window function. Requires the "Sigma"
|
||||
///< parameter. The "Power" parameter is optional.
|
||||
///<
|
||||
};
|
||||
|
||||
typedef double( CDSPSincFilterGen :: *CWindowFunc )(); ///< Window
|
||||
///< calculation function pointer type.
|
||||
///<
|
||||
|
||||
/**
|
||||
* Function initializes *this structure for generation of a window
|
||||
* function, odd-sized.
|
||||
*
|
||||
* @param WinType Window function type.
|
||||
* @param Params Window function's parameters. If NULL, the table values
|
||||
* may be used.
|
||||
* @param UsePower "True" if the power factor should be used to raise the
|
||||
* window function. If "true", the power factor should be specified as the
|
||||
* last value in the Params array. If Params is NULL, the table or default
|
||||
* value of -1.0 (off) will be used.
|
||||
*/
|
||||
|
||||
void initWindow( const EWindowFunctionType WinType = wftCosine,
|
||||
const double* const Params = NULL, const bool UsePower = false )
|
||||
{
|
||||
R8BASSERT( Len2 >= 2.0 );
|
||||
|
||||
fl2 = (int) floor( Len2 );
|
||||
KernelLen = fl2 + fl2 + 1;
|
||||
|
||||
setWindow( WinType, Params, UsePower, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function initializes *this structure for generation of band-limited
|
||||
* sinc filter kernel. The generateBand() function should be used to
|
||||
* calculate the filter.
|
||||
*
|
||||
* @param WinType Window function type.
|
||||
* @param Params Window function's parameters. If NULL, the table values
|
||||
* may be used.
|
||||
* @param UsePower "True" if the power factor should be used to raise the
|
||||
* window function. If "true", the power factor should be specified as the
|
||||
* last value in the Params array. If Params is NULL, the table or default
|
||||
* value of -1.0 (off) will be used.
|
||||
*/
|
||||
|
||||
void initBand( const EWindowFunctionType WinType = wftCosine,
|
||||
const double* const Params = NULL, const bool UsePower = false )
|
||||
{
|
||||
R8BASSERT( Len2 >= 2.0 );
|
||||
|
||||
fl2 = (int) floor( Len2 );
|
||||
KernelLen = fl2 + fl2 + 1;
|
||||
f1.init( Freq1, 0.0 );
|
||||
f2.init( Freq2, 0.0 );
|
||||
|
||||
setWindow( WinType, Params, UsePower, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function initializes *this structure for Hilbert transformation filter
|
||||
* calculation. Freq1 and Freq2 variables are not used.
|
||||
* The generateHilbert() function should be used to calculate the filter.
|
||||
*
|
||||
* @param WinType Window function type.
|
||||
* @param Params Window function's parameters. If NULL, the table values
|
||||
* may be used.
|
||||
* @param UsePower "True" if the power factor should be used to raise the
|
||||
* window function. If "true", the power factor should be specified as the
|
||||
* last value in the Params array. If Params is NULL, the table or default
|
||||
* value of -1.0 (off) will be used.
|
||||
*/
|
||||
|
||||
void initHilbert( const EWindowFunctionType WinType = wftCosine,
|
||||
const double* const Params = NULL, const bool UsePower = false )
|
||||
{
|
||||
R8BASSERT( Len2 >= 2.0 );
|
||||
|
||||
fl2 = (int) floor( Len2 );
|
||||
KernelLen = fl2 + fl2 + 1;
|
||||
|
||||
setWindow( WinType, Params, UsePower, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function initializes *this structure for generation of full-bandwidth
|
||||
* fractional delay sinc filter kernel. Freq1 and Freq2 variables are not
|
||||
* used. The generateFrac() function should be used to calculate the
|
||||
* filter.
|
||||
*
|
||||
* @param WinType Window function type.
|
||||
* @param Params Window function's parameters. If NULL, the table values
|
||||
* may be used.
|
||||
* @param UsePower "True" if the power factor should be used to raise the
|
||||
* window function. If "true", the power factor should be specified as the
|
||||
* last value in the Params array. If Params is NULL, the table or default
|
||||
* value of -1.0 (off) will be used.
|
||||
*/
|
||||
|
||||
void initFrac( const EWindowFunctionType WinType = wftCosine,
|
||||
const double* const Params = NULL, const bool UsePower = false )
|
||||
{
|
||||
R8BASSERT( Len2 >= 2.0 );
|
||||
|
||||
fl2 = (int) ceil( Len2 );
|
||||
KernelLen = fl2 + fl2;
|
||||
|
||||
setWindow( WinType, Params, UsePower, false, FracDelay );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next "Hann" window function coefficient.
|
||||
*/
|
||||
|
||||
double calcWindowHann()
|
||||
{
|
||||
return( 0.5 + 0.5 * w1.generate() );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next "Hamming" window function coefficient.
|
||||
*/
|
||||
|
||||
double calcWindowHamming()
|
||||
{
|
||||
return( 0.54 + 0.46 * w1.generate() );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next "Blackman" window function coefficient.
|
||||
*/
|
||||
|
||||
double calcWindowBlackman()
|
||||
{
|
||||
return( 0.42 + 0.5 * w1.generate() + 0.08 * w2.generate() );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next "Nuttall" window function coefficient.
|
||||
*/
|
||||
|
||||
double calcWindowNuttall()
|
||||
{
|
||||
return( 0.355768 + 0.487396 * w1.generate() +
|
||||
0.144232 * w2.generate() + 0.012604 * w3.generate() );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next "Blackman-Nuttall" window function coefficient.
|
||||
*/
|
||||
|
||||
double calcWindowBlackmanNuttall()
|
||||
{
|
||||
return( 0.3635819 + 0.4891775 * w1.generate() +
|
||||
0.1365995 * w2.generate() + 0.0106411 * w3.generate() );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next "Kaiser" window function coefficient.
|
||||
*/
|
||||
|
||||
double calcWindowKaiser()
|
||||
{
|
||||
const double n = 1.0 - sqr( wn / Len2 + KaiserLen2Frac );
|
||||
wn++;
|
||||
|
||||
if( n < 0.0 )
|
||||
{
|
||||
return( 0.0 );
|
||||
}
|
||||
|
||||
return( besselI0( KaiserBeta * sqrt( n )) / KaiserDiv );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The next "Gaussian" window function coefficient.
|
||||
*/
|
||||
|
||||
double calcWindowGaussian()
|
||||
{
|
||||
const double f = exp( -0.5 * sqr( wn / GaussianSigma +
|
||||
GaussianSigmaFrac ));
|
||||
|
||||
wn++;
|
||||
|
||||
return( f );
|
||||
}
|
||||
|
||||
/**
|
||||
* Function calculates window function only.
|
||||
*
|
||||
* @param[out] op Output buffer, length = KernelLen.
|
||||
* @param wfunc Window calculation function to use.
|
||||
*/
|
||||
|
||||
void generateWindow( double* op,
|
||||
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
|
||||
{
|
||||
op += fl2;
|
||||
double* op2 = op;
|
||||
|
||||
int l = fl2;
|
||||
|
||||
if( Power < 0.0 )
|
||||
{
|
||||
*op = ( *this.*wfunc )();
|
||||
|
||||
while( l > 0 )
|
||||
{
|
||||
const double v = ( *this.*wfunc )();
|
||||
|
||||
op++;
|
||||
op2--;
|
||||
*op = v;
|
||||
*op2 = v;
|
||||
l--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*op = pows(( *this.*wfunc )(), Power );
|
||||
|
||||
while( l > 0 )
|
||||
{
|
||||
const double v = pows(( *this.*wfunc )(), Power );
|
||||
|
||||
op++;
|
||||
op2--;
|
||||
*op = v;
|
||||
*op2 = v;
|
||||
l--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function calculates band-limited windowed sinc function-based filter
|
||||
* kernel.
|
||||
*
|
||||
* @param[out] op Output buffer, length = KernelLen.
|
||||
* @param wfunc Window calculation function to use.
|
||||
*/
|
||||
|
||||
void generateBand( double* op,
|
||||
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
|
||||
{
|
||||
op += fl2;
|
||||
double* op2 = op;
|
||||
f1.generate();
|
||||
f2.generate();
|
||||
int t = 1;
|
||||
|
||||
if( Power < 0.0 )
|
||||
{
|
||||
*op = ( Freq2 - Freq1 ) * ( *this.*wfunc )() / R8B_PI;
|
||||
|
||||
while( t <= fl2 )
|
||||
{
|
||||
const double v = ( f2.generate() - f1.generate() ) *
|
||||
( *this.*wfunc )() / ( t * R8B_PI );
|
||||
|
||||
op++;
|
||||
op2--;
|
||||
*op = v;
|
||||
*op2 = v;
|
||||
t++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*op = ( Freq2 - Freq1 ) * pows(( *this.*wfunc )(), Power ) /
|
||||
R8B_PI;
|
||||
|
||||
while( t <= fl2 )
|
||||
{
|
||||
const double v = ( f2.generate() - f1.generate() ) *
|
||||
pows(( *this.*wfunc )(), Power ) / ( t * R8B_PI );
|
||||
|
||||
op++;
|
||||
op2--;
|
||||
*op = v;
|
||||
*op2 = v;
|
||||
t++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function calculates windowed Hilbert transformer filter kernel.
|
||||
*
|
||||
* @param[out] op Output buffer, length = KernelLen.
|
||||
* @param wfunc Window calculation function to use.
|
||||
*/
|
||||
|
||||
void generateHilbert( double* op,
|
||||
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
|
||||
{
|
||||
static const double fvalues[ 2 ] = { 0.0, 2.0 / R8B_PI };
|
||||
op += fl2;
|
||||
double* op2 = op;
|
||||
|
||||
( *this.*wfunc )();
|
||||
*op = 0.0;
|
||||
|
||||
int t = 1;
|
||||
|
||||
if( Power < 0.0 )
|
||||
{
|
||||
while( t <= fl2 )
|
||||
{
|
||||
const double v = fvalues[ t & 1 ] * ( *this.*wfunc )() / t;
|
||||
op++;
|
||||
op2--;
|
||||
*op = v;
|
||||
*op2 = -v;
|
||||
t++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while( t <= fl2 )
|
||||
{
|
||||
const double v = fvalues[ t & 1 ] *
|
||||
pows( ( *this.*wfunc )(), Power ) / t;
|
||||
|
||||
op++;
|
||||
op2--;
|
||||
*op = v;
|
||||
*op2 = -v;
|
||||
t++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function calculates windowed fractional delay filter kernel.
|
||||
*
|
||||
* @param[out] op Output buffer, length = KernelLen.
|
||||
* @param wfunc Window calculation function to use.
|
||||
* @param opinc Output buffer increment, in "op" elements.
|
||||
*/
|
||||
|
||||
void generateFrac( double* op,
|
||||
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman,
|
||||
const int opinc = 1 )
|
||||
{
|
||||
R8BASSERT( opinc != 0 );
|
||||
|
||||
double f[ 2 ];
|
||||
f[ 0 ] = sin( FracDelay * R8B_PI ) / R8B_PI;
|
||||
f[ 1 ] = -f[ 0 ];
|
||||
|
||||
int t = -fl2;
|
||||
|
||||
if( t + FracDelay < -Len2 )
|
||||
{
|
||||
( *this.*wfunc )();
|
||||
*op = 0.0;
|
||||
op += opinc;
|
||||
t++;
|
||||
}
|
||||
|
||||
int IsZeroX = ( fabs( FracDelay - 1.0 ) < 0x1p-42 );
|
||||
int mt = 0 - IsZeroX;
|
||||
IsZeroX = ( IsZeroX || fabs( FracDelay ) < 0x1p-42 );
|
||||
|
||||
if( Power < 0.0 )
|
||||
{
|
||||
while( t < mt )
|
||||
{
|
||||
*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
|
||||
op += opinc;
|
||||
t++;
|
||||
}
|
||||
|
||||
if( IsZeroX ) // t+FracDelay==0
|
||||
{
|
||||
*op = ( *this.*wfunc )();
|
||||
}
|
||||
else
|
||||
{
|
||||
*op = f[ t & 1 ] * ( *this.*wfunc )() / FracDelay; // t==0
|
||||
}
|
||||
|
||||
mt = fl2 - 2;
|
||||
|
||||
while( t < mt )
|
||||
{
|
||||
op += opinc;
|
||||
t++;
|
||||
*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
|
||||
}
|
||||
|
||||
op += opinc;
|
||||
t++;
|
||||
const double ut = t + FracDelay;
|
||||
*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] * ( *this.*wfunc )() / ut );
|
||||
}
|
||||
else
|
||||
{
|
||||
while( t < mt )
|
||||
{
|
||||
*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
|
||||
( t + FracDelay );
|
||||
|
||||
op += opinc;
|
||||
t++;
|
||||
}
|
||||
|
||||
if( IsZeroX ) // t+FracDelay==0
|
||||
{
|
||||
*op = pows( ( *this.*wfunc )(), Power );
|
||||
}
|
||||
else
|
||||
{
|
||||
*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
|
||||
FracDelay; // t==0
|
||||
}
|
||||
|
||||
mt = fl2 - 2;
|
||||
|
||||
while( t < mt )
|
||||
{
|
||||
op += opinc;
|
||||
t++;
|
||||
*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
|
||||
( t + FracDelay );
|
||||
}
|
||||
|
||||
op += opinc;
|
||||
t++;
|
||||
const double ut = t + FracDelay;
|
||||
*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] *
|
||||
pows( ( *this.*wfunc )(), Power ) / ut );
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
double Power; ///< The power factor used to raise the window function.
|
||||
///< Equals a negative value if the power factor should not be used.
|
||||
///<
|
||||
CSineGen f1; ///< Sine function 1. Used in the generateBand() function.
|
||||
///<
|
||||
CSineGen f2; ///< Sine function 2. Used in the generateBand() function.
|
||||
///<
|
||||
int wn; ///< Window function integer position. 0 - center of the window
|
||||
///< function. This variable may not be used by some window functions.
|
||||
///<
|
||||
CSineGen w1; ///< Cosine wave 1 for window function.
|
||||
///<
|
||||
CSineGen w2; ///< Cosine wave 2 for window function.
|
||||
///<
|
||||
CSineGen w3; ///< Cosine wave 3 for window function.
|
||||
///<
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
double KaiserBeta; ///< Kaiser window function's "Beta"
|
||||
///< coefficient.
|
||||
///<
|
||||
double KaiserDiv; ///< Kaiser window function's divisor.
|
||||
///<
|
||||
double KaiserLen2Frac; ///< Equals FracDelay / Len2.
|
||||
///<
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
double GaussianSigma; ///< Gaussian window function's "Sigma"
|
||||
///< coefficient.
|
||||
///<
|
||||
double GaussianSigmaFrac; ///< Equals FracDelay / GaussianSigma.
|
||||
///<
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Function initializes Kaiser window function calculation. The FracDelay
|
||||
* variable should be initialized when using this window function.
|
||||
*
|
||||
* @param Params Function parameters. If NULL, the default values will be
|
||||
* used. If not NULL, the first parameter should specify the "Beta" value.
|
||||
* @param UsePower "True" if the power factor should be used to raise the
|
||||
* window function.
|
||||
* @param IsCentered "True" if centered window should be used. This
|
||||
* parameter usually equals to "false" for fractional delay filters only.
|
||||
*/
|
||||
|
||||
void setWindowKaiser( const double* Params, const bool UsePower,
|
||||
const bool IsCentered )
|
||||
{
|
||||
wn = ( IsCentered ? 0 : -fl2 );
|
||||
|
||||
if( Params == NULL )
|
||||
{
|
||||
KaiserBeta = 9.5945013206755156;
|
||||
Power = ( UsePower ? 1.9718457932433306 : -1.0 );
|
||||
}
|
||||
else
|
||||
{
|
||||
KaiserBeta = clampr( Params[ 0 ], 1.0, 350.0 );
|
||||
Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
|
||||
}
|
||||
|
||||
KaiserDiv = besselI0( KaiserBeta );
|
||||
KaiserLen2Frac = FracDelay / Len2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function initializes Gaussian window function calculation. The FracDelay
|
||||
* variable should be initialized when using this window function.
|
||||
*
|
||||
* @param Params Function parameters. If NULL, the table values will be
|
||||
* used. If not NULL, the first parameter should specify the "Sigma"
|
||||
* value.
|
||||
* @param UsePower "True" if the power factor should be used to raise the
|
||||
* window function.
|
||||
* @param IsCentered "True" if centered window should be used. This
|
||||
* parameter usually equals to "false" for fractional delay filters only.
|
||||
*/
|
||||
|
||||
void setWindowGaussian( const double* Params, const bool UsePower,
|
||||
const bool IsCentered )
|
||||
{
|
||||
wn = ( IsCentered ? 0 : -fl2 );
|
||||
|
||||
if( Params == NULL )
|
||||
{
|
||||
GaussianSigma = 1.0;
|
||||
Power = -1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
GaussianSigma = clampr( fabs( Params[ 0 ]), 1e-1, 100.0 );
|
||||
Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
|
||||
}
|
||||
|
||||
GaussianSigma *= Len2;
|
||||
GaussianSigmaFrac = FracDelay / GaussianSigma;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function initializes calculation of window function of the specified
|
||||
* type.
|
||||
*
|
||||
* @param WinType Window function type.
|
||||
* @param Params Window function's parameters. If NULL, the table values
|
||||
* may be used.
|
||||
* @param UsePower "True" if the power factor should be used to raise the
|
||||
* window function. If "true", the power factor should be specified as the
|
||||
* last value in the Params array. If Params is NULL, the table or default
|
||||
* value of -1.0 (off) will be used.
|
||||
* @param IsCentered "True" if centered window should be used. This
|
||||
* parameter usually equals to "false" for fractional delay filters only.
|
||||
* @param UseFracDelay Fractional delay to use.
|
||||
*/
|
||||
|
||||
void setWindow( const EWindowFunctionType WinType,
|
||||
const double* const Params, const bool UsePower,
|
||||
const bool IsCentered, const double UseFracDelay = 0.0 )
|
||||
{
|
||||
FracDelay = UseFracDelay;
|
||||
|
||||
if( WinType == wftCosine )
|
||||
{
|
||||
if( IsCentered )
|
||||
{
|
||||
w1.init( R8B_PI / Len2, R8B_PId2 );
|
||||
w2.init( R8B_2PI / Len2, R8B_PId2 );
|
||||
w3.init( R8B_3PI / Len2, R8B_PId2 );
|
||||
}
|
||||
else
|
||||
{
|
||||
const double step1 = R8B_PI / Len2;
|
||||
w1.init( step1, R8B_PId2 - step1 * fl2 + step1 * FracDelay );
|
||||
|
||||
const double step2 = R8B_2PI / Len2;
|
||||
w2.init( step2, R8B_PId2 - step2 * fl2 + step2 * FracDelay );
|
||||
|
||||
const double step3 = R8B_3PI / Len2;
|
||||
w3.init( step3, R8B_PId2 - step3 * fl2 + step3 * FracDelay );
|
||||
}
|
||||
|
||||
Power = ( UsePower && Params != NULL ? Params[ 0 ] : -1.0 );
|
||||
}
|
||||
else
|
||||
if( WinType == wftKaiser )
|
||||
{
|
||||
setWindowKaiser( Params, UsePower, IsCentered );
|
||||
}
|
||||
else
|
||||
if( WinType == wftGaussian )
|
||||
{
|
||||
setWindowGaussian( Params, UsePower, IsCentered );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8B_CDSPSINCFILTERGEN_INCLUDED
|
21
Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
vendored
Normal file
21
Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2013-2022 Aleksey Vaneev
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
6
Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
vendored
Normal file
6
Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
vendored
Normal file
@ -0,0 +1,6 @@
|
||||
r8brain-free resampling library from https://github.com/avaneev/r8brain-free-src
|
||||
version 5.6.
|
||||
The example program, DLL and "other" folder, PFFFT sources and logo file have been removed.
|
||||
No further local changes have been made.
|
||||
For building, premake is used to generate Visual Studio project files.
|
||||
See ../build/premake/ for details.
|
408
Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
vendored
Normal file
408
Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
vendored
Normal file
@ -0,0 +1,408 @@
|
||||
# r8brain-free-src - High-Quality Resampler #
|
||||
|
||||
## Introduction ##
|
||||
|
||||
Open source (under the MIT license) high-quality professional audio sample
|
||||
rate converter (SRC) / resampler C++ library. Features routines for SRC,
|
||||
both up- and downsampling, to/from any sample rate, including non-integer
|
||||
sample rates: it can be also used for conversion to/from SACD/DSD sample
|
||||
rates, and even go beyond that. SRC routines were implemented in a
|
||||
multi-platform C++ code, and have a high level of optimality. Also suitable
|
||||
for fast general-purpose 1D time-series resampling / interpolation (with
|
||||
relaxed filter parameters).
|
||||
|
||||
The structure of this library's objects is such that they can be frequently
|
||||
created and destroyed in large applications with a minimal performance impact
|
||||
due to a high level of reusability of its most "initialization-expensive"
|
||||
objects: the fast Fourier transform and FIR filter objects.
|
||||
|
||||
The SRC algorithm at first produces 2X oversampled (relative to the source
|
||||
sample rate, or the destination sample rate if the downsampling is performed)
|
||||
signal, then performs interpolation using a bank of short (8 to 30 taps,
|
||||
depending on the required precision) polynomial-interpolated sinc
|
||||
function-based fractional delay filters. This puts the algorithm into the
|
||||
league of the fastest among the most precise SRC algorithms. The more precise
|
||||
alternative being only the whole number-factored SRC, which can be slower.
|
||||
|
||||
P.S. Please credit the creator of this library in your documentation in the
|
||||
following way: "Sample rate converter designed by Aleksey Vaneev of Voxengo".
|
||||
|
||||
## Requirements ##
|
||||
|
||||
C++ compiler and system with the "double" floating point type (53-bit
|
||||
mantissa) support. No explicit code for the "float" type is present in this
|
||||
library, because as practice has shown the "float"-based code performs
|
||||
considerably slower on a modern processor, at least in this library. This
|
||||
library does not have dependencies beside the standard C library, the
|
||||
"windows.h" on Windows and the "pthread.h" on macOS and Linux.
|
||||
|
||||
## Links ##
|
||||
|
||||
* [Documentation](https://www.voxengo.com/public/r8brain-free-src/Documentation/)
|
||||
* [Discussion](https://www.kvraudio.com/forum/viewtopic.php?t=389711)
|
||||
|
||||
## Usage Information ##
|
||||
|
||||
The sample rate converter (resampler) is represented by the
|
||||
**r8b::CDSPResampler** class, which is a single front-end class for the
|
||||
whole library. You do not basically need to use nor understand any other
|
||||
classes beside this class. Several derived classes that have varying levels
|
||||
of precision are also available (for full-resolution 16-bit and 24-bit
|
||||
resampling).
|
||||
|
||||
The code of the library resides in the "r8b" C++ namespace, effectively
|
||||
isolating it from all other code. The code is thread-safe. A separate
|
||||
resampler object should be created for each audio channel or stream being
|
||||
processed concurrently.
|
||||
|
||||
Note that you will need to compile the "r8bbase.cpp" source file and include
|
||||
the resulting object file into your application build. This source file
|
||||
includes definitions of several global static objects used by the library.
|
||||
You may also need to include to your project: the "Kernel32" library (on
|
||||
Windows) and the "pthread" library on macOS and Linux.
|
||||
|
||||
The library is able to process signal of any scale and loudness: it is not
|
||||
limited to just a "usual" -1.0 to 1.0 range.
|
||||
|
||||
By defining the `R8B_IPP` configuration macro it is possible to enable Intel
|
||||
IPP back-end for FFT functions, instead of the default Ooura FFT. IPP FFT
|
||||
makes sample rate conversion faster by 23% on average.
|
||||
|
||||
#define R8B_IPP 1
|
||||
|
||||
If a larger initial processing delay and a very minor sample timing error are
|
||||
not an issue, for the most efficiency you can define these macros at
|
||||
the beginning of the `r8bconf.h` file, or during compilation:
|
||||
|
||||
#define R8B_IPP 1
|
||||
#define R8B_FASTTIMING 1
|
||||
#define R8B_EXTFFT 1
|
||||
|
||||
If you do not have access to the Intel IPP then you may consider enabling the
|
||||
PFFFT which is only slightly slower than Intel IPP FFT in performance. There
|
||||
are two macros available: `R8B_PFFFT` and `R8B_PFFFT_DOUBLE`. The first macro
|
||||
enables PFFFT that works in single-precision resolution, thus limiting the
|
||||
overall resampler's precision to 24-bit sample rate conversions (for
|
||||
mission-critical professional audio applications, using the `R8B_PFFFT` macro
|
||||
is not recommended as its peak error is quite large). The second macro
|
||||
enables PFFFT implementation that works in double-precision resolution, making
|
||||
use of SSE2, AVX, and NEON intrinsics, yielding precision that is equal to
|
||||
both Intel IPP and Ooura FFT implementations.
|
||||
|
||||
To use the PFFFT, define the `R8B_PFFFT` or `R8B_PFFFT_DOUBLE` macro, compile
|
||||
and include the supplied `pffft.cpp` or `pffft_double/pffft_double.c` file to
|
||||
your project build.
|
||||
|
||||
#define R8B_PFFFT 1
|
||||
|
||||
or
|
||||
|
||||
#define R8B_PFFFT_DOUBLE 1
|
||||
|
||||
The code of this library was commented in the [Doxygen](http://www.doxygen.org/)
|
||||
style. To generate the documentation locally you may run the
|
||||
`doxygen ./other/r8bdoxy.txt` command from the library's folder.
|
||||
|
||||
Preliminary tests show that the r8b::CDSPResampler24 resampler class achieves
|
||||
`31.8*n_cores` Mrops (`44*n_cores` for Intel IPP FFT) when converting 1
|
||||
channel of 24-bit audio from 44100 to 96000 sample rate (2% transition band),
|
||||
on a Ryzen 3700X processor-based 64-bit system. This approximately translates
|
||||
to a real-time resampling of `720*n_cores` (`1000*n_cores`) audio streams, at
|
||||
100% CPU load. Speed performance when converting to other sample rates may
|
||||
vary greatly. When comparing performance of this resampler library to another
|
||||
library make sure that the competing library is also tuned to produce a fully
|
||||
linear-phase response, has similar stop-band characteristics and similar
|
||||
sample timing precision.
|
||||
|
||||
## Dynamic Link Library ##
|
||||
|
||||
The functions of this SRC library are also accessible in simplified form via
|
||||
the DLL file on Windows, requiring a processor with SSE2 support (Win64
|
||||
version includes AVX2 auto-dispatch code). Delphi Pascal interface unit file
|
||||
for the DLL file is available. DLL and C LIB files are distributed in the DLL
|
||||
folder on the project's homepage. On non-Windows systems it is preferrable
|
||||
to use the C++ library directly. Note that the DLL was compiled with the
|
||||
Intel IPP enabled.
|
||||
|
||||
## Real-Time Applications ##
|
||||
|
||||
The resampler class of this library was designed as an asynchronous processor:
|
||||
it may produce any number of output samples, depending on the input sample
|
||||
data length and the resampling parameters. The resampler must be fed with the
|
||||
input sample data until enough output sample data were produced, with any
|
||||
excess output samples used before feeding the resampler with more input data.
|
||||
A "relief" factor here is that the resampler removes the initial processing
|
||||
latency automatically, and that after initial moments of processing the output
|
||||
becomes steady, with only minor output sample data length fluctuations.
|
||||
|
||||
So, while for an off-line resampling a "push" method can be used,
|
||||
demonstrated in the `example.cpp` file, for a real-time resampling a "pull"
|
||||
method should be used which calls the resampling process until the output
|
||||
buffer is filled.
|
||||
|
||||
## Notes ##
|
||||
|
||||
When using the **r8b::CDSPResampler** class directly, you may select the
|
||||
transition band/steepness of the low-pass (reconstruction) filter, expressed
|
||||
as a percentage of the full spectral bandwidth of the input signal (or the
|
||||
output signal if the downsampling is performed), and the desired stop-band
|
||||
attenuation in decibel.
|
||||
|
||||
The transition band is specified as the normalized spectral space of the input
|
||||
signal (or the output signal if the downsampling is performed) between the
|
||||
low-pass filter's -3 dB point and the Nyquist frequency, and ranges from 0.5%
|
||||
to 45%. Stop-band attenuation can be specified in the range from 49 to 218
|
||||
decibel. Both the transition band and stop-band attenuation affect
|
||||
resampler's overall speed performance and initial output delay. For your
|
||||
information, transition frequency range spans 175% of the specified transition
|
||||
band, which means that for 2% transition band, frequency response below
|
||||
0.965\*Nyquist is linear.
|
||||
|
||||
This SRC library also implements a much faster "power of 2" resampling (e.g.
|
||||
2X, 4X, 8X, 16X, 3X, 3\*2X, 3\*4X, 3\*8X, etc. upsampling and downsampling),
|
||||
which is engaged automatically if the resampling parameters permit.
|
||||
|
||||
This library was tested for compatibility with [GNU C++](https://gcc.gnu.org/),
|
||||
[Microsoft Visual C++](https://visualstudio.microsoft.com/),
|
||||
[LLVM](https://llvm.org/) and [Intel C++](https://software.intel.com/en-us/c-compilers)
|
||||
compilers, on 32- and 64-bit Windows, macOS, and CentOS Linux.
|
||||
|
||||
Most code is "inline", without the need to compile many source files. The
|
||||
memory footprint is quite modest.
|
||||
|
||||
## Acknowledgements ##
|
||||
|
||||
r8brain-free-src is bundled with the following code:
|
||||
|
||||
* FFT routines Copyright (c) 1996-2001 Takuya OOURA.
|
||||
[Homepage](http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
|
||||
* PFFFT Copyright (c) 2013 Julien Pommier.
|
||||
[Homepage](https://bitbucket.org/jpommier/pffft)
|
||||
* PFFFT DOUBLE Copyright (c) 2020 Hayati Ayguen, Dario Mambro.
|
||||
[Homepage](https://github.com/unevens/pffft)
|
||||
|
||||
## Users ##
|
||||
|
||||
This library is used by:
|
||||
|
||||
* [REAPER](https://reaper.fm/)
|
||||
* [AUDIRVANA](https://audirvana.com/)
|
||||
* [Red Dead Redemption 2](https://www.rockstargames.com/reddeadredemption2/credits)
|
||||
* [Mini Piano Lite](https://play.google.com/store/apps/details?id=umito.android.minipiano)
|
||||
* [OpenMPT](https://openmpt.org/)
|
||||
* [Boogex Guitar Amp audio plugin](https://www.voxengo.com/product/boogex/)
|
||||
* [r8brain free sample rate converter](https://www.voxengo.com/product/r8brain/)
|
||||
* [Voice Aloud Reader](https://play.google.com/store/apps/details?id=com.hyperionics.avarLic)
|
||||
* [Zynewave Podium](https://zynewave.com/podium/)
|
||||
* [Phonometrica](http://www.phonometrica-ling.org/index.html)
|
||||
* [Ripcord](https://cancel.fm/ripcord/)
|
||||
* [TensorVox](https://github.com/ZDisket/TensorVox)
|
||||
* [Curvessor](https://github.com/unevens/Curvessor)
|
||||
|
||||
Please send me a note via aleksey.vaneev@gmail.com and I will include a link
|
||||
to your software product to this list of users. This list is important in
|
||||
maintaining confidence in this library among the interested parties. The
|
||||
inclusion into this list is not mandatory.
|
||||
|
||||
## Change Log ##
|
||||
|
||||
Version 5.6:
|
||||
|
||||
* Added SSE and NEON implementations to `CDSPHBUpsampler` yielding 15%
|
||||
performance improvement of power-of-2 upsampling.
|
||||
* Added SSE and NEON implementations to the `CDSPRealFFT::multiplyBlocksZP`
|
||||
function, for 2-3% performance improvement.
|
||||
* Added intermediate interpolator's transition band limitation, for logical
|
||||
hardness (not practically needed).
|
||||
* Added the `aDoConsumeLatency` parameter to `CDSPHBUpsampler` constructor,
|
||||
for "inline" DSP uses of the class.
|
||||
* Made various minor changes across the codebase.
|
||||
|
||||
Version 5.5:
|
||||
|
||||
* Hardened positional logic of fractional filter calculation, removed
|
||||
redundant multiplications.
|
||||
* Removed unnecessary function templating from the `CDSPSincFilterGen` class.
|
||||
* Added the `__ARM_NEON` macro to NEON availability detection.
|
||||
|
||||
Version 5.4:
|
||||
|
||||
* Added compiler specializations to previously optimized inner loops.
|
||||
"Shuffled" SIMD interpolation code is not efficient on Apple M1. Intel C++
|
||||
Compiler vectorizes "whole stepping" interpolation as good as a
|
||||
manually-written SSE.
|
||||
* Reorganized SIMD instructions for a slightly better performance.
|
||||
* Changed internal buffer sizes of half-band resamplers (1-2% performance
|
||||
boost).
|
||||
* Fixed compiler warnings in PFFFT code.
|
||||
* Added several asserts to the code.
|
||||
|
||||
Version 5.3:
|
||||
|
||||
* Optimized inner loops of the fractional interpolator, added SSE2 and NEON
|
||||
intrinsics, resulting in a measurable (8-25%) performance gain.
|
||||
* Optimized filter calculation functions: changed some divisions by a constant
|
||||
to multiplications.
|
||||
* Renamed M_PI macros to R8B_PI, to avoid macro collisions.
|
||||
* Removed redundant code and macros.
|
||||
|
||||
Version 5.2:
|
||||
|
||||
* Modified `PFFFT` and `PFFFT DOUBLE` conditional pre-processor directives to
|
||||
always enable NEON on `aarch64`/`arm64` (this includes code built for
|
||||
Apple M1).
|
||||
|
||||
Version 5.1:
|
||||
|
||||
* Changed alignment in the `CFixedBuffer` class to 64 bytes. This improves AVX
|
||||
performance of the `PFFFT DOUBLE` implementation by a few percent.
|
||||
* Removed redundant files from the `pffft_double` folder, integrated the
|
||||
`pffft_common.c` file into the `pffft_double.c` file.
|
||||
|
||||
Version 5.0:
|
||||
|
||||
* Removed a long-outdated macros from the `r8bconf.h` file.
|
||||
* Changed a conditional pre-processor directive in the `pf_sse2_double.h` file
|
||||
as per PFFFT DOUBLE author's suggestion, to allow SSE2 intrinsics in most
|
||||
compilers.
|
||||
* Fixed "License.txt" misnaming in the source files to "LICENSE".
|
||||
|
||||
Version 4.10:
|
||||
|
||||
* Added the `PFFFT DOUBLE` implementation support. Now available via the
|
||||
`R8B_PFFFT_DOUBLE` definition macro.
|
||||
|
||||
Version 4.9:
|
||||
|
||||
* Reoptimized half-band and fractional interpolation filters with a stricter
|
||||
frequency response linearity constraints. This did not impact the average
|
||||
speed performance.
|
||||
|
||||
Version 4.8:
|
||||
|
||||
* Added a limit to the intermediate filter's transition band, to keep the
|
||||
latency under control at any resampling ratio.
|
||||
|
||||
Version 4.7:
|
||||
|
||||
* Added `#ifndef _USE_MATH_DEFINES` to `pffft.cpp`.
|
||||
* Moved `#include "pffft.h"` to `CDSPRealFFT.h`.
|
||||
|
||||
Version 4.6:
|
||||
|
||||
* Removed the `MaxInLen` parameter from the `oneshot()` function.
|
||||
* Decreased intermediate low-pass filter's transition band slightly, for more
|
||||
stable quality.
|
||||
|
||||
Version 4.5:
|
||||
|
||||
* Fixed VS2017 compiler warnings.
|
||||
|
||||
Version 4.4:
|
||||
|
||||
* Fixed the "Declaration hides a member" Intel C++ compiler warnings.
|
||||
|
||||
Version 4.3:
|
||||
|
||||
* Added //$ markers for internal debugging purposes.
|
||||
|
||||
Version 4.2:
|
||||
|
||||
* Backed-off max transition band to 45 and MinAtten to 49.
|
||||
* Implemented Wave64 and AIFF file input in the `r8bfreesrc` bench tool. The
|
||||
tool is now compiled with the `R8B_IPP 1` and `R8B_EXTFFT 1` macros to
|
||||
demonstrate the maximal achievable performance.
|
||||
|
||||
Version 4.1:
|
||||
|
||||
* Updated allowed ReqAtten range to 52-218, ReqTransBand 0.5-56. It is
|
||||
possible to specify filter parameters slightly beyond these values, but the
|
||||
resulting filter will be slightly out of specification as well.
|
||||
* Optimized static filter banks allocation.
|
||||
|
||||
Version 4.0:
|
||||
|
||||
* A major overhaul of interpolation classes: now templated parameters are not
|
||||
used, all required parameters are calculated at runtime. Static filter bank
|
||||
object is not used, it is created when necessary, and then cached.
|
||||
* Implemented one-third interpolation filters, however, this did not
|
||||
measurably increase resampler's speed.
|
||||
|
||||
Version 3.7:
|
||||
|
||||
* Used ippsMul_64f_I() in the CDSPRealFFT::multiplyBlockZ() function for a
|
||||
minor conversion speed increase in Intel IPP mode.
|
||||
|
||||
Version 3.6:
|
||||
|
||||
* Added memory alignment to allocated buffers which boosts performance by 1.5%
|
||||
when Intel IPP FFT is in use.
|
||||
* Implemented PFFFT support.
|
||||
|
||||
Version 3.5:
|
||||
|
||||
* Improved resampling speed very slightly.
|
||||
* Updated the `r8bfreesrc` benchmark tool to support RF64 WAV files.
|
||||
|
||||
Version 3.4:
|
||||
|
||||
* Added a more efficient half-band filters for >= 256 resampling ratios.
|
||||
|
||||
Version 3.3:
|
||||
|
||||
* Made minor fix to downsampling for some use cases of CDSPBlockConvolver,
|
||||
did not affect resampler.
|
||||
* Converted CDSPHBUpsampler and CDSPHBDownsampler's inner functions to
|
||||
static functions, which boosted high-ratio resampling performance measurably.
|
||||
|
||||
Version 3.2:
|
||||
|
||||
* Minor fix to the latency consumption mechanism.
|
||||
|
||||
Version 3.1:
|
||||
|
||||
* Reoptimized fractional delay filter's windowing function.
|
||||
|
||||
Version 3.0:
|
||||
|
||||
* Implemented a new variant of the getInLenBeforeOutStart() function.
|
||||
* Reimplemented oneshot() function to support `float` buffer types.
|
||||
* Considerably improved downsampling performance at high resampling ratios.
|
||||
* Implemented intermediate interpolation technique which boosted upsampling
|
||||
performance for most resampling ratios considerably.
|
||||
* Removed the ConvCount constant - now resampler supports virtually any
|
||||
resampling ratios.
|
||||
* Removed the UsePower2 parameter from the resampler constructor.
|
||||
* Now resampler's process() function always returns pointer to the internal
|
||||
buffer, input buffer is returned only if no resampling happens.
|
||||
* Resampler's getMaxOutLen() function can now be used to obtain the maximal
|
||||
output length that can be produced by the resampler in a single call.
|
||||
* Added a more efficient "one-third" filters to half-band upsampler and
|
||||
downsampler.
|
||||
|
||||
Version 2.1:
|
||||
|
||||
* Optimized 2X half-band downsampler.
|
||||
|
||||
Version 2.0:
|
||||
|
||||
* Optimized power-of-2 upsampling.
|
||||
|
||||
Version 1.9:
|
||||
|
||||
* Optimized half-band downsampling filter.
|
||||
* Implemented whole-number stepping resampling.
|
||||
* Added `R8B_EXTFFT` configuration option.
|
||||
* Fixed initial sub-sample offseting on downsampling.
|
||||
|
||||
Version 1.8:
|
||||
|
||||
* Added `R8B_FASTTIMING` configuration option.
|
||||
|
||||
Version 1.7:
|
||||
|
||||
* Improved sample timing precision.
|
||||
* Increased CDSPResampler :: ConvCountMax to 28 to support a lot wider
|
||||
resampling ratios.
|
||||
* Added `bench` tools.
|
||||
* Removed getInLenBeforeOutStart() due to incorrect calculation.
|
1316
Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
vendored
Normal file
1316
Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
38
Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
vendored
Normal file
38
Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @file r8bbase.cpp
|
||||
*
|
||||
* @brief C++ file that should be compiled and included into your application.
|
||||
*
|
||||
* This is a single library file that should be compiled and included into the
|
||||
* project that uses the "r8brain-free-src" sample rate converter. This file
|
||||
* defines several global static objects used by the library.
|
||||
*
|
||||
* You may also need to include to your project: the "Kernel32" library
|
||||
* (on Windows) and the "pthread" library on macOS and Linux.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#include "CDSPFIRFilter.h"
|
||||
#include "CDSPFracInterpolator.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
#if R8B_FLTTEST
|
||||
int InterpFilterFracs = -1;
|
||||
#endif // R8B_FLTTEST
|
||||
|
||||
CSyncObject CDSPRealFFTKeeper :: StateSync;
|
||||
CDSPRealFFT :: CObjKeeper CDSPRealFFTKeeper :: FFTObjects[ 31 ];
|
||||
|
||||
CSyncObject CDSPFIRFilterCache :: StateSync;
|
||||
CPtrKeeper< CDSPFIRFilter* > CDSPFIRFilterCache :: Objects;
|
||||
int CDSPFIRFilterCache :: ObjCount = 0;
|
||||
|
||||
CSyncObject CDSPFracDelayFilterBankCache :: StateSync;
|
||||
CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: Objects;
|
||||
CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: StaticObjects;
|
||||
int CDSPFracDelayFilterBankCache :: ObjCount = 0;
|
||||
|
||||
} // namespace r8b
|
1206
Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
vendored
Normal file
1206
Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
176
Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
vendored
Normal file
176
Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
vendored
Normal file
@ -0,0 +1,176 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file r8bconf.h
|
||||
*
|
||||
* @brief The "configuration" inclusion file you can modify.
|
||||
*
|
||||
* This is the "configuration" inclusion file for the "r8brain-free-src"
|
||||
* sample rate converter. You may redefine the macros here as you see fit.
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8BCONF_INCLUDED
|
||||
#define R8BCONF_INCLUDED
|
||||
|
||||
#if !defined( R8B_IPP )
|
||||
/**
|
||||
* Set the R8B_IPP macro definition to 1 to enable the use of Intel IPP's
|
||||
* fast Fourier transform functions. Also uncomment and correct the IPP
|
||||
* header inclusion macros.
|
||||
*
|
||||
* Do not forget to call the ippInit() function at the start of the
|
||||
* application, before using this library's functions.
|
||||
*/
|
||||
|
||||
#define R8B_IPP 0
|
||||
|
||||
// #include <ippcore.h>
|
||||
// #include <ipps.h>
|
||||
#endif // !defined( R8B_IPP )
|
||||
|
||||
#if !defined( R8BASSERT )
|
||||
/**
|
||||
* Assertion macro used to check for certain run-time conditions. By
|
||||
* default no action is taken if assertion fails.
|
||||
*
|
||||
* @param e Expression to check.
|
||||
*/
|
||||
|
||||
#define R8BASSERT( e )
|
||||
#endif // !defined( R8BASSERT )
|
||||
|
||||
#if !defined( R8BCONSOLE )
|
||||
/**
|
||||
* Console output macro, used to output various resampler status strings,
|
||||
* including filter design parameters, convolver parameters.
|
||||
*
|
||||
* @param e Expression to send to the console, usually consists of a
|
||||
* standard "printf" format string followed by several parameters
|
||||
* (__VA_ARGS__).
|
||||
*/
|
||||
|
||||
#define R8BCONSOLE( ... )
|
||||
#endif // !defined( R8BCONSOLE )
|
||||
|
||||
#if !defined( R8B_BASECLASS )
|
||||
/**
|
||||
* Macro defines the name of the class from which all classes that are
|
||||
* designed to be created on heap are derived. The default
|
||||
* r8b::CStdClassAllocator class uses "stdlib" memory allocation
|
||||
* functions.
|
||||
*
|
||||
* The classes that are best placed on stack or as class members are not
|
||||
* derived from any class.
|
||||
*/
|
||||
|
||||
#define R8B_BASECLASS :: r8b :: CStdClassAllocator
|
||||
#endif // !defined( R8B_BASECLASS )
|
||||
|
||||
#if !defined( R8B_MEMALLOCCLASS )
|
||||
/**
|
||||
* Macro defines the name of the class that implements raw memory
|
||||
* allocation functions, see the r8b::CStdMemAllocator class for details.
|
||||
*/
|
||||
|
||||
#define R8B_MEMALLOCCLASS :: r8b :: CStdMemAllocator
|
||||
#endif // !defined( R8B_MEMALLOCCLASS )
|
||||
|
||||
#if !defined( R8B_FILTER_CACHE_MAX )
|
||||
/**
|
||||
* This macro specifies the number of filters kept in the cache at most.
|
||||
* The actual number can be higher if many different filters are in use at
|
||||
* the same time.
|
||||
*/
|
||||
|
||||
#define R8B_FILTER_CACHE_MAX 96
|
||||
#endif // !defined( R8B_FILTER_CACHE_MAX )
|
||||
|
||||
#if !defined( R8B_FRACBANK_CACHE_MAX )
|
||||
/**
|
||||
* This macro specifies the number of whole-number stepping fractional
|
||||
* delay filter banks kept in the cache at most. The actual number can be
|
||||
* higher if many different filter banks are in use at the same time. As
|
||||
* filter banks are usually big objects, it is advisable to keep this
|
||||
* cache size small.
|
||||
*/
|
||||
|
||||
#define R8B_FRACBANK_CACHE_MAX 12
|
||||
#endif // !defined( R8B_FRACBANK_CACHE_MAX )
|
||||
|
||||
#if !defined( R8B_FLTTEST )
|
||||
/**
|
||||
* This macro, when equal to 1, enables fractional delay filter bank
|
||||
* testing: in this mode the filter bank becomes a dynamic member of the
|
||||
* CDSPFracInterpolator object instead of being a global static object.
|
||||
*/
|
||||
|
||||
#define R8B_FLTTEST 0
|
||||
#endif // !defined( R8B_FLTTEST )
|
||||
|
||||
#if !defined( R8B_FASTTIMING )
|
||||
/**
|
||||
* This macro, when equal to 1, enables a fast interpolation sample
|
||||
* timing technique. This technique improves interpolation performance
|
||||
* (by around 10%) at the expense of a minor sample timing drift which is
|
||||
* on the order of 1e-6 samples per 10 billion output samples. This
|
||||
* setting does not apply to whole-number stepping, if it is in use, as
|
||||
* this stepping provides zero timing error without performance impact.
|
||||
* Also does not apply to the cases when a whole-numbered (2X, 3X, etc.)
|
||||
* resampling is in the actual use.
|
||||
*/
|
||||
|
||||
#define R8B_FASTTIMING 0
|
||||
#endif // !defined( R8B_FASTTIMING )
|
||||
|
||||
#if !defined( R8B_EXTFFT )
|
||||
/**
|
||||
* This macro, when equal to 1, extends length of low-pass filters' FFT
|
||||
* block by a factor of 2, by zero-padding it. This usually improves the
|
||||
* overall time performance of the resampler at the expense of a higher
|
||||
* overall latency (initial processing delay). If such delay is not an
|
||||
* issue, setting this macro to 1 is preferrable. This macro can only have
|
||||
* a value of 0 or 1.
|
||||
*/
|
||||
|
||||
#define R8B_EXTFFT 0
|
||||
#endif // !defined( R8B_EXTFFT )
|
||||
|
||||
#if !defined( R8B_PFFFT )
|
||||
/**
|
||||
* When defined as 1, enables PFFFT routines which are fast, but which
|
||||
* are limited to 24-bit precision. May be a good choice for time-series
|
||||
* interpolation, when stop-band attenuation higher than 120 dB is not
|
||||
* required.
|
||||
*/
|
||||
|
||||
#define R8B_PFFFT 0
|
||||
#endif // !defined( R8B_PFFFT )
|
||||
|
||||
#if R8B_PFFFT
|
||||
#define R8B_FLOATFFT 1
|
||||
#endif // R8B_PFFFT
|
||||
|
||||
#if !defined( R8B_PFFFT_DOUBLE )
|
||||
/**
|
||||
* When defined as 1, enables PFFFT "double" routines which are fast, and
|
||||
* which provide the highest precision.
|
||||
*/
|
||||
|
||||
#define R8B_PFFFT_DOUBLE 0
|
||||
#endif // !defined( R8B_PFFFT_DOUBLE )
|
||||
|
||||
#if !defined( R8B_FLOATFFT )
|
||||
/**
|
||||
* The R8B_FLOATFFT definition enables double-to-float buffer conversions
|
||||
* for FFT operations, for algorithms that work with "float" values. This
|
||||
* macro should not be changed from the default "0" here.
|
||||
*/
|
||||
|
||||
#define R8B_FLOATFFT 0
|
||||
#endif // !defined( R8B_FLOATFFT )
|
||||
|
||||
#endif // R8BCONF_INCLUDED
|
306
Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
vendored
Normal file
306
Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
vendored
Normal file
@ -0,0 +1,306 @@
|
||||
//$ nobt
|
||||
//$ nocpp
|
||||
|
||||
/**
|
||||
* @file r8butil.h
|
||||
*
|
||||
* @brief The inclusion file with several utility functions.
|
||||
*
|
||||
* This file includes several utility functions used by various utility
|
||||
* programs like "calcErrorTable.cpp".
|
||||
*
|
||||
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
|
||||
* See the "LICENSE" file for license.
|
||||
*/
|
||||
|
||||
#ifndef R8BUTIL_INCLUDED
|
||||
#define R8BUTIL_INCLUDED
|
||||
|
||||
#include "r8bbase.h"
|
||||
|
||||
namespace r8b {
|
||||
|
||||
/**
|
||||
* @param re Real part of the frequency response.
|
||||
* @param im Imaginary part of the frequency response.
|
||||
* @return A magnitude response value converted from the linear scale to the
|
||||
* logarithmic scale.
|
||||
*/
|
||||
|
||||
inline double convertResponseToLog( const double re, const double im )
|
||||
{
|
||||
return( 4.34294481903251828 * log( re * re + im * im + 1e-100 ));
|
||||
}
|
||||
|
||||
/**
|
||||
* An utility function that performs frequency response scanning step update
|
||||
* based on the current magnitude response's slope.
|
||||
*
|
||||
* @param[in,out] step The current scanning step. Will be updated on
|
||||
* function's return. Must be a positive value.
|
||||
* @param curg Squared magnitude response at the current frequency point.
|
||||
* @param[in,out] prevg_log Previous magnitude response, log scale. Will be
|
||||
* updated on function's return.
|
||||
* @param prec Precision multiplier, affects the size of the step.
|
||||
* @param maxstep The maximal allowed step.
|
||||
* @param minstep The minimal allowed step.
|
||||
*/
|
||||
|
||||
inline void updateScanStep( double& step, const double curg,
|
||||
double& prevg_log, const double prec, const double maxstep,
|
||||
const double minstep = 1e-11 )
|
||||
{
|
||||
double curg_log = 4.34294481903251828 * log( curg + 1e-100 );
|
||||
curg_log += ( prevg_log - curg_log ) * 0.7;
|
||||
|
||||
const double slope = fabs( curg_log - prevg_log );
|
||||
prevg_log = curg_log;
|
||||
|
||||
if( slope > 0.0 )
|
||||
{
|
||||
step /= prec * slope;
|
||||
step = max( min( step, maxstep ), minstep );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function locates normalized frequency at which the minimum filter gain
|
||||
* is reached. The scanning is performed from lower (left) to higher
|
||||
* (right) frequencies, the whole range is scanned.
|
||||
*
|
||||
* Function expects that the magnitude response is always reducing from lower
|
||||
* to high frequencies, starting at "minth".
|
||||
*
|
||||
* @param flt Filter response.
|
||||
* @param fltlen Filter response's length in samples (taps).
|
||||
* @param[out] ming The current minimal gain (squared). On function's return
|
||||
* will contain the minimal gain value found (squared).
|
||||
* @param[out] minth The normalized frequency where the minimal gain is
|
||||
* currently at. On function's return will point to the normalized frequency
|
||||
* where the new minimum was found.
|
||||
* @param thend The ending frequency, inclusive.
|
||||
*/
|
||||
|
||||
inline void findFIRFilterResponseMinLtoR( const double* const flt,
|
||||
const int fltlen, double& ming, double& minth, const double thend )
|
||||
{
|
||||
const double maxstep = minth * 2e-3;
|
||||
double curth = minth;
|
||||
double re;
|
||||
double im;
|
||||
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
|
||||
double prevg_log = convertResponseToLog( re, im );
|
||||
double step = 1e-11;
|
||||
|
||||
while( true )
|
||||
{
|
||||
curth += step;
|
||||
|
||||
if( curth > thend )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
|
||||
const double curg = re * re + im * im;
|
||||
|
||||
if( curg > ming )
|
||||
{
|
||||
ming = curg;
|
||||
minth = curth;
|
||||
break;
|
||||
}
|
||||
|
||||
ming = curg;
|
||||
minth = curth;
|
||||
|
||||
updateScanStep( step, curg, prevg_log, 0.31, maxstep );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function locates normalized frequency at which the maximal filter gain
|
||||
* is reached. The scanning is performed from lower (left) to higher
|
||||
* (right) frequencies, the whole range is scanned.
|
||||
*
|
||||
* Note: this function may "stall" in very rare cases if the magnitude
|
||||
* response happens to be "saw-tooth" like, requiring a very small stepping to
|
||||
* be used. If this happens, it may take dozens of seconds to complete.
|
||||
*
|
||||
* @param flt Filter response.
|
||||
* @param fltlen Filter response's length in samples (taps).
|
||||
* @param[out] maxg The current maximal gain (squared). On function's return
|
||||
* will contain the maximal gain value (squared).
|
||||
* @param[out] maxth The normalized frequency where the maximal gain is
|
||||
* currently at. On function's return will point to the normalized frequency
|
||||
* where the maximum was reached.
|
||||
* @param thend The ending frequency, inclusive.
|
||||
*/
|
||||
|
||||
inline void findFIRFilterResponseMaxLtoR( const double* const flt,
|
||||
const int fltlen, double& maxg, double& maxth, const double thend )
|
||||
{
|
||||
const double maxstep = maxth * 1e-4;
|
||||
double premaxth = maxth;
|
||||
double premaxg = maxg;
|
||||
double postmaxth = maxth;
|
||||
double postmaxg = maxg;
|
||||
|
||||
double prevth = maxth;
|
||||
double prevg = maxg;
|
||||
double curth = maxth;
|
||||
double re;
|
||||
double im;
|
||||
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
|
||||
double prevg_log = convertResponseToLog( re, im );
|
||||
double step = 1e-11;
|
||||
|
||||
bool WasPeak = false;
|
||||
int AfterPeakCount = 0;
|
||||
|
||||
while( true )
|
||||
{
|
||||
curth += step;
|
||||
|
||||
if( curth > thend )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
|
||||
const double curg = re * re + im * im;
|
||||
|
||||
if( curg > maxg )
|
||||
{
|
||||
premaxth = prevth;
|
||||
premaxg = prevg;
|
||||
maxg = curg;
|
||||
maxth = curth;
|
||||
WasPeak = true;
|
||||
AfterPeakCount = 0;
|
||||
}
|
||||
else
|
||||
if( WasPeak )
|
||||
{
|
||||
if( AfterPeakCount == 0 )
|
||||
{
|
||||
postmaxth = curth;
|
||||
postmaxg = curg;
|
||||
}
|
||||
|
||||
if( AfterPeakCount == 5 )
|
||||
{
|
||||
// Perform 2 approximate binary searches.
|
||||
|
||||
int k;
|
||||
|
||||
for( k = 0; k < 2; k++ )
|
||||
{
|
||||
double l = ( k == 0 ? premaxth : maxth );
|
||||
double curgl = ( k == 0 ? premaxg : maxg );
|
||||
double r = ( k == 0 ? maxth : postmaxth );
|
||||
double curgr = ( k == 0 ? maxg : postmaxg );
|
||||
|
||||
while( true )
|
||||
{
|
||||
const double c = ( l + r ) * 0.5;
|
||||
calcFIRFilterResponse( flt, fltlen, R8B_PI * c,
|
||||
re, im );
|
||||
|
||||
const double curg = re * re + im * im;
|
||||
|
||||
if( curgl > curgr )
|
||||
{
|
||||
r = c;
|
||||
curgr = curg;
|
||||
}
|
||||
else
|
||||
{
|
||||
l = c;
|
||||
curgl = curg;
|
||||
}
|
||||
|
||||
if( r - l < 1e-11 )
|
||||
{
|
||||
if( curgl > curgr )
|
||||
{
|
||||
maxth = l;
|
||||
maxg = curgl;
|
||||
}
|
||||
else
|
||||
{
|
||||
maxth = r;
|
||||
maxg = curgr;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
AfterPeakCount++;
|
||||
}
|
||||
|
||||
prevth = curth;
|
||||
prevg = curg;
|
||||
|
||||
updateScanStep( step, curg, prevg_log, 1.0, maxstep );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Function locates normalized frequency at which the specified maximum
|
||||
* filter gain is reached. The scanning is performed from higher (right)
|
||||
* to lower (left) frequencies, scanning stops when the required gain
|
||||
* value was crossed. Function uses an extremely efficient binary search and
|
||||
* thus expects that the magnitude response has the "main lobe" form produced
|
||||
* by windowing, with a minimal pass-band ripple.
|
||||
*
|
||||
* @param flt Filter response.
|
||||
* @param fltlen Filter response's length in samples (taps).
|
||||
* @param maxg Maximal gain (squared).
|
||||
* @param[out] th The current normalized frequency. On function's return will
|
||||
* point to the normalized frequency where "maxg" is reached.
|
||||
* @param thend The leftmost frequency to scan, inclusive.
|
||||
*/
|
||||
|
||||
inline void findFIRFilterResponseLevelRtoL( const double* const flt,
|
||||
const int fltlen, const double maxg, double& th, const double thend )
|
||||
{
|
||||
// Perform exact binary search.
|
||||
|
||||
double l = thend;
|
||||
double r = th;
|
||||
|
||||
while( true )
|
||||
{
|
||||
const double c = ( l + r ) * 0.5;
|
||||
|
||||
if( r - l < 1e-14 )
|
||||
{
|
||||
th = c;
|
||||
break;
|
||||
}
|
||||
|
||||
double re;
|
||||
double im;
|
||||
calcFIRFilterResponse( flt, fltlen, R8B_PI * c, re, im );
|
||||
const double curg = re * re + im * im;
|
||||
|
||||
if( curg > maxg )
|
||||
{
|
||||
l = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
r = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace r8b
|
||||
|
||||
#endif // R8BUTIL_INCLUDED
|
Reference in New Issue
Block a user