Initial community commit

This commit is contained in:
Jef
2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit 20d28e80a5
16810 changed files with 4640254 additions and 2 deletions

View File

@ -0,0 +1,642 @@
//$ nobt
//$ nocpp
/**
* @file CDSPBlockConvolver.h
*
* @brief Single-block overlap-save convolution processor class.
*
* This file includes single-block overlap-save convolution processor class.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
#define R8B_CDSPBLOCKCONVOLVER_INCLUDED
#include "CDSPFIRFilter.h"
#include "CDSPProcessor.h"
namespace r8b {
/**
* @brief Single-block overlap-save convolution processing class.
*
* Class that implements single-block overlap-save convolution processing. The
* length of a single FFT block used depends on the length of the filter
* kernel.
*
* The rationale behind "single-block" processing is that increasing the FFT
* block length by 2 is more efficient than performing convolution at the same
* FFT block length but using two blocks.
*
* This class also implements a built-in resampling by any whole-number
* factor, which simplifies the overall resampling objects topology.
*/
class CDSPBlockConvolver : public CDSPProcessor
{
public:
/**
* Constructor initializes internal variables and constants of *this
* object.
*
* @param aFilter Pre-calculated filter data. Reference to this object is
* inhertied by *this object, and the object will be released when *this
* object is destroyed. If upsampling is used, filter's gain should be
* equal to the upsampling factor.
* @param aUpFactor The upsampling factor, positive value. E.g. value of 2
* means 2x upsampling should be performed over the input data.
* @param aDownFactor The downsampling factor, positive value. E.g. value
* of 2 means 2x downsampling should be performed over the output data.
* @param PrevLatency Latency, in samples (any value >=0), which was left
* in the output signal by a previous process. This value is usually
* non-zero if the minimum-phase filters are in use. This value is always
* zero if the linear-phase filters are in use.
* @param aDoConsumeLatency "True" if the output latency should be
* consumed. Does not apply to the fractional part of the latency (if such
* part is available).
*/
CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor,
const int aDownFactor, const double PrevLatency = 0.0,
const bool aDoConsumeLatency = true )
: Filter( &aFilter )
, UpFactor( aUpFactor )
, DownFactor( aDownFactor )
, DoConsumeLatency( aDoConsumeLatency )
, BlockLen2( 2 << Filter -> getBlockLenBits() )
{
R8BASSERT( UpFactor > 0 );
R8BASSERT( DownFactor > 0 );
R8BASSERT( PrevLatency >= 0.0 );
int fftinBits;
UpShift = getBitOccupancy( UpFactor ) - 1;
if(( 1 << UpShift ) == UpFactor )
{
fftinBits = Filter -> getBlockLenBits() + 1 - UpShift;
PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) /
UpFactor;
InputLen = BlockLen2 - PrevInputLen * UpFactor;
}
else
{
UpShift = -1;
fftinBits = Filter -> getBlockLenBits() + 1;
PrevInputLen = Filter -> getKernelLen() - 1;
InputLen = BlockLen2 - PrevInputLen;
}
OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 );
LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor;
Latency = (int) LatencyFrac;
const int InLatency = Latency + Filter -> getLatency() - OutOffset;
LatencyFrac -= Latency;
LatencyFrac /= DownFactor;
Latency += InputLen + Filter -> getLatency();
int fftoutBits;
InputDelay = 0;
DownSkipInit = 0;
DownShift = getBitOccupancy( DownFactor ) - 1;
if(( 1 << DownShift ) == DownFactor )
{
fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift;
if( DownFactor > 1 )
{
if( UpShift > 0 )
{
// This case never happens in practice due to mutual
// exclusion of "power of 2" DownFactor and UpFactor
// values.
R8BASSERT( UpShift == 0 );
}
else
{
// Make sure InputLen is divisible by DownFactor.
const int ilc = InputLen & ( DownFactor - 1 );
PrevInputLen += ilc;
InputLen -= ilc;
Latency -= ilc;
// Correct InputDelay for input and filter's latency.
const int lc = InLatency & ( DownFactor - 1 );
if( lc > 0 )
{
InputDelay = DownFactor - lc;
}
if( !DoConsumeLatency )
{
Latency /= DownFactor;
}
}
}
}
else
{
fftoutBits = Filter -> getBlockLenBits() + 1;
DownShift = -1;
if( !DoConsumeLatency && DownFactor > 1 )
{
DownSkipInit = Latency % DownFactor;
Latency /= DownFactor;
}
}
R8BASSERT( Latency >= 0 );
fftin = new CDSPRealFFTKeeper( fftinBits );
if( fftoutBits == fftinBits )
{
fftout = fftin;
}
else
{
ffto2 = new CDSPRealFFTKeeper( fftoutBits );
fftout = ffto2;
}
WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen );
CurInput = &WorkBlocks[ 0 ];
CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and
// CurOutput are address-aligned.
PrevInput = &WorkBlocks[ BlockLen2 * 2 ];
clear();
R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
"fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
getLatency() );
}
virtual ~CDSPBlockConvolver()
{
Filter -> unref();
}
virtual int getLatency() const
{
return( DoConsumeLatency ? 0 : Latency );
}
virtual double getLatencyFrac() const
{
return( LatencyFrac );
}
virtual int getMaxOutLen( const int MaxInLen ) const
{
R8BASSERT( MaxInLen >= 0 );
return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor );
}
virtual void clear()
{
memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ]));
if( DoConsumeLatency )
{
LatencyLeft = Latency;
}
else
{
LatencyLeft = 0;
if( DownShift > 0 )
{
memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) *
sizeof( CurOutput[ 0 ]));
}
else
{
memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset *
sizeof( CurOutput[ 0 ]));
memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) *
sizeof( CurOutput[ 0 ]));
}
}
memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ]));
InDataLeft = InputLen - InputDelay;
UpSkip = 0;
DownSkip = DownSkipInit;
}
virtual int process( double* ip, int l0, double*& op0 )
{
R8BASSERT( l0 >= 0 );
R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 );
double* op = op0;
int l = l0 * UpFactor;
l0 = 0;
while( l > 0 )
{
const int Offs = InputLen - InDataLeft;
if( l < InDataLeft )
{
InDataLeft -= l;
if( UpShift >= 0 )
{
memcpy( &CurInput[ Offs >> UpShift ], ip,
( l >> UpShift ) * sizeof( CurInput[ 0 ]));
}
else
{
copyUpsample( ip, &CurInput[ Offs ], l );
}
copyToOutput( Offs - OutOffset, op, l, l0 );
break;
}
const int b = InDataLeft;
l -= b;
InDataLeft = InputLen;
int ilu;
if( UpShift >= 0 )
{
const int bu = b >> UpShift;
memcpy( &CurInput[ Offs >> UpShift ], ip,
bu * sizeof( CurInput[ 0 ]));
ip += bu;
ilu = InputLen >> UpShift;
}
else
{
copyUpsample( ip, &CurInput[ Offs ], b );
ilu = InputLen;
}
const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]);
memcpy( &CurInput[ ilu ], PrevInput, pil );
memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil );
(*fftin) -> forward( CurInput );
if( UpShift > 0 )
{
#if R8B_FLOATFFT
mirrorInputSpectrum( (float*) CurInput );
#else // R8B_FLOATFFT
mirrorInputSpectrum( CurInput );
#endif // R8B_FLOATFFT
}
if( Filter -> isZeroPhase() )
{
(*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(),
CurInput );
}
else
{
(*fftout) -> multiplyBlocks( Filter -> getKernelBlock(),
CurInput );
}
if( DownShift > 0 )
{
const int z = BlockLen2 >> DownShift;
#if R8B_FLOATFFT
float* const kb = (float*) Filter -> getKernelBlock();
float* const p = (float*) CurInput;
#else // R8B_FLOATFFT
const double* const kb = Filter -> getKernelBlock();
double* const p = CurInput;
#endif // R8B_FLOATFFT
p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ];
}
(*fftout) -> inverse( CurInput );
copyToOutput( Offs - OutOffset, op, b, l0 );
double* const tmp = CurInput;
CurInput = CurOutput;
CurOutput = tmp;
}
return( l0 );
}
private:
CDSPFIRFilter* Filter; ///< Filter in use.
///<
CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce
///< the input spectrum (can embed the "power of 2" upsampling).
///<
CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL).
///<
CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output
///< signal (can embed the "power of 2" downsampling), may point to
///< either "fftin" or "ffto2".
///<
int UpFactor; ///< Upsampling factor.
///<
int DownFactor; ///< Downsampling factor.
///<
bool DoConsumeLatency; ///< "True" if the output latency should be
///< consumed. Does not apply to the fractional part of the latency
///< (if such part is available).
///<
int BlockLen2; ///< Equals block length * 2.
///<
int OutOffset; ///< Output offset, depends on filter's introduced latency.
///<
int PrevInputLen; ///< The length of previous input data saved, used for
///< overlap.
///<
int InputLen; ///< The number of input samples that should be accumulated
///< before the input block is processed.
///<
int Latency; ///< Processing latency, in samples.
///<
double LatencyFrac; ///< Fractional latency, in samples, that is left in
///< the output signal.
///<
int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
///<
int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if
///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
///< 1.
///<
int InputDelay; ///< Additional input delay, in samples. Used to make the
///< output delay divisible by DownShift. Used only if UpShift <= 0
///< and DownShift > 0.
///<
CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and
///< output data blocks, overall capacity = BlockLen2 * 2 +
///< PrevInputLen. Used in the flip-flop manner.
///<
double* PrevInput; ///< Previous input data buffer, capacity = BlockLen.
///<
double* CurInput; ///< Input data buffer, capacity = BlockLen2.
///<
double* CurOutput; ///< Output data buffer, capacity = BlockLen2.
///<
int InDataLeft; ///< Samples left before processing input and output FFT
///< blocks. Initialized to InputLen on clear.
///<
int LatencyLeft; ///< Latency in samples left to skip.
///<
int UpSkip; ///< The current upsampling sample skip (value in the range
///< 0 to UpFactor - 1).
///<
int DownSkip; ///< The current downsampling sample skip (value in the
///< range 0 to DownFactor - 1). Not used if DownShift > 0.
///<
int DownSkipInit; ///< The initial DownSkip value after clear().
///<
/**
* Function copies samples from the input buffer to the output buffer
* while inserting zeros inbetween them to perform the whole-numbered
* upsampling.
*
* @param[in,out] ip0 Input buffer. Will be advanced on function's return.
* @param[out] op Output buffer.
* @param l0 The number of samples to fill in the output buffer, including
* both input samples and interpolation (zero) samples.
*/
void copyUpsample( double*& ip0, double* op, int l0 )
{
int b = min( UpSkip, l0 );
if( b != 0 )
{
UpSkip -= b;
l0 -= b;
*op = 0.0;
op++;
while( --b != 0 )
{
*op = 0.0;
op++;
}
}
double* ip = ip0;
const int upf = UpFactor;
int l = l0 / upf;
int lz = l0 - l * upf;
if( upf == 3 )
{
while( l != 0 )
{
op[ 0 ] = *ip;
op[ 1 ] = 0.0;
op[ 2 ] = 0.0;
ip++;
op += upf;
l--;
}
}
else
if( upf == 5 )
{
while( l != 0 )
{
op[ 0 ] = *ip;
op[ 1 ] = 0.0;
op[ 2 ] = 0.0;
op[ 3 ] = 0.0;
op[ 4 ] = 0.0;
ip++;
op += upf;
l--;
}
}
else
{
const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]);
while( l != 0 )
{
*op = *ip;
ip++;
memset( op + 1, 0, zc );
op += upf;
l--;
}
}
if( lz != 0 )
{
*op = *ip;
ip++;
op++;
UpSkip = upf - lz;
while( --lz != 0 )
{
*op = 0.0;
op++;
}
}
ip0 = ip;
}
/**
* Function copies sample data from the CurOutput buffer to the specified
* output buffer and advances its position. If necessary, this function
* "consumes" latency and performs downsampling.
*
* @param Offs CurOutput buffer offset, can be negative.
* @param[out] op0 Output buffer pointer, will be advanced.
* @param b The number of output samples available, including those which
* are discarded during whole-number downsampling.
* @param l0 The overall output sample count, will be increased.
*/
void copyToOutput( int Offs, double*& op0, int b, int& l0 )
{
if( Offs < 0 )
{
if( Offs + b <= 0 )
{
Offs += BlockLen2;
}
else
{
copyToOutput( Offs + BlockLen2, op0, -Offs, l0 );
b += Offs;
Offs = 0;
}
}
if( LatencyLeft != 0 )
{
if( LatencyLeft >= b )
{
LatencyLeft -= b;
return;
}
Offs += LatencyLeft;
b -= LatencyLeft;
LatencyLeft = 0;
}
const int df = DownFactor;
if( DownShift > 0 )
{
int Skip = Offs & ( df - 1 );
if( Skip > 0 )
{
Skip = df - Skip;
b -= Skip;
Offs += Skip;
}
if( b > 0 )
{
b = ( b + df - 1 ) >> DownShift;
memcpy( op0, &CurOutput[ Offs >> DownShift ],
b * sizeof( op0[ 0 ]));
op0 += b;
l0 += b;
}
}
else
{
if( df > 1 )
{
const double* ip = &CurOutput[ Offs + DownSkip ];
int l = ( b + df - 1 - DownSkip ) / df;
DownSkip += l * df - b;
double* op = op0;
l0 += l;
op0 += l;
while( l > 0 )
{
*op = *ip;
ip += df;
op++;
l--;
}
}
else
{
memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ]));
op0 += b;
l0 += b;
}
}
}
/**
* Function performs input spectrum mirroring which is used to perform a
* fast "power of 2" upsampling. Such mirroring is equivalent to insertion
* of zeros into the input signal.
*
* @param p Spectrum data block to mirror.
* @tparam T Buffer's element type.
*/
template< typename T >
void mirrorInputSpectrum( T* const p )
{
const int bl1 = BlockLen2 >> UpShift;
const int bl2 = bl1 + bl1;
int i;
for( i = bl1 + 2; i < bl2; i += 2 )
{
p[ i ] = p[ bl2 - i ];
p[ i + 1 ] = -p[ bl2 - i + 1 ];
}
p[ bl1 ] = p[ 1 ];
p[ bl1 + 1 ] = 0.0;
p[ 1 ] = p[ 0 ];
for( i = 1; i < UpShift; i++ )
{
const int z = bl1 << i;
memcpy( &p[ z ], p, z * sizeof( p[ 0 ]));
p[ z + 1 ] = 0.0;
}
}
};
} // namespace r8b
#endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED

View File

@ -0,0 +1,719 @@
//$ nobt
//$ nocpp
/**
* @file CDSPFIRFilter.h
*
* @brief FIR filter generator and filter cache classes.
*
* This file includes low-pass FIR filter generator and filter cache.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPFIRFILTER_INCLUDED
#define R8B_CDSPFIRFILTER_INCLUDED
#include "CDSPSincFilterGen.h"
#include "CDSPRealFFT.h"
namespace r8b {
/**
* Enumeration of filter's phase responses.
*/
enum EDSPFilterPhaseResponse
{
fprLinearPhase = 0, ///< Linear-phase response. Features a linear-phase
///< high-latency response, with the latency expressed as integer
///< value.
///<
fprMinPhase ///< Minimum-phase response. Features a minimal latency
///< response, but the response's phase is non-linear. The latency is
///< usually expressed as non-integer value, and usually is small, but
///< is never equal to zero. The minimum-phase filter is transformed
///< from the linear-phase filter. The transformation has precision
///< limits which may skew both the -3 dB point and attenuation of the
///< filter being transformed: as it was measured, the skew happens
///< purely at random, and in most cases it is within tolerable range.
///< In a small (1%) random subset of cases the skew is bigger and
///< cannot be predicted. Minimum-phase transform requires 64-bit
///< floating point FFT precision, results with 32-bit float FFT are
///< far from optimal.
///<
};
/**
* @brief Calculation and storage class for FIR filters.
*
* Class that implements calculation and storing of a FIR filter (currently
* contains low-pass filter calculation routine designed for sample rate
* conversion). Objects of this class cannot be created directly, but can be
* obtained via the CDSPFilterCache::getLPFilter() static function.
*/
class CDSPFIRFilter : public R8B_BASECLASS
{
R8BNOCTOR( CDSPFIRFilter );
friend class CDSPFIRFilterCache;
public:
~CDSPFIRFilter()
{
R8BASSERT( RefCount == 0 );
delete Next;
}
/**
* @return The minimal allowed low-pass filter's transition band, in
* percent.
*/
static double getLPMinTransBand()
{
return( 0.5 );
}
/**
* @return The maximal allowed low-pass filter's transition band, in
* percent.
*/
static double getLPMaxTransBand()
{
return( 45.0 );
}
/**
* @return The minimal allowed low-pass filter's stop-band attenuation, in
* decibel.
*/
static double getLPMinAtten()
{
return( 49.0 );
}
/**
* @return The maximal allowed low-pass filter's stop-band attenuation, in
* decibel.
*/
static double getLPMaxAtten()
{
return( 218.0 );
}
/**
* @return "True" if kernel block of *this filter has zero-phase response.
*/
bool isZeroPhase() const
{
return( IsZeroPhase );
}
/**
* @return Filter's latency, in samples (integer part).
*/
int getLatency() const
{
return( Latency );
}
/**
* @return Filter's latency, in samples (fractional part). Always zero for
* linear-phase filters.
*/
double getLatencyFrac() const
{
return( LatencyFrac );
}
/**
* @return Filter kernel length, in samples. Not to be confused with the
* block length.
*/
int getKernelLen() const
{
return( KernelLen );
}
/**
* @return Filter's block length, espressed as Nth power of 2. The actual
* length is twice as large due to zero-padding.
*/
int getBlockLenBits() const
{
return( BlockLenBits );
}
/**
* @return Filter's kernel block, in complex-numbered form obtained via
* the CDSPRealFFT::forward() function call, zero-padded, gain-adjusted
* with the CDSPRealFFT::getInvMulConst() * ReqGain constant, immediately
* suitable for convolution. Kernel block may have "zero-phase" response,
* depending on the isZeroPhase() function's result.
*/
const double* getKernelBlock() const
{
return( KernelBlock );
}
/**
* This function should be called when the filter obtained via the
* filter cache is no longer needed.
*/
void unref();
private:
double ReqNormFreq; ///< Required normalized frequency, 0 to 1 inclusive.
///<
double ReqTransBand; ///< Required transition band in percent, as passed
///< by the user.
///<
double ReqAtten; ///< Required stop-band attenuation in decibel, as passed
///< by the user (positive value).
///<
EDSPFilterPhaseResponse ReqPhase; ///< Required filter's phase response.
///<
double ReqGain; ///< Required overall filter's gain.
///<
CDSPFIRFilter* Next; ///< Next FIR filter in cache's list.
///<
int RefCount; ///< The number of references made to *this FIR filter.
///<
bool IsZeroPhase; ///< "True" if kernel block of *this filter has
///< zero-phase response.
///<
int Latency; ///< Filter's latency in samples (integer part).
///<
double LatencyFrac; ///< Filter's latency in samples (fractional part).
///<
int KernelLen; ///< Filter kernel length, in samples.
///<
int BlockLenBits; ///< Block length used to store *this FIR filter,
///< expressed as Nth power of 2. This value is used directly by the
///< convolver.
///<
CFixedBuffer< double > KernelBlock; ///< FIR filter buffer, capacity
///< equals to 1 << ( BlockLenBits + 1 ). Second part of the buffer
///< contains zero-padding to allow alias-free convolution.
///< Address-aligned.
///<
CDSPFIRFilter()
: RefCount( 1 )
{
}
/**
* Function builds filter kernel based on the "Req" parameters.
*
* @param ExtAttenCorrs External attentuation correction table, for
* internal use.
*/
void buildLPFilter( const double* const ExtAttenCorrs )
{
const double tb = ReqTransBand * 0.01;
double pwr;
double fo1;
double hl;
double atten = -ReqAtten;
if( tb >= 0.25 )
{
if( ReqAtten >= 117.0 )
{
atten -= 1.60;
}
else
if( ReqAtten >= 60.0 )
{
atten -= 1.91;
}
else
{
atten -= 2.25;
}
}
else
if( tb >= 0.10 )
{
if( ReqAtten >= 117.0 )
{
atten -= 0.69;
}
else
if( ReqAtten >= 60.0 )
{
atten -= 0.73;
}
else
{
atten -= 1.13;
}
}
else
{
if( ReqAtten >= 117.0 )
{
atten -= 0.21;
}
else
if( ReqAtten >= 60.0 )
{
atten -= 0.25;
}
else
{
atten -= 0.36;
}
}
static const int AttenCorrCount = 264;
static const double AttenCorrMin = 49.0;
static const double AttenCorrDiff = 176.25;
int AttenCorr = (int) floor(( -atten - AttenCorrMin ) *
AttenCorrCount / AttenCorrDiff + 0.5 );
AttenCorr = min( AttenCorrCount, max( 0, AttenCorr ));
if( ExtAttenCorrs != NULL )
{
atten -= ExtAttenCorrs[ AttenCorr ];
}
else
if( tb >= 0.25 )
{
static const double AttenCorrScale = 101.0;
static const signed char AttenCorrs[] = {
-127, -127, -125, -125, -122, -119, -115, -110, -104, -97,
-91, -82, -75, -24, -16, -6, 4, 14, 24, 29, 30, 32, 37, 44,
51, 57, 63, 67, 65, 50, 53, 56, 58, 60, 63, 64, 66, 68, 74,
77, 78, 78, 78, 79, 79, 60, 60, 60, 61, 59, 52, 47, 41, 36,
30, 24, 17, 9, 0, -8, -10, -11, -14, -13, -18, -25, -31, -38,
-44, -50, -57, -63, -68, -74, -81, -89, -96, -101, -104, -107,
-109, -110, -86, -84, -85, -82, -80, -77, -73, -67, -62, -55,
-48, -42, -35, -30, -20, -11, -2, 5, 6, 6, 7, 11, 16, 21, 26,
34, 41, 46, 49, 52, 55, 56, 48, 49, 51, 51, 52, 52, 52, 52,
52, 51, 51, 50, 47, 47, 50, 48, 46, 42, 38, 35, 31, 27, 24,
20, 16, 12, 11, 12, 10, 8, 4, -1, -6, -11, -16, -19, -17, -21,
-24, -27, -32, -34, -37, -38, -40, -41, -40, -40, -42, -41,
-44, -45, -43, -41, -34, -31, -28, -24, -21, -18, -14, -10,
-5, -1, 2, 5, 8, 7, 4, 3, 2, 2, 4, 6, 8, 9, 9, 10, 10, 10, 10,
9, 8, 9, 11, 14, 13, 12, 11, 10, 8, 7, 6, 5, 3, 2, 2, -1, -1,
-3, -3, -4, -4, -5, -4, -6, -7, -9, -5, -1, -1, 0, 1, 0, -2,
-3, -4, -5, -5, -8, -13, -13, -13, -12, -13, -12, -11, -11,
-9, -8, -7, -5, -3, -1, 2, 4, 6, 9, 10, 11, 14, 18, 21, 24,
27, 30, 34, 37, 37, 39, 40 };
atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
}
else
if( tb >= 0.10 )
{
static const double AttenCorrScale = 210.0;
static const signed char AttenCorrs[] = {
-113, -118, -122, -125, -126, -97, -95, -92, -92, -89, -82,
-75, -69, -48, -42, -36, -30, -22, -14, -5, -2, 1, 6, 13, 22,
28, 35, 41, 48, 55, 56, 56, 61, 65, 71, 77, 81, 83, 85, 85,
74, 74, 73, 72, 71, 70, 68, 64, 59, 56, 49, 52, 46, 42, 36,
32, 26, 20, 13, 7, -2, -6, -10, -15, -20, -27, -33, -38, -44,
-43, -48, -53, -57, -63, -69, -73, -75, -79, -81, -74, -76,
-77, -77, -78, -81, -80, -80, -78, -76, -65, -62, -59, -56,
-51, -48, -44, -38, -33, -25, -19, -13, -5, -1, 2, 7, 13, 17,
21, 25, 30, 35, 40, 45, 50, 53, 56, 57, 55, 58, 59, 62, 64,
67, 67, 68, 68, 62, 61, 61, 59, 59, 57, 57, 55, 52, 48, 42,
38, 35, 31, 26, 20, 15, 13, 10, 7, 3, -2, -8, -13, -17, -23,
-28, -34, -37, -40, -41, -45, -48, -50, -53, -57, -59, -62,
-63, -63, -57, -57, -56, -56, -54, -54, -53, -49, -48, -41,
-38, -33, -31, -26, -23, -18, -12, -9, -7, -7, -3, 0, 5, 9,
14, 16, 20, 22, 21, 23, 25, 27, 28, 29, 34, 33, 35, 33, 31,
30, 29, 29, 26, 26, 25, 24, 20, 19, 15, 10, 8, 4, 1, -2, -6,
-10, -16, -19, -23, -26, -27, -30, -34, -39, -43, -47, -51,
-52, -54, -56, -58, -59, -62, -63, -66, -65, -65, -64, -59,
-57, -54, -52, -48, -44, -42, -37, -32, -22, -17, -10, -3, 5,
13, 22, 30, 40, 50, 60, 72 };
atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
}
else
{
static const double AttenCorrScale = 196.0;
static const signed char AttenCorrs[] = {
-15, -17, -20, -20, -20, -21, -20, -16, -17, -18, -17, -13,
-12, -11, -9, -7, -5, -4, -1, 1, 3, 4, 5, 6, 7, 9, 9, 10, 10,
10, 11, 11, 11, 12, 12, 12, 10, 11, 10, 10, 8, 10, 11, 10, 11,
11, 13, 14, 15, 19, 27, 26, 23, 18, 14, 8, 4, -2, -6, -12,
-17, -23, -28, -33, -37, -42, -46, -49, -53, -57, -60, -61,
-64, -65, -67, -66, -66, -66, -65, -64, -61, -59, -56, -52,
-48, -42, -38, -31, -27, -19, -13, -7, -1, 8, 14, 22, 29, 37,
45, 52, 59, 66, 73, 80, 86, 91, 96, 100, 104, 108, 111, 114,
115, 117, 118, 120, 120, 118, 117, 114, 113, 111, 107, 103,
99, 95, 89, 84, 78, 72, 66, 60, 52, 44, 37, 30, 21, 14, 6, -3,
-11, -18, -26, -34, -43, -51, -58, -65, -73, -78, -85, -90,
-97, -102, -107, -113, -115, -118, -121, -125, -125, -126,
-126, -126, -125, -124, -121, -119, -115, -111, -109, -101,
-102, -95, -88, -81, -73, -67, -63, -54, -47, -40, -33, -26,
-18, -11, -5, 2, 8, 14, 19, 25, 31, 36, 37, 43, 47, 49, 51,
52, 57, 57, 56, 57, 58, 58, 58, 57, 56, 52, 52, 50, 48, 44,
41, 39, 37, 33, 31, 26, 24, 21, 18, 14, 11, 8, 4, 2, -2, -5,
-7, -9, -11, -13, -15, -16, -18, -19, -20, -23, -24, -24, -25,
-27, -26, -27, -29, -30, -31, -32, -35, -36, -39, -40, -44,
-46, -51, -54, -59, -63, -69, -76, -83, -91, -98 };
atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
}
pwr = 7.43932822146293e-8 * sqr( atten ) + 0.000102747434588003 *
cos( 0.00785021930010397 * atten ) * cos( 0.633854318781239 +
0.103208573657699 * atten ) - 0.00798132247867036 -
0.000903555213543865 * atten - 0.0969365532127236 * exp(
0.0779275237937911 * atten ) - 1.37304948662012e-5 * atten * cos(
0.00785021930010397 * atten );
if( pwr <= 0.067665322581 )
{
if( tb >= 0.25 )
{
hl = 2.6778150875894 / tb + 300.547590563091 * atan( atan(
2.68959772209918 * pwr )) / ( 5.5099277187035 * tb - tb *
tanh( cos( asinh( atten ))));
fo1 = 0.987205355829873 * tb + 1.00011788929851 * atan2(
-0.321432067051302 - 6.19131357321578 * sqrt( pwr ),
hl + -1.14861472207245 / ( hl - 14.1821147585957 ) + pow(
0.9521145021664, pow( atan2( 1.12018764830637, tb ),
2.10988901686912 * hl - 20.9691278378345 )));
}
else
if( tb >= 0.10 )
{
hl = ( 1.56688617018066 + 142.064321294568 * pwr +
0.00419441117131136 * cos( 243.633511747297 * pwr ) -
0.022953443903576 * atten - 0.026629568860284 * cos(
127.715550622571 * pwr )) / tb;
fo1 = 0.982299356642411 * tb + 0.999441744774215 * asinh((
-0.361783054039583 - 5.80540593623676 * sqrt( pwr )) /
hl );
}
else
{
hl = ( 2.45739657014937 + 269.183679500541 * pwr * cos(
5.73225668178813 + atan2( cosh( 0.988861169868941 -
17.2201556280744 * pwr ), 1.08340138240431 * pwr ))) / tb;
fo1 = 2.291956939 * tb + 0.01942450693 * sqr( tb ) * hl -
4.67538973161837 * pwr * tb - 1.668433124 * tb *
pow( pwr, pwr );
}
}
else
{
if( tb >= 0.25 )
{
hl = ( 1.50258368698213 + 158.556968859477 * asinh( pwr ) *
tanh( 57.9466246871383 * tanh( pwr )) -
0.0105440479814834 * atten ) / tb;
fo1 = 0.994024401639321 * tb + ( -0.236282717577215 -
6.8724924545387 * sqrt( sin( pwr ))) / hl;
}
else
if( tb >= 0.10 )
{
hl = ( 1.50277377248945 + 158.222625721046 * asinh( pwr ) *
tanh( 1.02875299001715 + 42.072277322604 * pwr ) -
0.0108380943845632 * atten ) / tb;
fo1 = 0.992539376734551 * tb + ( -0.251747813037178 -
6.74159892452584 * sqrt( tanh( tanh( tan( pwr ))))) / hl;
}
else
{
hl = ( 1.15990238966306 * pwr - 5.02124037125213 * sqr(
pwr ) - 0.158676856669827 * atten * cos( 1.1609073390614 *
pwr - 6.33932586197475 * pwr * sqr( pwr ))) / tb;
fo1 = 0.867344453126885 * tb + 0.052693817907757 * tb * log(
pwr ) + 0.0895511178735932 * tb * atan( 59.7538527741309 *
pwr ) - 0.0745653568081453 * pwr * tb;
}
}
double WinParams[ 2 ];
WinParams[ 0 ] = 125.0;
WinParams[ 1 ] = pwr;
CDSPSincFilterGen sinc;
sinc.Len2 = 0.25 * hl / ReqNormFreq;
sinc.Freq1 = 0.0;
sinc.Freq2 = R8B_PI * ( 1.0 - fo1 ) * ReqNormFreq;
sinc.initBand( CDSPSincFilterGen :: wftKaiser, WinParams, true );
KernelLen = sinc.KernelLen;
BlockLenBits = getBitOccupancy( KernelLen - 1 ) + R8B_EXTFFT;
const int BlockLen = 1 << BlockLenBits;
KernelBlock.alloc( BlockLen * 2 );
sinc.generateBand( &KernelBlock[ 0 ],
&CDSPSincFilterGen :: calcWindowKaiser );
if( ReqPhase == fprLinearPhase )
{
IsZeroPhase = true;
Latency = sinc.fl2;
LatencyFrac = 0.0;
}
else
{
IsZeroPhase = false;
double DCGroupDelay;
calcMinPhaseTransform( &KernelBlock[ 0 ], KernelLen, 16, false,
&DCGroupDelay );
Latency = (int) DCGroupDelay;
LatencyFrac = DCGroupDelay - Latency;
}
CDSPRealFFTKeeper ffto( BlockLenBits + 1 );
if( IsZeroPhase )
{
// Calculate DC gain.
double s = 0.0;
int i;
for( i = 0; i < KernelLen; i++ )
{
s += KernelBlock[ i ];
}
s = ffto -> getInvMulConst() * ReqGain / s;
// Time-shift the filter so that zero-phase response is produced.
// Simultaneously multiply by "s".
for( i = 0; i <= sinc.fl2; i++ )
{
KernelBlock[ i ] = KernelBlock[ sinc.fl2 + i ] * s;
}
for( i = 1; i <= sinc.fl2; i++ )
{
KernelBlock[ BlockLen * 2 - i ] = KernelBlock[ i ];
}
memset( &KernelBlock[ sinc.fl2 + 1 ], 0,
( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
ffto -> forward( KernelBlock );
ffto -> convertToZP( KernelBlock );
}
else
{
normalizeFIRFilter( &KernelBlock[ 0 ], KernelLen,
ffto -> getInvMulConst() * ReqGain );
memset( &KernelBlock[ KernelLen ], 0,
( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
ffto -> forward( KernelBlock );
}
R8BCONSOLE( "CDSPFIRFilter: flt_len=%i latency=%i nfreq=%.4f "
"tb=%.1f att=%.1f gain=%.3f\n", KernelLen, Latency,
ReqNormFreq, ReqTransBand, ReqAtten, ReqGain );
}
};
/**
* @brief FIR filter cache class.
*
* Class that implements cache for calculated FIR filters. The required FIR
* filter should be obtained via the getLPFilter() static function.
*/
class CDSPFIRFilterCache : public R8B_BASECLASS
{
R8BNOCTOR( CDSPFIRFilterCache );
friend class CDSPFIRFilter;
public:
/**
* @return The number of filters present in the cache now. This value can
* be monitored for debugging "forgotten" filters.
*/
static int getObjCount()
{
R8BSYNC( StateSync );
return( ObjCount );
}
/**
* Function calculates or returns reference to a previously calculated
* (cached) low-pass FIR filter. Note that the real transition band and
* attenuation achieved by the filter varies with the magnitude of the
* required attenuation, and are never 100% exact.
*
* @param ReqNormFreq Required normalized frequency, in the range 0 to 1,
* inclusive. This is the point after which the stop-band spans.
* @param ReqTransBand Required transition band, in percent of the
* 0 to ReqNormFreq spectral bandwidth, in the range
* CDSPFIRFilter::getLPMinTransBand() to
* CDSPFIRFilter::getLPMaxTransBand(), inclusive. The transition band
* specifies the part of the spectrum between the -3 dB and ReqNormFreq
* points. The real resulting -3 dB point varies in the range from -3.00
* to -3.05 dB, but is generally very close to -3 dB.
* @param ReqAtten Required stop-band attenuation in decibel, in the range
* CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
* inclusive. Note that the actual stop-band attenuation of the resulting
* filter may be 0.40-4.46 dB higher.
* @param ReqPhase Required filter's phase response.
* @param ReqGain Required overall filter's gain (1.0 for unity gain).
* @param AttenCorrs Attentuation correction table, to pass to the filter
* generation function. For internal use.
* @return A reference to a new or a previously calculated low-pass FIR
* filter object with the required characteristics. A reference count is
* incremented in the returned filter object which should be released
* after use via the CDSPFIRFilter::unref() function.
*/
static CDSPFIRFilter& getLPFilter( const double ReqNormFreq,
const double ReqTransBand, const double ReqAtten,
const EDSPFilterPhaseResponse ReqPhase, const double ReqGain,
const double* const AttenCorrs = NULL )
{
R8BASSERT( ReqNormFreq > 0.0 && ReqNormFreq <= 1.0 );
R8BASSERT( ReqTransBand >= CDSPFIRFilter :: getLPMinTransBand() );
R8BASSERT( ReqTransBand <= CDSPFIRFilter :: getLPMaxTransBand() );
R8BASSERT( ReqAtten >= CDSPFIRFilter :: getLPMinAtten() );
R8BASSERT( ReqAtten <= CDSPFIRFilter :: getLPMaxAtten() );
R8BASSERT( ReqGain > 0.0 );
R8BSYNC( StateSync );
CDSPFIRFilter* PrevObj = NULL;
CDSPFIRFilter* CurObj = Objects;
while( CurObj != NULL )
{
if( CurObj -> ReqNormFreq == ReqNormFreq &&
CurObj -> ReqTransBand == ReqTransBand &&
CurObj -> ReqAtten == ReqAtten &&
CurObj -> ReqPhase == ReqPhase &&
CurObj -> ReqGain == ReqGain )
{
break;
}
if( CurObj -> Next == NULL && ObjCount >= R8B_FILTER_CACHE_MAX )
{
if( CurObj -> RefCount == 0 )
{
// Delete the last filter which is not used.
PrevObj -> Next = NULL;
delete CurObj;
ObjCount--;
}
else
{
// Move the last filter to the top of the list since it
// seems to be in use for a long time.
PrevObj -> Next = NULL;
CurObj -> Next = Objects.unkeep();
Objects = CurObj;
}
CurObj = NULL;
break;
}
PrevObj = CurObj;
CurObj = CurObj -> Next;
}
if( CurObj != NULL )
{
CurObj -> RefCount++;
if( PrevObj == NULL )
{
return( *CurObj );
}
// Remove the filter from the list temporarily.
PrevObj -> Next = CurObj -> Next;
}
else
{
// Create a new filter object (with RefCount == 1) and build the
// filter kernel.
CurObj = new CDSPFIRFilter();
CurObj -> ReqNormFreq = ReqNormFreq;
CurObj -> ReqTransBand = ReqTransBand;
CurObj -> ReqAtten = ReqAtten;
CurObj -> ReqPhase = ReqPhase;
CurObj -> ReqGain = ReqGain;
ObjCount++;
CurObj -> buildLPFilter( AttenCorrs );
}
// Insert the filter at the start of the list.
CurObj -> Next = Objects.unkeep();
Objects = CurObj;
return( *CurObj );
}
private:
static CSyncObject StateSync; ///< Cache state synchronizer.
///<
static CPtrKeeper< CDSPFIRFilter* > Objects; ///< The chain of cached
///< objects.
///<
static int ObjCount; ///< The number of objects currently preset in the
///< cache.
///<
};
// ---------------------------------------------------------------------------
// CDSPFIRFilter PUBLIC
// ---------------------------------------------------------------------------
inline void CDSPFIRFilter :: unref()
{
R8BSYNC( CDSPFIRFilterCache :: StateSync );
RefCount--;
}
// ---------------------------------------------------------------------------
} // namespace r8b
#endif // R8B_CDSPFIRFILTER_INCLUDED

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,401 @@
//$ nobt
//$ nocpp
/**
* @file CDSPHBDownsampler.h
*
* @brief Half-band downsampling convolver class.
*
* This file includes half-band downsampling convolver class.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPHBDOWNSAMPLER_INCLUDED
#define R8B_CDSPHBDOWNSAMPLER_INCLUDED
#include "CDSPHBUpsampler.h"
namespace r8b {
/**
* @brief Half-band downsampler class.
*
* Class implements brute-force half-band 2X downsampling that uses small
* sparse symmetric FIR filters. The output has 2.0 gain.
*/
class CDSPHBDownsampler : public CDSPProcessor
{
public:
/**
* Constructor initalizes the half-band downsampler.
*
* @param ReqAtten Required half-band filter attentuation.
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
* downsampling ratio, e.g. at 4x downsampling 0 is used, at 8x
* downsampling 1 is used, etc.
* @param IsThird "True" if 1/3 resampling is performed.
* @param PrevLatency Latency, in samples (any value >=0), which was left
* in the output signal by a previous process. Whole-number latency will
* be consumed by *this object while remaining fractional latency can be
* obtained via the getLatencyFrac() function.
*/
CDSPHBDownsampler( const double ReqAtten, const int SteepIndex,
const bool IsThird, const double PrevLatency )
{
static const CConvolveFn FltConvFn[ 14 ] = {
&CDSPHBDownsampler :: convolve1, &CDSPHBDownsampler :: convolve2,
&CDSPHBDownsampler :: convolve3, &CDSPHBDownsampler :: convolve4,
&CDSPHBDownsampler :: convolve5, &CDSPHBDownsampler :: convolve6,
&CDSPHBDownsampler :: convolve7, &CDSPHBDownsampler :: convolve8,
&CDSPHBDownsampler :: convolve9, &CDSPHBDownsampler :: convolve10,
&CDSPHBDownsampler :: convolve11, &CDSPHBDownsampler :: convolve12,
&CDSPHBDownsampler :: convolve13,
&CDSPHBDownsampler :: convolve14 };
int fltt;
double att;
if( IsThird )
{
CDSPHBUpsampler :: getHBFilterThird( ReqAtten, SteepIndex, fltp,
fltt, att );
}
else
{
CDSPHBUpsampler :: getHBFilter( ReqAtten, SteepIndex, fltp, fltt,
att );
}
convfn = FltConvFn[ fltt - 1 ];
fll = fltt * 2 - 1;
fl2 = fll;
flo = fll + fl2;
flb = BufLen - fll;
BufRP = Buf + fll;
LatencyFrac = PrevLatency * 0.5;
Latency = (int) LatencyFrac;
LatencyFrac -= Latency;
R8BASSERT( Latency >= 0 );
R8BCONSOLE( "CDSPHBDownsampler: taps=%i third=%i att=%.1f io=1/2\n",
fltt, (int) IsThird, att );
clear();
}
virtual int getLatency() const
{
return( 0 );
}
virtual double getLatencyFrac() const
{
return( LatencyFrac );
}
virtual int getMaxOutLen( const int MaxInLen ) const
{
R8BASSERT( MaxInLen >= 0 );
return(( MaxInLen + 1 ) >> 1 );
}
virtual void clear()
{
LatencyLeft = Latency;
BufLeft = 0;
WritePos = 0;
ReadPos = flb; // Set "read" position to account for filter's latency.
memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
}
virtual int process( double* ip, int l, double*& op0 )
{
R8BASSERT( l >= 0 );
double* op = op0;
while( l > 0 )
{
// Add new input samples to both halves of the ring buffer.
const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
double* const wp1 = Buf + WritePos;
memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
if( WritePos < flo )
{
const int c = min( b, flo - WritePos );
memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
}
ip += b;
WritePos = ( WritePos + b ) & BufLenMask;
l -= b;
BufLeft += b;
// Produce output.
if( BufLeft > fl2 )
{
const int c = ( BufLeft - fl2 + 1 ) >> 1;
double* const opend = op + c;
( *convfn )( op, opend, fltp, BufRP, ReadPos );
op = opend;
const int c2 = c + c;
ReadPos = ( ReadPos + c2 ) & BufLenMask;
BufLeft -= c2;
}
}
int ol = (int) ( op - op0 );
if( LatencyLeft != 0 )
{
if( LatencyLeft >= ol )
{
LatencyLeft -= ol;
return( 0 );
}
ol -= LatencyLeft;
op0 += LatencyLeft;
LatencyLeft = 0;
}
return( ol );
}
private:
static const int BufLenBits = 10; ///< The length of the ring buffer,
///< expressed as Nth power of 2. This value can be reduced if it is
///< known that only short input buffers will be passed to the
///< interpolator. The minimum value of this parameter is 5, and
///< 1 << BufLenBits should be at least 3 times larger than the
///< FilterLen.
///<
static const int BufLen = 1 << BufLenBits; ///< The length of the ring
///< buffer. The actual length is twice as long to allow "beyond max
///< position" positioning.
///<
static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
///< position wrapping.
///<
double Buf[ BufLen + 54 ]; ///< The ring buffer, including overrun
///< protection for the largest filter.
///<
const double* fltp; ///< Half-band filter taps.
///<
int fll; ///< Input latency.
///<
int fl2; ///< Right-side filter length.
///<
int flo; ///< Overrun length.
///<
int flb; ///< Initial read position and maximal buffer write length.
///<
const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
///<
int Latency; ///< Initial latency that should be removed from the output.
///<
double LatencyFrac; ///< Fractional latency left on the output.
///<
int BufLeft; ///< The number of samples left in the buffer to process.
///< When this value is below FilterLenD2Plus1, the interpolation
///< cycle ends.
///<
int WritePos; ///< The current buffer write position. Incremented together
///< with the BufLeft variable.
///<
int ReadPos; ///< The current buffer read position.
///<
int LatencyLeft; ///< Latency left to remove.
///<
typedef void( *CConvolveFn )( double* op, double* const opend,
const double* const flt, const double* const rp0, int rpos ); ///<
///< Convolution function type.
///<
CConvolveFn convfn; ///< Convolution function in use.
///<
#define R8BHBC1( fn ) \
static void fn( double* op, double* const opend, const double* const flt, \
const double* const rp0, int rpos ) \
{ \
while( op != opend ) \
{ \
const double* const rp = rp0 + rpos; \
*op = rp[ 0 ] +
#define R8BHBC2 \
rpos = ( rpos + 2 ) & BufLenMask; \
op++; \
} \
}
R8BHBC1( convolve1 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]);
R8BHBC2
R8BHBC1( convolve2 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]);
R8BHBC2
R8BHBC1( convolve3 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]);
R8BHBC2
R8BHBC1( convolve4 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]);
R8BHBC2
R8BHBC1( convolve5 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]);
R8BHBC2
R8BHBC1( convolve6 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]);
R8BHBC2
R8BHBC1( convolve7 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]);
R8BHBC2
R8BHBC1( convolve8 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]);
R8BHBC2
R8BHBC1( convolve9 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]);
R8BHBC2
R8BHBC1( convolve10 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]);
R8BHBC2
R8BHBC1( convolve11 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]);
R8BHBC2
R8BHBC1( convolve12 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]);
R8BHBC2
R8BHBC1( convolve13 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]);
R8BHBC2
R8BHBC1( convolve14 )
flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]) +
flt[ 13 ] * ( rp[ 27 ] + rp[ -27 ]);
R8BHBC2
#undef R8BHBC1
#undef R8BHBC2
};
// ---------------------------------------------------------------------------
} // namespace r8b
#endif // R8B_CDSPHBDOWNSAMPLER_INCLUDED

View File

@ -0,0 +1,804 @@
//$ nobt
//$ nocpp
/**
* @file CDSPHBUpsampler.h
*
* @brief Half-band upsampling class.
*
* This file includes half-band upsampling class.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPHBUPSAMPLER_INCLUDED
#define R8B_CDSPHBUPSAMPLER_INCLUDED
#include "CDSPProcessor.h"
namespace r8b {
/**
* @brief Half-band upsampling class.
*
* Class implements brute-force half-band 2X upsampling that uses small
* sparse symmetric FIR filters. It is very efficient and should be used at
* latter upsampling steps after initial steep 2X upsampling.
*/
class CDSPHBUpsampler : public CDSPProcessor
{
public:
/**
* Function that provides filter data for various steepness indices and
* attenuations.
*
* @param ReqAtten Required half-band filter attentuation.
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
* upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
* etc.
*/
static void getHBFilter( const double ReqAtten, const int SteepIndex,
const double*& flt, int& fltt, double& att )
{
static const double HBKernel_4A[ 4 ] = { // -54.5176 dB, 4
6.1729335650971517e-001, -1.5963945620743250e-001,
5.5073370934086312e-002, -1.4603578989932850e-002,};
static const double HBKernel_5A[ 5 ] = { // -66.3075 dB, 4
6.2068807424902472e-001, -1.6827573634467302e-001,
6.5263016720721170e-002, -2.2483331611592005e-002,
5.2917326684281110e-003,};
static const double HBKernel_6A[ 6 ] = { // -89.5271 dB, 4
6.2187202340480707e-001, -1.7132842113816371e-001,
6.9019169178765674e-002, -2.5799728312695277e-002,
7.4880112525741666e-003, -1.2844465869952567e-003,};
static const double HBKernel_7A[ 7 ] = { // -105.2842 dB, 4
6.2354494135775851e-001, -1.7571220703702045e-001,
7.4529843603968457e-002, -3.0701736822442153e-002,
1.0716755639039573e-002, -2.7833422930759735e-003,
4.1118797093875510e-004,};
static const double HBKernel_8A[ 8 ] = { // -121.0063 dB, 4
6.2488363107953926e-001, -1.7924942606514119e-001,
7.9068155655640557e-002, -3.4907523415495731e-002,
1.3710256799907897e-002, -4.3991142586987933e-003,
1.0259190163889602e-003, -1.3278941979339359e-004,};
static const double HBKernel_9A[ 9 ] = { // -136.6982 dB, 4
6.2597763804021977e-001, -1.8216414325139055e-001,
8.2879104876726728e-002, -3.8563442248249404e-002,
1.6471530499739394e-002, -6.0489108881335227e-003,
1.7805283804140392e-003, -3.7533200112729561e-004,
4.3172840558735476e-005,};
static const double HBKernel_10A[ 10 ] = { // -152.3572 dB, 4
6.2688767582974092e-001, -1.8460766807559420e-001,
8.6128943000481864e-002, -4.1774474147006607e-002,
1.9014801985747346e-002, -7.6870397465866507e-003,
2.6264590175341853e-003, -7.1106660285478562e-004,
1.3645852036179345e-004, -1.4113888783332969e-005,};
static const double HBKernel_11A[ 11 ] = { // -183.7962 dB, 4
6.2667167706948146e-001, -1.8407153342635879e-001,
8.5529995610836046e-002, -4.1346831462361310e-002,
1.8844831691322637e-002, -7.7125170365394992e-003,
2.7268674860562087e-003, -7.9745028501057233e-004,
1.8116344606360795e-004, -2.8569149754241848e-005,
2.3667022010173616e-006,};
static const double HBKernel_12A[ 12 ] = { // -199.4768 dB, 4
6.2747849730367999e-001, -1.8623616784506747e-001,
8.8409755898467945e-002, -4.4207468821462342e-002,
2.1149175945115381e-002, -9.2551508371115209e-003,
3.5871562170822330e-003, -1.1923167653750219e-003,
3.2627812189920129e-004, -6.9106902511490413e-005,
1.0122897863125124e-005, -7.7531878906846174e-007,};
static const double HBKernel_13A[ 13 ] = { // -215.1364 dB, 4
6.2816416252367324e-001, -1.8809076955230414e-001,
9.0918539867353029e-002, -4.6765502683599310e-002,
2.3287520498995663e-002, -1.0760627245014184e-002,
4.4853922948425683e-003, -1.6438775426910800e-003,
5.1441312354764978e-004, -1.3211725685765050e-004,
2.6191319837779187e-005, -3.5802430606313093e-006,
2.5491278270628601e-007,};
static const double HBKernel_14A[ 14 ] = { // -230.7526 dB, 4
6.2875473120929948e-001, -1.8969941936903847e-001,
9.3126094480960403e-002, -4.9067251179869126e-002,
2.5273008851199916e-002, -1.2218646153393291e-002,
5.4048942085580280e-003, -2.1409919546078581e-003,
7.4250292812927973e-004, -2.1924542206832172e-004,
5.3015808983125091e-005, -9.8743034923598196e-006,
1.2650391141650221e-006, -8.4146674637474946e-008,};
static const int FltCountA = 11;
static const int FlttBaseA = 4;
static const double FltAttensA[ FltCountA ] = {
54.5176, 66.3075, 89.5271, 105.2842, 121.0063, 136.6982, 152.3572, 183.7962, 199.4768, 215.1364, 230.7526, };
static const double* const FltPtrsA[ FltCountA ] = {
HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, HBKernel_10A, HBKernel_11A, HBKernel_12A, HBKernel_13A, HBKernel_14A, };
static const double HBKernel_2B[ 2 ] = { // -56.6007 dB, 8
5.7361525854329076e-001, -7.5092074924827903e-002,};
static const double HBKernel_3B[ 3 ] = { // -83.0295 dB, 8
5.9277038608066912e-001, -1.0851340190268854e-001,
1.5813570475513079e-002,};
static const double HBKernel_4B[ 4 ] = { // -123.4724 dB, 8
6.0140277542879617e-001, -1.2564483854574138e-001,
2.7446500598038322e-002, -3.2051079559057435e-003,};
static const double HBKernel_5B[ 5 ] = { // -152.4411 dB, 8
6.0818642429088932e-001, -1.3981140187175697e-001,
3.8489164054503623e-002, -7.6218861797853104e-003,
7.5772358130952392e-004,};
static const double HBKernel_6B[ 6 ] = { // -181.2501 dB, 8
6.1278392271464355e-001, -1.5000053762513338e-001,
4.7575323511364960e-002, -1.2320702802243476e-002,
2.1462442592348487e-003, -1.8425092381892940e-004,};
static const double HBKernel_7B[ 7 ] = { // -209.9472 dB, 8
6.1610372263478952e-001, -1.5767891882524138e-001,
5.5089691170294691e-002, -1.6895755656366061e-002,
3.9416643438213977e-003, -6.0603623791604668e-004,
4.5632602433393365e-005,};
static const double HBKernel_8B[ 8 ] = { // -238.5616 dB, 8
6.1861282914465976e-001, -1.6367179451225150e-001,
6.1369861342939716e-002, -2.1184466539006987e-002,
5.9623357510842061e-003, -1.2483098507454090e-003,
1.7099297537964702e-004, -1.1448313239478885e-005,};
static const int FltCountB = 7;
static const int FlttBaseB = 2;
static const double FltAttensB[ FltCountB ] = {
56.6007, 83.0295, 123.4724, 152.4411, 181.2501, 209.9472, 238.5616, };
static const double* const FltPtrsB[ FltCountB ] = {
HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, HBKernel_7B, HBKernel_8B, };
static const double HBKernel_2C[ 2 ] = { // -89.0473 dB, 16
5.6430278013478008e-001, -6.4338068855763375e-002,};
static const double HBKernel_3C[ 3 ] = { // -130.8951 dB, 16
5.8706402915551448e-001, -9.9362380958670449e-002,
1.2298637065869358e-002,};
static const double HBKernel_4C[ 4 ] = { // -172.3192 dB, 16
5.9896586134984675e-001, -1.2111680603434927e-001,
2.4763118076458895e-002, -2.6121758132212989e-003,};
static const double HBKernel_5C[ 5 ] = { // -213.4984 dB, 16
6.0626808285230716e-001, -1.3588224032740795e-001,
3.5544305238309003e-002, -6.5127022377289654e-003,
5.8255449565950768e-004,};
static const double HBKernel_6C[ 6 ] = { // -254.5186 dB, 16
6.1120171263351242e-001, -1.4654486853757870e-001,
4.4582959299131253e-002, -1.0840543858123995e-002,
1.7343706485509962e-003, -1.3363018567985596e-004,};
static const int FltCountC = 5;
static const int FlttBaseC = 2;
static const double FltAttensC[ FltCountC ] = {
89.0473, 130.8951, 172.3192, 213.4984, 254.5186, };
static const double* const FltPtrsC[ FltCountC ] = {
HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, HBKernel_6C, };
static const double HBKernel_1D[ 1 ] = { // -54.4754 dB, 32
5.0188900022775451e-001,};
static const double HBKernel_2D[ 2 ] = { // -113.2139 dB, 32
5.6295152180538044e-001, -6.2953706070191726e-002,};
static const double HBKernel_3D[ 3 ] = { // -167.1447 dB, 32
5.8621968728755036e-001, -9.8080551656524531e-002,
1.1860868761997080e-002,};
static const double HBKernel_4D[ 4 ] = { // -220.6519 dB, 32
5.9835028657163591e-001, -1.1999986086623511e-001,
2.4132530854004228e-002, -2.4829565686819706e-003,};
static const int FltCountD = 4;
static const int FlttBaseD = 1;
static const double FltAttensD[ FltCountD ] = {
54.4754, 113.2139, 167.1447, 220.6519, };
static const double* const FltPtrsD[ FltCountD ] = {
HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
static const double HBKernel_1E[ 1 ] = { // -66.5391 dB, 64
5.0047102586416625e-001,};
static const double HBKernel_2E[ 2 ] = { // -137.3173 dB, 64
5.6261293163933568e-001, -6.2613067826620017e-002,};
static const double HBKernel_3E[ 3 ] = { // -203.2997 dB, 64
5.8600808139396787e-001, -9.7762185880067784e-002,
1.1754104554493029e-002,};
static const double HBKernel_4E[ 4 ] = { // -268.8550 dB, 64
5.9819599352772002e-001, -1.1972157555011861e-001,
2.3977305567947922e-002, -2.4517235455853992e-003,};
static const int FltCountE = 4;
static const int FlttBaseE = 1;
static const double FltAttensE[ FltCountE ] = {
66.5391, 137.3173, 203.2997, 268.8550, };
static const double* const FltPtrsE[ FltCountE ] = {
HBKernel_1E, HBKernel_2E, HBKernel_3E, HBKernel_4E, };
static const double HBKernel_1F[ 1 ] = { // -82.4633 dB, 128
5.0007530666642896e-001,};
static const double HBKernel_2F[ 2 ] = { // -161.4049 dB, 128
5.6252823610146030e-001, -6.2528244608044792e-002,};
static const double HBKernel_3F[ 3 ] = { // -239.4313 dB, 128
5.8595514744674237e-001, -9.7682725156791952e-002,
1.1727577711117231e-002,};
static const int FltCountF = 3;
static const int FlttBaseF = 1;
static const double FltAttensF[ FltCountF ] = {
82.4633, 161.4049, 239.4313, };
static const double* const FltPtrsF[ FltCountF ] = {
HBKernel_1F, HBKernel_2F, HBKernel_3F, };
static const double HBKernel_1G[ 1 ] = { // -94.5052 dB, 256
5.0001882524896712e-001,};
static const double HBKernel_2G[ 2 ] = { // -185.4886 dB, 256
5.6250705922479682e-001, -6.2507059756378394e-002,};
static const double HBKernel_3G[ 3 ] = { // -275.5501 dB, 256
5.8594191201187384e-001, -9.7662868266991207e-002,
1.1720956255134043e-002,};
static const int FltCountG = 3;
static const int FlttBaseG = 1;
static const double FltAttensG[ FltCountG ] = {
94.5052, 185.4886, 275.5501, };
static const double* const FltPtrsG[ FltCountG ] = {
HBKernel_1G, HBKernel_2G, HBKernel_3G, };
int k = 0;
if( SteepIndex <= 0 )
{
while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsA[ k ];
fltt = FlttBaseA + k;
att = FltAttensA[ k ];
}
else
if( SteepIndex == 1 )
{
while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsB[ k ];
fltt = FlttBaseB + k;
att = FltAttensB[ k ];
}
else
if( SteepIndex == 2 )
{
while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsC[ k ];
fltt = FlttBaseC + k;
att = FltAttensC[ k ];
}
else
if( SteepIndex == 3 )
{
while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsD[ k ];
fltt = FlttBaseD + k;
att = FltAttensD[ k ];
}
else
if( SteepIndex == 4 )
{
while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsE[ k ];
fltt = FlttBaseE + k;
att = FltAttensE[ k ];
}
else
if( SteepIndex == 5 )
{
while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsF[ k ];
fltt = FlttBaseF + k;
att = FltAttensF[ k ];
}
else
{
while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsG[ k ];
fltt = FlttBaseG + k;
att = FltAttensG[ k ];
}
}
/**
* Function that provides filter data for various steepness indices and
* attenuations. For 1/3 resamplings.
*
* @param ReqAtten Required half-band filter attentuation.
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
* upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
* etc.
*/
static void getHBFilterThird( const double ReqAtten, const int SteepIndex,
const double*& flt, int& fltt, double& att )
{
static const double HBKernel_3A[ 3 ] = { // -66.3726 dB, 6
5.9811355069551475e-001, -1.1793396656733847e-001,
2.0300557211946322e-002,};
static const double HBKernel_4A[ 4 ] = { // -90.2546 dB, 6
6.0645499250612578e-001, -1.3555496505481171e-001,
3.4022804962365975e-002, -4.9535418595798757e-003,};
static const double HBKernel_5A[ 5 ] = { // -126.5507 dB, 6
6.1014115058940210e-001, -1.4393081816629907e-001,
4.1760642892852244e-002, -8.9692183234056175e-003,
9.9871340618342070e-004,};
static const double HBKernel_6A[ 6 ] = { // -150.1839 dB, 6
6.1439563420546972e-001, -1.5360187826905250e-001,
5.0840891345687034e-002, -1.4053648740561121e-002,
2.6771286587305727e-003, -2.5815816044823123e-004,};
static const double HBKernel_7A[ 7 ] = { // -173.7068 dB, 6
6.1747493476329918e-001, -1.6087373733313212e-001,
5.8263075641409430e-002, -1.8872408173431318e-002,
4.7421376543513687e-003, -8.0196529612267474e-004,
6.7964807393798996e-005,};
static const double HBKernel_8A[ 8 ] = { // -197.1454 dB, 6
6.1980610947775050e-001, -1.6654070578314714e-001,
6.4416567441730327e-002, -2.3307744348719822e-002,
6.9909157372312443e-003, -1.5871946293364403e-003,
2.4017727382382763e-004, -1.8125308241541697e-005,};
static const double HBKernel_9A[ 9 ] = { // -220.5199 dB, 6
6.2163188951899306e-001, -1.7108115323810941e-001,
6.9588370095600260e-002, -2.7339625080613838e-002,
9.2954469183791771e-003, -2.5537179959555429e-003,
5.2572290897951021e-004, -7.1813356135154921e-005,
4.8802382808892154e-006,};
static const int FltCountA = 7;
static const int FlttBaseA = 3;
static const double FltAttensA[ FltCountA ] = {
66.3726, 90.2546, 126.5507, 150.1839, 173.7068, 197.1454, 220.5199, };
static const double* const FltPtrsA[ FltCountA ] = {
HBKernel_3A, HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, };
static const double HBKernel_2B[ 2 ] = { // -71.0965 dB, 12
5.6748544264806311e-001, -6.7764090509431732e-002,};
static const double HBKernel_3B[ 3 ] = { // -115.7707 dB, 12
5.8793612182667199e-001, -1.0070583248877293e-001,
1.2771337947163834e-002,};
static const double HBKernel_4B[ 4 ] = { // -152.1535 dB, 12
5.9960155600862808e-001, -1.2228154335199336e-001,
2.5433718917694709e-002, -2.7537562530837154e-003,};
static const double HBKernel_5B[ 5 ] = { // -188.2914 dB, 12
6.0676859170554343e-001, -1.3689667009876413e-001,
3.6288512631926818e-002, -6.7838855305035351e-003,
6.2345167677087547e-004,};
static const double HBKernel_6B[ 6 ] = { // -224.2705 dB, 12
6.1161456341904397e-001, -1.4743901958274458e-001,
4.5344160157313275e-002, -1.1207371780924531e-002,
1.8328497112594935e-003, -1.4518193006359589e-004,};
static const int FltCountB = 5;
static const int FlttBaseB = 2;
static const double FltAttensB[ FltCountB ] = {
71.0965, 115.7707, 152.1535, 188.2914, 224.2705, };
static const double* const FltPtrsB[ FltCountB ] = {
HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, };
static const double HBKernel_1C[ 1 ] = { // -49.4544 dB, 24
5.0336730531430562e-001,};
static const double HBKernel_2C[ 2 ] = { // -103.1970 dB, 24
5.6330232648142819e-001, -6.3309247177420452e-002,};
static const double HBKernel_3C[ 3 ] = { // -152.1195 dB, 24
5.8643891113580415e-001, -9.8411593011583087e-002,
1.1972706651483846e-002,};
static const double HBKernel_4C[ 4 ] = { // -200.6182 dB, 24
5.9851012363917222e-001, -1.2028885239978220e-001,
2.4294521083140615e-002, -2.5157924156609776e-003,};
static const double HBKernel_5C[ 5 ] = { // -248.8730 dB, 24
6.0590922882030196e-001, -1.3515953438018685e-001,
3.5020857107815606e-002, -6.3256196990467053e-003,
5.5506815147598793e-004,};
static const int FltCountC = 5;
static const int FlttBaseC = 1;
static const double FltAttensC[ FltCountC ] = {
49.4544, 103.1970, 152.1195, 200.6182, 248.8730, };
static const double* const FltPtrsC[ FltCountC ] = {
HBKernel_1C, HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, };
static const double HBKernel_1D[ 1 ] = { // -61.5357 dB, 48
5.0083794231068057e-001,};
static const double HBKernel_2D[ 2 ] = { // -127.3167 dB, 48
5.6270074379958690e-001, -6.2701174487726344e-002,};
static const double HBKernel_3D[ 3 ] = { // -188.2990 dB, 48
5.8606296210323228e-001, -9.7844644765123029e-002,
1.1781683046528768e-002,};
static const double HBKernel_4D[ 4 ] = { // -248.8580 dB, 48
5.9823601243162516e-001, -1.1979368994739022e-001,
2.4017458606412575e-002, -2.4597810910081913e-003,};
static const int FltCountD = 4;
static const int FlttBaseD = 1;
static const double FltAttensD[ FltCountD ] = {
61.5357, 127.3167, 188.2990, 248.8580, };
static const double* const FltPtrsD[ FltCountD ] = {
HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
static const double HBKernel_1E[ 1 ] = { // -77.4651 dB, 96
5.0013388897382527e-001,};
static const double HBKernel_2E[ 2 ] = { // -151.4084 dB, 96
5.6255019604317880e-001, -6.2550222932381064e-002,};
static const double HBKernel_3E[ 3 ] = { // -224.4365 dB, 96
5.8596887234201078e-001, -9.7703321113080305e-002,
1.1734448777069783e-002,};
static const int FltCountE = 3;
static const int FlttBaseE = 1;
static const double FltAttensE[ FltCountE ] = {
77.4651, 151.4084, 224.4365, };
static const double* const FltPtrsE[ FltCountE ] = {
HBKernel_1E, HBKernel_2E, HBKernel_3E, };
static const double HBKernel_1F[ 1 ] = { // -89.5075 dB, 192
5.0003346776264190e-001,};
static const double HBKernel_2F[ 2 ] = { // -175.4932 dB, 192
5.6251254964097952e-001, -6.2512551321105267e-002,};
static const double HBKernel_3F[ 3 ] = { // -260.5645 dB, 192
5.8594534336747051e-001, -9.7668015838639821e-002,
1.1722672471262996e-002,};
static const int FltCountF = 3;
static const int FlttBaseF = 1;
static const double FltAttensF[ FltCountF ] = {
89.5075, 175.4932, 260.5645, };
static const double* const FltPtrsF[ FltCountF ] = {
HBKernel_1F, HBKernel_2F, HBKernel_3F, };
static const double HBKernel_1G[ 1 ] = { // -101.5490 dB, 384
5.0000836666064941e-001,};
static const double HBKernel_2G[ 2 ] = { // -199.5761 dB, 384
5.6250313744943459e-001, -6.2503137554435345e-002,};
static const double HBKernel_3G[ 3 ] = { // -296.5185 dB, 384
5.8593945786963764e-001, -9.7659186853499613e-002,
1.1719728983863425e-002,};
static const int FltCountG = 3;
static const int FlttBaseG = 1;
static const double FltAttensG[ FltCountG ] = {
101.5490, 199.5761, 296.5185, };
static const double* const FltPtrsG[ FltCountG ] = {
HBKernel_1G, HBKernel_2G, HBKernel_3G, };
int k = 0;
if( SteepIndex <= 0 )
{
while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsA[ k ];
fltt = FlttBaseA + k;
att = FltAttensA[ k ];
}
else
if( SteepIndex == 1 )
{
while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsB[ k ];
fltt = FlttBaseB + k;
att = FltAttensB[ k ];
}
else
if( SteepIndex == 2 )
{
while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsC[ k ];
fltt = FlttBaseC + k;
att = FltAttensC[ k ];
}
else
if( SteepIndex == 3 )
{
while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsD[ k ];
fltt = FlttBaseD + k;
att = FltAttensD[ k ];
}
else
if( SteepIndex == 4 )
{
while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsE[ k ];
fltt = FlttBaseE + k;
att = FltAttensE[ k ];
}
else
if( SteepIndex == 5 )
{
while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsF[ k ];
fltt = FlttBaseF + k;
att = FltAttensF[ k ];
}
else
{
while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
{
k++;
}
flt = FltPtrsG[ k ];
fltt = FlttBaseG + k;
att = FltAttensG[ k ];
}
}
/**
* Constructor initalizes the half-band upsampler.
*
* @param ReqAtten Required half-band filter attentuation.
* @param SteepIndex Steepness index - 0=steepest. Corresponds to general
* upsampling ratio, e.g. at 4x upsampling 0 is used, at 8x upsampling 1
* is used, etc.
* @param IsThird "True" if 1/3 of frequency response resampling is
* performed.
* @param PrevLatency Latency, in samples (any value >=0), which was left
* in the output signal by a previous process. Whole-number latency will
* be consumed by *this object while remaining fractional latency can be
* obtained via the getLatencyFrac() function.
* @param aDoConsumeLatency "True" if the output latency should be
* consumed. Does not apply to the fractional part of the latency (if such
* part is available).
*/
CDSPHBUpsampler( const double ReqAtten, const int SteepIndex,
const bool IsThird, const double PrevLatency,
const bool aDoConsumeLatency = true )
: DoConsumeLatency( aDoConsumeLatency )
{
static const CConvolveFn FltConvFn[ 14 ] = {
&CDSPHBUpsampler :: convolve1, &CDSPHBUpsampler :: convolve2,
&CDSPHBUpsampler :: convolve3, &CDSPHBUpsampler :: convolve4,
&CDSPHBUpsampler :: convolve5, &CDSPHBUpsampler :: convolve6,
&CDSPHBUpsampler :: convolve7, &CDSPHBUpsampler :: convolve8,
&CDSPHBUpsampler :: convolve9, &CDSPHBUpsampler :: convolve10,
&CDSPHBUpsampler :: convolve11, &CDSPHBUpsampler :: convolve12,
&CDSPHBUpsampler :: convolve13, &CDSPHBUpsampler :: convolve14 };
const double* fltp0;
int fltt;
double att;
if( IsThird )
{
getHBFilterThird( ReqAtten, SteepIndex, fltp0, fltt, att );
}
else
{
getHBFilter( ReqAtten, SteepIndex, fltp0, fltt, att );
}
// Copy obtained filter to address-aligned buffer.
fltp = alignptr( FltBuf, 16 );
memcpy( fltp, fltp0, fltt * sizeof( fltp[ 0 ]));
convfn = FltConvFn[ fltt - 1 ];
fll = fltt - 1;
fl2 = fltt;
flo = fll + fl2;
BufRP = Buf + fll;
LatencyFrac = PrevLatency * 2.0;
Latency = (int) LatencyFrac;
LatencyFrac -= Latency;
R8BASSERT( Latency >= 0 );
if( DoConsumeLatency )
{
flb = BufLen - fll;
}
else
{
Latency += fl2 + fl2;
flb = BufLen - flo;
}
R8BCONSOLE( "CDSPHBUpsampler: sti=%i third=%i taps=%i att=%.1f "
"io=2/1\n", SteepIndex, (int) IsThird, fltt, att );
clear();
}
virtual int getLatency() const
{
return( DoConsumeLatency ? 0 : Latency );
}
virtual double getLatencyFrac() const
{
return( LatencyFrac );
}
virtual int getMaxOutLen( const int MaxInLen ) const
{
R8BASSERT( MaxInLen >= 0 );
return( MaxInLen << 1 );
}
virtual void clear()
{
if( DoConsumeLatency )
{
LatencyLeft = Latency;
BufLeft = 0;
}
else
{
LatencyLeft = 0;
BufLeft = fl2;
}
WritePos = 0;
ReadPos = flb; // Set "read" position to account for filter's latency.
memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
}
virtual int process( double* ip, int l, double*& op0 )
{
R8BASSERT( l >= 0 );
double* op = op0;
while( l > 0 )
{
// Add new input samples to both halves of the ring buffer.
const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
double* const wp1 = Buf + WritePos;
memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
if( WritePos < flo )
{
const int c = min( b, flo - WritePos );
memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
}
ip += b;
WritePos = ( WritePos + b ) & BufLenMask;
l -= b;
BufLeft += b;
// Produce output.
if( BufLeft > fl2 )
{
const int c = BufLeft - fl2;
double* const opend = op + ( c + c );
( *convfn )( op, opend, fltp, BufRP, ReadPos );
op = opend;
ReadPos = ( ReadPos + c ) & BufLenMask;
BufLeft -= c;
}
}
int ol = (int) ( op - op0 );
if( LatencyLeft != 0 )
{
if( LatencyLeft >= ol )
{
LatencyLeft -= ol;
return( 0 );
}
ol -= LatencyLeft;
op0 += LatencyLeft;
LatencyLeft = 0;
}
return( ol );
}
private:
static const int BufLenBits = 9; ///< The length of the ring buffer,
///< expressed as Nth power of 2. This value can be reduced if it is
///< known that only short input buffers will be passed to the
///< interpolator. The minimum value of this parameter is 5, and
///< 1 << BufLenBits should be at least 3 times larger than the
///< FilterLen.
///<
static const int BufLen = 1 << BufLenBits; ///< The length of the ring
///< buffer. The actual length is twice as long to allow "beyond max
///< position" positioning.
///<
static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
///< position wrapping.
///<
double Buf[ BufLen + 27 ]; ///< The ring buffer, including overrun
///< protection for the largest filter.
///<
double FltBuf[ 14 + 2 ]; ///< Holder for half-band filter taps, used with
///< 16-byte address-aligning, for SIMD use.
///<
double* fltp; ///< Half-band filter taps, points to FltBuf.
///<
int fll; ///< Input latency.
///<
int fl2; ///< Right-side filter length.
///<
int flo; ///< Overrrun length.
///<
int flb; ///< Initial read position and maximal buffer write length.
///<
const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
///<
bool DoConsumeLatency; ///< "True" if the output latency should be
///< consumed. Does not apply to the fractional part of the latency
///< (if such part is available).
///<
int Latency; ///< Initial latency that should be removed from the output.
///<
double LatencyFrac; ///< Fractional latency left on the output.
///<
int BufLeft; ///< The number of samples left in the buffer to process.
///< When this value is below FilterLenD2Plus1, the interpolation
///< cycle ends.
///<
int WritePos; ///< The current buffer write position. Incremented together
///< with the BufLeft variable.
///<
int ReadPos; ///< The current buffer read position.
///<
int LatencyLeft; ///< Latency left to remove.
///<
typedef void( *CConvolveFn )( double* op, double* const opend,
const double* const flt, const double* const rp0, int rpos ); ///<
///< Convolution function type.
///<
CConvolveFn convfn; ///< Convolution function in use.
///<
#define R8BHBC1( fn ) \
static void fn( double* op, double* const opend, const double* const flt, \
const double* const rp0, int rpos ) \
{ \
while( op != opend ) \
{ \
const double* const rp = rp0 + rpos; \
op[ 0 ] = rp[ 0 ];
#define R8BHBC2 \
rpos = ( rpos + 1 ) & BufLenMask; \
op += 2; \
} \
}
#include "CDSPHBUpsampler.inc"
#undef R8BHBC1
#undef R8BHBC2
};
// ---------------------------------------------------------------------------
} // namespace r8b
#endif // R8B_CDSPHBUPSAMPLER_INCLUDED

View File

@ -0,0 +1,711 @@
// Auto-generated by `genhbc`, do not edit!
#if defined( R8B_SSE2 )
R8BHBC1( convolve1 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
R8BHBC2
R8BHBC1( convolve2 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
R8BHBC2
R8BHBC1( convolve3 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
op[ 1 ] += flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
R8BHBC2
R8BHBC1( convolve4 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
R8BHBC2
R8BHBC1( convolve5 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
op[ 1 ] += flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
R8BHBC2
R8BHBC1( convolve6 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
R8BHBC2
R8BHBC1( convolve7 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
op[ 1 ] += flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
R8BHBC2
R8BHBC1( convolve8 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
R8BHBC2
R8BHBC1( convolve9 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
op[ 1 ] += flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
R8BHBC2
R8BHBC1( convolve10 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
R8BHBC2
R8BHBC1( convolve11 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
op[ 1 ] += flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
R8BHBC2
R8BHBC1( convolve12 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
R8BHBC2
R8BHBC1( convolve13 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
op[ 1 ] += flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
R8BHBC2
R8BHBC1( convolve14 )
__m128d v1, v2, m1, s1;
v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = m1;
__m128d v3, v4, m3, s3;
v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = m3;
v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
s3 = _mm_add_pd( s3, m3 );
v2 = _mm_loadu_pd( rp - 13 ); v1 = _mm_loadu_pd( rp + 13 );
m1 = _mm_mul_pd( _mm_load_pd( flt + 12 ),
_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
s1 = _mm_add_pd( s1, m1 );
s1 = _mm_add_pd( s1, s3 );
_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
R8BHBC2
#elif defined( R8B_NEON )
R8BHBC1( convolve1 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
R8BHBC2
R8BHBC1( convolve2 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
op[ 1 ] = vaddvq_f64( s1 );
R8BHBC2
R8BHBC1( convolve3 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
R8BHBC2
R8BHBC1( convolve4 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 );
R8BHBC2
R8BHBC1( convolve5 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
R8BHBC2
R8BHBC1( convolve6 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 );
R8BHBC2
R8BHBC1( convolve7 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
R8BHBC2
R8BHBC1( convolve8 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 );
R8BHBC2
R8BHBC1( convolve9 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
R8BHBC2
R8BHBC1( convolve10 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 );
R8BHBC2
R8BHBC1( convolve11 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
R8BHBC2
R8BHBC1( convolve12 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 );
R8BHBC2
R8BHBC1( convolve13 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 ) + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
R8BHBC2
R8BHBC1( convolve14 )
float64x2_t v1, v2, s1;
s1 = vdupq_n_f64( 0.0 );
v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
float64x2_t v3, v4, s3;
s3 = vdupq_n_f64( 0.0 );
v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
v2 = vld1q_f64( rp - 13 ); v1 = vld1q_f64( rp + 13 );
s1 = vmlaq_f64( s1, vld1q_f64( flt + 12 ),
vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
s1 = vaddq_f64( s1, s3 );
op[ 1 ] = vaddvq_f64( s1 );
R8BHBC2
#else // SIMD
R8BHBC1( convolve1 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
R8BHBC2
R8BHBC1( convolve2 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]);
R8BHBC2
R8BHBC1( convolve3 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
R8BHBC2
R8BHBC1( convolve4 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]);
R8BHBC2
R8BHBC1( convolve5 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
R8BHBC2
R8BHBC1( convolve6 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]);
R8BHBC2
R8BHBC1( convolve7 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
R8BHBC2
R8BHBC1( convolve8 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]);
R8BHBC2
R8BHBC1( convolve9 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
R8BHBC2
R8BHBC1( convolve10 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]);
R8BHBC2
R8BHBC1( convolve11 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
R8BHBC2
R8BHBC1( convolve12 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]);
R8BHBC2
R8BHBC1( convolve13 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
R8BHBC2
R8BHBC1( convolve14 )
op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ])
+ flt[ 13 ] * ( rp[ 14 ] + rp[ -13 ]);
R8BHBC2
#endif // SIMD

View File

@ -0,0 +1,103 @@
//$ nobt
//$ nocpp
/**
* @file CDSPProcessor.h
*
* @brief The base virtual class for DSP processing algorithms.
*
* This file includes the base virtual class for DSP processing algorithm
* classes like FIR filtering and interpolation.
*
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPPROCESSOR_INCLUDED
#define R8B_CDSPPROCESSOR_INCLUDED
#include "r8bbase.h"
namespace r8b {
/**
* @brief The base virtual class for DSP processing algorithms.
*
* This class can be used as a base class for various DSP processing
* algorithms (processors). DSP processors that are derived from this class
* can be seamlessly integrated into various DSP processing graphs.
*/
class CDSPProcessor : public R8B_BASECLASS
{
R8BNOCTOR( CDSPProcessor );
public:
CDSPProcessor()
{
}
virtual ~CDSPProcessor()
{
}
/**
* @return The latency, in samples, which is present in the output signal.
* This value is usually zero if the DSP processor "consumes" the latency
* automatically.
*/
virtual int getLatency() const = 0;
/**
* @return Fractional latency, in samples, which is present in the output
* signal. This value is usually zero if a linear-phase filtering is used.
* With minimum-phase filters in use, this value can be non-zero even if
* the getLatency() function returns zero.
*/
virtual double getLatencyFrac() const = 0;
/**
* @param MaxInLen The number of samples planned to process at once, at
* most.
* @return The maximal length of the output buffer required when
* processing the "MaxInLen" number of input samples.
*/
virtual int getMaxOutLen( const int MaxInLen ) const = 0;
/**
* Function clears (resets) the state of *this object and returns it to
* the state after construction. All input data accumulated in the
* internal buffer so far will be discarded.
*/
virtual void clear() = 0;
/**
* Function performs DSP processing.
*
* @param ip Input data pointer.
* @param l0 How many samples to process.
* @param[out] op0 Output data pointer. The capacity of this buffer should
* be equal to the value returned by the getMaxOutLen() function for the
* given "l0". This buffer can be equal to "ip" only if the
* getMaxOutLen( l0 ) function returned a value lesser than "l0". This
* pointer can be incremented on function's return if latency compensation
* was performed by the processor. Note that on function's return, this
* pointer may point to some internal buffers, including the "ip" buffer,
* ignoring the originally passed value.
* @return The number of output samples written to the "op0" buffer and
* available after processing. This value can be smaller or larger in
* comparison to the original "l0" value due to processing and filter's
* latency compensation that took place, and due to resampling if it was
* performed.
*/
virtual int process( double* ip, int l0, double*& op0 ) = 0;
};
} // namespace r8b
#endif // R8B_CDSPPROCESSOR_INCLUDED

View File

@ -0,0 +1,820 @@
//$ nobt
//$ nocpp
/**
* @file CDSPRealFFT.h
*
* @brief Real-valued FFT transform class.
*
* This file includes FFT object implementation. All created FFT objects are
* kept in a global list after use, for a future reusal. Such approach
* minimizes time necessary to initialize the FFT object of the required
* length.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPREALFFT_INCLUDED
#define R8B_CDSPREALFFT_INCLUDED
#include "r8bbase.h"
#if !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
#include "fft4g.h"
#endif // !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
#if R8B_PFFFT
#include "pffft.h"
#endif // R8B_PFFFT
#if R8B_PFFFT_DOUBLE
#include "pffft_double/pffft_double.h"
#endif // R8B_PFFFT_DOUBLE
namespace r8b {
/**
* @brief Real-valued FFT transform class.
*
* Class implements a wrapper for real-valued discrete fast Fourier transform
* functions. The object of this class can only be obtained via the
* CDSPRealFFTKeeper class.
*
* Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
* http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
*
* Also uses Intel IPP library functions if available (if the R8B_IPP=1 macro
* was defined). Note that IPP library's FFT functions are 2-3 times more
* efficient on the modern Intel Core i7-3770K processor than Ooura's
* functions. It may be worthwhile investing in IPP. Note, that FFT functions
* take less than 20% of the overall sample rate conversion time. However,
* when the "power of 2" resampling is used the performance of FFT functions
* becomes "everything".
*/
class CDSPRealFFT : public R8B_BASECLASS
{
R8BNOCTOR( CDSPRealFFT );
friend class CDSPRealFFTKeeper;
public:
/**
* @return A multiplication constant that should be used after inverse
* transform to obtain a correct value scale.
*/
double getInvMulConst() const
{
return( InvMulConst );
}
/**
* @return The length (the number of real values in a transform) of *this
* FFT object, expressed as Nth power of 2.
*/
int getLenBits() const
{
return( LenBits );
}
/**
* @return The length (the number of real values in a transform) of *this
* FFT object.
*/
int getLen() const
{
return( Len );
}
/**
* Function performs in-place forward FFT.
*
* @param[in,out] p Pointer to data block to transform, length should be
* equal to *this object's getLen().
*/
void forward( double* const p ) const
{
#if R8B_FLOATFFT
float* const op = (float*) p;
int i;
for( i = 0; i < Len; i++ )
{
op[ i ] = (float) p[ i ];
}
#endif // R8B_FLOATFFT
#if R8B_IPP
ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer );
#elif R8B_PFFFT
pffft_transform_ordered( setup, op, op, work, PFFFT_FORWARD );
#elif R8B_PFFFT_DOUBLE
pffftd_transform_ordered( setup, p, p, work, PFFFT_FORWARD );
#else // R8B_PFFFT_DOUBLE
ooura_fft :: rdft( Len, 1, p, wi.getPtr(), wd.getPtr() );
#endif // R8B_IPP
}
/**
* Function performs in-place inverse FFT.
*
* @param[in,out] p Pointer to data block to transform, length should be
* equal to *this object's getLen().
*/
void inverse( double* const p ) const
{
#if R8B_IPP
ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer );
#elif R8B_PFFFT
pffft_transform_ordered( setup, (float*) p, (float*) p, work,
PFFFT_BACKWARD );
#elif R8B_PFFFT_DOUBLE
pffftd_transform_ordered( setup, p, p, work, PFFFT_BACKWARD );
#else // R8B_PFFFT_DOUBLE
ooura_fft :: rdft( Len, -1, p, wi.getPtr(), wd.getPtr() );
#endif // R8B_IPP
#if R8B_FLOATFFT
const float* const ip = (const float*) p;
int i;
for( i = Len - 1; i >= 0; i-- )
{
p[ i ] = ip[ i ];
}
#endif // R8B_FLOATFFT
}
/**
* Function multiplies two complex-valued data blocks and places result in
* a new data block. Length of all data blocks should be equal to *this
* object's block length. Input blocks should have been produced with the
* forward() function of *this object.
*
* @param aip1 Input data block 1.
* @param aip2 Input data block 2.
* @param[out] aop Output data block, should not be equal to aip1 nor
* aip2.
*/
void multiplyBlocks( const double* const aip1, const double* const aip2,
double* const aop ) const
{
#if R8B_FLOATFFT
const float* const ip1 = (const float*) aip1;
const float* const ip2 = (const float*) aip2;
float* const op = (float*) aop;
#else // R8B_FLOATFFT
const double* const ip1 = aip1;
const double* const ip2 = aip2;
double* const op = aop;
#endif // R8B_FLOATFFT
#if R8B_IPP
ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len );
#else // R8B_IPP
op[ 0 ] = ip1[ 0 ] * ip2[ 0 ];
op[ 1 ] = ip1[ 1 ] * ip2[ 1 ];
int i = 2;
while( i < Len )
{
op[ i ] = ip1[ i ] * ip2[ i ] - ip1[ i + 1 ] * ip2[ i + 1 ];
op[ i + 1 ] = ip1[ i ] * ip2[ i + 1 ] + ip1[ i + 1 ] * ip2[ i ];
i += 2;
}
#endif // R8B_IPP
}
/**
* Function multiplies two complex-valued data blocks in-place. Length of
* both data blocks should be equal to *this object's block length. Blocks
* should have been produced with the forward() function of *this object.
*
* @param aip Input data block 1.
* @param[in,out] aop Output/input data block 2.
*/
void multiplyBlocks( const double* const aip, double* const aop ) const
{
#if R8B_FLOATFFT
const float* const ip = (const float*) aip;
float* const op = (float*) aop;
float t;
#else // R8B_FLOATFFT
const double* const ip = aip;
double* const op = aop;
#if !R8B_IPP
double t;
#endif // !R8B_IPP
#endif // R8B_FLOATFFT
#if R8B_IPP
ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len );
#else // R8B_IPP
op[ 0 ] *= ip[ 0 ];
op[ 1 ] *= ip[ 1 ];
int i = 2;
while( i < Len )
{
t = op[ i ] * ip[ i ] - op[ i + 1 ] * ip[ i + 1 ];
op[ i + 1 ] = op[ i ] * ip[ i + 1 ] + op[ i + 1 ] * ip[ i ];
op[ i ] = t;
i += 2;
}
#endif // R8B_IPP
}
/**
* Function multiplies two complex-valued data blocks in-place,
* considering that the "ip" block contains "zero-phase" response. Length
* of both data blocks should be equal to *this object's block length.
* Blocks should have been produced with the forward() function of *this
* object.
*
* @param aip Input data block 1, "zero-phase" response. This block should
* be first transformed via the convertToZP() function.
* @param[in,out] aop Output/input data block 2.
*/
void multiplyBlocksZP( const double* const aip, double* const aop ) const
{
#if R8B_FLOATFFT
const float* const ip = (const float*) aip;
float* const op = (float*) aop;
#else // R8B_FLOATFFT
const double* ip = aip;
double* op = aop;
#endif // R8B_FLOATFFT
// SIMD implementations assume that pointers are address-aligned.
#if !R8B_FLOATFFT && defined( R8B_SSE2 )
int c8 = Len >> 3;
while( c8 != 0 )
{
const __m128d iv1 = _mm_load_pd( ip );
const __m128d iv2 = _mm_load_pd( ip + 2 );
const __m128d ov1 = _mm_load_pd( op );
const __m128d ov2 = _mm_load_pd( op + 2 );
_mm_store_pd( op, _mm_mul_pd( iv1, ov1 ));
_mm_store_pd( op + 2, _mm_mul_pd( iv2, ov2 ));
const __m128d iv3 = _mm_load_pd( ip + 4 );
const __m128d ov3 = _mm_load_pd( op + 4 );
const __m128d iv4 = _mm_load_pd( ip + 6 );
const __m128d ov4 = _mm_load_pd( op + 6 );
_mm_store_pd( op + 4, _mm_mul_pd( iv3, ov3 ));
_mm_store_pd( op + 6, _mm_mul_pd( iv4, ov4 ));
ip += 8;
op += 8;
c8--;
}
int c = Len & 7;
while( c != 0 )
{
*op *= *ip;
ip++;
op++;
c--;
}
#elif !R8B_FLOATFFT && defined( R8B_NEON )
int c8 = Len >> 3;
while( c8 != 0 )
{
const float64x2_t iv1 = vld1q_f64( ip );
const float64x2_t iv2 = vld1q_f64( ip + 2 );
const float64x2_t ov1 = vld1q_f64( op );
const float64x2_t ov2 = vld1q_f64( op + 2 );
vst1q_f64( op, vmulq_f64( iv1, ov1 ));
vst1q_f64( op + 2, vmulq_f64( iv2, ov2 ));
const float64x2_t iv3 = vld1q_f64( ip + 4 );
const float64x2_t iv4 = vld1q_f64( ip + 6 );
const float64x2_t ov3 = vld1q_f64( op + 4 );
const float64x2_t ov4 = vld1q_f64( op + 6 );
vst1q_f64( op + 4, vmulq_f64( iv3, ov3 ));
vst1q_f64( op + 6, vmulq_f64( iv4, ov4 ));
ip += 8;
op += 8;
c8--;
}
int c = Len & 7;
while( c != 0 )
{
*op *= *ip;
ip++;
op++;
c--;
}
#else // SIMD
int i;
for( i = 0; i < Len; i++ )
{
op[ i ] *= ip[ i ];
}
#endif // SIMD
}
/**
* Function converts the specified forward-transformed block into
* "zero-phase" form suitable for use with the multiplyBlocksZ() function.
*
* @param[in,out] ap Block to transform.
*/
void convertToZP( double* const ap ) const
{
#if R8B_FLOATFFT
float* const p = (float*) ap;
#else // R8B_FLOATFFT
double* const p = ap;
#endif // R8B_FLOATFFT
int i = 2;
while( i < Len )
{
p[ i + 1 ] = p[ i ];
i += 2;
}
}
private:
int LenBits; ///< Length of FFT block (expressed as Nth power of 2).
///<
int Len; ///< Length of FFT block (number of real values).
///<
double InvMulConst; ///< Inverse FFT multiply constant.
///<
CDSPRealFFT* Next; ///< Next object in a singly-linked list.
///<
#if R8B_IPP
IppsFFTSpec_R_64f* SPtr; ///< Pointer to initialized data buffer
///< to be passed to IPP's FFT functions.
///<
CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer.
///<
CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer.
///<
#elif R8B_PFFFT
PFFFT_Setup* setup; ///< PFFFT setup object.
///<
CFixedBuffer< float > work; ///< Working buffer.
///<
#elif R8B_PFFFT_DOUBLE
PFFFTD_Setup* setup; ///< PFFFTD setup object.
///<
CFixedBuffer< double > work; ///< Working buffer.
///<
#else // R8B_PFFFT_DOUBLE
CFixedBuffer< int > wi; ///< Working buffer (ints).
///<
CFixedBuffer< double > wd; ///< Working buffer (doubles).
///<
#endif // R8B_IPP
/**
* A simple class that keeps the pointer to the object and deletes it
* automatically.
*/
class CObjKeeper
{
R8BNOCTOR( CObjKeeper );
public:
CObjKeeper()
: Object( NULL )
{
}
~CObjKeeper()
{
delete Object;
}
CObjKeeper& operator = ( CDSPRealFFT* const aObject )
{
Object = aObject;
return( *this );
}
operator CDSPRealFFT* () const
{
return( Object );
}
private:
CDSPRealFFT* Object; ///< FFT object being kept.
///<
};
CDSPRealFFT()
{
}
/**
* Constructor initializes FFT object.
*
* @param aLenBits The length of FFT block (Nth power of 2), specifies the
* number of real values in a block. Values from 1 to 30 inclusive are
* supported.
*/
CDSPRealFFT( const int aLenBits )
: LenBits( aLenBits )
, Len( 1 << aLenBits )
#if R8B_IPP
, InvMulConst( 1.0 / Len )
#elif R8B_PFFFT
, InvMulConst( 1.0 / Len )
#elif R8B_PFFFT_DOUBLE
, InvMulConst( 1.0 / Len )
#else // R8B_PFFFT_DOUBLE
, InvMulConst( 2.0 / Len )
#endif // R8B_IPP
{
#if R8B_IPP
int SpecSize;
int SpecBufferSize;
int BufferSize;
ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY,
ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize );
CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize );
SpecBuffer.alloc( SpecSize );
WorkBuffer.alloc( BufferSize );
ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY,
ippAlgHintFast, SpecBuffer, InitBuffer );
#elif R8B_PFFFT
setup = pffft_new_setup( Len, PFFFT_REAL );
work.alloc( Len );
#elif R8B_PFFFT_DOUBLE
setup = pffftd_new_setup( Len, PFFFT_REAL );
work.alloc( Len );
#else // R8B_PFFFT_DOUBLE
wi.alloc( (int) ceil( 2.0 + sqrt( (double) ( Len >> 1 ))));
wi[ 0 ] = 0;
wd.alloc( Len >> 1 );
#endif // R8B_IPP
}
~CDSPRealFFT()
{
#if R8B_PFFFT
pffft_destroy_setup( setup );
#elif R8B_PFFFT_DOUBLE
pffftd_destroy_setup( setup );
#endif // R8B_PFFFT_DOUBLE
delete Next;
}
};
/**
* @brief A "keeper" class for real-valued FFT transform objects.
*
* Class implements "keeper" functionality for handling CDSPRealFFT objects.
* The allocated FFT objects are placed on the global static list of objects
* for future reuse instead of deallocation.
*/
class CDSPRealFFTKeeper : public R8B_BASECLASS
{
R8BNOCTOR( CDSPRealFFTKeeper );
public:
CDSPRealFFTKeeper()
: Object( NULL )
{
}
/**
* Function acquires FFT object with the specified block length.
*
* @param LenBits The length of FFT block (Nth power of 2), in the range
* [1; 30] inclusive, specifies the number of real values in a FFT block.
*/
CDSPRealFFTKeeper( const int LenBits )
{
Object = acquire( LenBits );
}
~CDSPRealFFTKeeper()
{
if( Object != NULL )
{
release( Object );
}
}
/**
* @return Pointer to the acquired FFT object.
*/
const CDSPRealFFT* operator -> () const
{
R8BASSERT( Object != NULL );
return( Object );
}
/**
* Function acquires FFT object with the specified block length. This
* function can be called any number of times.
*
* @param LenBits The length of FFT block (Nth power of 2), in the range
* [1; 30] inclusive, specifies the number of real values in a FFT block.
*/
void init( const int LenBits )
{
if( Object != NULL )
{
if( Object -> LenBits == LenBits )
{
return;
}
release( Object );
}
Object = acquire( LenBits );
}
/**
* Function releases a previously acquired FFT object.
*/
void reset()
{
if( Object != NULL )
{
release( Object );
Object = NULL;
}
}
private:
CDSPRealFFT* Object; ///< FFT object.
///<
static CSyncObject StateSync; ///< FFTObjects synchronizer.
///<
static CDSPRealFFT :: CObjKeeper FFTObjects[]; ///< Pool of FFT objects of
///< various lengths.
///<
/**
* Function acquires FFT object from the global pool.
*
* @param LenBits FFT block length (expressed as Nth power of 2).
*/
CDSPRealFFT* acquire( const int LenBits )
{
R8BASSERT( LenBits > 0 && LenBits <= 30 );
R8BSYNC( StateSync );
if( FFTObjects[ LenBits ] == NULL )
{
return( new CDSPRealFFT( LenBits ));
}
CDSPRealFFT* ffto = FFTObjects[ LenBits ];
FFTObjects[ LenBits ] = ffto -> Next;
return( ffto );
}
/**
* Function releases a previously acquired FFT object.
*
* @param ffto FFT object to release.
*/
void release( CDSPRealFFT* const ffto )
{
R8BSYNC( StateSync );
ffto -> Next = FFTObjects[ ffto -> LenBits ];
FFTObjects[ ffto -> LenBits ] = ffto;
}
};
/**
* Function calculates the minimum-phase transform of the filter kernel, using
* a discrete Hilbert transform in cepstrum domain.
*
* For more details, see part III.B of
* http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf
*
* @param[in,out] Kernel Filter kernel buffer.
* @param KernelLen Filter kernel's length, in samples.
* @param LenMult Kernel length multiplier. Used as a coefficient of the
* "oversampling" in the frequency domain. Such oversampling is needed to
* improve the precision of the minimum-phase transform. If the filter's
* attenuation is high, this multiplier should be increased or otherwise the
* required attenuation will not be reached due to "smoothing" effect of this
* transform.
* @param DoFinalMul "True" if the final multiplication after transform should
* be performed or not. Such multiplication returns the gain of the signal to
* its original value. This parameter can be set to "false" if normalization
* of the resulting filter kernel is planned to be used.
* @param[out] DCGroupDelay If not NULL, this variable receives group delay
* at DC offset, in samples (can be a non-integer value).
*/
inline void calcMinPhaseTransform( double* const Kernel, const int KernelLen,
const int LenMult = 2, const bool DoFinalMul = true,
double* const DCGroupDelay = NULL )
{
R8BASSERT( KernelLen > 0 );
R8BASSERT( LenMult >= 2 );
const int LenBits = getBitOccupancy(( KernelLen * LenMult ) - 1 );
const int Len = 1 << LenBits;
const int Len2 = Len >> 1;
int i;
CFixedBuffer< double > ip( Len );
CFixedBuffer< double > ip2( Len2 + 1 );
memcpy( &ip[ 0 ], Kernel, KernelLen * sizeof( ip[ 0 ]));
memset( &ip[ KernelLen ], 0, ( Len - KernelLen ) * sizeof( ip[ 0 ]));
CDSPRealFFTKeeper ffto( LenBits );
ffto -> forward( ip );
// Create the "log |c|" spectrum while saving the original power spectrum
// in the "ip2" buffer.
#if R8B_FLOATFFT
float* const aip = (float*) &ip[ 0 ];
float* const aip2 = (float*) &ip2[ 0 ];
const float nzbias = 1e-35;
#else // R8B_FLOATFFT
double* const aip = &ip[ 0 ];
double* const aip2 = &ip2[ 0 ];
const double nzbias = 1e-300;
#endif // R8B_FLOATFFT
aip2[ 0 ] = aip[ 0 ];
aip[ 0 ] = log( fabs( aip[ 0 ]) + nzbias );
aip2[ Len2 ] = aip[ 1 ];
aip[ 1 ] = log( fabs( aip[ 1 ]) + nzbias );
for( i = 1; i < Len2; i++ )
{
aip2[ i ] = sqrt( aip[ i * 2 ] * aip[ i * 2 ] +
aip[ i * 2 + 1 ] * aip[ i * 2 + 1 ]);
aip[ i * 2 ] = log( aip2[ i ] + nzbias );
aip[ i * 2 + 1 ] = 0.0;
}
// Convert to cepstrum and apply discrete Hilbert transform.
ffto -> inverse( ip );
const double m1 = ffto -> getInvMulConst();
const double m2 = -m1;
ip[ 0 ] = 0.0;
for( i = 1; i < Len2; i++ )
{
ip[ i ] *= m1;
}
ip[ Len2 ] = 0.0;
for( i = Len2 + 1; i < Len; i++ )
{
ip[ i ] *= m2;
}
// Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and
// perform its exponentiation, multiplied by the power spectrum previously
// saved in the "ip2" buffer.
ffto -> forward( ip );
aip[ 0 ] = aip2[ 0 ];
aip[ 1 ] = aip2[ Len2 ];
for( i = 1; i < Len2; i++ )
{
aip[ i * 2 + 0 ] = cos( aip[ i * 2 + 1 ]) * aip2[ i ];
aip[ i * 2 + 1 ] = sin( aip[ i * 2 + 1 ]) * aip2[ i ];
}
ffto -> inverse( ip );
if( DoFinalMul )
{
for( i = 0; i < KernelLen; i++ )
{
Kernel[ i ] = ip[ i ] * m1;
}
}
else
{
memcpy( &Kernel[ 0 ], &ip[ 0 ], KernelLen * sizeof( Kernel[ 0 ]));
}
if( DCGroupDelay != NULL )
{
double tmp;
calcFIRFilterResponseAndGroupDelay( Kernel, KernelLen, 0.0,
tmp, tmp, *DCGroupDelay );
}
}
} // namespace r8b
#endif // VOX_CDSPREALFFT_INCLUDED

View File

@ -0,0 +1,769 @@
//$ nobt
//$ nocpp
/**
* @file CDSPResampler.h
*
* @brief The master sample rate converter (resampler) class.
*
* This file includes the master sample rate converter (resampler) class that
* combines all elements of this library into a single front-end class.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPRESAMPLER_INCLUDED
#define R8B_CDSPRESAMPLER_INCLUDED
#include "CDSPHBDownsampler.h"
#include "CDSPHBUpsampler.h"
#include "CDSPBlockConvolver.h"
#include "CDSPFracInterpolator.h"
namespace r8b {
/**
* @brief The master sample rate converter (resampler) class.
*
* This class can be considered the "master" sample rate converter (resampler)
* class since it combines all functionality of this library into a single
* front-end class to perform sample rate conversion to/from any sample rate,
* including non-integer sample rates.
*
* Note that objects of this class can be constructed on the stack as it has a
* small member data size. The default template parameters of this class are
* suited for 27-bit fixed point resampling.
*
* Use the CDSPResampler16 class for 16-bit resampling.
*
* Use the CDSPResampler16IR class for 16-bit impulse response resampling.
*
* Use the CDSPResampler24 class for 24-bit resampling (including 32-bit
* floating point resampling).
*/
class CDSPResampler : public CDSPProcessor
{
public:
/**
* Constructor initalizes the resampler object.
*
* Note that increasing the transition band and decreasing attenuation
* reduces the filter length, this in turn reduces the "input before
* output" delay. However, the filter length has only a minor influence on
* the overall resampling speed.
*
* It should be noted that the ReqAtten specifies the minimal difference
* between the loudest input signal component and the produced aliasing
* artifacts during resampling. For example, if ReqAtten=100 was specified
* when performing 2x upsampling, the analysis of the resulting signal may
* display high-frequency components which are quieter than the loudest
* part of the input signal by only 100 decibel meaning the high-frequency
* part did not become "magically" completely silent after resampling. You
* have to specify a higher ReqAtten value if you need a totally clean
* high-frequency content. On the other hand, it may not be reasonable to
* have a high-frequency content cleaner than the input signal itself: if
* the input signal is 16-bit, setting ReqAtten to 180 will make its
* high-frequency content 24-bit, but the original part of the signal will
* remain 16-bit.
*
* @param SrcSampleRate Source signal sample rate. Both sample rates can
* be specified as a ratio, e.g. SrcSampleRate = 1.0, DstSampleRate = 2.0.
* @param DstSampleRate Destination signal sample rate. The "power of 2"
* ratios between the source and destination sample rates force resampler
* to use several fast "power of 2" resampling steps, without using
* fractional interpolation at all.
* @param aMaxInLen The maximal planned length of the input buffer (in
* samples) that will be passed to the resampler. The resampler relies on
* this value as it allocates intermediate buffers. Input buffers longer
* than this value should never be supplied to the resampler. Note that
* upsampling produces more samples than was provided on input, so at
* higher upsampling ratios it is advisable to use smaller MaxInLen
* values to reduce memory footprint. When downsampling, a larger MaxInLen
* is suggested in order to increase downsampling performance.
* @param ReqTransBand Required transition band, in percent of the
* spectral space of the input signal (or the output signal if
* downsampling is performed) between filter's -3 dB point and the Nyquist
* frequency. The range is from CDSPFIRFilter::getLPMinTransBand() to
* CDSPFIRFilter::getLPMaxTransBand(), inclusive. When upsampling 88200 or
* 96000 audio to a higher sample rates the ReqTransBand can be
* considerably increased, up to 30. The selection of ReqTransBand depends
* on the level of desire to preserve the high-frequency content. While
* values 0.5 to 2 are extremely "greedy" settings, not necessary in most
* cases, values 2 to 3 can be used in most cases. Values 3 to 4 are
* relaxed settings, but they still offer a flat frequency response up to
* 21kHz with 44.1k source or destination sample rate.
* @param ReqAtten Required stop-band attenuation in decibel, in the
* range CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
* inclusive. The actual attenuation may be 0.40-4.46 dB higher. The
* general formula for selecting the ReqAtten is 6.02 * Bits + 40, where
* "Bits" is the bit resolution (e.g. 16, 24), "40" is an added resolution
* for dynamic signals; this value can be decreased to 20 to 10 if the
* signal being resampled is non-dynamic (e.g., an impulse response or
* filter, with a non-steep frequency response).
* @param ReqPhase Required filter's phase response. Note that this
* setting does not affect interpolator's phase response which is always
* linear-phase. Also note that if the "power of 2" resampling was engaged
* by the resampler together with the minimum-phase response, the audio
* stream may become fractionally delayed, depending on the minimum-phase
* filter's actual fractional delay. Linear-phase filters do not have
* fractional delay.
* @see CDSPFIRFilterCache::getLPFilter()
*/
CDSPResampler( const double SrcSampleRate, const double DstSampleRate,
const int aMaxInLen, const double ReqTransBand = 2.0,
const double ReqAtten = 206.91,
const EDSPFilterPhaseResponse ReqPhase = fprLinearPhase )
: StepCapacity( 0 )
, StepCount( 0 )
, MaxInLen( aMaxInLen )
, CurMaxOutLen( aMaxInLen )
, LatencyFrac( 0.0 )
{
R8BASSERT( SrcSampleRate > 0.0 );
R8BASSERT( DstSampleRate > 0.0 );
R8BASSERT( MaxInLen > 0 );
R8BCONSOLE( "* CDSPResampler: src=%.1f dst=%.1f len=%i tb=%.1f "
"att=%.2f ph=%i\n", SrcSampleRate, DstSampleRate, aMaxInLen,
ReqTransBand, ReqAtten, (int) ReqPhase );
if( SrcSampleRate == DstSampleRate )
{
return;
}
TmpBufCapacities[ 0 ] = 0;
TmpBufCapacities[ 1 ] = 0;
CurTmpBuf = 0;
// Try some common efficient ratios requiring only a single step.
const int CommonRatioCount = 5;
const int CommonRatios[ CommonRatioCount ][ 2 ] = {
{ 1, 2 },
{ 1, 3 },
{ 2, 3 },
{ 3, 2 },
{ 3, 4 }
};
int i;
for( i = 0; i < CommonRatioCount; i++ )
{
const int num = CommonRatios[ i ][ 0 ];
const int den = CommonRatios[ i ][ 1 ];
if( SrcSampleRate * num == DstSampleRate * den )
{
addProcessor( new CDSPBlockConvolver(
CDSPFIRFilterCache :: getLPFilter(
1.0 / ( num > den ? num : den ), ReqTransBand,
ReqAtten, ReqPhase, num ), num, den, LatencyFrac ));
createTmpBuffers();
return;
}
}
// Try whole-number power-of-2 or 3*power-of-2 upsampling.
for( i = 2; i <= 3; i++ )
{
bool WasFound = false;
int c = 0;
while( true )
{
const double NewSR = SrcSampleRate * ( i << c );
if( NewSR == DstSampleRate )
{
WasFound = true;
break;
}
if( NewSR > DstSampleRate )
{
break;
}
c++;
}
if( WasFound )
{
addProcessor( new CDSPBlockConvolver(
CDSPFIRFilterCache :: getLPFilter( 1.0 / i, ReqTransBand,
ReqAtten, ReqPhase, i ), i, 1, LatencyFrac ));
const bool IsThird = ( i == 3 );
for( i = 0; i < c; i++ )
{
addProcessor( new CDSPHBUpsampler( ReqAtten, i, IsThird,
LatencyFrac ));
}
createTmpBuffers();
return;
}
}
if( DstSampleRate * 2.0 > SrcSampleRate )
{
// Upsampling or fractional downsampling down to 2X.
const double NormFreq = ( DstSampleRate > SrcSampleRate ? 0.5 :
0.5 * DstSampleRate / SrcSampleRate );
addProcessor( new CDSPBlockConvolver(
CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
ReqAtten, ReqPhase, 2.0 ), 2, 1, LatencyFrac ));
// Try intermediate interpolated resampling with subsequent 2X
// or 3X upsampling.
const double tbw = 0.0175; // Intermediate filter's transition
// band extension coefficient.
const double ThreshSampleRate = SrcSampleRate /
( 1.0 - tbw * ReqTransBand ); // Make sure intermediate
// filter's transition band is not steeper than ReqTransBand
// (this keeps the latency under control).
int c = 0;
int div = 1;
while( true )
{
const int ndiv = div * 2;
if( DstSampleRate < ThreshSampleRate * ndiv )
{
break;
}
div = ndiv;
c++;
}
int c2 = 0;
int div2 = 1;
while( true )
{
const int ndiv = div * ( c2 == 0 ? 3 : 2 );
if( DstSampleRate < ThreshSampleRate * ndiv )
{
break;
}
div2 = ndiv;
c2++;
}
const double SrcSampleRate2 = SrcSampleRate * 2.0;
int tmp1;
int tmp2;
if( c == 1 && getWholeStepping( SrcSampleRate2, DstSampleRate,
tmp1, tmp2 ))
{
// Do not use intermediate interpolation if whole stepping is
// available as it performs very fast.
c = 0;
}
if( c > 0 )
{
// Add steps using intermediate interpolation.
int num;
if( c2 > 0 && div2 > div )
{
div = div2;
c = c2;
num = 3;
}
else
{
num = 2;
}
addProcessor( new CDSPFracInterpolator( SrcSampleRate2 * div,
DstSampleRate, ReqAtten, false, LatencyFrac ));
double tb = ( 1.0 - SrcSampleRate * div / DstSampleRate ) /
tbw; // Divide TransBand by a constant that assures a
// linear response in the pass-band.
if( tb > CDSPFIRFilter :: getLPMaxTransBand() )
{
tb = CDSPFIRFilter :: getLPMaxTransBand();
}
addProcessor( new CDSPBlockConvolver(
CDSPFIRFilterCache :: getLPFilter( 1.0 / num, tb,
ReqAtten, ReqPhase, num ), num, 1, LatencyFrac ));
const bool IsThird = ( num == 3 );
for( i = 1; i < c; i++ )
{
addProcessor( new CDSPHBUpsampler( ReqAtten, i - 1,
IsThird, LatencyFrac ));
}
}
else
{
addProcessor( new CDSPFracInterpolator( SrcSampleRate2,
DstSampleRate, ReqAtten, false, LatencyFrac ));
}
createTmpBuffers();
return;
}
// Use downsampling steps, including power-of-2 downsampling.
double CheckSR = DstSampleRate * 4.0;
int c = 0;
double FinGain = 1.0;
while( CheckSR <= SrcSampleRate )
{
c++;
CheckSR *= 2.0;
FinGain *= 0.5;
}
const int SrcSRDiv = ( 1 << c );
int downf;
double NormFreq = 0.5;
bool UseInterp = true;
bool IsThird = false;
for( downf = 2; downf <= 3; downf++ )
{
if( DstSampleRate * SrcSRDiv * downf == SrcSampleRate )
{
NormFreq = 1.0 / downf;
UseInterp = false;
IsThird = ( downf == 3 );
break;
}
}
if( UseInterp )
{
downf = 1;
NormFreq = DstSampleRate * SrcSRDiv / SrcSampleRate;
IsThird = ( NormFreq * 3.0 <= 1.0 );
}
for( i = 0; i < c; i++ )
{
// Use a fixed very relaxed 2X downsampling filters, that at
// the final stage only guarantees stop-band between 0.75 and
// pi. 0.5-0.75 range will be aliased to 0.25-0.5 range which
// will then be filtered out by the final filter.
addProcessor( new CDSPHBDownsampler( ReqAtten, c - 1 - i, IsThird,
LatencyFrac ));
}
addProcessor( new CDSPBlockConvolver(
CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
ReqAtten, ReqPhase, FinGain ), 1, downf, LatencyFrac ));
if( UseInterp )
{
addProcessor( new CDSPFracInterpolator( SrcSampleRate,
DstSampleRate * SrcSRDiv, ReqAtten, IsThird, LatencyFrac ));
}
createTmpBuffers();
}
virtual ~CDSPResampler()
{
int i;
for( i = 0; i < StepCount; i++ )
{
delete Steps[ i ];
}
}
virtual int getLatency() const
{
return( 0 );
}
virtual double getLatencyFrac() const
{
return( LatencyFrac );
}
/**
* This function ignores the supplied parameter and returns the maximal
* output buffer length that depends on the MaxInLen supplied to the
* constructor.
*/
virtual int getMaxOutLen( const int/* MaxInLen */ ) const
{
return( CurMaxOutLen );
}
/**
* Function clears (resets) the state of *this object and returns it to
* the state after construction. All input data accumulated in the
* internal buffer so far will be discarded.
*
* This function makes it possible to use *this object for converting
* separate streams from the same source sample rate to the same
* destination sample rate without reconstructing the object. It is more
* efficient to clear the state of the resampler object than to destroy it
* and create a new object.
*/
virtual void clear()
{
int i;
for( i = 0; i < StepCount; i++ )
{
Steps[ i ] -> clear();
}
}
/**
* Function performs sample rate conversion.
*
* If the source and destination sample rates are equal, the resampler
* will do nothing and will simply return the input buffer unchanged.
*
* You do not need to allocate an intermediate output buffer for use with
* this function. If required, the resampler will allocate a suitable
* intermediate output buffer itself.
*
* @param ip0 Input buffer. This buffer is never used as output buffer by
* this function. This pointer may be returned in "op0" if no resampling
* is happening (source sample rate equals destination sample rate).
* @param l The number of samples available in the input buffer. Should
* not exceed the MaxInLen supplied in the constructor.
* @param[out] op0 This variable receives the pointer to the resampled
* data. On function's return, this pointer points to *this object's
* internal buffer. In real-time applications it is suggested to pass this
* pointer to the next output audio block and consume any data left from
* the previous output audio block first before calling the process()
* function again. The buffer pointed to by the "op0" on return is owned
* by the resampler, so it should not be freed by the caller.
* @return The number of samples available in the "op0" output buffer. If
* the data from the output buffer "op0" is going to be written to a
* bigger output buffer, it is suggested to check the returned number of
* samples so that no overflow of the bigger output buffer happens.
*/
virtual int process( double* ip0, int l, double*& op0 )
{
R8BASSERT( l >= 0 );
double* ip = ip0;
int i;
for( i = 0; i < StepCount; i++ )
{
double* op = TmpBufs[ i & 1 ];
l = Steps[ i ] -> process( ip, l, op );
ip = op;
}
op0 = ip;
return( l );
}
/**
* Function performs resampling of an input sample buffer of the specified
* length in the "one-shot" mode. This function can be useful when impulse
* response resampling is required.
*
* @param ip Input buffer pointer.
* @param iplen Length of the input buffer in samples.
* @param[out] op Output buffer pointer.
* @param oplen Length of the output buffer in samples.
* @tparam Tin Input buffer's element type.
* @tparam Tout Output buffer's element type.
*/
template< typename Tin, typename Tout >
void oneshot( const Tin* ip, int iplen, Tout* op, int oplen )
{
CFixedBuffer< double > Buf( MaxInLen );
bool IsZero = false;
while( oplen > 0 )
{
int rc;
double* p;
int i;
if( iplen == 0 )
{
rc = MaxInLen;
p = &Buf[ 0 ];
if( !IsZero )
{
IsZero = true;
memset( p, 0, MaxInLen * sizeof( p[ 0 ]));
}
}
else
{
rc = min( iplen, MaxInLen );
if( sizeof( Tin ) == sizeof( double ))
{
p = (double*) ip;
}
else
{
p = &Buf[ 0 ];
for( i = 0; i < rc; i++ )
{
p[ i ] = ip[ i ];
}
}
ip += rc;
iplen -= rc;
}
double* op0;
int wc = process( p, rc, op0 );
wc = min( oplen, wc );
for( i = 0; i < wc; i++ )
{
op[ i ] = (Tout) op0[ i ];
}
op += wc;
oplen -= wc;
}
clear();
}
/**
* Function obtains overall input sample count required to produce first
* output sample. Function works by iteratively passing 1 sample at a time
* until output begins. This is a relatively CPU-consuming operation. This
* function should be called after the clear() function call or after
* object's construction. The function itself calls the clear() function
* before return.
*
* Note that it is advisable to cache the value returned by this function,
* for each SrcSampleRate/DstSampleRate pair, if it is called frequently.
*/
int getInLenBeforeOutStart()
{
int inc = 0;
while( true )
{
double ins = 0.0;
double* op;
if( process( &ins, 1, op ) > 0 )
{
clear();
return( inc );
}
inc++;
}
}
private:
CFixedBuffer< CDSPProcessor* > Steps; ///< Array of processing steps.
///<
int StepCapacity; ///< The capacity of the Steps array.
///<
int StepCount; ///< The number of created processing steps.
///<
int MaxInLen; ///< Maximal input length.
///<
CFixedBuffer< double > TmpBufAll; ///< Buffer containing both temporary
///< buffers.
///<
double* TmpBufs[ 2 ]; ///< Temporary output buffers.
///<
int TmpBufCapacities[ 2 ]; ///< Capacities of temporary buffers, updated
///< during processing steps building.
///<
int CurTmpBuf; ///< Current temporary buffer.
///<
int CurMaxOutLen; ///< Current maximal output length.
///<
double LatencyFrac; ///< Current fractional latency. After object's
///< construction, equals to the remaining fractional latency in the
///< output.
///<
/**
* Function adds processor, updates MaxOutLen variable and adjusts length
* of temporary internal buffers.
*
* @param Proc Processor to add. This pointer is inherited and will be
* destroyed on *this object's destruction.
*/
void addProcessor( CDSPProcessor* const Proc )
{
if( StepCount == StepCapacity )
{
// Reallocate and increase Steps array's capacity.
const int NewCapacity = StepCapacity + 8;
Steps.realloc( StepCapacity, NewCapacity );
StepCapacity = NewCapacity;
}
LatencyFrac = Proc -> getLatencyFrac();
CurMaxOutLen = Proc -> getMaxOutLen( CurMaxOutLen );
if( CurMaxOutLen > TmpBufCapacities[ CurTmpBuf ])
{
TmpBufCapacities[ CurTmpBuf ] = CurMaxOutLen;
}
CurTmpBuf ^= 1;
Steps[ StepCount ] = Proc;
StepCount++;
}
/**
* Function creates temporary buffers.
*/
void createTmpBuffers()
{
const int ol = TmpBufCapacities[ 0 ] + TmpBufCapacities[ 1 ];
if( ol > 0 )
{
TmpBufAll.alloc( ol );
TmpBufs[ 0 ] = &TmpBufAll[ 0 ];
TmpBufs[ 1 ] = &TmpBufAll[ TmpBufCapacities[ 0 ]];
}
R8BCONSOLE( "* CDSPResampler: init done\n" );
}
};
/**
* @brief The resampler class for 16-bit resampling.
*
* This class defines resampling parameters suitable for 16-bit resampling,
* using linear-phase low-pass filter. See the r8b::CDSPResampler class for
* details.
*/
class CDSPResampler16 : public CDSPResampler
{
public:
/**
* Constructor initializes the 16-bit resampler. See the
* r8b::CDSPResampler class for details.
*
* @param SrcSampleRate Source signal sample rate.
* @param DstSampleRate Destination signal sample rate.
* @param aMaxInLen The maximal planned length of the input buffer (in
* samples) that will be passed to the resampler.
* @param ReqTransBand Required transition band, in percent.
*/
CDSPResampler16( const double SrcSampleRate, const double DstSampleRate,
const int aMaxInLen, const double ReqTransBand = 2.0 )
: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
136.45, fprLinearPhase )
{
}
};
/**
* @brief The resampler class for 16-bit impulse response resampling.
*
* This class defines resampling parameters suitable for 16-bit impulse
* response resampling, using linear-phase low-pass filter. Impulse responses
* are non-dynamic signals, and thus need resampler with a lesser SNR. See the
* r8b::CDSPResampler class for details.
*/
class CDSPResampler16IR : public CDSPResampler
{
public:
/**
* Constructor initializes the 16-bit impulse response resampler. See the
* r8b::CDSPResampler class for details.
*
* @param SrcSampleRate Source signal sample rate.
* @param DstSampleRate Destination signal sample rate.
* @param aMaxInLen The maximal planned length of the input buffer (in
* samples) that will be passed to the resampler.
* @param ReqTransBand Required transition band, in percent.
*/
CDSPResampler16IR( const double SrcSampleRate, const double DstSampleRate,
const int aMaxInLen, const double ReqTransBand = 2.0 )
: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
109.56, fprLinearPhase )
{
}
};
/**
* @brief The resampler class for 24-bit resampling.
*
* This class defines resampling parameters suitable for 24-bit resampling
* (including 32-bit floating point resampling), using linear-phase low-pass
* filter. See the r8b::CDSPResampler class for details.
*/
class CDSPResampler24 : public CDSPResampler
{
public:
/**
* Constructor initializes the 24-bit resampler (including 32-bit floating
* point). See the r8b::CDSPResampler class for details.
*
* @param SrcSampleRate Source signal sample rate.
* @param DstSampleRate Destination signal sample rate.
* @param aMaxInLen The maximal planned length of the input buffer (in
* samples) that will be passed to the resampler.
* @param ReqTransBand Required transition band, in percent.
*/
CDSPResampler24( const double SrcSampleRate, const double DstSampleRate,
const int aMaxInLen, const double ReqTransBand = 2.0 )
: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
180.15, fprLinearPhase )
{
}
};
} // namespace r8b
#endif // R8B_CDSPRESAMPLER_INCLUDED

View File

@ -0,0 +1,687 @@
//$ nobt
//$ nocpp
/**
* @file CDSPSincFilterGen.h
*
* @brief Sinc function-based FIR filter generator class.
*
* This file includes the CDSPSincFilterGen class implementation that
* generates FIR filters.
*
* r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8B_CDSPSINCFILTERGEN_INCLUDED
#define R8B_CDSPSINCFILTERGEN_INCLUDED
#include "r8bbase.h"
namespace r8b {
/**
* @brief Sinc function-based FIR filter generator class.
*
* Structure that holds state used to perform generation of sinc functions of
* various types, windowed by the Blackman window by default (but the window
* function can be changed if necessary).
*/
class CDSPSincFilterGen
{
public:
double Len2; ///< Required half filter kernel's length in samples (can be
///< a fractional value). Final physical kernel length will be
///< provided in the KernelLen variable. Len2 should be >= 2.
///<
int KernelLen; ///< Resulting length of the filter kernel, this variable
///< is set after the call to one of the "init" functions.
///<
int fl2; ///< Internal "half kernel length" value. This value can be used
///< as filter's latency in samples (taps), this variable is set after
///< the call to one of the "init" functions.
///<
union
{
struct
{
double Freq1; ///< Required corner circular frequency 1 [0; pi].
///< Used only in the generateBand() function.
///<
double Freq2; ///< Required corner circular frequency 2 [0; pi].
///< Used only in the generateBand() function. The range
///< [Freq1; Freq2] defines a pass band for the generateBand()
///< function.
///<
};
struct
{
double FracDelay; ///< Fractional delay in the range [0; 1], used
///< only in the generateFrac() function. Note that the
///< FracDelay parameter is actually inversed. At 0.0 value it
///< produces 1 sample delay (with the latency equal to fl2),
///< at 1.0 value it produces 0 sample delay (with the latency
///< equal to fl2 - 1).
///<
};
};
/**
* Window function type.
*/
enum EWindowFunctionType
{
wftCosine, ///< Generalized cosine window function. No parameters
///< required. The "Power" parameter is optional.
///<
wftKaiser, ///< Kaiser window function. Requires the "Beta" parameter.
///< The "Power" parameter is optional.
///<
wftGaussian ///< Gaussian window function. Requires the "Sigma"
///< parameter. The "Power" parameter is optional.
///<
};
typedef double( CDSPSincFilterGen :: *CWindowFunc )(); ///< Window
///< calculation function pointer type.
///<
/**
* Function initializes *this structure for generation of a window
* function, odd-sized.
*
* @param WinType Window function type.
* @param Params Window function's parameters. If NULL, the table values
* may be used.
* @param UsePower "True" if the power factor should be used to raise the
* window function. If "true", the power factor should be specified as the
* last value in the Params array. If Params is NULL, the table or default
* value of -1.0 (off) will be used.
*/
void initWindow( const EWindowFunctionType WinType = wftCosine,
const double* const Params = NULL, const bool UsePower = false )
{
R8BASSERT( Len2 >= 2.0 );
fl2 = (int) floor( Len2 );
KernelLen = fl2 + fl2 + 1;
setWindow( WinType, Params, UsePower, true );
}
/**
* Function initializes *this structure for generation of band-limited
* sinc filter kernel. The generateBand() function should be used to
* calculate the filter.
*
* @param WinType Window function type.
* @param Params Window function's parameters. If NULL, the table values
* may be used.
* @param UsePower "True" if the power factor should be used to raise the
* window function. If "true", the power factor should be specified as the
* last value in the Params array. If Params is NULL, the table or default
* value of -1.0 (off) will be used.
*/
void initBand( const EWindowFunctionType WinType = wftCosine,
const double* const Params = NULL, const bool UsePower = false )
{
R8BASSERT( Len2 >= 2.0 );
fl2 = (int) floor( Len2 );
KernelLen = fl2 + fl2 + 1;
f1.init( Freq1, 0.0 );
f2.init( Freq2, 0.0 );
setWindow( WinType, Params, UsePower, true );
}
/**
* Function initializes *this structure for Hilbert transformation filter
* calculation. Freq1 and Freq2 variables are not used.
* The generateHilbert() function should be used to calculate the filter.
*
* @param WinType Window function type.
* @param Params Window function's parameters. If NULL, the table values
* may be used.
* @param UsePower "True" if the power factor should be used to raise the
* window function. If "true", the power factor should be specified as the
* last value in the Params array. If Params is NULL, the table or default
* value of -1.0 (off) will be used.
*/
void initHilbert( const EWindowFunctionType WinType = wftCosine,
const double* const Params = NULL, const bool UsePower = false )
{
R8BASSERT( Len2 >= 2.0 );
fl2 = (int) floor( Len2 );
KernelLen = fl2 + fl2 + 1;
setWindow( WinType, Params, UsePower, true );
}
/**
* Function initializes *this structure for generation of full-bandwidth
* fractional delay sinc filter kernel. Freq1 and Freq2 variables are not
* used. The generateFrac() function should be used to calculate the
* filter.
*
* @param WinType Window function type.
* @param Params Window function's parameters. If NULL, the table values
* may be used.
* @param UsePower "True" if the power factor should be used to raise the
* window function. If "true", the power factor should be specified as the
* last value in the Params array. If Params is NULL, the table or default
* value of -1.0 (off) will be used.
*/
void initFrac( const EWindowFunctionType WinType = wftCosine,
const double* const Params = NULL, const bool UsePower = false )
{
R8BASSERT( Len2 >= 2.0 );
fl2 = (int) ceil( Len2 );
KernelLen = fl2 + fl2;
setWindow( WinType, Params, UsePower, false, FracDelay );
}
/**
* @return The next "Hann" window function coefficient.
*/
double calcWindowHann()
{
return( 0.5 + 0.5 * w1.generate() );
}
/**
* @return The next "Hamming" window function coefficient.
*/
double calcWindowHamming()
{
return( 0.54 + 0.46 * w1.generate() );
}
/**
* @return The next "Blackman" window function coefficient.
*/
double calcWindowBlackman()
{
return( 0.42 + 0.5 * w1.generate() + 0.08 * w2.generate() );
}
/**
* @return The next "Nuttall" window function coefficient.
*/
double calcWindowNuttall()
{
return( 0.355768 + 0.487396 * w1.generate() +
0.144232 * w2.generate() + 0.012604 * w3.generate() );
}
/**
* @return The next "Blackman-Nuttall" window function coefficient.
*/
double calcWindowBlackmanNuttall()
{
return( 0.3635819 + 0.4891775 * w1.generate() +
0.1365995 * w2.generate() + 0.0106411 * w3.generate() );
}
/**
* @return The next "Kaiser" window function coefficient.
*/
double calcWindowKaiser()
{
const double n = 1.0 - sqr( wn / Len2 + KaiserLen2Frac );
wn++;
if( n < 0.0 )
{
return( 0.0 );
}
return( besselI0( KaiserBeta * sqrt( n )) / KaiserDiv );
}
/**
* @return The next "Gaussian" window function coefficient.
*/
double calcWindowGaussian()
{
const double f = exp( -0.5 * sqr( wn / GaussianSigma +
GaussianSigmaFrac ));
wn++;
return( f );
}
/**
* Function calculates window function only.
*
* @param[out] op Output buffer, length = KernelLen.
* @param wfunc Window calculation function to use.
*/
void generateWindow( double* op,
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
{
op += fl2;
double* op2 = op;
int l = fl2;
if( Power < 0.0 )
{
*op = ( *this.*wfunc )();
while( l > 0 )
{
const double v = ( *this.*wfunc )();
op++;
op2--;
*op = v;
*op2 = v;
l--;
}
}
else
{
*op = pows(( *this.*wfunc )(), Power );
while( l > 0 )
{
const double v = pows(( *this.*wfunc )(), Power );
op++;
op2--;
*op = v;
*op2 = v;
l--;
}
}
}
/**
* Function calculates band-limited windowed sinc function-based filter
* kernel.
*
* @param[out] op Output buffer, length = KernelLen.
* @param wfunc Window calculation function to use.
*/
void generateBand( double* op,
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
{
op += fl2;
double* op2 = op;
f1.generate();
f2.generate();
int t = 1;
if( Power < 0.0 )
{
*op = ( Freq2 - Freq1 ) * ( *this.*wfunc )() / R8B_PI;
while( t <= fl2 )
{
const double v = ( f2.generate() - f1.generate() ) *
( *this.*wfunc )() / ( t * R8B_PI );
op++;
op2--;
*op = v;
*op2 = v;
t++;
}
}
else
{
*op = ( Freq2 - Freq1 ) * pows(( *this.*wfunc )(), Power ) /
R8B_PI;
while( t <= fl2 )
{
const double v = ( f2.generate() - f1.generate() ) *
pows(( *this.*wfunc )(), Power ) / ( t * R8B_PI );
op++;
op2--;
*op = v;
*op2 = v;
t++;
}
}
}
/**
* Function calculates windowed Hilbert transformer filter kernel.
*
* @param[out] op Output buffer, length = KernelLen.
* @param wfunc Window calculation function to use.
*/
void generateHilbert( double* op,
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
{
static const double fvalues[ 2 ] = { 0.0, 2.0 / R8B_PI };
op += fl2;
double* op2 = op;
( *this.*wfunc )();
*op = 0.0;
int t = 1;
if( Power < 0.0 )
{
while( t <= fl2 )
{
const double v = fvalues[ t & 1 ] * ( *this.*wfunc )() / t;
op++;
op2--;
*op = v;
*op2 = -v;
t++;
}
}
else
{
while( t <= fl2 )
{
const double v = fvalues[ t & 1 ] *
pows( ( *this.*wfunc )(), Power ) / t;
op++;
op2--;
*op = v;
*op2 = -v;
t++;
}
}
}
/**
* Function calculates windowed fractional delay filter kernel.
*
* @param[out] op Output buffer, length = KernelLen.
* @param wfunc Window calculation function to use.
* @param opinc Output buffer increment, in "op" elements.
*/
void generateFrac( double* op,
CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman,
const int opinc = 1 )
{
R8BASSERT( opinc != 0 );
double f[ 2 ];
f[ 0 ] = sin( FracDelay * R8B_PI ) / R8B_PI;
f[ 1 ] = -f[ 0 ];
int t = -fl2;
if( t + FracDelay < -Len2 )
{
( *this.*wfunc )();
*op = 0.0;
op += opinc;
t++;
}
int IsZeroX = ( fabs( FracDelay - 1.0 ) < 0x1p-42 );
int mt = 0 - IsZeroX;
IsZeroX = ( IsZeroX || fabs( FracDelay ) < 0x1p-42 );
if( Power < 0.0 )
{
while( t < mt )
{
*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
op += opinc;
t++;
}
if( IsZeroX ) // t+FracDelay==0
{
*op = ( *this.*wfunc )();
}
else
{
*op = f[ t & 1 ] * ( *this.*wfunc )() / FracDelay; // t==0
}
mt = fl2 - 2;
while( t < mt )
{
op += opinc;
t++;
*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
}
op += opinc;
t++;
const double ut = t + FracDelay;
*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] * ( *this.*wfunc )() / ut );
}
else
{
while( t < mt )
{
*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
( t + FracDelay );
op += opinc;
t++;
}
if( IsZeroX ) // t+FracDelay==0
{
*op = pows( ( *this.*wfunc )(), Power );
}
else
{
*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
FracDelay; // t==0
}
mt = fl2 - 2;
while( t < mt )
{
op += opinc;
t++;
*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
( t + FracDelay );
}
op += opinc;
t++;
const double ut = t + FracDelay;
*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] *
pows( ( *this.*wfunc )(), Power ) / ut );
}
}
private:
double Power; ///< The power factor used to raise the window function.
///< Equals a negative value if the power factor should not be used.
///<
CSineGen f1; ///< Sine function 1. Used in the generateBand() function.
///<
CSineGen f2; ///< Sine function 2. Used in the generateBand() function.
///<
int wn; ///< Window function integer position. 0 - center of the window
///< function. This variable may not be used by some window functions.
///<
CSineGen w1; ///< Cosine wave 1 for window function.
///<
CSineGen w2; ///< Cosine wave 2 for window function.
///<
CSineGen w3; ///< Cosine wave 3 for window function.
///<
union
{
struct
{
double KaiserBeta; ///< Kaiser window function's "Beta"
///< coefficient.
///<
double KaiserDiv; ///< Kaiser window function's divisor.
///<
double KaiserLen2Frac; ///< Equals FracDelay / Len2.
///<
};
struct
{
double GaussianSigma; ///< Gaussian window function's "Sigma"
///< coefficient.
///<
double GaussianSigmaFrac; ///< Equals FracDelay / GaussianSigma.
///<
};
};
/**
* Function initializes Kaiser window function calculation. The FracDelay
* variable should be initialized when using this window function.
*
* @param Params Function parameters. If NULL, the default values will be
* used. If not NULL, the first parameter should specify the "Beta" value.
* @param UsePower "True" if the power factor should be used to raise the
* window function.
* @param IsCentered "True" if centered window should be used. This
* parameter usually equals to "false" for fractional delay filters only.
*/
void setWindowKaiser( const double* Params, const bool UsePower,
const bool IsCentered )
{
wn = ( IsCentered ? 0 : -fl2 );
if( Params == NULL )
{
KaiserBeta = 9.5945013206755156;
Power = ( UsePower ? 1.9718457932433306 : -1.0 );
}
else
{
KaiserBeta = clampr( Params[ 0 ], 1.0, 350.0 );
Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
}
KaiserDiv = besselI0( KaiserBeta );
KaiserLen2Frac = FracDelay / Len2;
}
/**
* Function initializes Gaussian window function calculation. The FracDelay
* variable should be initialized when using this window function.
*
* @param Params Function parameters. If NULL, the table values will be
* used. If not NULL, the first parameter should specify the "Sigma"
* value.
* @param UsePower "True" if the power factor should be used to raise the
* window function.
* @param IsCentered "True" if centered window should be used. This
* parameter usually equals to "false" for fractional delay filters only.
*/
void setWindowGaussian( const double* Params, const bool UsePower,
const bool IsCentered )
{
wn = ( IsCentered ? 0 : -fl2 );
if( Params == NULL )
{
GaussianSigma = 1.0;
Power = -1.0;
}
else
{
GaussianSigma = clampr( fabs( Params[ 0 ]), 1e-1, 100.0 );
Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
}
GaussianSigma *= Len2;
GaussianSigmaFrac = FracDelay / GaussianSigma;
}
/**
* Function initializes calculation of window function of the specified
* type.
*
* @param WinType Window function type.
* @param Params Window function's parameters. If NULL, the table values
* may be used.
* @param UsePower "True" if the power factor should be used to raise the
* window function. If "true", the power factor should be specified as the
* last value in the Params array. If Params is NULL, the table or default
* value of -1.0 (off) will be used.
* @param IsCentered "True" if centered window should be used. This
* parameter usually equals to "false" for fractional delay filters only.
* @param UseFracDelay Fractional delay to use.
*/
void setWindow( const EWindowFunctionType WinType,
const double* const Params, const bool UsePower,
const bool IsCentered, const double UseFracDelay = 0.0 )
{
FracDelay = UseFracDelay;
if( WinType == wftCosine )
{
if( IsCentered )
{
w1.init( R8B_PI / Len2, R8B_PId2 );
w2.init( R8B_2PI / Len2, R8B_PId2 );
w3.init( R8B_3PI / Len2, R8B_PId2 );
}
else
{
const double step1 = R8B_PI / Len2;
w1.init( step1, R8B_PId2 - step1 * fl2 + step1 * FracDelay );
const double step2 = R8B_2PI / Len2;
w2.init( step2, R8B_PId2 - step2 * fl2 + step2 * FracDelay );
const double step3 = R8B_3PI / Len2;
w3.init( step3, R8B_PId2 - step3 * fl2 + step3 * FracDelay );
}
Power = ( UsePower && Params != NULL ? Params[ 0 ] : -1.0 );
}
else
if( WinType == wftKaiser )
{
setWindowKaiser( Params, UsePower, IsCentered );
}
else
if( WinType == wftGaussian )
{
setWindowGaussian( Params, UsePower, IsCentered );
}
}
};
} // namespace r8b
#endif // R8B_CDSPSINCFILTERGEN_INCLUDED

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2013-2022 Aleksey Vaneev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,6 @@
r8brain-free resampling library from https://github.com/avaneev/r8brain-free-src
version 5.6.
The example program, DLL and "other" folder, PFFFT sources and logo file have been removed.
No further local changes have been made.
For building, premake is used to generate Visual Studio project files.
See ../build/premake/ for details.

View File

@ -0,0 +1,408 @@
# r8brain-free-src - High-Quality Resampler #
## Introduction ##
Open source (under the MIT license) high-quality professional audio sample
rate converter (SRC) / resampler C++ library. Features routines for SRC,
both up- and downsampling, to/from any sample rate, including non-integer
sample rates: it can be also used for conversion to/from SACD/DSD sample
rates, and even go beyond that. SRC routines were implemented in a
multi-platform C++ code, and have a high level of optimality. Also suitable
for fast general-purpose 1D time-series resampling / interpolation (with
relaxed filter parameters).
The structure of this library's objects is such that they can be frequently
created and destroyed in large applications with a minimal performance impact
due to a high level of reusability of its most "initialization-expensive"
objects: the fast Fourier transform and FIR filter objects.
The SRC algorithm at first produces 2X oversampled (relative to the source
sample rate, or the destination sample rate if the downsampling is performed)
signal, then performs interpolation using a bank of short (8 to 30 taps,
depending on the required precision) polynomial-interpolated sinc
function-based fractional delay filters. This puts the algorithm into the
league of the fastest among the most precise SRC algorithms. The more precise
alternative being only the whole number-factored SRC, which can be slower.
P.S. Please credit the creator of this library in your documentation in the
following way: "Sample rate converter designed by Aleksey Vaneev of Voxengo".
## Requirements ##
C++ compiler and system with the "double" floating point type (53-bit
mantissa) support. No explicit code for the "float" type is present in this
library, because as practice has shown the "float"-based code performs
considerably slower on a modern processor, at least in this library. This
library does not have dependencies beside the standard C library, the
"windows.h" on Windows and the "pthread.h" on macOS and Linux.
## Links ##
* [Documentation](https://www.voxengo.com/public/r8brain-free-src/Documentation/)
* [Discussion](https://www.kvraudio.com/forum/viewtopic.php?t=389711)
## Usage Information ##
The sample rate converter (resampler) is represented by the
**r8b::CDSPResampler** class, which is a single front-end class for the
whole library. You do not basically need to use nor understand any other
classes beside this class. Several derived classes that have varying levels
of precision are also available (for full-resolution 16-bit and 24-bit
resampling).
The code of the library resides in the "r8b" C++ namespace, effectively
isolating it from all other code. The code is thread-safe. A separate
resampler object should be created for each audio channel or stream being
processed concurrently.
Note that you will need to compile the "r8bbase.cpp" source file and include
the resulting object file into your application build. This source file
includes definitions of several global static objects used by the library.
You may also need to include to your project: the "Kernel32" library (on
Windows) and the "pthread" library on macOS and Linux.
The library is able to process signal of any scale and loudness: it is not
limited to just a "usual" -1.0 to 1.0 range.
By defining the `R8B_IPP` configuration macro it is possible to enable Intel
IPP back-end for FFT functions, instead of the default Ooura FFT. IPP FFT
makes sample rate conversion faster by 23% on average.
#define R8B_IPP 1
If a larger initial processing delay and a very minor sample timing error are
not an issue, for the most efficiency you can define these macros at
the beginning of the `r8bconf.h` file, or during compilation:
#define R8B_IPP 1
#define R8B_FASTTIMING 1
#define R8B_EXTFFT 1
If you do not have access to the Intel IPP then you may consider enabling the
PFFFT which is only slightly slower than Intel IPP FFT in performance. There
are two macros available: `R8B_PFFFT` and `R8B_PFFFT_DOUBLE`. The first macro
enables PFFFT that works in single-precision resolution, thus limiting the
overall resampler's precision to 24-bit sample rate conversions (for
mission-critical professional audio applications, using the `R8B_PFFFT` macro
is not recommended as its peak error is quite large). The second macro
enables PFFFT implementation that works in double-precision resolution, making
use of SSE2, AVX, and NEON intrinsics, yielding precision that is equal to
both Intel IPP and Ooura FFT implementations.
To use the PFFFT, define the `R8B_PFFFT` or `R8B_PFFFT_DOUBLE` macro, compile
and include the supplied `pffft.cpp` or `pffft_double/pffft_double.c` file to
your project build.
#define R8B_PFFFT 1
or
#define R8B_PFFFT_DOUBLE 1
The code of this library was commented in the [Doxygen](http://www.doxygen.org/)
style. To generate the documentation locally you may run the
`doxygen ./other/r8bdoxy.txt` command from the library's folder.
Preliminary tests show that the r8b::CDSPResampler24 resampler class achieves
`31.8*n_cores` Mrops (`44*n_cores` for Intel IPP FFT) when converting 1
channel of 24-bit audio from 44100 to 96000 sample rate (2% transition band),
on a Ryzen 3700X processor-based 64-bit system. This approximately translates
to a real-time resampling of `720*n_cores` (`1000*n_cores`) audio streams, at
100% CPU load. Speed performance when converting to other sample rates may
vary greatly. When comparing performance of this resampler library to another
library make sure that the competing library is also tuned to produce a fully
linear-phase response, has similar stop-band characteristics and similar
sample timing precision.
## Dynamic Link Library ##
The functions of this SRC library are also accessible in simplified form via
the DLL file on Windows, requiring a processor with SSE2 support (Win64
version includes AVX2 auto-dispatch code). Delphi Pascal interface unit file
for the DLL file is available. DLL and C LIB files are distributed in the DLL
folder on the project's homepage. On non-Windows systems it is preferrable
to use the C++ library directly. Note that the DLL was compiled with the
Intel IPP enabled.
## Real-Time Applications ##
The resampler class of this library was designed as an asynchronous processor:
it may produce any number of output samples, depending on the input sample
data length and the resampling parameters. The resampler must be fed with the
input sample data until enough output sample data were produced, with any
excess output samples used before feeding the resampler with more input data.
A "relief" factor here is that the resampler removes the initial processing
latency automatically, and that after initial moments of processing the output
becomes steady, with only minor output sample data length fluctuations.
So, while for an off-line resampling a "push" method can be used,
demonstrated in the `example.cpp` file, for a real-time resampling a "pull"
method should be used which calls the resampling process until the output
buffer is filled.
## Notes ##
When using the **r8b::CDSPResampler** class directly, you may select the
transition band/steepness of the low-pass (reconstruction) filter, expressed
as a percentage of the full spectral bandwidth of the input signal (or the
output signal if the downsampling is performed), and the desired stop-band
attenuation in decibel.
The transition band is specified as the normalized spectral space of the input
signal (or the output signal if the downsampling is performed) between the
low-pass filter's -3 dB point and the Nyquist frequency, and ranges from 0.5%
to 45%. Stop-band attenuation can be specified in the range from 49 to 218
decibel. Both the transition band and stop-band attenuation affect
resampler's overall speed performance and initial output delay. For your
information, transition frequency range spans 175% of the specified transition
band, which means that for 2% transition band, frequency response below
0.965\*Nyquist is linear.
This SRC library also implements a much faster "power of 2" resampling (e.g.
2X, 4X, 8X, 16X, 3X, 3\*2X, 3\*4X, 3\*8X, etc. upsampling and downsampling),
which is engaged automatically if the resampling parameters permit.
This library was tested for compatibility with [GNU C++](https://gcc.gnu.org/),
[Microsoft Visual C++](https://visualstudio.microsoft.com/),
[LLVM](https://llvm.org/) and [Intel C++](https://software.intel.com/en-us/c-compilers)
compilers, on 32- and 64-bit Windows, macOS, and CentOS Linux.
Most code is "inline", without the need to compile many source files. The
memory footprint is quite modest.
## Acknowledgements ##
r8brain-free-src is bundled with the following code:
* FFT routines Copyright (c) 1996-2001 Takuya OOURA.
[Homepage](http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
* PFFFT Copyright (c) 2013 Julien Pommier.
[Homepage](https://bitbucket.org/jpommier/pffft)
* PFFFT DOUBLE Copyright (c) 2020 Hayati Ayguen, Dario Mambro.
[Homepage](https://github.com/unevens/pffft)
## Users ##
This library is used by:
* [REAPER](https://reaper.fm/)
* [AUDIRVANA](https://audirvana.com/)
* [Red Dead Redemption 2](https://www.rockstargames.com/reddeadredemption2/credits)
* [Mini Piano Lite](https://play.google.com/store/apps/details?id=umito.android.minipiano)
* [OpenMPT](https://openmpt.org/)
* [Boogex Guitar Amp audio plugin](https://www.voxengo.com/product/boogex/)
* [r8brain free sample rate converter](https://www.voxengo.com/product/r8brain/)
* [Voice Aloud Reader](https://play.google.com/store/apps/details?id=com.hyperionics.avarLic)
* [Zynewave Podium](https://zynewave.com/podium/)
* [Phonometrica](http://www.phonometrica-ling.org/index.html)
* [Ripcord](https://cancel.fm/ripcord/)
* [TensorVox](https://github.com/ZDisket/TensorVox)
* [Curvessor](https://github.com/unevens/Curvessor)
Please send me a note via aleksey.vaneev@gmail.com and I will include a link
to your software product to this list of users. This list is important in
maintaining confidence in this library among the interested parties. The
inclusion into this list is not mandatory.
## Change Log ##
Version 5.6:
* Added SSE and NEON implementations to `CDSPHBUpsampler` yielding 15%
performance improvement of power-of-2 upsampling.
* Added SSE and NEON implementations to the `CDSPRealFFT::multiplyBlocksZP`
function, for 2-3% performance improvement.
* Added intermediate interpolator's transition band limitation, for logical
hardness (not practically needed).
* Added the `aDoConsumeLatency` parameter to `CDSPHBUpsampler` constructor,
for "inline" DSP uses of the class.
* Made various minor changes across the codebase.
Version 5.5:
* Hardened positional logic of fractional filter calculation, removed
redundant multiplications.
* Removed unnecessary function templating from the `CDSPSincFilterGen` class.
* Added the `__ARM_NEON` macro to NEON availability detection.
Version 5.4:
* Added compiler specializations to previously optimized inner loops.
"Shuffled" SIMD interpolation code is not efficient on Apple M1. Intel C++
Compiler vectorizes "whole stepping" interpolation as good as a
manually-written SSE.
* Reorganized SIMD instructions for a slightly better performance.
* Changed internal buffer sizes of half-band resamplers (1-2% performance
boost).
* Fixed compiler warnings in PFFFT code.
* Added several asserts to the code.
Version 5.3:
* Optimized inner loops of the fractional interpolator, added SSE2 and NEON
intrinsics, resulting in a measurable (8-25%) performance gain.
* Optimized filter calculation functions: changed some divisions by a constant
to multiplications.
* Renamed M_PI macros to R8B_PI, to avoid macro collisions.
* Removed redundant code and macros.
Version 5.2:
* Modified `PFFFT` and `PFFFT DOUBLE` conditional pre-processor directives to
always enable NEON on `aarch64`/`arm64` (this includes code built for
Apple M1).
Version 5.1:
* Changed alignment in the `CFixedBuffer` class to 64 bytes. This improves AVX
performance of the `PFFFT DOUBLE` implementation by a few percent.
* Removed redundant files from the `pffft_double` folder, integrated the
`pffft_common.c` file into the `pffft_double.c` file.
Version 5.0:
* Removed a long-outdated macros from the `r8bconf.h` file.
* Changed a conditional pre-processor directive in the `pf_sse2_double.h` file
as per PFFFT DOUBLE author's suggestion, to allow SSE2 intrinsics in most
compilers.
* Fixed "License.txt" misnaming in the source files to "LICENSE".
Version 4.10:
* Added the `PFFFT DOUBLE` implementation support. Now available via the
`R8B_PFFFT_DOUBLE` definition macro.
Version 4.9:
* Reoptimized half-band and fractional interpolation filters with a stricter
frequency response linearity constraints. This did not impact the average
speed performance.
Version 4.8:
* Added a limit to the intermediate filter's transition band, to keep the
latency under control at any resampling ratio.
Version 4.7:
* Added `#ifndef _USE_MATH_DEFINES` to `pffft.cpp`.
* Moved `#include "pffft.h"` to `CDSPRealFFT.h`.
Version 4.6:
* Removed the `MaxInLen` parameter from the `oneshot()` function.
* Decreased intermediate low-pass filter's transition band slightly, for more
stable quality.
Version 4.5:
* Fixed VS2017 compiler warnings.
Version 4.4:
* Fixed the "Declaration hides a member" Intel C++ compiler warnings.
Version 4.3:
* Added //$ markers for internal debugging purposes.
Version 4.2:
* Backed-off max transition band to 45 and MinAtten to 49.
* Implemented Wave64 and AIFF file input in the `r8bfreesrc` bench tool. The
tool is now compiled with the `R8B_IPP 1` and `R8B_EXTFFT 1` macros to
demonstrate the maximal achievable performance.
Version 4.1:
* Updated allowed ReqAtten range to 52-218, ReqTransBand 0.5-56. It is
possible to specify filter parameters slightly beyond these values, but the
resulting filter will be slightly out of specification as well.
* Optimized static filter banks allocation.
Version 4.0:
* A major overhaul of interpolation classes: now templated parameters are not
used, all required parameters are calculated at runtime. Static filter bank
object is not used, it is created when necessary, and then cached.
* Implemented one-third interpolation filters, however, this did not
measurably increase resampler's speed.
Version 3.7:
* Used ippsMul_64f_I() in the CDSPRealFFT::multiplyBlockZ() function for a
minor conversion speed increase in Intel IPP mode.
Version 3.6:
* Added memory alignment to allocated buffers which boosts performance by 1.5%
when Intel IPP FFT is in use.
* Implemented PFFFT support.
Version 3.5:
* Improved resampling speed very slightly.
* Updated the `r8bfreesrc` benchmark tool to support RF64 WAV files.
Version 3.4:
* Added a more efficient half-band filters for >= 256 resampling ratios.
Version 3.3:
* Made minor fix to downsampling for some use cases of CDSPBlockConvolver,
did not affect resampler.
* Converted CDSPHBUpsampler and CDSPHBDownsampler's inner functions to
static functions, which boosted high-ratio resampling performance measurably.
Version 3.2:
* Minor fix to the latency consumption mechanism.
Version 3.1:
* Reoptimized fractional delay filter's windowing function.
Version 3.0:
* Implemented a new variant of the getInLenBeforeOutStart() function.
* Reimplemented oneshot() function to support `float` buffer types.
* Considerably improved downsampling performance at high resampling ratios.
* Implemented intermediate interpolation technique which boosted upsampling
performance for most resampling ratios considerably.
* Removed the ConvCount constant - now resampler supports virtually any
resampling ratios.
* Removed the UsePower2 parameter from the resampler constructor.
* Now resampler's process() function always returns pointer to the internal
buffer, input buffer is returned only if no resampling happens.
* Resampler's getMaxOutLen() function can now be used to obtain the maximal
output length that can be produced by the resampler in a single call.
* Added a more efficient "one-third" filters to half-band upsampler and
downsampler.
Version 2.1:
* Optimized 2X half-band downsampler.
Version 2.0:
* Optimized power-of-2 upsampling.
Version 1.9:
* Optimized half-band downsampling filter.
* Implemented whole-number stepping resampling.
* Added `R8B_EXTFFT` configuration option.
* Fixed initial sub-sample offseting on downsampling.
Version 1.8:
* Added `R8B_FASTTIMING` configuration option.
Version 1.7:
* Improved sample timing precision.
* Increased CDSPResampler :: ConvCountMax to 28 to support a lot wider
resampling ratios.
* Added `bench` tools.
* Removed getInLenBeforeOutStart() due to incorrect calculation.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,38 @@
/**
* @file r8bbase.cpp
*
* @brief C++ file that should be compiled and included into your application.
*
* This is a single library file that should be compiled and included into the
* project that uses the "r8brain-free-src" sample rate converter. This file
* defines several global static objects used by the library.
*
* You may also need to include to your project: the "Kernel32" library
* (on Windows) and the "pthread" library on macOS and Linux.
*
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#include "CDSPFIRFilter.h"
#include "CDSPFracInterpolator.h"
namespace r8b {
#if R8B_FLTTEST
int InterpFilterFracs = -1;
#endif // R8B_FLTTEST
CSyncObject CDSPRealFFTKeeper :: StateSync;
CDSPRealFFT :: CObjKeeper CDSPRealFFTKeeper :: FFTObjects[ 31 ];
CSyncObject CDSPFIRFilterCache :: StateSync;
CPtrKeeper< CDSPFIRFilter* > CDSPFIRFilterCache :: Objects;
int CDSPFIRFilterCache :: ObjCount = 0;
CSyncObject CDSPFracDelayFilterBankCache :: StateSync;
CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: Objects;
CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: StaticObjects;
int CDSPFracDelayFilterBankCache :: ObjCount = 0;
} // namespace r8b

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,176 @@
//$ nobt
//$ nocpp
/**
* @file r8bconf.h
*
* @brief The "configuration" inclusion file you can modify.
*
* This is the "configuration" inclusion file for the "r8brain-free-src"
* sample rate converter. You may redefine the macros here as you see fit.
*
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8BCONF_INCLUDED
#define R8BCONF_INCLUDED
#if !defined( R8B_IPP )
/**
* Set the R8B_IPP macro definition to 1 to enable the use of Intel IPP's
* fast Fourier transform functions. Also uncomment and correct the IPP
* header inclusion macros.
*
* Do not forget to call the ippInit() function at the start of the
* application, before using this library's functions.
*/
#define R8B_IPP 0
// #include <ippcore.h>
// #include <ipps.h>
#endif // !defined( R8B_IPP )
#if !defined( R8BASSERT )
/**
* Assertion macro used to check for certain run-time conditions. By
* default no action is taken if assertion fails.
*
* @param e Expression to check.
*/
#define R8BASSERT( e )
#endif // !defined( R8BASSERT )
#if !defined( R8BCONSOLE )
/**
* Console output macro, used to output various resampler status strings,
* including filter design parameters, convolver parameters.
*
* @param e Expression to send to the console, usually consists of a
* standard "printf" format string followed by several parameters
* (__VA_ARGS__).
*/
#define R8BCONSOLE( ... )
#endif // !defined( R8BCONSOLE )
#if !defined( R8B_BASECLASS )
/**
* Macro defines the name of the class from which all classes that are
* designed to be created on heap are derived. The default
* r8b::CStdClassAllocator class uses "stdlib" memory allocation
* functions.
*
* The classes that are best placed on stack or as class members are not
* derived from any class.
*/
#define R8B_BASECLASS :: r8b :: CStdClassAllocator
#endif // !defined( R8B_BASECLASS )
#if !defined( R8B_MEMALLOCCLASS )
/**
* Macro defines the name of the class that implements raw memory
* allocation functions, see the r8b::CStdMemAllocator class for details.
*/
#define R8B_MEMALLOCCLASS :: r8b :: CStdMemAllocator
#endif // !defined( R8B_MEMALLOCCLASS )
#if !defined( R8B_FILTER_CACHE_MAX )
/**
* This macro specifies the number of filters kept in the cache at most.
* The actual number can be higher if many different filters are in use at
* the same time.
*/
#define R8B_FILTER_CACHE_MAX 96
#endif // !defined( R8B_FILTER_CACHE_MAX )
#if !defined( R8B_FRACBANK_CACHE_MAX )
/**
* This macro specifies the number of whole-number stepping fractional
* delay filter banks kept in the cache at most. The actual number can be
* higher if many different filter banks are in use at the same time. As
* filter banks are usually big objects, it is advisable to keep this
* cache size small.
*/
#define R8B_FRACBANK_CACHE_MAX 12
#endif // !defined( R8B_FRACBANK_CACHE_MAX )
#if !defined( R8B_FLTTEST )
/**
* This macro, when equal to 1, enables fractional delay filter bank
* testing: in this mode the filter bank becomes a dynamic member of the
* CDSPFracInterpolator object instead of being a global static object.
*/
#define R8B_FLTTEST 0
#endif // !defined( R8B_FLTTEST )
#if !defined( R8B_FASTTIMING )
/**
* This macro, when equal to 1, enables a fast interpolation sample
* timing technique. This technique improves interpolation performance
* (by around 10%) at the expense of a minor sample timing drift which is
* on the order of 1e-6 samples per 10 billion output samples. This
* setting does not apply to whole-number stepping, if it is in use, as
* this stepping provides zero timing error without performance impact.
* Also does not apply to the cases when a whole-numbered (2X, 3X, etc.)
* resampling is in the actual use.
*/
#define R8B_FASTTIMING 0
#endif // !defined( R8B_FASTTIMING )
#if !defined( R8B_EXTFFT )
/**
* This macro, when equal to 1, extends length of low-pass filters' FFT
* block by a factor of 2, by zero-padding it. This usually improves the
* overall time performance of the resampler at the expense of a higher
* overall latency (initial processing delay). If such delay is not an
* issue, setting this macro to 1 is preferrable. This macro can only have
* a value of 0 or 1.
*/
#define R8B_EXTFFT 0
#endif // !defined( R8B_EXTFFT )
#if !defined( R8B_PFFFT )
/**
* When defined as 1, enables PFFFT routines which are fast, but which
* are limited to 24-bit precision. May be a good choice for time-series
* interpolation, when stop-band attenuation higher than 120 dB is not
* required.
*/
#define R8B_PFFFT 0
#endif // !defined( R8B_PFFFT )
#if R8B_PFFFT
#define R8B_FLOATFFT 1
#endif // R8B_PFFFT
#if !defined( R8B_PFFFT_DOUBLE )
/**
* When defined as 1, enables PFFFT "double" routines which are fast, and
* which provide the highest precision.
*/
#define R8B_PFFFT_DOUBLE 0
#endif // !defined( R8B_PFFFT_DOUBLE )
#if !defined( R8B_FLOATFFT )
/**
* The R8B_FLOATFFT definition enables double-to-float buffer conversions
* for FFT operations, for algorithms that work with "float" values. This
* macro should not be changed from the default "0" here.
*/
#define R8B_FLOATFFT 0
#endif // !defined( R8B_FLOATFFT )
#endif // R8BCONF_INCLUDED

View File

@ -0,0 +1,306 @@
//$ nobt
//$ nocpp
/**
* @file r8butil.h
*
* @brief The inclusion file with several utility functions.
*
* This file includes several utility functions used by various utility
* programs like "calcErrorTable.cpp".
*
* r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
* See the "LICENSE" file for license.
*/
#ifndef R8BUTIL_INCLUDED
#define R8BUTIL_INCLUDED
#include "r8bbase.h"
namespace r8b {
/**
* @param re Real part of the frequency response.
* @param im Imaginary part of the frequency response.
* @return A magnitude response value converted from the linear scale to the
* logarithmic scale.
*/
inline double convertResponseToLog( const double re, const double im )
{
return( 4.34294481903251828 * log( re * re + im * im + 1e-100 ));
}
/**
* An utility function that performs frequency response scanning step update
* based on the current magnitude response's slope.
*
* @param[in,out] step The current scanning step. Will be updated on
* function's return. Must be a positive value.
* @param curg Squared magnitude response at the current frequency point.
* @param[in,out] prevg_log Previous magnitude response, log scale. Will be
* updated on function's return.
* @param prec Precision multiplier, affects the size of the step.
* @param maxstep The maximal allowed step.
* @param minstep The minimal allowed step.
*/
inline void updateScanStep( double& step, const double curg,
double& prevg_log, const double prec, const double maxstep,
const double minstep = 1e-11 )
{
double curg_log = 4.34294481903251828 * log( curg + 1e-100 );
curg_log += ( prevg_log - curg_log ) * 0.7;
const double slope = fabs( curg_log - prevg_log );
prevg_log = curg_log;
if( slope > 0.0 )
{
step /= prec * slope;
step = max( min( step, maxstep ), minstep );
}
}
/**
* Function locates normalized frequency at which the minimum filter gain
* is reached. The scanning is performed from lower (left) to higher
* (right) frequencies, the whole range is scanned.
*
* Function expects that the magnitude response is always reducing from lower
* to high frequencies, starting at "minth".
*
* @param flt Filter response.
* @param fltlen Filter response's length in samples (taps).
* @param[out] ming The current minimal gain (squared). On function's return
* will contain the minimal gain value found (squared).
* @param[out] minth The normalized frequency where the minimal gain is
* currently at. On function's return will point to the normalized frequency
* where the new minimum was found.
* @param thend The ending frequency, inclusive.
*/
inline void findFIRFilterResponseMinLtoR( const double* const flt,
const int fltlen, double& ming, double& minth, const double thend )
{
const double maxstep = minth * 2e-3;
double curth = minth;
double re;
double im;
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
double prevg_log = convertResponseToLog( re, im );
double step = 1e-11;
while( true )
{
curth += step;
if( curth > thend )
{
break;
}
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
const double curg = re * re + im * im;
if( curg > ming )
{
ming = curg;
minth = curth;
break;
}
ming = curg;
minth = curth;
updateScanStep( step, curg, prevg_log, 0.31, maxstep );
}
}
/**
* Function locates normalized frequency at which the maximal filter gain
* is reached. The scanning is performed from lower (left) to higher
* (right) frequencies, the whole range is scanned.
*
* Note: this function may "stall" in very rare cases if the magnitude
* response happens to be "saw-tooth" like, requiring a very small stepping to
* be used. If this happens, it may take dozens of seconds to complete.
*
* @param flt Filter response.
* @param fltlen Filter response's length in samples (taps).
* @param[out] maxg The current maximal gain (squared). On function's return
* will contain the maximal gain value (squared).
* @param[out] maxth The normalized frequency where the maximal gain is
* currently at. On function's return will point to the normalized frequency
* where the maximum was reached.
* @param thend The ending frequency, inclusive.
*/
inline void findFIRFilterResponseMaxLtoR( const double* const flt,
const int fltlen, double& maxg, double& maxth, const double thend )
{
const double maxstep = maxth * 1e-4;
double premaxth = maxth;
double premaxg = maxg;
double postmaxth = maxth;
double postmaxg = maxg;
double prevth = maxth;
double prevg = maxg;
double curth = maxth;
double re;
double im;
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
double prevg_log = convertResponseToLog( re, im );
double step = 1e-11;
bool WasPeak = false;
int AfterPeakCount = 0;
while( true )
{
curth += step;
if( curth > thend )
{
break;
}
calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
const double curg = re * re + im * im;
if( curg > maxg )
{
premaxth = prevth;
premaxg = prevg;
maxg = curg;
maxth = curth;
WasPeak = true;
AfterPeakCount = 0;
}
else
if( WasPeak )
{
if( AfterPeakCount == 0 )
{
postmaxth = curth;
postmaxg = curg;
}
if( AfterPeakCount == 5 )
{
// Perform 2 approximate binary searches.
int k;
for( k = 0; k < 2; k++ )
{
double l = ( k == 0 ? premaxth : maxth );
double curgl = ( k == 0 ? premaxg : maxg );
double r = ( k == 0 ? maxth : postmaxth );
double curgr = ( k == 0 ? maxg : postmaxg );
while( true )
{
const double c = ( l + r ) * 0.5;
calcFIRFilterResponse( flt, fltlen, R8B_PI * c,
re, im );
const double curg = re * re + im * im;
if( curgl > curgr )
{
r = c;
curgr = curg;
}
else
{
l = c;
curgl = curg;
}
if( r - l < 1e-11 )
{
if( curgl > curgr )
{
maxth = l;
maxg = curgl;
}
else
{
maxth = r;
maxg = curgr;
}
break;
}
}
}
break;
}
AfterPeakCount++;
}
prevth = curth;
prevg = curg;
updateScanStep( step, curg, prevg_log, 1.0, maxstep );
}
}
/**
* Function locates normalized frequency at which the specified maximum
* filter gain is reached. The scanning is performed from higher (right)
* to lower (left) frequencies, scanning stops when the required gain
* value was crossed. Function uses an extremely efficient binary search and
* thus expects that the magnitude response has the "main lobe" form produced
* by windowing, with a minimal pass-band ripple.
*
* @param flt Filter response.
* @param fltlen Filter response's length in samples (taps).
* @param maxg Maximal gain (squared).
* @param[out] th The current normalized frequency. On function's return will
* point to the normalized frequency where "maxg" is reached.
* @param thend The leftmost frequency to scan, inclusive.
*/
inline void findFIRFilterResponseLevelRtoL( const double* const flt,
const int fltlen, const double maxg, double& th, const double thend )
{
// Perform exact binary search.
double l = thend;
double r = th;
while( true )
{
const double c = ( l + r ) * 0.5;
if( r - l < 1e-14 )
{
th = c;
break;
}
double re;
double im;
calcFIRFilterResponse( flt, fltlen, R8B_PI * c, re, im );
const double curg = re * re + im * im;
if( curg > maxg )
{
l = c;
}
else
{
r = c;
}
}
}
} // namespace r8b
#endif // R8BUTIL_INCLUDED