Initial community commit

2025-06-17 12:35:45 -04:00 · 2024-09-24 14:54:57 +02:00
parent 537bcbc862
commit 20d28e80a5
16810 changed files with 4640254 additions and 2 deletions
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
@ -0,0 +1,642 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPBlockConvolver.h
+ *
+ * @brief Single-block overlap-save convolution processor class.
+ *
+ * This file includes single-block overlap-save convolution processor class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
+#define R8B_CDSPBLOCKCONVOLVER_INCLUDED
+
+#include "CDSPFIRFilter.h"
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+/**
+ * @brief Single-block overlap-save convolution processing class.
+ *
+ * Class that implements single-block overlap-save convolution processing. The
+ * length of a single FFT block used depends on the length of the filter
+ * kernel.
+ *
+ * The rationale behind "single-block" processing is that increasing the FFT
+ * block length by 2 is more efficient than performing convolution at the same
+ * FFT block length but using two blocks.
+ *
+ * This class also implements a built-in resampling by any whole-number
+ * factor, which simplifies the overall resampling objects topology.
+ */
+
+class CDSPBlockConvolver : public CDSPProcessor
+{
+public:
+	/**
+	 * Constructor initializes internal variables and constants of *this
+	 * object.
+	 *
+	 * @param aFilter Pre-calculated filter data. Reference to this object is
+	 * inhertied by *this object, and the object will be released when *this
+	 * object is destroyed. If upsampling is used, filter's gain should be
+	 * equal to the upsampling factor.
+	 * @param aUpFactor The upsampling factor, positive value. E.g. value of 2
+	 * means 2x upsampling should be performed over the input data.
+	 * @param aDownFactor The downsampling factor, positive value. E.g. value
+	 * of 2 means 2x downsampling should be performed over the output data.
+	 * @param PrevLatency Latency, in samples (any value >=0), which was left
+	 * in the output signal by a previous process. This value is usually
+	 * non-zero if the minimum-phase filters are in use. This value is always
+	 * zero if the linear-phase filters are in use.
+	 * @param aDoConsumeLatency "True" if the output latency should be
+	 * consumed. Does not apply to the fractional part of the latency (if such
+	 * part is available).
+	 */
+
+	CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor,
+		const int aDownFactor, const double PrevLatency = 0.0,
+		const bool aDoConsumeLatency = true )
+		: Filter( &aFilter )
+		, UpFactor( aUpFactor )
+		, DownFactor( aDownFactor )
+		, DoConsumeLatency( aDoConsumeLatency )
+		, BlockLen2( 2 << Filter -> getBlockLenBits() )
+	{
+		R8BASSERT( UpFactor > 0 );
+		R8BASSERT( DownFactor > 0 );
+		R8BASSERT( PrevLatency >= 0.0 );
+
+		int fftinBits;
+		UpShift = getBitOccupancy( UpFactor ) - 1;
+
+		if(( 1 << UpShift ) == UpFactor )
+		{
+			fftinBits = Filter -> getBlockLenBits() + 1 - UpShift;
+			PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) /
+				UpFactor;
+
+			InputLen = BlockLen2 - PrevInputLen * UpFactor;
+		}
+		else
+		{
+			UpShift = -1;
+			fftinBits = Filter -> getBlockLenBits() + 1;
+			PrevInputLen = Filter -> getKernelLen() - 1;
+			InputLen = BlockLen2 - PrevInputLen;
+		}
+
+		OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 );
+		LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor;
+		Latency = (int) LatencyFrac;
+		const int InLatency = Latency + Filter -> getLatency() - OutOffset;
+		LatencyFrac -= Latency;
+		LatencyFrac /= DownFactor;
+
+		Latency += InputLen + Filter -> getLatency();
+
+		int fftoutBits;
+		InputDelay = 0;
+		DownSkipInit = 0;
+		DownShift = getBitOccupancy( DownFactor ) - 1;
+
+		if(( 1 << DownShift ) == DownFactor )
+		{
+			fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift;
+
+			if( DownFactor > 1 )
+			{
+				if( UpShift > 0 )
+				{
+					// This case never happens in practice due to mutual
+					// exclusion of "power of 2" DownFactor and UpFactor
+					// values.
+
+					R8BASSERT( UpShift == 0 );
+				}
+				else
+				{
+					// Make sure InputLen is divisible by DownFactor.
+
+					const int ilc = InputLen & ( DownFactor - 1 );
+					PrevInputLen += ilc;
+					InputLen -= ilc;
+					Latency -= ilc;
+
+					// Correct InputDelay for input and filter's latency.
+
+					const int lc = InLatency & ( DownFactor - 1 );
+
+					if( lc > 0 )
+					{
+						InputDelay = DownFactor - lc;
+					}
+
+					if( !DoConsumeLatency )
+					{
+						Latency /= DownFactor;
+					}
+				}
+			}
+		}
+		else
+		{
+			fftoutBits = Filter -> getBlockLenBits() + 1;
+			DownShift = -1;
+
+			if( !DoConsumeLatency && DownFactor > 1 )
+			{
+				DownSkipInit = Latency % DownFactor;
+				Latency /= DownFactor;
+			}
+		}
+
+		R8BASSERT( Latency >= 0 );
+
+		fftin = new CDSPRealFFTKeeper( fftinBits );
+
+		if( fftoutBits == fftinBits )
+		{
+			fftout = fftin;
+		}
+		else
+		{
+			ffto2 = new CDSPRealFFTKeeper( fftoutBits );
+			fftout = ffto2;
+		}
+
+		WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen );
+		CurInput = &WorkBlocks[ 0 ];
+		CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and
+			// CurOutput are address-aligned.
+		PrevInput = &WorkBlocks[ BlockLen2 * 2 ];
+
+		clear();
+
+		R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
+			"fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
+			UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
+			getLatency() );
+	}
+
+	virtual ~CDSPBlockConvolver()
+	{
+		Filter -> unref();
+	}
+
+	virtual int getLatency() const
+	{
+		return( DoConsumeLatency ? 0 : Latency );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	virtual int getMaxOutLen( const int MaxInLen ) const
+	{
+		R8BASSERT( MaxInLen >= 0 );
+
+		return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor );
+	}
+
+	virtual void clear()
+	{
+		memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ]));
+
+		if( DoConsumeLatency )
+		{
+			LatencyLeft = Latency;
+		}
+		else
+		{
+			LatencyLeft = 0;
+
+			if( DownShift > 0 )
+			{
+				memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) *
+					sizeof( CurOutput[ 0 ]));
+			}
+			else
+			{
+				memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset *
+					sizeof( CurOutput[ 0 ]));
+
+				memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) *
+					sizeof( CurOutput[ 0 ]));
+			}
+		}
+
+		memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ]));
+
+		InDataLeft = InputLen - InputDelay;
+		UpSkip = 0;
+		DownSkip = DownSkipInit;
+	}
+
+	virtual int process( double* ip, int l0, double*& op0 )
+	{
+		R8BASSERT( l0 >= 0 );
+		R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 );
+
+		double* op = op0;
+		int l = l0 * UpFactor;
+		l0 = 0;
+
+		while( l > 0 )
+		{
+			const int Offs = InputLen - InDataLeft;
+
+			if( l < InDataLeft )
+			{
+				InDataLeft -= l;
+
+				if( UpShift >= 0 )
+				{
+					memcpy( &CurInput[ Offs >> UpShift ], ip,
+						( l >> UpShift ) * sizeof( CurInput[ 0 ]));
+				}
+				else
+				{
+					copyUpsample( ip, &CurInput[ Offs ], l );
+				}
+
+				copyToOutput( Offs - OutOffset, op, l, l0 );
+				break;
+			}
+
+			const int b = InDataLeft;
+			l -= b;
+			InDataLeft = InputLen;
+			int ilu;
+
+			if( UpShift >= 0 )
+			{
+				const int bu = b >> UpShift;
+				memcpy( &CurInput[ Offs >> UpShift ], ip,
+					bu * sizeof( CurInput[ 0 ]));
+
+				ip += bu;
+				ilu = InputLen >> UpShift;
+			}
+			else
+			{
+				copyUpsample( ip, &CurInput[ Offs ], b );
+				ilu = InputLen;
+			}
+
+			const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]);
+			memcpy( &CurInput[ ilu ], PrevInput, pil );
+			memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil );
+
+			(*fftin) -> forward( CurInput );
+
+			if( UpShift > 0 )
+			{
+				#if R8B_FLOATFFT
+					mirrorInputSpectrum( (float*) CurInput );
+				#else // R8B_FLOATFFT
+					mirrorInputSpectrum( CurInput );
+				#endif // R8B_FLOATFFT
+			}
+
+			if( Filter -> isZeroPhase() )
+			{
+				(*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(),
+					CurInput );
+			}
+			else
+			{
+				(*fftout) -> multiplyBlocks( Filter -> getKernelBlock(),
+					CurInput );
+			}
+
+			if( DownShift > 0 )
+			{
+				const int z = BlockLen2 >> DownShift;
+
+				#if R8B_FLOATFFT
+					float* const kb = (float*) Filter -> getKernelBlock();
+					float* const p = (float*) CurInput;
+				#else // R8B_FLOATFFT
+					const double* const kb = Filter -> getKernelBlock();
+					double* const p = CurInput;
+				#endif // R8B_FLOATFFT
+
+				p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ];
+			}
+
+			(*fftout) -> inverse( CurInput );
+
+			copyToOutput( Offs - OutOffset, op, b, l0 );
+
+			double* const tmp = CurInput;
+			CurInput = CurOutput;
+			CurOutput = tmp;
+		}
+
+		return( l0 );
+	}
+
+private:
+	CDSPFIRFilter* Filter; ///< Filter in use.
+		///<
+	CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce
+		///< the input spectrum (can embed the "power of 2" upsampling).
+		///<
+	CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL).
+		///<
+	CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output
+		///< signal (can embed the "power of 2" downsampling), may point to
+		///< either "fftin" or "ffto2".
+		///<
+	int UpFactor; ///< Upsampling factor.
+		///<
+	int DownFactor; ///< Downsampling factor.
+		///<
+	bool DoConsumeLatency; ///< "True" if the output latency should be
+		///< consumed. Does not apply to the fractional part of the latency
+		///< (if such part is available).
+		///<
+	int BlockLen2; ///< Equals block length * 2.
+		///<
+	int OutOffset; ///< Output offset, depends on filter's introduced latency.
+		///<
+	int PrevInputLen; ///< The length of previous input data saved, used for
+		///< overlap.
+		///<
+	int InputLen; ///< The number of input samples that should be accumulated
+		///< before the input block is processed.
+		///<
+	int Latency; ///< Processing latency, in samples.
+		///<
+	double LatencyFrac; ///< Fractional latency, in samples, that is left in
+		///< the output signal.
+		///<
+	int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
+		///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
+		///<
+	int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if
+		///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
+		///< 1.
+		///<
+	int InputDelay; ///< Additional input delay, in samples. Used to make the
+		///< output delay divisible by DownShift. Used only if UpShift <= 0
+		///< and DownShift > 0.
+		///<
+	CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and
+		///< output data blocks, overall capacity = BlockLen2 * 2 +
+		///< PrevInputLen. Used in the flip-flop manner.
+		///<
+	double* PrevInput; ///< Previous input data buffer, capacity = BlockLen.
+		///<
+	double* CurInput; ///< Input data buffer, capacity = BlockLen2.
+		///<
+	double* CurOutput; ///< Output data buffer, capacity = BlockLen2.
+		///<
+	int InDataLeft; ///< Samples left before processing input and output FFT
+		///< blocks. Initialized to InputLen on clear.
+		///<
+	int LatencyLeft; ///< Latency in samples left to skip.
+		///<
+	int UpSkip; ///< The current upsampling sample skip (value in the range
+		///< 0 to UpFactor - 1).
+		///<
+	int DownSkip; ///< The current downsampling sample skip (value in the
+		///< range 0 to DownFactor - 1). Not used if DownShift > 0.
+		///<
+	int DownSkipInit; ///< The initial DownSkip value after clear().
+		///<
+
+	/**
+	 * Function copies samples from the input buffer to the output buffer
+	 * while inserting zeros inbetween them to perform the whole-numbered
+	 * upsampling.
+	 *
+	 * @param[in,out] ip0 Input buffer. Will be advanced on function's return.
+	 * @param[out] op Output buffer.
+	 * @param l0 The number of samples to fill in the output buffer, including
+	 * both input samples and interpolation (zero) samples.
+	 */
+
+	void copyUpsample( double*& ip0, double* op, int l0 )
+	{
+		int b = min( UpSkip, l0 );
+
+		if( b != 0 )
+		{
+			UpSkip -= b;
+			l0 -= b;
+
+			*op = 0.0;
+			op++;
+
+			while( --b != 0 )
+			{
+				*op = 0.0;
+				op++;
+			}
+		}
+
+		double* ip = ip0;
+		const int upf = UpFactor;
+		int l = l0 / upf;
+		int lz = l0 - l * upf;
+
+		if( upf == 3 )
+		{
+			while( l != 0 )
+			{
+				op[ 0 ] = *ip;
+				op[ 1 ] = 0.0;
+				op[ 2 ] = 0.0;
+				ip++;
+				op += upf;
+				l--;
+			}
+		}
+		else
+		if( upf == 5 )
+		{
+			while( l != 0 )
+			{
+				op[ 0 ] = *ip;
+				op[ 1 ] = 0.0;
+				op[ 2 ] = 0.0;
+				op[ 3 ] = 0.0;
+				op[ 4 ] = 0.0;
+				ip++;
+				op += upf;
+				l--;
+			}
+		}
+		else
+		{
+			const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]);
+
+			while( l != 0 )
+			{
+				*op = *ip;
+				ip++;
+
+				memset( op + 1, 0, zc );
+				op += upf;
+				l--;
+			}
+		}
+
+		if( lz != 0 )
+		{
+			*op = *ip;
+			ip++;
+			op++;
+
+			UpSkip = upf - lz;
+
+			while( --lz != 0 )
+			{
+				*op = 0.0;
+				op++;
+			}
+		}
+
+		ip0 = ip;
+	}
+
+	/**
+	 * Function copies sample data from the CurOutput buffer to the specified
+	 * output buffer and advances its position. If necessary, this function
+	 * "consumes" latency and performs downsampling.
+	 *
+	 * @param Offs CurOutput buffer offset, can be negative.
+	 * @param[out] op0 Output buffer pointer, will be advanced.
+	 * @param b The number of output samples available, including those which
+	 * are discarded during whole-number downsampling.
+	 * @param l0 The overall output sample count, will be increased.
+	 */
+
+	void copyToOutput( int Offs, double*& op0, int b, int& l0 )
+	{
+		if( Offs < 0 )
+		{
+			if( Offs + b <= 0 )
+			{
+				Offs += BlockLen2;
+			}
+			else
+			{
+				copyToOutput( Offs + BlockLen2, op0, -Offs, l0 );
+				b += Offs;
+				Offs = 0;
+			}
+		}
+
+		if( LatencyLeft != 0 )
+		{
+			if( LatencyLeft >= b )
+			{
+				LatencyLeft -= b;
+				return;
+			}
+
+			Offs += LatencyLeft;
+			b -= LatencyLeft;
+			LatencyLeft = 0;
+		}
+
+		const int df = DownFactor;
+
+		if( DownShift > 0 )
+		{
+			int Skip = Offs & ( df - 1 );
+
+			if( Skip > 0 )
+			{
+				Skip = df - Skip;
+				b -= Skip;
+				Offs += Skip;
+			}
+
+			if( b > 0 )
+			{
+				b = ( b + df - 1 ) >> DownShift;
+				memcpy( op0, &CurOutput[ Offs >> DownShift ],
+					b * sizeof( op0[ 0 ]));
+
+				op0 += b;
+				l0 += b;
+			}
+		}
+		else
+		{
+			if( df > 1 )
+			{
+				const double* ip = &CurOutput[ Offs + DownSkip ];
+				int l = ( b + df - 1 - DownSkip ) / df;
+				DownSkip += l * df - b;
+
+				double* op = op0;
+				l0 += l;
+				op0 += l;
+
+				while( l > 0 )
+				{
+					*op = *ip;
+					ip += df;
+					op++;
+					l--;
+				}
+			}
+			else
+			{
+				memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ]));
+				op0 += b;
+				l0 += b;
+			}
+		}
+	}
+
+	/**
+	 * Function performs input spectrum mirroring which is used to perform a
+	 * fast "power of 2" upsampling. Such mirroring is equivalent to insertion
+	 * of zeros into the input signal.
+	 *
+	 * @param p Spectrum data block to mirror.
+	 * @tparam T Buffer's element type.
+	 */
+
+	template< typename T >
+	void mirrorInputSpectrum( T* const p )
+	{
+		const int bl1 = BlockLen2 >> UpShift;
+		const int bl2 = bl1 + bl1;
+		int i;
+
+		for( i = bl1 + 2; i < bl2; i += 2 )
+		{
+			p[ i ] = p[ bl2 - i ];
+			p[ i + 1 ] = -p[ bl2 - i + 1 ];
+		}
+
+		p[ bl1 ] = p[ 1 ];
+		p[ bl1 + 1 ] = 0.0;
+		p[ 1 ] = p[ 0 ];
+
+		for( i = 1; i < UpShift; i++ )
+		{
+			const int z = bl1 << i;
+			memcpy( &p[ z ], p, z * sizeof( p[ 0 ]));
+			p[ z + 1 ] = 0.0;
+		}
+	}
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
@ -0,0 +1,719 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPFIRFilter.h
+ *
+ * @brief FIR filter generator and filter cache classes.
+ *
+ * This file includes low-pass FIR filter generator and filter cache.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPFIRFILTER_INCLUDED
+#define R8B_CDSPFIRFILTER_INCLUDED
+
+#include "CDSPSincFilterGen.h"
+#include "CDSPRealFFT.h"
+
+namespace r8b {
+
+/**
+ * Enumeration of filter's phase responses.
+ */
+
+enum EDSPFilterPhaseResponse
+{
+	fprLinearPhase = 0, ///< Linear-phase response. Features a linear-phase
+		///< high-latency response, with the latency expressed as integer
+		///< value.
+		///<
+	fprMinPhase ///< Minimum-phase response. Features a minimal latency
+		///< response, but the response's phase is non-linear. The latency is
+		///< usually expressed as non-integer value, and usually is small, but
+		///< is never equal to zero. The minimum-phase filter is transformed
+		///< from the linear-phase filter. The transformation has precision
+		///< limits which may skew both the -3 dB point and attenuation of the
+		///< filter being transformed: as it was measured, the skew happens
+		///< purely at random, and in most cases it is within tolerable range.
+		///< In a small (1%) random subset of cases the skew is bigger and
+		///< cannot be predicted. Minimum-phase transform requires 64-bit
+		///< floating point FFT precision, results with 32-bit float FFT are
+		///< far from optimal.
+		///<
+};
+
+/**
+ * @brief Calculation and storage class for FIR filters.
+ *
+ * Class that implements calculation and storing of a FIR filter (currently
+ * contains low-pass filter calculation routine designed for sample rate
+ * conversion). Objects of this class cannot be created directly, but can be
+ * obtained via the CDSPFilterCache::getLPFilter() static function.
+ */
+
+class CDSPFIRFilter : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPFIRFilter );
+
+	friend class CDSPFIRFilterCache;
+
+public:
+	~CDSPFIRFilter()
+	{
+		R8BASSERT( RefCount == 0 );
+
+		delete Next;
+	}
+
+	/**
+	 * @return The minimal allowed low-pass filter's transition band, in
+	 * percent.
+	 */
+
+	static double getLPMinTransBand()
+	{
+		return( 0.5 );
+	}
+
+	/**
+	 * @return The maximal allowed low-pass filter's transition band, in
+	 * percent.
+	 */
+
+	static double getLPMaxTransBand()
+	{
+		return( 45.0 );
+	}
+
+	/**
+	 * @return The minimal allowed low-pass filter's stop-band attenuation, in
+	 * decibel.
+	 */
+
+	static double getLPMinAtten()
+	{
+		return( 49.0 );
+	}
+
+	/**
+	 * @return The maximal allowed low-pass filter's stop-band attenuation, in
+	 * decibel.
+	 */
+
+	static double getLPMaxAtten()
+	{
+		return( 218.0 );
+	}
+
+	/**
+	 * @return "True" if kernel block of *this filter has zero-phase response.
+	 */
+
+	bool isZeroPhase() const
+	{
+		return( IsZeroPhase );
+	}
+
+	/**
+	 * @return Filter's latency, in samples (integer part).
+	 */
+
+	int getLatency() const
+	{
+		return( Latency );
+	}
+
+	/**
+	 * @return Filter's latency, in samples (fractional part). Always zero for
+	 * linear-phase filters.
+	 */
+
+	double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	/**
+	 * @return Filter kernel length, in samples. Not to be confused with the
+	 * block length.
+	 */
+
+	int getKernelLen() const
+	{
+		return( KernelLen );
+	}
+
+	/**
+	 * @return Filter's block length, espressed as Nth power of 2. The actual
+	 * length is twice as large due to zero-padding.
+	 */
+
+	int getBlockLenBits() const
+	{
+		return( BlockLenBits );
+	}
+
+	/**
+	 * @return Filter's kernel block, in complex-numbered form obtained via
+	 * the CDSPRealFFT::forward() function call, zero-padded, gain-adjusted
+	 * with the CDSPRealFFT::getInvMulConst() * ReqGain constant, immediately
+	 * suitable for convolution. Kernel block may have "zero-phase" response,
+	 * depending on the isZeroPhase() function's result.
+	 */
+
+	const double* getKernelBlock() const
+	{
+		return( KernelBlock );
+	}
+
+	/**
+	 * This function should be called when the filter obtained via the
+	 * filter cache is no longer needed.
+	 */
+
+	void unref();
+
+private:
+	double ReqNormFreq; ///< Required normalized frequency, 0 to 1 inclusive.
+		///<
+	double ReqTransBand; ///< Required transition band in percent, as passed
+		///< by the user.
+		///<
+	double ReqAtten; ///< Required stop-band attenuation in decibel, as passed
+		///< by the user (positive value).
+		///<
+	EDSPFilterPhaseResponse ReqPhase; ///< Required filter's phase response.
+		///<
+	double ReqGain; ///< Required overall filter's gain.
+		///<
+	CDSPFIRFilter* Next; ///< Next FIR filter in cache's list.
+		///<
+	int RefCount; ///< The number of references made to *this FIR filter.
+		///<
+	bool IsZeroPhase; ///< "True" if kernel block of *this filter has
+		///< zero-phase response.
+		///<
+	int Latency; ///< Filter's latency in samples (integer part).
+		///<
+	double LatencyFrac; ///< Filter's latency in samples (fractional part).
+		///<
+	int KernelLen; ///< Filter kernel length, in samples.
+		///<
+	int BlockLenBits; ///< Block length used to store *this FIR filter,
+		///< expressed as Nth power of 2. This value is used directly by the
+		///< convolver.
+		///<
+	CFixedBuffer< double > KernelBlock; ///< FIR filter buffer, capacity
+		///< equals to 1 << ( BlockLenBits + 1 ). Second part of the buffer
+		///< contains zero-padding to allow alias-free convolution.
+		///< Address-aligned.
+		///<
+
+	CDSPFIRFilter()
+		: RefCount( 1 )
+	{
+	}
+
+	/**
+	 * Function builds filter kernel based on the "Req" parameters.
+	 *
+	 * @param ExtAttenCorrs External attentuation correction table, for
+	 * internal use.
+	 */
+
+	void buildLPFilter( const double* const ExtAttenCorrs )
+	{
+		const double tb = ReqTransBand * 0.01;
+		double pwr;
+		double fo1;
+		double hl;
+		double atten = -ReqAtten;
+
+		if( tb >= 0.25 )
+		{
+			if( ReqAtten >= 117.0 )
+			{
+				atten -= 1.60;
+			}
+			else
+			if( ReqAtten >= 60.0 )
+			{
+				atten -= 1.91;
+			}
+			else
+			{
+				atten -= 2.25;
+			}
+		}
+		else
+		if( tb >= 0.10 )
+		{
+			if( ReqAtten >= 117.0 )
+			{
+				atten -= 0.69;
+			}
+			else
+			if( ReqAtten >= 60.0 )
+			{
+				atten -= 0.73;
+			}
+			else
+			{
+				atten -= 1.13;
+			}
+		}
+		else
+		{
+			if( ReqAtten >= 117.0 )
+			{
+				atten -= 0.21;
+			}
+			else
+			if( ReqAtten >= 60.0 )
+			{
+				atten -= 0.25;
+			}
+			else
+			{
+				atten -= 0.36;
+			}
+		}
+
+		static const int AttenCorrCount = 264;
+		static const double AttenCorrMin = 49.0;
+		static const double AttenCorrDiff = 176.25;
+		int AttenCorr = (int) floor(( -atten - AttenCorrMin ) *
+			AttenCorrCount / AttenCorrDiff + 0.5 );
+
+		AttenCorr = min( AttenCorrCount, max( 0, AttenCorr ));
+
+		if( ExtAttenCorrs != NULL )
+		{
+			atten -= ExtAttenCorrs[ AttenCorr ];
+		}
+		else
+		if( tb >= 0.25 )
+		{
+			static const double AttenCorrScale = 101.0;
+			static const signed char AttenCorrs[] = {
+				-127, -127, -125, -125, -122, -119, -115, -110, -104, -97,
+				-91, -82, -75, -24, -16, -6, 4, 14, 24, 29, 30, 32, 37, 44,
+				51, 57, 63, 67, 65, 50, 53, 56, 58, 60, 63, 64, 66, 68, 74,
+				77, 78, 78, 78, 79, 79, 60, 60, 60, 61, 59, 52, 47, 41, 36,
+				30, 24, 17, 9, 0, -8, -10, -11, -14, -13, -18, -25, -31, -38,
+				-44, -50, -57, -63, -68, -74, -81, -89, -96, -101, -104, -107,
+				-109, -110, -86, -84, -85, -82, -80, -77, -73, -67, -62, -55,
+				-48, -42, -35, -30, -20, -11, -2, 5, 6, 6, 7, 11, 16, 21, 26,
+				34, 41, 46, 49, 52, 55, 56, 48, 49, 51, 51, 52, 52, 52, 52,
+				52, 51, 51, 50, 47, 47, 50, 48, 46, 42, 38, 35, 31, 27, 24,
+				20, 16, 12, 11, 12, 10, 8, 4, -1, -6, -11, -16, -19, -17, -21,
+				-24, -27, -32, -34, -37, -38, -40, -41, -40, -40, -42, -41,
+				-44, -45, -43, -41, -34, -31, -28, -24, -21, -18, -14, -10,
+				-5, -1, 2, 5, 8, 7, 4, 3, 2, 2, 4, 6, 8, 9, 9, 10, 10, 10, 10,
+				9, 8, 9, 11, 14, 13, 12, 11, 10, 8, 7, 6, 5, 3, 2, 2, -1, -1,
+				-3, -3, -4, -4, -5, -4, -6, -7, -9, -5, -1, -1, 0, 1, 0, -2,
+				-3, -4, -5, -5, -8, -13, -13, -13, -12, -13, -12, -11, -11,
+				-9, -8, -7, -5, -3, -1, 2, 4, 6, 9, 10, 11, 14, 18, 21, 24,
+				27, 30, 34, 37, 37, 39, 40 };
+
+			atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+		}
+		else
+		if( tb >= 0.10 )
+		{
+			static const double AttenCorrScale = 210.0;
+			static const signed char AttenCorrs[] = {
+				-113, -118, -122, -125, -126, -97, -95, -92, -92, -89, -82,
+				-75, -69, -48, -42, -36, -30, -22, -14, -5, -2, 1, 6, 13, 22,
+				28, 35, 41, 48, 55, 56, 56, 61, 65, 71, 77, 81, 83, 85, 85,
+				74, 74, 73, 72, 71, 70, 68, 64, 59, 56, 49, 52, 46, 42, 36,
+				32, 26, 20, 13, 7, -2, -6, -10, -15, -20, -27, -33, -38, -44,
+				-43, -48, -53, -57, -63, -69, -73, -75, -79, -81, -74, -76,
+				-77, -77, -78, -81, -80, -80, -78, -76, -65, -62, -59, -56,
+				-51, -48, -44, -38, -33, -25, -19, -13, -5, -1, 2, 7, 13, 17,
+				21, 25, 30, 35, 40, 45, 50, 53, 56, 57, 55, 58, 59, 62, 64,
+				67, 67, 68, 68, 62, 61, 61, 59, 59, 57, 57, 55, 52, 48, 42,
+				38, 35, 31, 26, 20, 15, 13, 10, 7, 3, -2, -8, -13, -17, -23,
+				-28, -34, -37, -40, -41, -45, -48, -50, -53, -57, -59, -62,
+				-63, -63, -57, -57, -56, -56, -54, -54, -53, -49, -48, -41,
+				-38, -33, -31, -26, -23, -18, -12, -9, -7, -7, -3, 0, 5, 9,
+				14, 16, 20, 22, 21, 23, 25, 27, 28, 29, 34, 33, 35, 33, 31,
+				30, 29, 29, 26, 26, 25, 24, 20, 19, 15, 10, 8, 4, 1, -2, -6,
+				-10, -16, -19, -23, -26, -27, -30, -34, -39, -43, -47, -51,
+				-52, -54, -56, -58, -59, -62, -63, -66, -65, -65, -64, -59,
+				-57, -54, -52, -48, -44, -42, -37, -32, -22, -17, -10, -3, 5,
+				13, 22, 30, 40, 50, 60, 72 };
+
+			atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+		}
+		else
+		{
+			static const double AttenCorrScale = 196.0;
+			static const signed char AttenCorrs[] = {
+				-15, -17, -20, -20, -20, -21, -20, -16, -17, -18, -17, -13,
+				-12, -11, -9, -7, -5, -4, -1, 1, 3, 4, 5, 6, 7, 9, 9, 10, 10,
+				10, 11, 11, 11, 12, 12, 12, 10, 11, 10, 10, 8, 10, 11, 10, 11,
+				11, 13, 14, 15, 19, 27, 26, 23, 18, 14, 8, 4, -2, -6, -12,
+				-17, -23, -28, -33, -37, -42, -46, -49, -53, -57, -60, -61,
+				-64, -65, -67, -66, -66, -66, -65, -64, -61, -59, -56, -52,
+				-48, -42, -38, -31, -27, -19, -13, -7, -1, 8, 14, 22, 29, 37,
+				45, 52, 59, 66, 73, 80, 86, 91, 96, 100, 104, 108, 111, 114,
+				115, 117, 118, 120, 120, 118, 117, 114, 113, 111, 107, 103,
+				99, 95, 89, 84, 78, 72, 66, 60, 52, 44, 37, 30, 21, 14, 6, -3,
+				-11, -18, -26, -34, -43, -51, -58, -65, -73, -78, -85, -90,
+				-97, -102, -107, -113, -115, -118, -121, -125, -125, -126,
+				-126, -126, -125, -124, -121, -119, -115, -111, -109, -101,
+				-102, -95, -88, -81, -73, -67, -63, -54, -47, -40, -33, -26,
+				-18, -11, -5, 2, 8, 14, 19, 25, 31, 36, 37, 43, 47, 49, 51,
+				52, 57, 57, 56, 57, 58, 58, 58, 57, 56, 52, 52, 50, 48, 44,
+				41, 39, 37, 33, 31, 26, 24, 21, 18, 14, 11, 8, 4, 2, -2, -5,
+				-7, -9, -11, -13, -15, -16, -18, -19, -20, -23, -24, -24, -25,
+				-27, -26, -27, -29, -30, -31, -32, -35, -36, -39, -40, -44,
+				-46, -51, -54, -59, -63, -69, -76, -83, -91, -98 };
+
+			atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+		}
+
+		pwr = 7.43932822146293e-8 * sqr( atten ) + 0.000102747434588003 *
+			cos( 0.00785021930010397 * atten ) * cos( 0.633854318781239 +
+			0.103208573657699 * atten ) - 0.00798132247867036 -
+			0.000903555213543865 * atten - 0.0969365532127236 * exp(
+			0.0779275237937911 * atten ) - 1.37304948662012e-5 * atten * cos(
+			0.00785021930010397 * atten );
+
+		if( pwr <= 0.067665322581 )
+		{
+			if( tb >= 0.25 )
+			{
+				hl = 2.6778150875894 / tb + 300.547590563091 * atan( atan(
+					2.68959772209918 * pwr )) / ( 5.5099277187035 * tb - tb *
+					tanh( cos( asinh( atten ))));
+
+				fo1 = 0.987205355829873 * tb + 1.00011788929851 * atan2(
+					-0.321432067051302 - 6.19131357321578 * sqrt( pwr ),
+					hl + -1.14861472207245 / ( hl - 14.1821147585957 ) + pow(
+					0.9521145021664, pow( atan2( 1.12018764830637, tb ),
+					2.10988901686912 * hl - 20.9691278378345 )));
+			}
+			else
+			if( tb >= 0.10 )
+			{
+				hl = ( 1.56688617018066 + 142.064321294568 * pwr +
+					0.00419441117131136 * cos( 243.633511747297 * pwr ) -
+					0.022953443903576 * atten - 0.026629568860284 * cos(
+					127.715550622571 * pwr )) / tb;
+
+				fo1 = 0.982299356642411 * tb + 0.999441744774215 * asinh((
+					-0.361783054039583 - 5.80540593623676 * sqrt( pwr )) /
+					hl );
+			}
+			else
+			{
+				hl = ( 2.45739657014937 + 269.183679500541 * pwr * cos(
+					5.73225668178813 + atan2( cosh( 0.988861169868941 -
+					17.2201556280744 * pwr ), 1.08340138240431 * pwr ))) / tb;
+
+				fo1 = 2.291956939 * tb + 0.01942450693 * sqr( tb ) * hl -
+					4.67538973161837 * pwr * tb - 1.668433124 * tb *
+					pow( pwr, pwr );
+			}
+		}
+		else
+		{
+			if( tb >= 0.25 )
+			{
+				hl = ( 1.50258368698213 + 158.556968859477 * asinh( pwr ) *
+					tanh( 57.9466246871383 * tanh( pwr )) -
+					0.0105440479814834 * atten ) / tb;
+
+				fo1 = 0.994024401639321 * tb + ( -0.236282717577215 -
+					6.8724924545387 * sqrt( sin( pwr ))) / hl;
+			}
+			else
+			if( tb >= 0.10 )
+			{
+				hl = ( 1.50277377248945 + 158.222625721046 * asinh( pwr ) *
+					tanh( 1.02875299001715 + 42.072277322604 * pwr ) -
+					0.0108380943845632 * atten ) / tb;
+
+				fo1 = 0.992539376734551 * tb + ( -0.251747813037178 -
+					6.74159892452584 * sqrt( tanh( tanh( tan( pwr ))))) / hl;
+			}
+			else
+			{
+				hl = ( 1.15990238966306 * pwr - 5.02124037125213 * sqr(
+					pwr ) - 0.158676856669827 * atten * cos( 1.1609073390614 *
+					pwr - 6.33932586197475 * pwr * sqr( pwr ))) / tb;
+
+				fo1 = 0.867344453126885 * tb + 0.052693817907757 * tb * log(
+					pwr ) + 0.0895511178735932 * tb * atan( 59.7538527741309 *
+					pwr ) - 0.0745653568081453 * pwr * tb;
+			}
+		}
+
+		double WinParams[ 2 ];
+		WinParams[ 0 ] = 125.0;
+		WinParams[ 1 ] = pwr;
+
+		CDSPSincFilterGen sinc;
+		sinc.Len2 = 0.25 * hl / ReqNormFreq;
+		sinc.Freq1 = 0.0;
+		sinc.Freq2 = R8B_PI * ( 1.0 - fo1 ) * ReqNormFreq;
+		sinc.initBand( CDSPSincFilterGen :: wftKaiser, WinParams, true );
+
+		KernelLen = sinc.KernelLen;
+		BlockLenBits = getBitOccupancy( KernelLen - 1 ) + R8B_EXTFFT;
+		const int BlockLen = 1 << BlockLenBits;
+
+		KernelBlock.alloc( BlockLen * 2 );
+		sinc.generateBand( &KernelBlock[ 0 ],
+			&CDSPSincFilterGen :: calcWindowKaiser );
+
+		if( ReqPhase == fprLinearPhase )
+		{
+			IsZeroPhase = true;
+			Latency = sinc.fl2;
+			LatencyFrac = 0.0;
+		}
+		else
+		{
+			IsZeroPhase = false;
+			double DCGroupDelay;
+
+			calcMinPhaseTransform( &KernelBlock[ 0 ], KernelLen, 16, false,
+				&DCGroupDelay );
+
+			Latency = (int) DCGroupDelay;
+			LatencyFrac = DCGroupDelay - Latency;
+		}
+
+		CDSPRealFFTKeeper ffto( BlockLenBits + 1 );
+
+		if( IsZeroPhase )
+		{
+			// Calculate DC gain.
+
+			double s = 0.0;
+			int i;
+
+			for( i = 0; i < KernelLen; i++ )
+			{
+				s += KernelBlock[ i ];
+			}
+
+			s = ffto -> getInvMulConst() * ReqGain / s;
+
+			// Time-shift the filter so that zero-phase response is produced.
+			// Simultaneously multiply by "s".
+
+			for( i = 0; i <= sinc.fl2; i++ )
+			{
+				KernelBlock[ i ] = KernelBlock[ sinc.fl2 + i ] * s;
+			}
+
+			for( i = 1; i <= sinc.fl2; i++ )
+			{
+				KernelBlock[ BlockLen * 2 - i ] = KernelBlock[ i ];
+			}
+
+			memset( &KernelBlock[ sinc.fl2 + 1 ], 0,
+				( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
+
+			ffto -> forward( KernelBlock );
+			ffto -> convertToZP( KernelBlock );
+		}
+		else
+		{
+			normalizeFIRFilter( &KernelBlock[ 0 ], KernelLen,
+				ffto -> getInvMulConst() * ReqGain );
+
+			memset( &KernelBlock[ KernelLen ], 0,
+				( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
+
+			ffto -> forward( KernelBlock );
+		}
+
+		R8BCONSOLE( "CDSPFIRFilter: flt_len=%i latency=%i nfreq=%.4f "
+			"tb=%.1f att=%.1f gain=%.3f\n", KernelLen, Latency,
+			ReqNormFreq, ReqTransBand, ReqAtten, ReqGain );
+	}
+};
+
+/**
+ * @brief FIR filter cache class.
+ *
+ * Class that implements cache for calculated FIR filters. The required FIR
+ * filter should be obtained via the getLPFilter() static function.
+ */
+
+class CDSPFIRFilterCache : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPFIRFilterCache );
+
+	friend class CDSPFIRFilter;
+
+public:
+	/**
+	 * @return The number of filters present in the cache now. This value can
+	 * be monitored for debugging "forgotten" filters.
+	 */
+
+	static int getObjCount()
+	{
+		R8BSYNC( StateSync );
+
+		return( ObjCount );
+	}
+
+	/**
+	 * Function calculates or returns reference to a previously calculated
+	 * (cached) low-pass FIR filter. Note that the real transition band and
+	 * attenuation achieved by the filter varies with the magnitude of the
+	 * required attenuation, and are never 100% exact.
+	 *
+	 * @param ReqNormFreq Required normalized frequency, in the range 0 to 1,
+	 * inclusive. This is the point after which the stop-band spans.
+	 * @param ReqTransBand Required transition band, in percent of the
+	 * 0 to ReqNormFreq spectral bandwidth, in the range
+	 * CDSPFIRFilter::getLPMinTransBand() to
+	 * CDSPFIRFilter::getLPMaxTransBand(), inclusive. The transition band
+	 * specifies the part of the spectrum between the -3 dB and ReqNormFreq
+	 * points. The real resulting -3 dB point varies in the range from -3.00
+	 * to -3.05 dB, but is generally very close to -3 dB.
+	 * @param ReqAtten Required stop-band attenuation in decibel, in the range
+	 * CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
+	 * inclusive. Note that the actual stop-band attenuation of the resulting
+	 * filter may be 0.40-4.46 dB higher.
+	 * @param ReqPhase Required filter's phase response.
+	 * @param ReqGain Required overall filter's gain (1.0 for unity gain).
+	 * @param AttenCorrs Attentuation correction table, to pass to the filter
+	 * generation function. For internal use.
+	 * @return A reference to a new or a previously calculated low-pass FIR
+	 * filter object with the required characteristics. A reference count is
+	 * incremented in the returned filter object which should be released
+	 * after use via the CDSPFIRFilter::unref() function.
+	 */
+
+	static CDSPFIRFilter& getLPFilter( const double ReqNormFreq,
+		const double ReqTransBand, const double ReqAtten,
+		const EDSPFilterPhaseResponse ReqPhase, const double ReqGain,
+		const double* const AttenCorrs = NULL )
+	{
+		R8BASSERT( ReqNormFreq > 0.0 && ReqNormFreq <= 1.0 );
+		R8BASSERT( ReqTransBand >= CDSPFIRFilter :: getLPMinTransBand() );
+		R8BASSERT( ReqTransBand <= CDSPFIRFilter :: getLPMaxTransBand() );
+		R8BASSERT( ReqAtten >= CDSPFIRFilter :: getLPMinAtten() );
+		R8BASSERT( ReqAtten <= CDSPFIRFilter :: getLPMaxAtten() );
+		R8BASSERT( ReqGain > 0.0 );
+
+		R8BSYNC( StateSync );
+
+		CDSPFIRFilter* PrevObj = NULL;
+		CDSPFIRFilter* CurObj = Objects;
+
+		while( CurObj != NULL )
+		{
+			if( CurObj -> ReqNormFreq == ReqNormFreq &&
+				CurObj -> ReqTransBand == ReqTransBand &&
+				CurObj -> ReqAtten == ReqAtten &&
+				CurObj -> ReqPhase == ReqPhase &&
+				CurObj -> ReqGain == ReqGain )
+			{
+				break;
+			}
+
+			if( CurObj -> Next == NULL && ObjCount >= R8B_FILTER_CACHE_MAX )
+			{
+				if( CurObj -> RefCount == 0 )
+				{
+					// Delete the last filter which is not used.
+
+					PrevObj -> Next = NULL;
+					delete CurObj;
+					ObjCount--;
+				}
+				else
+				{
+					// Move the last filter to the top of the list since it
+					// seems to be in use for a long time.
+
+					PrevObj -> Next = NULL;
+					CurObj -> Next = Objects.unkeep();
+					Objects = CurObj;
+				}
+
+				CurObj = NULL;
+				break;
+			}
+
+			PrevObj = CurObj;
+			CurObj = CurObj -> Next;
+		}
+
+		if( CurObj != NULL )
+		{
+			CurObj -> RefCount++;
+
+			if( PrevObj == NULL )
+			{
+				return( *CurObj );
+			}
+
+			// Remove the filter from the list temporarily.
+
+			PrevObj -> Next = CurObj -> Next;
+		}
+		else
+		{
+			// Create a new filter object (with RefCount == 1) and build the
+			// filter kernel.
+
+			CurObj = new CDSPFIRFilter();
+			CurObj -> ReqNormFreq = ReqNormFreq;
+			CurObj -> ReqTransBand = ReqTransBand;
+			CurObj -> ReqAtten = ReqAtten;
+			CurObj -> ReqPhase = ReqPhase;
+			CurObj -> ReqGain = ReqGain;
+			ObjCount++;
+
+			CurObj -> buildLPFilter( AttenCorrs );
+		}
+
+		// Insert the filter at the start of the list.
+
+		CurObj -> Next = Objects.unkeep();
+		Objects = CurObj;
+
+		return( *CurObj );
+	}
+
+private:
+	static CSyncObject StateSync; ///< Cache state synchronizer.
+		///<
+	static CPtrKeeper< CDSPFIRFilter* > Objects; ///< The chain of cached
+		///< objects.
+		///<
+	static int ObjCount; ///< The number of objects currently preset in the
+		///< cache.
+		///<
+};
+
+// ---------------------------------------------------------------------------
+// CDSPFIRFilter PUBLIC
+// ---------------------------------------------------------------------------
+
+inline void CDSPFIRFilter :: unref()
+{
+	R8BSYNC( CDSPFIRFilterCache :: StateSync );
+
+	RefCount--;
+}
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPFIRFILTER_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
@ -0,0 +1,401 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPHBDownsampler.h
+ *
+ * @brief Half-band downsampling convolver class.
+ *
+ * This file includes half-band downsampling convolver class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPHBDOWNSAMPLER_INCLUDED
+#define R8B_CDSPHBDOWNSAMPLER_INCLUDED
+
+#include "CDSPHBUpsampler.h"
+
+namespace r8b {
+
+/**
+ * @brief Half-band downsampler class.
+ *
+ * Class implements brute-force half-band 2X downsampling that uses small
+ * sparse symmetric FIR filters. The output has 2.0 gain.
+ */
+
+class CDSPHBDownsampler : public CDSPProcessor
+{
+public:
+	/**
+	 * Constructor initalizes the half-band downsampler.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * downsampling ratio, e.g. at 4x downsampling 0 is used, at 8x
+	 * downsampling 1 is used, etc.
+	 * @param IsThird "True" if 1/3 resampling is performed.
+	 * @param PrevLatency Latency, in samples (any value >=0), which was left
+	 * in the output signal by a previous process. Whole-number latency will
+	 * be consumed by *this object while remaining fractional latency can be
+	 * obtained via the getLatencyFrac() function.
+	 */
+
+	CDSPHBDownsampler( const double ReqAtten, const int SteepIndex,
+		const bool IsThird, const double PrevLatency )
+	{
+		static const CConvolveFn FltConvFn[ 14 ] = {
+			&CDSPHBDownsampler :: convolve1, &CDSPHBDownsampler :: convolve2,
+			&CDSPHBDownsampler :: convolve3, &CDSPHBDownsampler :: convolve4,
+			&CDSPHBDownsampler :: convolve5, &CDSPHBDownsampler :: convolve6,
+			&CDSPHBDownsampler :: convolve7, &CDSPHBDownsampler :: convolve8,
+			&CDSPHBDownsampler :: convolve9, &CDSPHBDownsampler :: convolve10,
+			&CDSPHBDownsampler :: convolve11, &CDSPHBDownsampler :: convolve12,
+			&CDSPHBDownsampler :: convolve13,
+			&CDSPHBDownsampler :: convolve14 };
+
+		int fltt;
+		double att;
+
+		if( IsThird )
+		{
+			CDSPHBUpsampler :: getHBFilterThird( ReqAtten, SteepIndex, fltp,
+				fltt, att );
+		}
+		else
+		{
+			CDSPHBUpsampler :: getHBFilter( ReqAtten, SteepIndex, fltp, fltt,
+				att );
+		}
+
+		convfn = FltConvFn[ fltt - 1 ];
+		fll = fltt * 2 - 1;
+		fl2 = fll;
+		flo = fll + fl2;
+		flb = BufLen - fll;
+		BufRP = Buf + fll;
+
+		LatencyFrac = PrevLatency * 0.5;
+		Latency = (int) LatencyFrac;
+		LatencyFrac -= Latency;
+
+		R8BASSERT( Latency >= 0 );
+
+		R8BCONSOLE( "CDSPHBDownsampler: taps=%i third=%i att=%.1f io=1/2\n",
+			fltt, (int) IsThird, att );
+
+		clear();
+	}
+
+	virtual int getLatency() const
+	{
+		return( 0 );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	virtual int getMaxOutLen( const int MaxInLen ) const
+	{
+		R8BASSERT( MaxInLen >= 0 );
+
+		return(( MaxInLen + 1 ) >> 1 );
+	}
+
+	virtual void clear()
+	{
+		LatencyLeft = Latency;
+		BufLeft = 0;
+		WritePos = 0;
+		ReadPos = flb; // Set "read" position to account for filter's latency.
+
+		memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+	}
+
+	virtual int process( double* ip, int l, double*& op0 )
+	{
+		R8BASSERT( l >= 0 );
+
+		double* op = op0;
+
+		while( l > 0 )
+		{
+			// Add new input samples to both halves of the ring buffer.
+
+			const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+			double* const wp1 = Buf + WritePos;
+			memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+			if( WritePos < flo )
+			{
+				const int c = min( b, flo - WritePos );
+				memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+			}
+
+			ip += b;
+			WritePos = ( WritePos + b ) & BufLenMask;
+			l -= b;
+			BufLeft += b;
+
+			// Produce output.
+
+			if( BufLeft > fl2 )
+			{
+				const int c = ( BufLeft - fl2 + 1 ) >> 1;
+
+				double* const opend = op + c;
+				( *convfn )( op, opend, fltp, BufRP, ReadPos );
+
+				op = opend;
+				const int c2 = c + c;
+				ReadPos = ( ReadPos + c2 ) & BufLenMask;
+				BufLeft -= c2;
+			}
+		}
+
+		int ol = (int) ( op - op0 );
+
+		if( LatencyLeft != 0 )
+		{
+			if( LatencyLeft >= ol )
+			{
+				LatencyLeft -= ol;
+				return( 0 );
+			}
+
+			ol -= LatencyLeft;
+			op0 += LatencyLeft;
+			LatencyLeft = 0;
+		}
+
+		return( ol );
+	}
+
+private:
+	static const int BufLenBits = 10; ///< The length of the ring buffer,
+		///< expressed as Nth power of 2. This value can be reduced if it is
+		///< known that only short input buffers will be passed to the
+		///< interpolator. The minimum value of this parameter is 5, and
+		///< 1 << BufLenBits should be at least 3 times larger than the
+		///< FilterLen.
+		///<
+	static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+		///< buffer. The actual length is twice as long to allow "beyond max
+		///< position" positioning.
+		///<
+	static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+		///< position wrapping.
+		///<
+	double Buf[ BufLen + 54 ]; ///< The ring buffer, including overrun
+		///< protection for the largest filter.
+		///<
+	const double* fltp; ///< Half-band filter taps.
+		///<
+	int fll; ///< Input latency.
+		///<
+	int fl2; ///< Right-side filter length.
+		///<
+	int flo; ///< Overrun length.
+		///<
+	int flb; ///< Initial read position and maximal buffer write length.
+		///<
+	const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
+		///<
+	int Latency; ///< Initial latency that should be removed from the output.
+		///<
+	double LatencyFrac; ///< Fractional latency left on the output.
+		///<
+	int BufLeft; ///< The number of samples left in the buffer to process.
+		///< When this value is below FilterLenD2Plus1, the interpolation
+		///< cycle ends.
+		///<
+	int WritePos; ///< The current buffer write position. Incremented together
+		///< with the BufLeft variable.
+		///<
+	int ReadPos; ///< The current buffer read position.
+		///<
+	int LatencyLeft; ///< Latency left to remove.
+		///<
+	typedef void( *CConvolveFn )( double* op, double* const opend,
+		const double* const flt, const double* const rp0, int rpos ); ///<
+		///< Convolution function type.
+		///<
+	CConvolveFn convfn; ///< Convolution function in use.
+		///<
+
+#define R8BHBC1( fn ) \
+	static void fn( double* op, double* const opend, const double* const flt, \
+		const double* const rp0, int rpos ) \
+	{ \
+		while( op != opend ) \
+		{ \
+			const double* const rp = rp0 + rpos; \
+			*op = rp[ 0 ] +
+
+#define R8BHBC2 \
+			rpos = ( rpos + 2 ) & BufLenMask; \
+			op++; \
+		} \
+	}
+
+	R8BHBC1( convolve1 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve2 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve3 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve4 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve5 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve6 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve7 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve8 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve9 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve10 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve11 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve12 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+				flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve13 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+				flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
+				flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve14 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+				flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
+				flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]) +
+				flt[ 13 ] * ( rp[ 27 ] + rp[ -27 ]);
+	R8BHBC2
+
+#undef R8BHBC1
+#undef R8BHBC2
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPHBDOWNSAMPLER_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
@ -0,0 +1,804 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPHBUpsampler.h
+ *
+ * @brief Half-band upsampling class.
+ *
+ * This file includes half-band upsampling class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPHBUPSAMPLER_INCLUDED
+#define R8B_CDSPHBUPSAMPLER_INCLUDED
+
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+/**
+ * @brief Half-band upsampling class.
+ *
+ * Class implements brute-force half-band 2X upsampling that uses small
+ * sparse symmetric FIR filters. It is very efficient and should be used at
+ * latter upsampling steps after initial steep 2X upsampling.
+ */
+
+class CDSPHBUpsampler : public CDSPProcessor
+{
+public:
+	/**
+	 * Function that provides filter data for various steepness indices and
+	 * attenuations.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
+	 * etc.
+	 */
+
+	static void getHBFilter( const double ReqAtten, const int SteepIndex,
+		const double*& flt, int& fltt, double& att )
+	{
+		static const double HBKernel_4A[ 4 ] = { // -54.5176 dB, 4
+			6.1729335650971517e-001, -1.5963945620743250e-001,
+			5.5073370934086312e-002, -1.4603578989932850e-002,};
+		static const double HBKernel_5A[ 5 ] = { // -66.3075 dB, 4
+			6.2068807424902472e-001, -1.6827573634467302e-001,
+			6.5263016720721170e-002, -2.2483331611592005e-002,
+			5.2917326684281110e-003,};
+		static const double HBKernel_6A[ 6 ] = { // -89.5271 dB, 4
+			6.2187202340480707e-001, -1.7132842113816371e-001,
+			6.9019169178765674e-002, -2.5799728312695277e-002,
+			7.4880112525741666e-003, -1.2844465869952567e-003,};
+		static const double HBKernel_7A[ 7 ] = { // -105.2842 dB, 4
+			6.2354494135775851e-001, -1.7571220703702045e-001,
+			7.4529843603968457e-002, -3.0701736822442153e-002,
+			1.0716755639039573e-002, -2.7833422930759735e-003,
+			4.1118797093875510e-004,};
+		static const double HBKernel_8A[ 8 ] = { // -121.0063 dB, 4
+			6.2488363107953926e-001, -1.7924942606514119e-001,
+			7.9068155655640557e-002, -3.4907523415495731e-002,
+			1.3710256799907897e-002, -4.3991142586987933e-003,
+			1.0259190163889602e-003, -1.3278941979339359e-004,};
+		static const double HBKernel_9A[ 9 ] = { // -136.6982 dB, 4
+			6.2597763804021977e-001, -1.8216414325139055e-001,
+			8.2879104876726728e-002, -3.8563442248249404e-002,
+			1.6471530499739394e-002, -6.0489108881335227e-003,
+			1.7805283804140392e-003, -3.7533200112729561e-004,
+			4.3172840558735476e-005,};
+		static const double HBKernel_10A[ 10 ] = { // -152.3572 dB, 4
+			6.2688767582974092e-001, -1.8460766807559420e-001,
+			8.6128943000481864e-002, -4.1774474147006607e-002,
+			1.9014801985747346e-002, -7.6870397465866507e-003,
+			2.6264590175341853e-003, -7.1106660285478562e-004,
+			1.3645852036179345e-004, -1.4113888783332969e-005,};
+		static const double HBKernel_11A[ 11 ] = { // -183.7962 dB, 4
+			6.2667167706948146e-001, -1.8407153342635879e-001,
+			8.5529995610836046e-002, -4.1346831462361310e-002,
+			1.8844831691322637e-002, -7.7125170365394992e-003,
+			2.7268674860562087e-003, -7.9745028501057233e-004,
+			1.8116344606360795e-004, -2.8569149754241848e-005,
+			2.3667022010173616e-006,};
+		static const double HBKernel_12A[ 12 ] = { // -199.4768 dB, 4
+			6.2747849730367999e-001, -1.8623616784506747e-001,
+			8.8409755898467945e-002, -4.4207468821462342e-002,
+			2.1149175945115381e-002, -9.2551508371115209e-003,
+			3.5871562170822330e-003, -1.1923167653750219e-003,
+			3.2627812189920129e-004, -6.9106902511490413e-005,
+			1.0122897863125124e-005, -7.7531878906846174e-007,};
+		static const double HBKernel_13A[ 13 ] = { // -215.1364 dB, 4
+			6.2816416252367324e-001, -1.8809076955230414e-001,
+			9.0918539867353029e-002, -4.6765502683599310e-002,
+			2.3287520498995663e-002, -1.0760627245014184e-002,
+			4.4853922948425683e-003, -1.6438775426910800e-003,
+			5.1441312354764978e-004, -1.3211725685765050e-004,
+			2.6191319837779187e-005, -3.5802430606313093e-006,
+			2.5491278270628601e-007,};
+		static const double HBKernel_14A[ 14 ] = { // -230.7526 dB, 4
+			6.2875473120929948e-001, -1.8969941936903847e-001,
+			9.3126094480960403e-002, -4.9067251179869126e-002,
+			2.5273008851199916e-002, -1.2218646153393291e-002,
+			5.4048942085580280e-003, -2.1409919546078581e-003,
+			7.4250292812927973e-004, -2.1924542206832172e-004,
+			5.3015808983125091e-005, -9.8743034923598196e-006,
+			1.2650391141650221e-006, -8.4146674637474946e-008,};
+		static const int FltCountA = 11;
+		static const int FlttBaseA = 4;
+		static const double FltAttensA[ FltCountA ] = {
+			54.5176, 66.3075, 89.5271, 105.2842, 121.0063, 136.6982, 152.3572, 183.7962, 199.4768, 215.1364, 230.7526, };
+		static const double* const FltPtrsA[ FltCountA ] = {
+			HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, HBKernel_10A, HBKernel_11A, HBKernel_12A, HBKernel_13A, HBKernel_14A, };
+		static const double HBKernel_2B[ 2 ] = { // -56.6007 dB, 8
+			5.7361525854329076e-001, -7.5092074924827903e-002,};
+		static const double HBKernel_3B[ 3 ] = { // -83.0295 dB, 8
+			5.9277038608066912e-001, -1.0851340190268854e-001,
+			1.5813570475513079e-002,};
+		static const double HBKernel_4B[ 4 ] = { // -123.4724 dB, 8
+			6.0140277542879617e-001, -1.2564483854574138e-001,
+			2.7446500598038322e-002, -3.2051079559057435e-003,};
+		static const double HBKernel_5B[ 5 ] = { // -152.4411 dB, 8
+			6.0818642429088932e-001, -1.3981140187175697e-001,
+			3.8489164054503623e-002, -7.6218861797853104e-003,
+			7.5772358130952392e-004,};
+		static const double HBKernel_6B[ 6 ] = { // -181.2501 dB, 8
+			6.1278392271464355e-001, -1.5000053762513338e-001,
+			4.7575323511364960e-002, -1.2320702802243476e-002,
+			2.1462442592348487e-003, -1.8425092381892940e-004,};
+		static const double HBKernel_7B[ 7 ] = { // -209.9472 dB, 8
+			6.1610372263478952e-001, -1.5767891882524138e-001,
+			5.5089691170294691e-002, -1.6895755656366061e-002,
+			3.9416643438213977e-003, -6.0603623791604668e-004,
+			4.5632602433393365e-005,};
+		static const double HBKernel_8B[ 8 ] = { // -238.5616 dB, 8
+			6.1861282914465976e-001, -1.6367179451225150e-001,
+			6.1369861342939716e-002, -2.1184466539006987e-002,
+			5.9623357510842061e-003, -1.2483098507454090e-003,
+			1.7099297537964702e-004, -1.1448313239478885e-005,};
+		static const int FltCountB = 7;
+		static const int FlttBaseB = 2;
+		static const double FltAttensB[ FltCountB ] = {
+			56.6007, 83.0295, 123.4724, 152.4411, 181.2501, 209.9472, 238.5616, };
+		static const double* const FltPtrsB[ FltCountB ] = {
+			HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, HBKernel_7B, HBKernel_8B, };
+		static const double HBKernel_2C[ 2 ] = { // -89.0473 dB, 16
+			5.6430278013478008e-001, -6.4338068855763375e-002,};
+		static const double HBKernel_3C[ 3 ] = { // -130.8951 dB, 16
+			5.8706402915551448e-001, -9.9362380958670449e-002,
+			1.2298637065869358e-002,};
+		static const double HBKernel_4C[ 4 ] = { // -172.3192 dB, 16
+			5.9896586134984675e-001, -1.2111680603434927e-001,
+			2.4763118076458895e-002, -2.6121758132212989e-003,};
+		static const double HBKernel_5C[ 5 ] = { // -213.4984 dB, 16
+			6.0626808285230716e-001, -1.3588224032740795e-001,
+			3.5544305238309003e-002, -6.5127022377289654e-003,
+			5.8255449565950768e-004,};
+		static const double HBKernel_6C[ 6 ] = { // -254.5186 dB, 16
+			6.1120171263351242e-001, -1.4654486853757870e-001,
+			4.4582959299131253e-002, -1.0840543858123995e-002,
+			1.7343706485509962e-003, -1.3363018567985596e-004,};
+		static const int FltCountC = 5;
+		static const int FlttBaseC = 2;
+		static const double FltAttensC[ FltCountC ] = {
+			89.0473, 130.8951, 172.3192, 213.4984, 254.5186, };
+		static const double* const FltPtrsC[ FltCountC ] = {
+			HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, HBKernel_6C, };
+		static const double HBKernel_1D[ 1 ] = { // -54.4754 dB, 32
+			5.0188900022775451e-001,};
+		static const double HBKernel_2D[ 2 ] = { // -113.2139 dB, 32
+			5.6295152180538044e-001, -6.2953706070191726e-002,};
+		static const double HBKernel_3D[ 3 ] = { // -167.1447 dB, 32
+			5.8621968728755036e-001, -9.8080551656524531e-002,
+			1.1860868761997080e-002,};
+		static const double HBKernel_4D[ 4 ] = { // -220.6519 dB, 32
+			5.9835028657163591e-001, -1.1999986086623511e-001,
+			2.4132530854004228e-002, -2.4829565686819706e-003,};
+		static const int FltCountD = 4;
+		static const int FlttBaseD = 1;
+		static const double FltAttensD[ FltCountD ] = {
+			54.4754, 113.2139, 167.1447, 220.6519, };
+		static const double* const FltPtrsD[ FltCountD ] = {
+			HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
+		static const double HBKernel_1E[ 1 ] = { // -66.5391 dB, 64
+			5.0047102586416625e-001,};
+		static const double HBKernel_2E[ 2 ] = { // -137.3173 dB, 64
+			5.6261293163933568e-001, -6.2613067826620017e-002,};
+		static const double HBKernel_3E[ 3 ] = { // -203.2997 dB, 64
+			5.8600808139396787e-001, -9.7762185880067784e-002,
+			1.1754104554493029e-002,};
+		static const double HBKernel_4E[ 4 ] = { // -268.8550 dB, 64
+			5.9819599352772002e-001, -1.1972157555011861e-001,
+			2.3977305567947922e-002, -2.4517235455853992e-003,};
+		static const int FltCountE = 4;
+		static const int FlttBaseE = 1;
+		static const double FltAttensE[ FltCountE ] = {
+			66.5391, 137.3173, 203.2997, 268.8550, };
+		static const double* const FltPtrsE[ FltCountE ] = {
+			HBKernel_1E, HBKernel_2E, HBKernel_3E, HBKernel_4E, };
+		static const double HBKernel_1F[ 1 ] = { // -82.4633 dB, 128
+			5.0007530666642896e-001,};
+		static const double HBKernel_2F[ 2 ] = { // -161.4049 dB, 128
+			5.6252823610146030e-001, -6.2528244608044792e-002,};
+		static const double HBKernel_3F[ 3 ] = { // -239.4313 dB, 128
+			5.8595514744674237e-001, -9.7682725156791952e-002,
+			1.1727577711117231e-002,};
+		static const int FltCountF = 3;
+		static const int FlttBaseF = 1;
+		static const double FltAttensF[ FltCountF ] = {
+			82.4633, 161.4049, 239.4313, };
+		static const double* const FltPtrsF[ FltCountF ] = {
+			HBKernel_1F, HBKernel_2F, HBKernel_3F, };
+		static const double HBKernel_1G[ 1 ] = { // -94.5052 dB, 256
+			5.0001882524896712e-001,};
+		static const double HBKernel_2G[ 2 ] = { // -185.4886 dB, 256
+			5.6250705922479682e-001, -6.2507059756378394e-002,};
+		static const double HBKernel_3G[ 3 ] = { // -275.5501 dB, 256
+			5.8594191201187384e-001, -9.7662868266991207e-002,
+			1.1720956255134043e-002,};
+		static const int FltCountG = 3;
+		static const int FlttBaseG = 1;
+		static const double FltAttensG[ FltCountG ] = {
+			94.5052, 185.4886, 275.5501, };
+		static const double* const FltPtrsG[ FltCountG ] = {
+			HBKernel_1G, HBKernel_2G, HBKernel_3G, };
+
+		int k = 0;
+
+		if( SteepIndex <= 0 )
+		{
+			while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsA[ k ];
+			fltt = FlttBaseA + k;
+			att = FltAttensA[ k ];
+		}
+		else
+		if( SteepIndex == 1 )
+		{
+			while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsB[ k ];
+			fltt = FlttBaseB + k;
+			att = FltAttensB[ k ];
+		}
+		else
+		if( SteepIndex == 2 )
+		{
+			while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsC[ k ];
+			fltt = FlttBaseC + k;
+			att = FltAttensC[ k ];
+		}
+		else
+		if( SteepIndex == 3 )
+		{
+			while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsD[ k ];
+			fltt = FlttBaseD + k;
+			att = FltAttensD[ k ];
+		}
+		else
+		if( SteepIndex == 4 )
+		{
+			while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsE[ k ];
+			fltt = FlttBaseE + k;
+			att = FltAttensE[ k ];
+		}
+		else
+		if( SteepIndex == 5 )
+		{
+			while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsF[ k ];
+			fltt = FlttBaseF + k;
+			att = FltAttensF[ k ];
+		}
+		else
+		{
+			while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsG[ k ];
+			fltt = FlttBaseG + k;
+			att = FltAttensG[ k ];
+		}
+	}
+
+	/**
+	 * Function that provides filter data for various steepness indices and
+	 * attenuations. For 1/3 resamplings.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
+	 * etc.
+	 */
+
+	static void getHBFilterThird( const double ReqAtten, const int SteepIndex,
+		const double*& flt, int& fltt, double& att )
+	{
+		static const double HBKernel_3A[ 3 ] = { // -66.3726 dB, 6
+			5.9811355069551475e-001, -1.1793396656733847e-001,
+			2.0300557211946322e-002,};
+		static const double HBKernel_4A[ 4 ] = { // -90.2546 dB, 6
+			6.0645499250612578e-001, -1.3555496505481171e-001,
+			3.4022804962365975e-002, -4.9535418595798757e-003,};
+		static const double HBKernel_5A[ 5 ] = { // -126.5507 dB, 6
+			6.1014115058940210e-001, -1.4393081816629907e-001,
+			4.1760642892852244e-002, -8.9692183234056175e-003,
+			9.9871340618342070e-004,};
+		static const double HBKernel_6A[ 6 ] = { // -150.1839 dB, 6
+			6.1439563420546972e-001, -1.5360187826905250e-001,
+			5.0840891345687034e-002, -1.4053648740561121e-002,
+			2.6771286587305727e-003, -2.5815816044823123e-004,};
+		static const double HBKernel_7A[ 7 ] = { // -173.7068 dB, 6
+			6.1747493476329918e-001, -1.6087373733313212e-001,
+			5.8263075641409430e-002, -1.8872408173431318e-002,
+			4.7421376543513687e-003, -8.0196529612267474e-004,
+			6.7964807393798996e-005,};
+		static const double HBKernel_8A[ 8 ] = { // -197.1454 dB, 6
+			6.1980610947775050e-001, -1.6654070578314714e-001,
+			6.4416567441730327e-002, -2.3307744348719822e-002,
+			6.9909157372312443e-003, -1.5871946293364403e-003,
+			2.4017727382382763e-004, -1.8125308241541697e-005,};
+		static const double HBKernel_9A[ 9 ] = { // -220.5199 dB, 6
+			6.2163188951899306e-001, -1.7108115323810941e-001,
+			6.9588370095600260e-002, -2.7339625080613838e-002,
+			9.2954469183791771e-003, -2.5537179959555429e-003,
+			5.2572290897951021e-004, -7.1813356135154921e-005,
+			4.8802382808892154e-006,};
+		static const int FltCountA = 7;
+		static const int FlttBaseA = 3;
+		static const double FltAttensA[ FltCountA ] = {
+			66.3726, 90.2546, 126.5507, 150.1839, 173.7068, 197.1454, 220.5199, };
+		static const double* const FltPtrsA[ FltCountA ] = {
+			HBKernel_3A, HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, };
+		static const double HBKernel_2B[ 2 ] = { // -71.0965 dB, 12
+			5.6748544264806311e-001, -6.7764090509431732e-002,};
+		static const double HBKernel_3B[ 3 ] = { // -115.7707 dB, 12
+			5.8793612182667199e-001, -1.0070583248877293e-001,
+			1.2771337947163834e-002,};
+		static const double HBKernel_4B[ 4 ] = { // -152.1535 dB, 12
+			5.9960155600862808e-001, -1.2228154335199336e-001,
+			2.5433718917694709e-002, -2.7537562530837154e-003,};
+		static const double HBKernel_5B[ 5 ] = { // -188.2914 dB, 12
+			6.0676859170554343e-001, -1.3689667009876413e-001,
+			3.6288512631926818e-002, -6.7838855305035351e-003,
+			6.2345167677087547e-004,};
+		static const double HBKernel_6B[ 6 ] = { // -224.2705 dB, 12
+			6.1161456341904397e-001, -1.4743901958274458e-001,
+			4.5344160157313275e-002, -1.1207371780924531e-002,
+			1.8328497112594935e-003, -1.4518193006359589e-004,};
+		static const int FltCountB = 5;
+		static const int FlttBaseB = 2;
+		static const double FltAttensB[ FltCountB ] = {
+			71.0965, 115.7707, 152.1535, 188.2914, 224.2705, };
+		static const double* const FltPtrsB[ FltCountB ] = {
+			HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, };
+		static const double HBKernel_1C[ 1 ] = { // -49.4544 dB, 24
+			5.0336730531430562e-001,};
+		static const double HBKernel_2C[ 2 ] = { // -103.1970 dB, 24
+			5.6330232648142819e-001, -6.3309247177420452e-002,};
+		static const double HBKernel_3C[ 3 ] = { // -152.1195 dB, 24
+			5.8643891113580415e-001, -9.8411593011583087e-002,
+			1.1972706651483846e-002,};
+		static const double HBKernel_4C[ 4 ] = { // -200.6182 dB, 24
+			5.9851012363917222e-001, -1.2028885239978220e-001,
+			2.4294521083140615e-002, -2.5157924156609776e-003,};
+		static const double HBKernel_5C[ 5 ] = { // -248.8730 dB, 24
+			6.0590922882030196e-001, -1.3515953438018685e-001,
+			3.5020857107815606e-002, -6.3256196990467053e-003,
+			5.5506815147598793e-004,};
+		static const int FltCountC = 5;
+		static const int FlttBaseC = 1;
+		static const double FltAttensC[ FltCountC ] = {
+			49.4544, 103.1970, 152.1195, 200.6182, 248.8730, };
+		static const double* const FltPtrsC[ FltCountC ] = {
+			HBKernel_1C, HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, };
+		static const double HBKernel_1D[ 1 ] = { // -61.5357 dB, 48
+			5.0083794231068057e-001,};
+		static const double HBKernel_2D[ 2 ] = { // -127.3167 dB, 48
+			5.6270074379958690e-001, -6.2701174487726344e-002,};
+		static const double HBKernel_3D[ 3 ] = { // -188.2990 dB, 48
+			5.8606296210323228e-001, -9.7844644765123029e-002,
+			1.1781683046528768e-002,};
+		static const double HBKernel_4D[ 4 ] = { // -248.8580 dB, 48
+			5.9823601243162516e-001, -1.1979368994739022e-001,
+			2.4017458606412575e-002, -2.4597810910081913e-003,};
+		static const int FltCountD = 4;
+		static const int FlttBaseD = 1;
+		static const double FltAttensD[ FltCountD ] = {
+			61.5357, 127.3167, 188.2990, 248.8580, };
+		static const double* const FltPtrsD[ FltCountD ] = {
+			HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
+		static const double HBKernel_1E[ 1 ] = { // -77.4651 dB, 96
+			5.0013388897382527e-001,};
+		static const double HBKernel_2E[ 2 ] = { // -151.4084 dB, 96
+			5.6255019604317880e-001, -6.2550222932381064e-002,};
+		static const double HBKernel_3E[ 3 ] = { // -224.4365 dB, 96
+			5.8596887234201078e-001, -9.7703321113080305e-002,
+			1.1734448777069783e-002,};
+		static const int FltCountE = 3;
+		static const int FlttBaseE = 1;
+		static const double FltAttensE[ FltCountE ] = {
+			77.4651, 151.4084, 224.4365, };
+		static const double* const FltPtrsE[ FltCountE ] = {
+			HBKernel_1E, HBKernel_2E, HBKernel_3E, };
+		static const double HBKernel_1F[ 1 ] = { // -89.5075 dB, 192
+			5.0003346776264190e-001,};
+		static const double HBKernel_2F[ 2 ] = { // -175.4932 dB, 192
+			5.6251254964097952e-001, -6.2512551321105267e-002,};
+		static const double HBKernel_3F[ 3 ] = { // -260.5645 dB, 192
+			5.8594534336747051e-001, -9.7668015838639821e-002,
+			1.1722672471262996e-002,};
+		static const int FltCountF = 3;
+		static const int FlttBaseF = 1;
+		static const double FltAttensF[ FltCountF ] = {
+			89.5075, 175.4932, 260.5645, };
+		static const double* const FltPtrsF[ FltCountF ] = {
+			HBKernel_1F, HBKernel_2F, HBKernel_3F, };
+		static const double HBKernel_1G[ 1 ] = { // -101.5490 dB, 384
+			5.0000836666064941e-001,};
+		static const double HBKernel_2G[ 2 ] = { // -199.5761 dB, 384
+			5.6250313744943459e-001, -6.2503137554435345e-002,};
+		static const double HBKernel_3G[ 3 ] = { // -296.5185 dB, 384
+			5.8593945786963764e-001, -9.7659186853499613e-002,
+			1.1719728983863425e-002,};
+		static const int FltCountG = 3;
+		static const int FlttBaseG = 1;
+		static const double FltAttensG[ FltCountG ] = {
+			101.5490, 199.5761, 296.5185, };
+		static const double* const FltPtrsG[ FltCountG ] = {
+			HBKernel_1G, HBKernel_2G, HBKernel_3G, };
+
+		int k = 0;
+
+		if( SteepIndex <= 0 )
+		{
+			while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsA[ k ];
+			fltt = FlttBaseA + k;
+			att = FltAttensA[ k ];
+		}
+		else
+		if( SteepIndex == 1 )
+		{
+			while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsB[ k ];
+			fltt = FlttBaseB + k;
+			att = FltAttensB[ k ];
+		}
+		else
+		if( SteepIndex == 2 )
+		{
+			while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsC[ k ];
+			fltt = FlttBaseC + k;
+			att = FltAttensC[ k ];
+		}
+		else
+		if( SteepIndex == 3 )
+		{
+			while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsD[ k ];
+			fltt = FlttBaseD + k;
+			att = FltAttensD[ k ];
+		}
+		else
+		if( SteepIndex == 4 )
+		{
+			while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsE[ k ];
+			fltt = FlttBaseE + k;
+			att = FltAttensE[ k ];
+		}
+		else
+		if( SteepIndex == 5 )
+		{
+			while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsF[ k ];
+			fltt = FlttBaseF + k;
+			att = FltAttensF[ k ];
+		}
+		else
+		{
+			while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsG[ k ];
+			fltt = FlttBaseG + k;
+			att = FltAttensG[ k ];
+		}
+	}
+
+	/**
+	 * Constructor initalizes the half-band upsampler.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * upsampling ratio, e.g. at 4x upsampling 0 is used, at 8x upsampling 1
+	 * is used, etc.
+	 * @param IsThird "True" if 1/3 of frequency response resampling is
+	 * performed.
+	 * @param PrevLatency Latency, in samples (any value >=0), which was left
+	 * in the output signal by a previous process. Whole-number latency will
+	 * be consumed by *this object while remaining fractional latency can be
+	 * obtained via the getLatencyFrac() function.
+	 * @param aDoConsumeLatency "True" if the output latency should be
+	 * consumed. Does not apply to the fractional part of the latency (if such
+	 * part is available).
+	 */
+
+	CDSPHBUpsampler( const double ReqAtten, const int SteepIndex,
+		const bool IsThird, const double PrevLatency,
+		const bool aDoConsumeLatency = true )
+		: DoConsumeLatency( aDoConsumeLatency )
+	{
+		static const CConvolveFn FltConvFn[ 14 ] = {
+			&CDSPHBUpsampler :: convolve1, &CDSPHBUpsampler :: convolve2,
+			&CDSPHBUpsampler :: convolve3, &CDSPHBUpsampler :: convolve4,
+			&CDSPHBUpsampler :: convolve5, &CDSPHBUpsampler :: convolve6,
+			&CDSPHBUpsampler :: convolve7, &CDSPHBUpsampler :: convolve8,
+			&CDSPHBUpsampler :: convolve9, &CDSPHBUpsampler :: convolve10,
+			&CDSPHBUpsampler :: convolve11, &CDSPHBUpsampler :: convolve12,
+			&CDSPHBUpsampler :: convolve13, &CDSPHBUpsampler :: convolve14 };
+
+		const double* fltp0;
+		int fltt;
+		double att;
+
+		if( IsThird )
+		{
+			getHBFilterThird( ReqAtten, SteepIndex, fltp0, fltt, att );
+		}
+		else
+		{
+			getHBFilter( ReqAtten, SteepIndex, fltp0, fltt, att );
+		}
+
+		// Copy obtained filter to address-aligned buffer.
+
+		fltp = alignptr( FltBuf, 16 );
+		memcpy( fltp, fltp0, fltt * sizeof( fltp[ 0 ]));
+
+		convfn = FltConvFn[ fltt - 1 ];
+		fll = fltt - 1;
+		fl2 = fltt;
+		flo = fll + fl2;
+		BufRP = Buf + fll;
+
+		LatencyFrac = PrevLatency * 2.0;
+		Latency = (int) LatencyFrac;
+		LatencyFrac -= Latency;
+
+		R8BASSERT( Latency >= 0 );
+
+		if( DoConsumeLatency )
+		{
+			flb = BufLen - fll;
+		}
+		else
+		{
+			Latency += fl2 + fl2;
+			flb = BufLen - flo;
+		}
+
+		R8BCONSOLE( "CDSPHBUpsampler: sti=%i third=%i taps=%i att=%.1f "
+			"io=2/1\n", SteepIndex, (int) IsThird, fltt, att );
+
+		clear();
+	}
+
+	virtual int getLatency() const
+	{
+		return( DoConsumeLatency ? 0 : Latency );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	virtual int getMaxOutLen( const int MaxInLen ) const
+	{
+		R8BASSERT( MaxInLen >= 0 );
+
+		return( MaxInLen << 1 );
+	}
+
+	virtual void clear()
+	{
+		if( DoConsumeLatency )
+		{
+			LatencyLeft = Latency;
+			BufLeft = 0;
+		}
+		else
+		{
+			LatencyLeft = 0;
+			BufLeft = fl2;
+		}
+
+		WritePos = 0;
+		ReadPos = flb; // Set "read" position to account for filter's latency.
+
+		memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+	}
+
+	virtual int process( double* ip, int l, double*& op0 )
+	{
+		R8BASSERT( l >= 0 );
+
+		double* op = op0;
+
+		while( l > 0 )
+		{
+			// Add new input samples to both halves of the ring buffer.
+
+			const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+			double* const wp1 = Buf + WritePos;
+			memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+			if( WritePos < flo )
+			{
+				const int c = min( b, flo - WritePos );
+				memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+			}
+
+			ip += b;
+			WritePos = ( WritePos + b ) & BufLenMask;
+			l -= b;
+			BufLeft += b;
+
+			// Produce output.
+
+			if( BufLeft > fl2 )
+			{
+				const int c = BufLeft - fl2;
+
+				double* const opend = op + ( c + c );
+				( *convfn )( op, opend, fltp, BufRP, ReadPos );
+
+				op = opend;
+				ReadPos = ( ReadPos + c ) & BufLenMask;
+				BufLeft -= c;
+			}
+		}
+
+		int ol = (int) ( op - op0 );
+
+		if( LatencyLeft != 0 )
+		{
+			if( LatencyLeft >= ol )
+			{
+				LatencyLeft -= ol;
+				return( 0 );
+			}
+
+			ol -= LatencyLeft;
+			op0 += LatencyLeft;
+			LatencyLeft = 0;
+		}
+
+		return( ol );
+	}
+
+private:
+	static const int BufLenBits = 9; ///< The length of the ring buffer,
+		///< expressed as Nth power of 2. This value can be reduced if it is
+		///< known that only short input buffers will be passed to the
+		///< interpolator. The minimum value of this parameter is 5, and
+		///< 1 << BufLenBits should be at least 3 times larger than the
+		///< FilterLen.
+		///<
+	static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+		///< buffer. The actual length is twice as long to allow "beyond max
+		///< position" positioning.
+		///<
+	static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+		///< position wrapping.
+		///<
+	double Buf[ BufLen + 27 ]; ///< The ring buffer, including overrun
+		///< protection for the largest filter.
+		///<
+	double FltBuf[ 14 + 2 ]; ///< Holder for half-band filter taps, used with
+		///< 16-byte address-aligning, for SIMD use.
+		///<
+	double* fltp; ///< Half-band filter taps, points to FltBuf.
+		///<
+	int fll; ///< Input latency.
+		///<
+	int fl2; ///< Right-side filter length.
+		///<
+	int flo; ///< Overrrun length.
+		///<
+	int flb; ///< Initial read position and maximal buffer write length.
+		///<
+	const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
+		///<
+	bool DoConsumeLatency; ///< "True" if the output latency should be
+		///< consumed. Does not apply to the fractional part of the latency
+		///< (if such part is available).
+		///<
+	int Latency; ///< Initial latency that should be removed from the output.
+		///<
+	double LatencyFrac; ///< Fractional latency left on the output.
+		///<
+	int BufLeft; ///< The number of samples left in the buffer to process.
+		///< When this value is below FilterLenD2Plus1, the interpolation
+		///< cycle ends.
+		///<
+	int WritePos; ///< The current buffer write position. Incremented together
+		///< with the BufLeft variable.
+		///<
+	int ReadPos; ///< The current buffer read position.
+		///<
+	int LatencyLeft; ///< Latency left to remove.
+		///<
+	typedef void( *CConvolveFn )( double* op, double* const opend,
+		const double* const flt, const double* const rp0, int rpos ); ///<
+		///< Convolution function type.
+		///<
+	CConvolveFn convfn; ///< Convolution function in use.
+		///<
+
+#define R8BHBC1( fn ) \
+	static void fn( double* op, double* const opend, const double* const flt, \
+		const double* const rp0, int rpos ) \
+	{ \
+		while( op != opend ) \
+		{ \
+			const double* const rp = rp0 + rpos; \
+			op[ 0 ] = rp[ 0 ];
+
+#define R8BHBC2 \
+			rpos = ( rpos + 1 ) & BufLenMask; \
+			op += 2; \
+		} \
+	}
+
+#include "CDSPHBUpsampler.inc"
+
+#undef R8BHBC1
+#undef R8BHBC2
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPHBUPSAMPLER_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
@ -0,0 +1,711 @@
+// Auto-generated by `genhbc`, do not edit!
+
+#if defined( R8B_SSE2 )
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve3 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve5 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve7 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve9 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve11 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve13 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 13 ); v1 = _mm_loadu_pd( rp + 13 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 12 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+#elif defined( R8B_NEON )
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve3 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve5 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve7 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve9 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve11 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve13 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 13 ); v1 = vld1q_f64( rp + 13 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 12 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+#else // SIMD
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]);
+R8BHBC2
+
+R8BHBC1( convolve3 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]);
+R8BHBC2
+
+R8BHBC1( convolve5 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]);
+R8BHBC2
+
+R8BHBC1( convolve7 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]);
+R8BHBC2
+
+R8BHBC1( convolve9 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]);
+R8BHBC2
+
+R8BHBC1( convolve11 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+	+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]);
+R8BHBC2
+
+R8BHBC1( convolve13 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+	+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+	+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+	+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+	+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ])
+	+ flt[ 13 ] * ( rp[ 14 ] + rp[ -13 ]);
+R8BHBC2
+
+#endif // SIMD
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
@ -0,0 +1,103 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPProcessor.h
+ *
+ * @brief The base virtual class for DSP processing algorithms.
+ *
+ * This file includes the base virtual class for DSP processing algorithm
+ * classes like FIR filtering and interpolation.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPPROCESSOR_INCLUDED
+#define R8B_CDSPPROCESSOR_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @brief The base virtual class for DSP processing algorithms.
+ *
+ * This class can be used as a base class for various DSP processing
+ * algorithms (processors). DSP processors that are derived from this class
+ * can be seamlessly integrated into various DSP processing graphs.
+ */
+
+class CDSPProcessor : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPProcessor );
+
+public:
+	CDSPProcessor()
+	{
+	}
+
+	virtual ~CDSPProcessor()
+	{
+	}
+
+	/**
+	 * @return The latency, in samples, which is present in the output signal.
+	 * This value is usually zero if the DSP processor "consumes" the latency
+	 * automatically.
+	 */
+
+	virtual int getLatency() const = 0;
+
+	/**
+	 * @return Fractional latency, in samples, which is present in the output
+	 * signal. This value is usually zero if a linear-phase filtering is used.
+	 * With minimum-phase filters in use, this value can be non-zero even if
+	 * the getLatency() function returns zero.
+	 */
+
+	virtual double getLatencyFrac() const = 0;
+
+	/**
+	 * @param MaxInLen The number of samples planned to process at once, at
+	 * most.
+	 * @return The maximal length of the output buffer required when
+	 * processing the "MaxInLen" number of input samples.
+	 */
+
+	virtual int getMaxOutLen( const int MaxInLen ) const = 0;
+
+	/**
+	 * Function clears (resets) the state of *this object and returns it to
+	 * the state after construction. All input data accumulated in the
+	 * internal buffer so far will be discarded.
+	 */
+
+	virtual void clear() = 0;
+
+	/**
+	 * Function performs DSP processing.
+	 *
+	 * @param ip Input data pointer.
+	 * @param l0 How many samples to process.
+	 * @param[out] op0 Output data pointer. The capacity of this buffer should
+	 * be equal to the value returned by the getMaxOutLen() function for the
+	 * given "l0". This buffer can be equal to "ip" only if the
+	 * getMaxOutLen( l0 ) function returned a value lesser than "l0". This
+	 * pointer can be incremented on function's return if latency compensation
+	 * was performed by the processor. Note that on function's return, this
+	 * pointer may point to some internal buffers, including the "ip" buffer,
+	 * ignoring the originally passed value.
+	 * @return The number of output samples written to the "op0" buffer and
+	 * available after processing. This value can be smaller or larger in
+	 * comparison to the original "l0" value due to processing and filter's
+	 * latency compensation that took place, and due to resampling if it was
+	 * performed.
+	 */
+
+	virtual int process( double* ip, int l0, double*& op0 ) = 0;
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPPROCESSOR_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
@ -0,0 +1,820 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPRealFFT.h
+ *
+ * @brief Real-valued FFT transform class.
+ *
+ * This file includes FFT object implementation. All created FFT objects are
+ * kept in a global list after use, for a future reusal. Such approach
+ * minimizes time necessary to initialize the FFT object of the required
+ * length.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPREALFFT_INCLUDED
+#define R8B_CDSPREALFFT_INCLUDED
+
+#include "r8bbase.h"
+
+#if !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
+	#include "fft4g.h"
+#endif // !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
+
+#if R8B_PFFFT
+	#include "pffft.h"
+#endif // R8B_PFFFT
+
+#if R8B_PFFFT_DOUBLE
+	#include "pffft_double/pffft_double.h"
+#endif // R8B_PFFFT_DOUBLE
+
+namespace r8b {
+
+/**
+ * @brief Real-valued FFT transform class.
+ *
+ * Class implements a wrapper for real-valued discrete fast Fourier transform
+ * functions. The object of this class can only be obtained via the
+ * CDSPRealFFTKeeper class.
+ *
+ * Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ *
+ * Also uses Intel IPP library functions if available (if the R8B_IPP=1 macro
+ * was defined). Note that IPP library's FFT functions are 2-3 times more
+ * efficient on the modern Intel Core i7-3770K processor than Ooura's
+ * functions. It may be worthwhile investing in IPP. Note, that FFT functions
+ * take less than 20% of the overall sample rate conversion time. However,
+ * when the "power of 2" resampling is used the performance of FFT functions
+ * becomes "everything".
+ */
+
+class CDSPRealFFT : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPRealFFT );
+
+	friend class CDSPRealFFTKeeper;
+
+public:
+	/**
+	 * @return A multiplication constant that should be used after inverse
+	 * transform to obtain a correct value scale.
+	 */
+
+	double getInvMulConst() const
+	{
+		return( InvMulConst );
+	}
+
+	/**
+	 * @return The length (the number of real values in a transform) of *this
+	 * FFT object, expressed as Nth power of 2.
+	 */
+
+	int getLenBits() const
+	{
+		return( LenBits );
+	}
+
+	/**
+	 * @return The length (the number of real values in a transform) of *this
+	 * FFT object.
+	 */
+
+	int getLen() const
+	{
+		return( Len );
+	}
+
+	/**
+	 * Function performs in-place forward FFT.
+	 *
+	 * @param[in,out] p Pointer to data block to transform, length should be
+	 * equal to *this object's getLen().
+	 */
+
+	void forward( double* const p ) const
+	{
+	#if R8B_FLOATFFT
+
+		float* const op = (float*) p;
+		int i;
+
+		for( i = 0; i < Len; i++ )
+		{
+			op[ i ] = (float) p[ i ];
+		}
+
+	#endif // R8B_FLOATFFT
+
+	#if R8B_IPP
+
+		ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer );
+
+	#elif R8B_PFFFT
+
+		pffft_transform_ordered( setup, op, op, work, PFFFT_FORWARD );
+
+	#elif R8B_PFFFT_DOUBLE
+
+		pffftd_transform_ordered( setup, p, p, work, PFFFT_FORWARD );
+
+	#else // R8B_PFFFT_DOUBLE
+
+		ooura_fft :: rdft( Len, 1, p, wi.getPtr(), wd.getPtr() );
+
+	#endif // R8B_IPP
+	}
+
+	/**
+	 * Function performs in-place inverse FFT.
+	 *
+	 * @param[in,out] p Pointer to data block to transform, length should be
+	 * equal to *this object's getLen().
+	 */
+
+	void inverse( double* const p ) const
+	{
+	#if R8B_IPP
+
+		ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer );
+
+	#elif R8B_PFFFT
+
+		pffft_transform_ordered( setup, (float*) p, (float*) p, work,
+			PFFFT_BACKWARD );
+
+	#elif R8B_PFFFT_DOUBLE
+
+		pffftd_transform_ordered( setup, p, p, work, PFFFT_BACKWARD );
+
+	#else // R8B_PFFFT_DOUBLE
+
+		ooura_fft :: rdft( Len, -1, p, wi.getPtr(), wd.getPtr() );
+
+	#endif // R8B_IPP
+
+	#if R8B_FLOATFFT
+
+		const float* const ip = (const float*) p;
+		int i;
+
+		for( i = Len - 1; i >= 0; i-- )
+		{
+			p[ i ] = ip[ i ];
+		}
+
+	#endif // R8B_FLOATFFT
+	}
+
+	/**
+	 * Function multiplies two complex-valued data blocks and places result in
+	 * a new data block. Length of all data blocks should be equal to *this
+	 * object's block length. Input blocks should have been produced with the
+	 * forward() function of *this object.
+	 *
+	 * @param aip1 Input data block 1.
+	 * @param aip2 Input data block 2.
+	 * @param[out] aop Output data block, should not be equal to aip1 nor
+	 * aip2.
+	 */
+
+	void multiplyBlocks( const double* const aip1, const double* const aip2,
+		double* const aop ) const
+	{
+	#if R8B_FLOATFFT
+
+		const float* const ip1 = (const float*) aip1;
+		const float* const ip2 = (const float*) aip2;
+		float* const op = (float*) aop;
+
+	#else // R8B_FLOATFFT
+
+		const double* const ip1 = aip1;
+		const double* const ip2 = aip2;
+		double* const op = aop;
+
+	#endif // R8B_FLOATFFT
+
+	#if R8B_IPP
+
+		ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len );
+
+	#else // R8B_IPP
+
+		op[ 0 ] = ip1[ 0 ] * ip2[ 0 ];
+		op[ 1 ] = ip1[ 1 ] * ip2[ 1 ];
+
+		int i = 2;
+
+		while( i < Len )
+		{
+			op[ i ] = ip1[ i ] * ip2[ i ] - ip1[ i + 1 ] * ip2[ i + 1 ];
+			op[ i + 1 ] = ip1[ i ] * ip2[ i + 1 ] + ip1[ i + 1 ] * ip2[ i ];
+			i += 2;
+		}
+
+	#endif // R8B_IPP
+	}
+
+	/**
+	 * Function multiplies two complex-valued data blocks in-place. Length of
+	 * both data blocks should be equal to *this object's block length. Blocks
+	 * should have been produced with the forward() function of *this object.
+	 *
+	 * @param aip Input data block 1.
+	 * @param[in,out] aop Output/input data block 2.
+	 */
+
+	void multiplyBlocks( const double* const aip, double* const aop ) const
+	{
+	#if R8B_FLOATFFT
+
+		const float* const ip = (const float*) aip;
+		float* const op = (float*) aop;
+		float t;
+
+	#else // R8B_FLOATFFT
+
+		const double* const ip = aip;
+		double* const op = aop;
+
+		#if !R8B_IPP
+		double t;
+		#endif // !R8B_IPP
+
+	#endif // R8B_FLOATFFT
+
+	#if R8B_IPP
+
+		ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len );
+
+	#else // R8B_IPP
+
+		op[ 0 ] *= ip[ 0 ];
+		op[ 1 ] *= ip[ 1 ];
+
+		int i = 2;
+
+		while( i < Len )
+		{
+			t = op[ i ] * ip[ i ] - op[ i + 1 ] * ip[ i + 1 ];
+			op[ i + 1 ] = op[ i ] * ip[ i + 1 ] + op[ i + 1 ] * ip[ i ];
+			op[ i ] = t;
+			i += 2;
+		}
+
+	#endif // R8B_IPP
+	}
+
+	/**
+	 * Function multiplies two complex-valued data blocks in-place,
+	 * considering that the "ip" block contains "zero-phase" response. Length
+	 * of both data blocks should be equal to *this object's block length.
+	 * Blocks should have been produced with the forward() function of *this
+	 * object.
+	 *
+	 * @param aip Input data block 1, "zero-phase" response. This block should
+	 * be first transformed via the convertToZP() function.
+	 * @param[in,out] aop Output/input data block 2.
+	 */
+
+	void multiplyBlocksZP( const double* const aip, double* const aop ) const
+	{
+	#if R8B_FLOATFFT
+
+		const float* const ip = (const float*) aip;
+		float* const op = (float*) aop;
+
+	#else // R8B_FLOATFFT
+
+		const double* ip = aip;
+		double* op = aop;
+
+	#endif // R8B_FLOATFFT
+
+	// SIMD implementations assume that pointers are address-aligned.
+
+	#if !R8B_FLOATFFT && defined( R8B_SSE2 )
+
+		int c8 = Len >> 3;
+
+		while( c8 != 0 )
+		{
+			const __m128d iv1 = _mm_load_pd( ip );
+			const __m128d iv2 = _mm_load_pd( ip + 2 );
+			const __m128d ov1 = _mm_load_pd( op );
+			const __m128d ov2 = _mm_load_pd( op + 2 );
+			_mm_store_pd( op, _mm_mul_pd( iv1, ov1 ));
+			_mm_store_pd( op + 2, _mm_mul_pd( iv2, ov2 ));
+
+			const __m128d iv3 = _mm_load_pd( ip + 4 );
+			const __m128d ov3 = _mm_load_pd( op + 4 );
+			const __m128d iv4 = _mm_load_pd( ip + 6 );
+			const __m128d ov4 = _mm_load_pd( op + 6 );
+			_mm_store_pd( op + 4, _mm_mul_pd( iv3, ov3 ));
+			_mm_store_pd( op + 6, _mm_mul_pd( iv4, ov4 ));
+
+			ip += 8;
+			op += 8;
+			c8--;
+		}
+
+		int c = Len & 7;
+
+		while( c != 0 )
+		{
+			*op *= *ip;
+			ip++;
+			op++;
+			c--;
+		}
+
+	#elif !R8B_FLOATFFT && defined( R8B_NEON )
+
+		int c8 = Len >> 3;
+
+		while( c8 != 0 )
+		{
+			const float64x2_t iv1 = vld1q_f64( ip );
+			const float64x2_t iv2 = vld1q_f64( ip + 2 );
+			const float64x2_t ov1 = vld1q_f64( op );
+			const float64x2_t ov2 = vld1q_f64( op + 2 );
+			vst1q_f64( op, vmulq_f64( iv1, ov1 ));
+			vst1q_f64( op + 2, vmulq_f64( iv2, ov2 ));
+
+			const float64x2_t iv3 = vld1q_f64( ip + 4 );
+			const float64x2_t iv4 = vld1q_f64( ip + 6 );
+			const float64x2_t ov3 = vld1q_f64( op + 4 );
+			const float64x2_t ov4 = vld1q_f64( op + 6 );
+			vst1q_f64( op + 4, vmulq_f64( iv3, ov3 ));
+			vst1q_f64( op + 6, vmulq_f64( iv4, ov4 ));
+
+			ip += 8;
+			op += 8;
+			c8--;
+		}
+
+		int c = Len & 7;
+
+		while( c != 0 )
+		{
+			*op *= *ip;
+			ip++;
+			op++;
+			c--;
+		}
+
+	#else // SIMD
+
+		int i;
+
+		for( i = 0; i < Len; i++ )
+		{
+			op[ i ] *= ip[ i ];
+		}
+
+	#endif // SIMD
+	}
+
+	/**
+	 * Function converts the specified forward-transformed block into
+	 * "zero-phase" form suitable for use with the multiplyBlocksZ() function.
+	 *
+	 * @param[in,out] ap Block to transform.
+	 */
+
+	void convertToZP( double* const ap ) const
+	{
+	#if R8B_FLOATFFT
+
+		float* const p = (float*) ap;
+
+	#else // R8B_FLOATFFT
+
+		double* const p = ap;
+
+	#endif // R8B_FLOATFFT
+
+		int i = 2;
+
+		while( i < Len )
+		{
+			p[ i + 1 ] = p[ i ];
+			i += 2;
+		}
+	}
+
+private:
+	int LenBits; ///< Length of FFT block (expressed as Nth power of 2).
+		///<
+	int Len; ///< Length of FFT block (number of real values).
+		///<
+	double InvMulConst; ///< Inverse FFT multiply constant.
+		///<
+	CDSPRealFFT* Next; ///< Next object in a singly-linked list.
+		///<
+
+	#if R8B_IPP
+		IppsFFTSpec_R_64f* SPtr; ///< Pointer to initialized data buffer
+			///< to be passed to IPP's FFT functions.
+			///<
+		CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer.
+			///<
+		CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer.
+			///<
+	#elif R8B_PFFFT
+		PFFFT_Setup* setup; ///< PFFFT setup object.
+			///<
+		CFixedBuffer< float > work; ///< Working buffer.
+			///<
+	#elif R8B_PFFFT_DOUBLE
+		PFFFTD_Setup* setup; ///< PFFFTD setup object.
+			///<
+		CFixedBuffer< double > work; ///< Working buffer.
+			///<
+	#else // R8B_PFFFT_DOUBLE
+		CFixedBuffer< int > wi; ///< Working buffer (ints).
+			///<
+		CFixedBuffer< double > wd; ///< Working buffer (doubles).
+			///<
+	#endif // R8B_IPP
+
+	/**
+	 * A simple class that keeps the pointer to the object and deletes it
+	 * automatically.
+	 */
+
+	class CObjKeeper
+	{
+		R8BNOCTOR( CObjKeeper );
+
+	public:
+		CObjKeeper()
+			: Object( NULL )
+		{
+		}
+
+		~CObjKeeper()
+		{
+			delete Object;
+		}
+
+		CObjKeeper& operator = ( CDSPRealFFT* const aObject )
+		{
+			Object = aObject;
+			return( *this );
+		}
+
+		operator CDSPRealFFT* () const
+		{
+			return( Object );
+		}
+
+	private:
+		CDSPRealFFT* Object; ///< FFT object being kept.
+			///<
+	};
+
+	CDSPRealFFT()
+	{
+	}
+
+	/**
+	 * Constructor initializes FFT object.
+	 *
+	 * @param aLenBits The length of FFT block (Nth power of 2), specifies the
+	 * number of real values in a block. Values from 1 to 30 inclusive are
+	 * supported.
+	 */
+
+	CDSPRealFFT( const int aLenBits )
+		: LenBits( aLenBits )
+		, Len( 1 << aLenBits )
+	#if R8B_IPP
+		, InvMulConst( 1.0 / Len )
+	#elif R8B_PFFFT
+		, InvMulConst( 1.0 / Len )
+	#elif R8B_PFFFT_DOUBLE
+		, InvMulConst( 1.0 / Len )
+	#else // R8B_PFFFT_DOUBLE
+		, InvMulConst( 2.0 / Len )
+	#endif // R8B_IPP
+	{
+	#if R8B_IPP
+
+		int SpecSize;
+		int SpecBufferSize;
+		int BufferSize;
+
+		ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY,
+			ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize );
+
+		CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize );
+		SpecBuffer.alloc( SpecSize );
+		WorkBuffer.alloc( BufferSize );
+
+		ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY,
+			ippAlgHintFast, SpecBuffer, InitBuffer );
+
+	#elif R8B_PFFFT
+
+		setup = pffft_new_setup( Len, PFFFT_REAL );
+		work.alloc( Len );
+
+	#elif R8B_PFFFT_DOUBLE
+
+		setup = pffftd_new_setup( Len, PFFFT_REAL );
+		work.alloc( Len );
+
+	#else // R8B_PFFFT_DOUBLE
+
+		wi.alloc( (int) ceil( 2.0 + sqrt( (double) ( Len >> 1 ))));
+		wi[ 0 ] = 0;
+		wd.alloc( Len >> 1 );
+
+	#endif // R8B_IPP
+	}
+
+	~CDSPRealFFT()
+	{
+		#if R8B_PFFFT
+			pffft_destroy_setup( setup );
+		#elif R8B_PFFFT_DOUBLE
+			pffftd_destroy_setup( setup );
+		#endif // R8B_PFFFT_DOUBLE
+
+		delete Next;
+	}
+};
+
+/**
+ * @brief A "keeper" class for real-valued FFT transform objects.
+ *
+ * Class implements "keeper" functionality for handling CDSPRealFFT objects.
+ * The allocated FFT objects are placed on the global static list of objects
+ * for future reuse instead of deallocation.
+ */
+
+class CDSPRealFFTKeeper : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPRealFFTKeeper );
+
+public:
+	CDSPRealFFTKeeper()
+		: Object( NULL )
+	{
+	}
+
+	/**
+	 * Function acquires FFT object with the specified block length.
+	 *
+	 * @param LenBits The length of FFT block (Nth power of 2), in the range
+	 * [1; 30] inclusive, specifies the number of real values in a FFT block.
+	 */
+
+	CDSPRealFFTKeeper( const int LenBits )
+	{
+		Object = acquire( LenBits );
+	}
+
+	~CDSPRealFFTKeeper()
+	{
+		if( Object != NULL )
+		{
+			release( Object );
+		}
+	}
+
+	/**
+	 * @return Pointer to the acquired FFT object.
+	 */
+
+	const CDSPRealFFT* operator -> () const
+	{
+		R8BASSERT( Object != NULL );
+
+		return( Object );
+	}
+
+	/**
+	 * Function acquires FFT object with the specified block length. This
+	 * function can be called any number of times.
+	 *
+	 * @param LenBits The length of FFT block (Nth power of 2), in the range
+	 * [1; 30] inclusive, specifies the number of real values in a FFT block.
+	 */
+
+	void init( const int LenBits )
+	{
+		if( Object != NULL )
+		{
+			if( Object -> LenBits == LenBits )
+			{
+				return;
+			}
+
+			release( Object );
+		}
+
+		Object = acquire( LenBits );
+	}
+
+	/**
+	 * Function releases a previously acquired FFT object.
+	 */
+
+	void reset()
+	{
+		if( Object != NULL )
+		{
+			release( Object );
+			Object = NULL;
+		}
+	}
+
+private:
+	CDSPRealFFT* Object; ///< FFT object.
+		///<
+
+	static CSyncObject StateSync; ///< FFTObjects synchronizer.
+		///<
+	static CDSPRealFFT :: CObjKeeper FFTObjects[]; ///< Pool of FFT objects of
+		///< various lengths.
+		///<
+
+	/**
+	 * Function acquires FFT object from the global pool.
+	 *
+	 * @param LenBits FFT block length (expressed as Nth power of 2).
+	 */
+
+	CDSPRealFFT* acquire( const int LenBits )
+	{
+		R8BASSERT( LenBits > 0 && LenBits <= 30 );
+
+		R8BSYNC( StateSync );
+
+		if( FFTObjects[ LenBits ] == NULL )
+		{
+			return( new CDSPRealFFT( LenBits ));
+		}
+
+		CDSPRealFFT* ffto = FFTObjects[ LenBits ];
+		FFTObjects[ LenBits ] = ffto -> Next;
+
+		return( ffto );
+	}
+
+	/**
+	 * Function releases a previously acquired FFT object.
+	 *
+	 * @param ffto FFT object to release.
+	 */
+
+	void release( CDSPRealFFT* const ffto )
+	{
+		R8BSYNC( StateSync );
+
+		ffto -> Next = FFTObjects[ ffto -> LenBits ];
+		FFTObjects[ ffto -> LenBits ] = ffto;
+	}
+};
+
+/**
+ * Function calculates the minimum-phase transform of the filter kernel, using
+ * a discrete Hilbert transform in cepstrum domain.
+ *
+ * For more details, see part III.B of
+ * http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf
+ *
+ * @param[in,out] Kernel Filter kernel buffer.
+ * @param KernelLen Filter kernel's length, in samples.
+ * @param LenMult Kernel length multiplier. Used as a coefficient of the
+ * "oversampling" in the frequency domain. Such oversampling is needed to
+ * improve the precision of the minimum-phase transform. If the filter's
+ * attenuation is high, this multiplier should be increased or otherwise the
+ * required attenuation will not be reached due to "smoothing" effect of this
+ * transform.
+ * @param DoFinalMul "True" if the final multiplication after transform should
+ * be performed or not. Such multiplication returns the gain of the signal to
+ * its original value. This parameter can be set to "false" if normalization
+ * of the resulting filter kernel is planned to be used.
+ * @param[out] DCGroupDelay If not NULL, this variable receives group delay
+ * at DC offset, in samples (can be a non-integer value).
+ */
+
+inline void calcMinPhaseTransform( double* const Kernel, const int KernelLen,
+	const int LenMult = 2, const bool DoFinalMul = true,
+	double* const DCGroupDelay = NULL )
+{
+	R8BASSERT( KernelLen > 0 );
+	R8BASSERT( LenMult >= 2 );
+
+	const int LenBits = getBitOccupancy(( KernelLen * LenMult ) - 1 );
+	const int Len = 1 << LenBits;
+	const int Len2 = Len >> 1;
+	int i;
+
+	CFixedBuffer< double > ip( Len );
+	CFixedBuffer< double > ip2( Len2 + 1 );
+
+	memcpy( &ip[ 0 ], Kernel, KernelLen * sizeof( ip[ 0 ]));
+	memset( &ip[ KernelLen ], 0, ( Len - KernelLen ) * sizeof( ip[ 0 ]));
+
+	CDSPRealFFTKeeper ffto( LenBits );
+	ffto -> forward( ip );
+
+	// Create the "log |c|" spectrum while saving the original power spectrum
+	// in the "ip2" buffer.
+
+	#if R8B_FLOATFFT
+		float* const aip = (float*) &ip[ 0 ];
+		float* const aip2 = (float*) &ip2[ 0 ];
+		const float nzbias = 1e-35;
+	#else // R8B_FLOATFFT
+		double* const aip = &ip[ 0 ];
+		double* const aip2 = &ip2[ 0 ];
+		const double nzbias = 1e-300;
+	#endif // R8B_FLOATFFT
+
+	aip2[ 0 ] = aip[ 0 ];
+	aip[ 0 ] = log( fabs( aip[ 0 ]) + nzbias );
+	aip2[ Len2 ] = aip[ 1 ];
+	aip[ 1 ] = log( fabs( aip[ 1 ]) + nzbias );
+
+	for( i = 1; i < Len2; i++ )
+	{
+		aip2[ i ] = sqrt( aip[ i * 2 ] * aip[ i * 2 ] +
+			aip[ i * 2 + 1 ] * aip[ i * 2 + 1 ]);
+
+		aip[ i * 2 ] = log( aip2[ i ] + nzbias );
+		aip[ i * 2 + 1 ] = 0.0;
+	}
+
+	// Convert to cepstrum and apply discrete Hilbert transform.
+
+	ffto -> inverse( ip );
+
+	const double m1 = ffto -> getInvMulConst();
+	const double m2 = -m1;
+
+	ip[ 0 ] = 0.0;
+
+	for( i = 1; i < Len2; i++ )
+	{
+		ip[ i ] *= m1;
+	}
+
+	ip[ Len2 ] = 0.0;
+
+	for( i = Len2 + 1; i < Len; i++ )
+	{
+		ip[ i ] *= m2;
+	}
+
+	// Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and
+	// perform its exponentiation, multiplied by the power spectrum previously
+	// saved in the "ip2" buffer.
+
+	ffto -> forward( ip );
+
+	aip[ 0 ] = aip2[ 0 ];
+	aip[ 1 ] = aip2[ Len2 ];
+
+	for( i = 1; i < Len2; i++ )
+	{
+		aip[ i * 2 + 0 ] = cos( aip[ i * 2 + 1 ]) * aip2[ i ];
+		aip[ i * 2 + 1 ] = sin( aip[ i * 2 + 1 ]) * aip2[ i ];
+	}
+
+	ffto -> inverse( ip );
+
+	if( DoFinalMul )
+	{
+		for( i = 0; i < KernelLen; i++ )
+		{
+			Kernel[ i ] = ip[ i ] * m1;
+		}
+	}
+	else
+	{
+		memcpy( &Kernel[ 0 ], &ip[ 0 ], KernelLen * sizeof( Kernel[ 0 ]));
+	}
+
+	if( DCGroupDelay != NULL )
+	{
+		double tmp;
+
+		calcFIRFilterResponseAndGroupDelay( Kernel, KernelLen, 0.0,
+			tmp, tmp, *DCGroupDelay );
+	}
+}
+
+} // namespace r8b
+
+#endif // VOX_CDSPREALFFT_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
@ -0,0 +1,769 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPResampler.h
+ *
+ * @brief The master sample rate converter (resampler) class.
+ *
+ * This file includes the master sample rate converter (resampler) class that
+ * combines all elements of this library into a single front-end class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPRESAMPLER_INCLUDED
+#define R8B_CDSPRESAMPLER_INCLUDED
+
+#include "CDSPHBDownsampler.h"
+#include "CDSPHBUpsampler.h"
+#include "CDSPBlockConvolver.h"
+#include "CDSPFracInterpolator.h"
+
+namespace r8b {
+
+/**
+ * @brief The master sample rate converter (resampler) class.
+ *
+ * This class can be considered the "master" sample rate converter (resampler)
+ * class since it combines all functionality of this library into a single
+ * front-end class to perform sample rate conversion to/from any sample rate,
+ * including non-integer sample rates.
+ *
+ * Note that objects of this class can be constructed on the stack as it has a
+ * small member data size. The default template parameters of this class are
+ * suited for 27-bit fixed point resampling.
+ *
+ * Use the CDSPResampler16 class for 16-bit resampling.
+ *
+ * Use the CDSPResampler16IR class for 16-bit impulse response resampling.
+ *
+ * Use the CDSPResampler24 class for 24-bit resampling (including 32-bit
+ * floating point resampling).
+ */
+
+class CDSPResampler : public CDSPProcessor
+{
+public:
+	/**
+	 * Constructor initalizes the resampler object.
+	 *
+	 * Note that increasing the transition band and decreasing attenuation
+	 * reduces the filter length, this in turn reduces the "input before
+	 * output" delay. However, the filter length has only a minor influence on
+	 * the overall resampling speed.
+	 *
+	 * It should be noted that the ReqAtten specifies the minimal difference
+	 * between the loudest input signal component and the produced aliasing
+	 * artifacts during resampling. For example, if ReqAtten=100 was specified
+	 * when performing 2x upsampling, the analysis of the resulting signal may
+	 * display high-frequency components which are quieter than the loudest
+	 * part of the input signal by only 100 decibel meaning the high-frequency
+	 * part did not become "magically" completely silent after resampling. You
+	 * have to specify a higher ReqAtten value if you need a totally clean
+	 * high-frequency content. On the other hand, it may not be reasonable to
+	 * have a high-frequency content cleaner than the input signal itself: if
+	 * the input signal is 16-bit, setting ReqAtten to 180 will make its
+	 * high-frequency content 24-bit, but the original part of the signal will
+	 * remain 16-bit.
+	 *
+	 * @param SrcSampleRate Source signal sample rate. Both sample rates can
+	 * be specified as a ratio, e.g. SrcSampleRate = 1.0, DstSampleRate = 2.0.
+	 * @param DstSampleRate Destination signal sample rate. The "power of 2"
+	 * ratios between the source and destination sample rates force resampler
+	 * to use several fast "power of 2" resampling steps, without using
+	 * fractional interpolation at all.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler. The resampler relies on
+	 * this value as it allocates intermediate buffers. Input buffers longer
+	 * than this value should never be supplied to the resampler. Note that
+	 * upsampling produces more samples than was provided on input, so at
+	 * higher upsampling ratios it is advisable to use smaller MaxInLen
+	 * values to reduce memory footprint. When downsampling, a larger MaxInLen
+	 * is suggested in order to increase downsampling performance.
+	 * @param ReqTransBand Required transition band, in percent of the
+	 * spectral space of the input signal (or the output signal if
+	 * downsampling is performed) between filter's -3 dB point and the Nyquist
+	 * frequency. The range is from CDSPFIRFilter::getLPMinTransBand() to
+	 * CDSPFIRFilter::getLPMaxTransBand(), inclusive. When upsampling 88200 or
+	 * 96000 audio to a higher sample rates the ReqTransBand can be
+	 * considerably increased, up to 30. The selection of ReqTransBand depends
+	 * on the level of desire to preserve the high-frequency content. While
+	 * values 0.5 to 2 are extremely "greedy" settings, not necessary in most
+	 * cases, values 2 to 3 can be used in most cases. Values 3 to 4 are
+	 * relaxed settings, but they still offer a flat frequency response up to
+	 * 21kHz with 44.1k source or destination sample rate.
+	 * @param ReqAtten Required stop-band attenuation in decibel, in the
+	 * range CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
+	 * inclusive. The actual attenuation may be 0.40-4.46 dB higher. The
+	 * general formula for selecting the ReqAtten is 6.02 * Bits + 40, where
+	 * "Bits" is the bit resolution (e.g. 16, 24), "40" is an added resolution
+	 * for dynamic signals; this value can be decreased to 20 to 10 if the
+	 * signal being resampled is non-dynamic (e.g., an impulse response or
+	 * filter, with a non-steep frequency response).
+	 * @param ReqPhase Required filter's phase response. Note that this
+	 * setting does not affect interpolator's phase response which is always
+	 * linear-phase. Also note that if the "power of 2" resampling was engaged
+	 * by the resampler together with the minimum-phase response, the audio
+	 * stream may become fractionally delayed, depending on the minimum-phase
+	 * filter's actual fractional delay. Linear-phase filters do not have
+	 * fractional delay.
+	 * @see CDSPFIRFilterCache::getLPFilter()
+	 */
+
+	CDSPResampler( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0,
+		const double ReqAtten = 206.91,
+		const EDSPFilterPhaseResponse ReqPhase = fprLinearPhase )
+		: StepCapacity( 0 )
+		, StepCount( 0 )
+		, MaxInLen( aMaxInLen )
+		, CurMaxOutLen( aMaxInLen )
+		, LatencyFrac( 0.0 )
+	{
+		R8BASSERT( SrcSampleRate > 0.0 );
+		R8BASSERT( DstSampleRate > 0.0 );
+		R8BASSERT( MaxInLen > 0 );
+
+		R8BCONSOLE( "* CDSPResampler: src=%.1f dst=%.1f len=%i tb=%.1f "
+			"att=%.2f ph=%i\n", SrcSampleRate, DstSampleRate, aMaxInLen,
+			ReqTransBand, ReqAtten, (int) ReqPhase );
+
+		if( SrcSampleRate == DstSampleRate )
+		{
+			return;
+		}
+
+		TmpBufCapacities[ 0 ] = 0;
+		TmpBufCapacities[ 1 ] = 0;
+		CurTmpBuf = 0;
+
+		// Try some common efficient ratios requiring only a single step.
+
+		const int CommonRatioCount = 5;
+		const int CommonRatios[ CommonRatioCount ][ 2 ] = {
+			{ 1, 2 },
+			{ 1, 3 },
+			{ 2, 3 },
+			{ 3, 2 },
+			{ 3, 4 }
+		};
+
+		int i;
+
+		for( i = 0; i < CommonRatioCount; i++ )
+		{
+			const int num = CommonRatios[ i ][ 0 ];
+			const int den = CommonRatios[ i ][ 1 ];
+
+			if( SrcSampleRate * num == DstSampleRate * den )
+			{
+				addProcessor( new CDSPBlockConvolver(
+					CDSPFIRFilterCache :: getLPFilter(
+					1.0 / ( num > den ? num : den ), ReqTransBand,
+					ReqAtten, ReqPhase, num ), num, den, LatencyFrac ));
+
+				createTmpBuffers();
+				return;
+			}
+		}
+
+		// Try whole-number power-of-2 or 3*power-of-2 upsampling.
+
+		for( i = 2; i <= 3; i++ )
+		{
+			bool WasFound = false;
+			int c = 0;
+
+			while( true )
+			{
+				const double NewSR = SrcSampleRate * ( i << c );
+
+				if( NewSR == DstSampleRate )
+				{
+					WasFound = true;
+					break;
+				}
+
+				if( NewSR > DstSampleRate )
+				{
+					break;
+				}
+
+				c++;
+			}
+
+			if( WasFound )
+			{
+				addProcessor( new CDSPBlockConvolver(
+					CDSPFIRFilterCache :: getLPFilter( 1.0 / i, ReqTransBand,
+					ReqAtten, ReqPhase, i ), i, 1, LatencyFrac ));
+
+				const bool IsThird = ( i == 3 );
+
+				for( i = 0; i < c; i++ )
+				{
+					addProcessor( new CDSPHBUpsampler( ReqAtten, i, IsThird,
+						LatencyFrac ));
+				}
+
+				createTmpBuffers();
+				return;
+			}
+		}
+
+		if( DstSampleRate * 2.0 > SrcSampleRate )
+		{
+			// Upsampling or fractional downsampling down to 2X.
+
+			const double NormFreq = ( DstSampleRate > SrcSampleRate ? 0.5 :
+				0.5 * DstSampleRate / SrcSampleRate );
+
+			addProcessor( new CDSPBlockConvolver(
+				CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
+				ReqAtten, ReqPhase, 2.0 ), 2, 1, LatencyFrac ));
+
+			// Try intermediate interpolated resampling with subsequent 2X
+			// or 3X upsampling.
+
+			const double tbw = 0.0175; // Intermediate filter's transition
+				// band extension coefficient.
+			const double ThreshSampleRate = SrcSampleRate /
+				( 1.0 - tbw * ReqTransBand ); // Make sure intermediate
+				// filter's transition band is not steeper than ReqTransBand
+				// (this keeps the latency under control).
+
+			int c = 0;
+			int div = 1;
+
+			while( true )
+			{
+				const int ndiv = div * 2;
+
+				if( DstSampleRate < ThreshSampleRate * ndiv )
+				{
+					break;
+				}
+
+				div = ndiv;
+				c++;
+			}
+
+			int c2 = 0;
+			int div2 = 1;
+
+			while( true )
+			{
+				const int ndiv = div * ( c2 == 0 ? 3 : 2 );
+
+				if( DstSampleRate < ThreshSampleRate * ndiv )
+				{
+					break;
+				}
+
+				div2 = ndiv;
+				c2++;
+			}
+
+			const double SrcSampleRate2 = SrcSampleRate * 2.0;
+			int tmp1;
+			int tmp2;
+
+			if( c == 1 && getWholeStepping( SrcSampleRate2, DstSampleRate,
+				tmp1, tmp2 ))
+			{
+				// Do not use intermediate interpolation if whole stepping is
+				// available as it performs very fast.
+
+				c = 0;
+			}
+
+			if( c > 0 )
+			{
+				// Add steps using intermediate interpolation.
+
+				int num;
+
+				if( c2 > 0 && div2 > div )
+				{
+					div = div2;
+					c = c2;
+					num = 3;
+				}
+				else
+				{
+					num = 2;
+				}
+
+				addProcessor( new CDSPFracInterpolator( SrcSampleRate2 * div,
+					DstSampleRate, ReqAtten, false, LatencyFrac ));
+
+				double tb = ( 1.0 - SrcSampleRate * div / DstSampleRate ) /
+					tbw; // Divide TransBand by a constant that assures a
+					// linear response in the pass-band.
+
+				if( tb > CDSPFIRFilter :: getLPMaxTransBand() )
+				{
+					tb = CDSPFIRFilter :: getLPMaxTransBand();
+				}
+
+				addProcessor( new CDSPBlockConvolver(
+					CDSPFIRFilterCache :: getLPFilter( 1.0 / num, tb,
+					ReqAtten, ReqPhase, num ), num, 1, LatencyFrac ));
+
+				const bool IsThird = ( num == 3 );
+
+				for( i = 1; i < c; i++ )
+				{
+					addProcessor( new CDSPHBUpsampler( ReqAtten, i - 1,
+						IsThird, LatencyFrac ));
+				}
+			}
+			else
+			{
+				addProcessor( new CDSPFracInterpolator( SrcSampleRate2,
+					DstSampleRate, ReqAtten, false, LatencyFrac ));
+			}
+
+			createTmpBuffers();
+			return;
+		}
+
+		// Use downsampling steps, including power-of-2 downsampling.
+
+		double CheckSR = DstSampleRate * 4.0;
+		int c = 0;
+		double FinGain = 1.0;
+
+		while( CheckSR <= SrcSampleRate )
+		{
+			c++;
+			CheckSR *= 2.0;
+			FinGain *= 0.5;
+		}
+
+		const int SrcSRDiv = ( 1 << c );
+		int downf;
+		double NormFreq = 0.5;
+		bool UseInterp = true;
+		bool IsThird = false;
+
+		for( downf = 2; downf <= 3; downf++ )
+		{
+			if( DstSampleRate * SrcSRDiv * downf == SrcSampleRate )
+			{
+				NormFreq = 1.0 / downf;
+				UseInterp = false;
+				IsThird = ( downf == 3 );
+				break;
+			}
+		}
+
+		if( UseInterp )
+		{
+			downf = 1;
+			NormFreq = DstSampleRate * SrcSRDiv / SrcSampleRate;
+			IsThird = ( NormFreq * 3.0 <= 1.0 );
+		}
+
+		for( i = 0; i < c; i++ )
+		{
+			// Use a fixed very relaxed 2X downsampling filters, that at
+			// the final stage only guarantees stop-band between 0.75 and
+			// pi. 0.5-0.75 range will be aliased to 0.25-0.5 range which
+			// will then be filtered out by the final filter.
+
+			addProcessor( new CDSPHBDownsampler( ReqAtten, c - 1 - i, IsThird,
+				LatencyFrac ));
+		}
+
+		addProcessor( new CDSPBlockConvolver(
+			CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
+			ReqAtten, ReqPhase, FinGain ), 1, downf, LatencyFrac ));
+
+		if( UseInterp )
+		{
+			addProcessor( new CDSPFracInterpolator( SrcSampleRate,
+				DstSampleRate * SrcSRDiv, ReqAtten, IsThird, LatencyFrac ));
+		}
+
+		createTmpBuffers();
+	}
+
+	virtual ~CDSPResampler()
+	{
+		int i;
+
+		for( i = 0; i < StepCount; i++ )
+		{
+			delete Steps[ i ];
+		}
+	}
+
+	virtual int getLatency() const
+	{
+		return( 0 );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	/**
+	 * This function ignores the supplied parameter and returns the maximal
+	 * output buffer length that depends on the MaxInLen supplied to the
+	 * constructor.
+	 */
+
+	virtual int getMaxOutLen( const int/* MaxInLen */ ) const
+	{
+		return( CurMaxOutLen );
+	}
+
+	/**
+	 * Function clears (resets) the state of *this object and returns it to
+	 * the state after construction. All input data accumulated in the
+	 * internal buffer so far will be discarded.
+	 *
+	 * This function makes it possible to use *this object for converting
+	 * separate streams from the same source sample rate to the same
+	 * destination sample rate without reconstructing the object. It is more
+	 * efficient to clear the state of the resampler object than to destroy it
+	 * and create a new object.
+	 */
+
+	virtual void clear()
+	{
+		int i;
+
+		for( i = 0; i < StepCount; i++ )
+		{
+			Steps[ i ] -> clear();
+		}
+	}
+
+	/**
+	 * Function performs sample rate conversion.
+	 *
+	 * If the source and destination sample rates are equal, the resampler
+	 * will do nothing and will simply return the input buffer unchanged.
+	 *
+	 * You do not need to allocate an intermediate output buffer for use with
+	 * this function. If required, the resampler will allocate a suitable
+	 * intermediate output buffer itself.
+	 *
+	 * @param ip0 Input buffer. This buffer is never used as output buffer by
+	 * this function. This pointer may be returned in "op0" if no resampling
+	 * is happening (source sample rate equals destination sample rate).
+	 * @param l The number of samples available in the input buffer. Should
+	 * not exceed the MaxInLen supplied in the constructor.
+	 * @param[out] op0 This variable receives the pointer to the resampled
+	 * data. On function's return, this pointer points to *this object's
+	 * internal buffer. In real-time applications it is suggested to pass this
+	 * pointer to the next output audio block and consume any data left from
+	 * the previous output audio block first before calling the process()
+	 * function again. The buffer pointed to by the "op0" on return is owned
+	 * by the resampler, so it should not be freed by the caller.
+	 * @return The number of samples available in the "op0" output buffer. If
+	 * the data from the output buffer "op0" is going to be written to a
+	 * bigger output buffer, it is suggested to check the returned number of
+	 * samples so that no overflow of the bigger output buffer happens.
+	 */
+
+	virtual int process( double* ip0, int l, double*& op0 )
+	{
+		R8BASSERT( l >= 0 );
+
+		double* ip = ip0;
+		int i;
+
+		for( i = 0; i < StepCount; i++ )
+		{
+			double* op = TmpBufs[ i & 1 ];
+			l = Steps[ i ] -> process( ip, l, op );
+			ip = op;
+		}
+
+		op0 = ip;
+		return( l );
+	}
+
+	/**
+	 * Function performs resampling of an input sample buffer of the specified
+	 * length in the "one-shot" mode. This function can be useful when impulse
+	 * response resampling is required.
+	 *
+	 * @param ip Input buffer pointer.
+	 * @param iplen Length of the input buffer in samples.
+	 * @param[out] op Output buffer pointer.
+	 * @param oplen Length of the output buffer in samples.
+	 * @tparam Tin Input buffer's element type.
+	 * @tparam Tout Output buffer's element type.
+	 */
+
+	template< typename Tin, typename Tout >
+	void oneshot( const Tin* ip, int iplen, Tout* op, int oplen )
+	{
+		CFixedBuffer< double > Buf( MaxInLen );
+		bool IsZero = false;
+
+		while( oplen > 0 )
+		{
+			int rc;
+			double* p;
+			int i;
+
+			if( iplen == 0 )
+			{
+				rc = MaxInLen;
+				p = &Buf[ 0 ];
+
+				if( !IsZero )
+				{
+					IsZero = true;
+					memset( p, 0, MaxInLen * sizeof( p[ 0 ]));
+				}
+			}
+			else
+			{
+				rc = min( iplen, MaxInLen );
+
+				if( sizeof( Tin ) == sizeof( double ))
+				{
+					p = (double*) ip;
+				}
+				else
+				{
+					p = &Buf[ 0 ];
+
+					for( i = 0; i < rc; i++ )
+					{
+						p[ i ] = ip[ i ];
+					}
+				}
+
+				ip += rc;
+				iplen -= rc;
+			}
+
+			double* op0;
+			int wc = process( p, rc, op0 );
+			wc = min( oplen, wc );
+
+			for( i = 0; i < wc; i++ )
+			{
+				op[ i ] = (Tout) op0[ i ];
+			}
+
+			op += wc;
+			oplen -= wc;
+		}
+
+		clear();
+	}
+
+	/**
+	 * Function obtains overall input sample count required to produce first
+	 * output sample. Function works by iteratively passing 1 sample at a time
+	 * until output begins. This is a relatively CPU-consuming operation. This
+	 * function should be called after the clear() function call or after
+	 * object's construction. The function itself calls the clear() function
+	 * before return.
+	 *
+	 * Note that it is advisable to cache the value returned by this function,
+	 * for each SrcSampleRate/DstSampleRate pair, if it is called frequently.
+	 */
+
+	int getInLenBeforeOutStart()
+	{
+		int inc = 0;
+
+		while( true )
+		{
+			double ins = 0.0;
+			double* op;
+
+			if( process( &ins, 1, op ) > 0 )
+			{
+				clear();
+				return( inc );
+			}
+
+			inc++;
+		}
+	}
+
+private:
+	CFixedBuffer< CDSPProcessor* > Steps; ///< Array of processing steps.
+		///<
+	int StepCapacity; ///< The capacity of the Steps array.
+		///<
+	int StepCount; ///< The number of created processing steps.
+		///<
+	int MaxInLen; ///< Maximal input length.
+		///<
+	CFixedBuffer< double > TmpBufAll; ///< Buffer containing both temporary
+		///< buffers.
+		///<
+	double* TmpBufs[ 2 ]; ///< Temporary output buffers.
+		///<
+	int TmpBufCapacities[ 2 ]; ///< Capacities of temporary buffers, updated
+		///< during processing steps building.
+		///<
+	int CurTmpBuf; ///< Current temporary buffer.
+		///<
+	int CurMaxOutLen; ///< Current maximal output length.
+		///<
+	double LatencyFrac; ///< Current fractional latency. After object's
+		///< construction, equals to the remaining fractional latency in the
+		///< output.
+		///<
+
+	/**
+	 * Function adds processor, updates MaxOutLen variable and adjusts length
+	 * of temporary internal buffers.
+	 *
+	 * @param Proc Processor to add. This pointer is inherited and will be
+	 * destroyed on *this object's destruction.
+	 */
+
+	void addProcessor( CDSPProcessor* const Proc )
+	{
+		if( StepCount == StepCapacity )
+		{
+			// Reallocate and increase Steps array's capacity.
+
+			const int NewCapacity = StepCapacity + 8;
+			Steps.realloc( StepCapacity, NewCapacity );
+			StepCapacity = NewCapacity;
+		}
+
+		LatencyFrac = Proc -> getLatencyFrac();
+		CurMaxOutLen = Proc -> getMaxOutLen( CurMaxOutLen );
+
+		if( CurMaxOutLen > TmpBufCapacities[ CurTmpBuf ])
+		{
+			TmpBufCapacities[ CurTmpBuf ] = CurMaxOutLen;
+		}
+
+		CurTmpBuf ^= 1;
+
+		Steps[ StepCount ] = Proc;
+		StepCount++;
+	}
+
+	/**
+	 * Function creates temporary buffers.
+	 */
+
+	void createTmpBuffers()
+	{
+		const int ol = TmpBufCapacities[ 0 ] + TmpBufCapacities[ 1 ];
+
+		if( ol > 0 )
+		{
+			TmpBufAll.alloc( ol );
+			TmpBufs[ 0 ] = &TmpBufAll[ 0 ];
+			TmpBufs[ 1 ] = &TmpBufAll[ TmpBufCapacities[ 0 ]];
+		}
+
+		R8BCONSOLE( "* CDSPResampler: init done\n" );
+	}
+};
+
+/**
+ * @brief The resampler class for 16-bit resampling.
+ *
+ * This class defines resampling parameters suitable for 16-bit resampling,
+ * using linear-phase low-pass filter. See the r8b::CDSPResampler class for
+ * details.
+ */
+
+class CDSPResampler16 : public CDSPResampler
+{
+public:
+	/**
+	 * Constructor initializes the 16-bit resampler. See the
+	 * r8b::CDSPResampler class for details.
+	 *
+	 * @param SrcSampleRate Source signal sample rate.
+	 * @param DstSampleRate Destination signal sample rate.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler.
+	 * @param ReqTransBand Required transition band, in percent.
+	 */
+
+	CDSPResampler16( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0 )
+		: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+			136.45, fprLinearPhase )
+	{
+	}
+};
+
+/**
+ * @brief The resampler class for 16-bit impulse response resampling.
+ *
+ * This class defines resampling parameters suitable for 16-bit impulse
+ * response resampling, using linear-phase low-pass filter. Impulse responses
+ * are non-dynamic signals, and thus need resampler with a lesser SNR. See the
+ * r8b::CDSPResampler class for details.
+ */
+
+class CDSPResampler16IR : public CDSPResampler
+{
+public:
+	/**
+	 * Constructor initializes the 16-bit impulse response resampler. See the
+	 * r8b::CDSPResampler class for details.
+	 *
+	 * @param SrcSampleRate Source signal sample rate.
+	 * @param DstSampleRate Destination signal sample rate.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler.
+	 * @param ReqTransBand Required transition band, in percent.
+	 */
+
+	CDSPResampler16IR( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0 )
+		: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+			109.56, fprLinearPhase )
+	{
+	}
+};
+
+/**
+ * @brief The resampler class for 24-bit resampling.
+ *
+ * This class defines resampling parameters suitable for 24-bit resampling
+ * (including 32-bit floating point resampling), using linear-phase low-pass
+ * filter. See the r8b::CDSPResampler class for details.
+ */
+
+class CDSPResampler24 : public CDSPResampler
+{
+public:
+	/**
+	 * Constructor initializes the 24-bit resampler (including 32-bit floating
+	 * point). See the r8b::CDSPResampler class for details.
+	 *
+	 * @param SrcSampleRate Source signal sample rate.
+	 * @param DstSampleRate Destination signal sample rate.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler.
+	 * @param ReqTransBand Required transition band, in percent.
+	 */
+
+	CDSPResampler24( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0 )
+		: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+			180.15, fprLinearPhase )
+	{
+	}
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPRESAMPLER_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
@ -0,0 +1,687 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPSincFilterGen.h
+ *
+ * @brief Sinc function-based FIR filter generator class.
+ *
+ * This file includes the CDSPSincFilterGen class implementation that
+ * generates FIR filters.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPSINCFILTERGEN_INCLUDED
+#define R8B_CDSPSINCFILTERGEN_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @brief Sinc function-based FIR filter generator class.
+ *
+ * Structure that holds state used to perform generation of sinc functions of
+ * various types, windowed by the Blackman window by default (but the window
+ * function can be changed if necessary).
+ */
+
+class CDSPSincFilterGen
+{
+public:
+	double Len2; ///< Required half filter kernel's length in samples (can be
+		///< a fractional value). Final physical kernel length will be
+		///< provided in the KernelLen variable. Len2 should be >= 2.
+		///<
+	int KernelLen; ///< Resulting length of the filter kernel, this variable
+		///< is set after the call to one of the "init" functions.
+		///<
+	int fl2; ///< Internal "half kernel length" value. This value can be used
+		///< as filter's latency in samples (taps), this variable is set after
+		///< the call to one of the "init" functions.
+		///<
+
+	union
+	{
+		struct
+		{
+			double Freq1; ///< Required corner circular frequency 1 [0; pi].
+				///< Used only in the generateBand() function.
+				///<
+			double Freq2; ///< Required corner circular frequency 2 [0; pi].
+				///< Used only in the generateBand() function. The range
+				///< [Freq1; Freq2] defines a pass band for the generateBand()
+				///< function.
+				///<
+		};
+
+		struct
+		{
+			double FracDelay; ///< Fractional delay in the range [0; 1], used
+				///< only in the generateFrac() function. Note that the
+				///< FracDelay parameter is actually inversed. At 0.0 value it
+				///< produces 1 sample delay (with the latency equal to fl2),
+				///< at 1.0 value it produces 0 sample delay (with the latency
+				///< equal to fl2 - 1).
+				///<
+		};
+	};
+
+	/**
+	 * Window function type.
+	 */
+
+	enum EWindowFunctionType
+	{
+		wftCosine, ///< Generalized cosine window function. No parameters
+			///< required. The "Power" parameter is optional.
+			///<
+		wftKaiser, ///< Kaiser window function. Requires the "Beta" parameter.
+			///< The "Power" parameter is optional.
+			///<
+		wftGaussian ///< Gaussian window function. Requires the "Sigma"
+			///< parameter. The "Power" parameter is optional.
+			///<
+	};
+
+	typedef double( CDSPSincFilterGen :: *CWindowFunc )(); ///< Window
+		///< calculation function pointer type.
+		///<
+
+	/**
+	 * Function initializes *this structure for generation of a window
+	 * function, odd-sized.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initWindow( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) floor( Len2 );
+		KernelLen = fl2 + fl2 + 1;
+
+		setWindow( WinType, Params, UsePower, true );
+	}
+
+	/**
+	 * Function initializes *this structure for generation of band-limited
+	 * sinc filter kernel. The generateBand() function should be used to
+	 * calculate the filter.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initBand( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) floor( Len2 );
+		KernelLen = fl2 + fl2 + 1;
+		f1.init( Freq1, 0.0 );
+		f2.init( Freq2, 0.0 );
+
+		setWindow( WinType, Params, UsePower, true );
+	}
+
+	/**
+	 * Function initializes *this structure for Hilbert transformation filter
+	 * calculation. Freq1 and Freq2 variables are not used.
+	 * The generateHilbert() function should be used to calculate the filter.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initHilbert( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) floor( Len2 );
+		KernelLen = fl2 + fl2 + 1;
+
+		setWindow( WinType, Params, UsePower, true );
+	}
+
+	/**
+	 * Function initializes *this structure for generation of full-bandwidth
+	 * fractional delay sinc filter kernel. Freq1 and Freq2 variables are not
+	 * used. The generateFrac() function should be used to calculate the
+	 * filter.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initFrac( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) ceil( Len2 );
+		KernelLen = fl2 + fl2;
+
+		setWindow( WinType, Params, UsePower, false, FracDelay );
+	}
+
+	/**
+	 * @return The next "Hann" window function coefficient.
+	 */
+
+	double calcWindowHann()
+	{
+		return( 0.5 + 0.5 * w1.generate() );
+	}
+
+	/**
+	 * @return The next "Hamming" window function coefficient.
+	 */
+
+	double calcWindowHamming()
+	{
+		return( 0.54 + 0.46 * w1.generate() );
+	}
+
+	/**
+	 * @return The next "Blackman" window function coefficient.
+	 */
+
+	double calcWindowBlackman()
+	{
+		return( 0.42 + 0.5 * w1.generate() + 0.08 * w2.generate() );
+	}
+
+	/**
+	 * @return The next "Nuttall" window function coefficient.
+	 */
+
+	double calcWindowNuttall()
+	{
+		return( 0.355768 + 0.487396 * w1.generate() +
+			0.144232 * w2.generate() + 0.012604 * w3.generate() );
+	}
+
+	/**
+	 * @return The next "Blackman-Nuttall" window function coefficient.
+	 */
+
+	double calcWindowBlackmanNuttall()
+	{
+		return( 0.3635819 + 0.4891775 * w1.generate() +
+			0.1365995 * w2.generate() + 0.0106411 * w3.generate() );
+	}
+
+	/**
+	 * @return The next "Kaiser" window function coefficient.
+	 */
+
+	double calcWindowKaiser()
+	{
+		const double n = 1.0 - sqr( wn / Len2 + KaiserLen2Frac );
+		wn++;
+
+		if( n < 0.0 )
+		{
+			return( 0.0 );
+		}
+
+		return( besselI0( KaiserBeta * sqrt( n )) / KaiserDiv );
+	}
+
+	/**
+	 * @return The next "Gaussian" window function coefficient.
+	 */
+
+	double calcWindowGaussian()
+	{
+		const double f = exp( -0.5 * sqr( wn / GaussianSigma +
+			GaussianSigmaFrac ));
+
+		wn++;
+
+		return( f );
+	}
+
+	/**
+	 * Function calculates window function only.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 */
+
+	void generateWindow( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+	{
+		op += fl2;
+		double* op2 = op;
+
+		int l = fl2;
+
+		if( Power < 0.0 )
+		{
+			*op = ( *this.*wfunc )();
+
+			while( l > 0 )
+			{
+				const double v = ( *this.*wfunc )();
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				l--;
+			}
+		}
+		else
+		{
+			*op = pows(( *this.*wfunc )(), Power );
+
+			while( l > 0 )
+			{
+				const double v = pows(( *this.*wfunc )(), Power );
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				l--;
+			}
+		}
+	}
+
+	/**
+	 * Function calculates band-limited windowed sinc function-based filter
+	 * kernel.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 */
+
+	void generateBand( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+	{
+		op += fl2;
+		double* op2 = op;
+		f1.generate();
+		f2.generate();
+		int t = 1;
+
+		if( Power < 0.0 )
+		{
+			*op = ( Freq2 - Freq1 ) * ( *this.*wfunc )() / R8B_PI;
+
+			while( t <= fl2 )
+			{
+				const double v = ( f2.generate() - f1.generate() ) *
+					( *this.*wfunc )() / ( t * R8B_PI );
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				t++;
+			}
+		}
+		else
+		{
+			*op = ( Freq2 - Freq1 ) * pows(( *this.*wfunc )(), Power ) /
+				R8B_PI;
+
+			while( t <= fl2 )
+			{
+				const double v = ( f2.generate() - f1.generate() ) *
+					pows(( *this.*wfunc )(), Power ) / ( t * R8B_PI );
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				t++;
+			}
+		}
+	}
+
+	/**
+	 * Function calculates windowed Hilbert transformer filter kernel.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 */
+
+	void generateHilbert( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+	{
+		static const double fvalues[ 2 ] = { 0.0, 2.0 / R8B_PI };
+		op += fl2;
+		double* op2 = op;
+
+		( *this.*wfunc )();
+		*op = 0.0;
+
+		int t = 1;
+
+		if( Power < 0.0 )
+		{
+			while( t <= fl2 )
+			{
+				const double v = fvalues[ t & 1 ] * ( *this.*wfunc )() / t;
+				op++;
+				op2--;
+				*op = v;
+				*op2 = -v;
+				t++;
+			}
+		}
+		else
+		{
+			while( t <= fl2 )
+			{
+				const double v = fvalues[ t & 1 ] *
+					pows( ( *this.*wfunc )(), Power ) / t;
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = -v;
+				t++;
+			}
+		}
+	}
+
+	/**
+	 * Function calculates windowed fractional delay filter kernel.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 * @param opinc Output buffer increment, in "op" elements.
+	 */
+
+	void generateFrac( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman,
+		const int opinc = 1 )
+	{
+		R8BASSERT( opinc != 0 );
+
+		double f[ 2 ];
+		f[ 0 ] = sin( FracDelay * R8B_PI ) / R8B_PI;
+		f[ 1 ] = -f[ 0 ];
+
+		int t = -fl2;
+
+		if( t + FracDelay < -Len2 )
+		{
+			( *this.*wfunc )();
+			*op = 0.0;
+			op += opinc;
+			t++;
+		}
+
+		int IsZeroX = ( fabs( FracDelay - 1.0 ) < 0x1p-42 );
+		int mt = 0 - IsZeroX;
+		IsZeroX = ( IsZeroX || fabs( FracDelay ) < 0x1p-42 );
+
+		if( Power < 0.0 )
+		{
+			while( t < mt )
+			{
+				*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
+				op += opinc;
+				t++;
+			}
+
+			if( IsZeroX ) // t+FracDelay==0
+			{
+				*op = ( *this.*wfunc )();
+			}
+			else
+			{
+				*op = f[ t & 1 ] * ( *this.*wfunc )() / FracDelay; // t==0
+			}
+
+			mt = fl2 - 2;
+
+			while( t < mt )
+			{
+				op += opinc;
+				t++;
+				*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
+			}
+
+			op += opinc;
+			t++;
+			const double ut = t + FracDelay;
+			*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] * ( *this.*wfunc )() / ut );
+		}
+		else
+		{
+			while( t < mt )
+			{
+				*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+					( t + FracDelay );
+
+				op += opinc;
+				t++;
+			}
+
+			if( IsZeroX ) // t+FracDelay==0
+			{
+				*op = pows( ( *this.*wfunc )(), Power );
+			}
+			else
+			{
+				*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+					FracDelay; // t==0
+			}
+
+			mt = fl2 - 2;
+
+			while( t < mt )
+			{
+				op += opinc;
+				t++;
+				*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+					( t + FracDelay );
+			}
+
+			op += opinc;
+			t++;
+			const double ut = t + FracDelay;
+			*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] *
+				pows( ( *this.*wfunc )(), Power ) / ut );
+		}
+	}
+
+private:
+	double Power; ///< The power factor used to raise the window function.
+		///< Equals a negative value if the power factor should not be used.
+		///<
+	CSineGen f1; ///< Sine function 1. Used in the generateBand() function.
+		///<
+	CSineGen f2; ///< Sine function 2. Used in the generateBand() function.
+		///<
+	int wn; ///< Window function integer position. 0 - center of the window
+		///< function. This variable may not be used by some window functions.
+		///<
+	CSineGen w1; ///< Cosine wave 1 for window function.
+		///<
+	CSineGen w2; ///< Cosine wave 2 for window function.
+		///<
+	CSineGen w3; ///< Cosine wave 3 for window function.
+		///<
+
+	union
+	{
+		struct
+		{
+			double KaiserBeta; ///< Kaiser window function's "Beta"
+				///< coefficient.
+				///<
+			double KaiserDiv; ///< Kaiser window function's divisor.
+				///<
+			double KaiserLen2Frac; ///< Equals FracDelay / Len2.
+				///<
+		};
+
+		struct
+		{
+			double GaussianSigma; ///< Gaussian window function's "Sigma"
+				///< coefficient.
+				///<
+			double GaussianSigmaFrac; ///< Equals FracDelay / GaussianSigma.
+				///<
+		};
+	};
+
+	/**
+	 * Function initializes Kaiser window function calculation. The FracDelay
+	 * variable should be initialized when using this window function.
+	 *
+	 * @param Params Function parameters. If NULL, the default values will be
+	 * used. If not NULL, the first parameter should specify the "Beta" value.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function.
+	 * @param IsCentered "True" if centered window should be used. This
+	 * parameter usually equals to "false" for fractional delay filters only.
+	 */
+
+	void setWindowKaiser( const double* Params, const bool UsePower,
+		const bool IsCentered )
+	{
+		wn = ( IsCentered ? 0 : -fl2 );
+
+		if( Params == NULL )
+		{
+			KaiserBeta = 9.5945013206755156;
+			Power = ( UsePower ? 1.9718457932433306 : -1.0 );
+		}
+		else
+		{
+			KaiserBeta = clampr( Params[ 0 ], 1.0, 350.0 );
+			Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
+		}
+
+		KaiserDiv = besselI0( KaiserBeta );
+		KaiserLen2Frac = FracDelay / Len2;
+	}
+
+	/**
+	 * Function initializes Gaussian window function calculation. The FracDelay
+	 * variable should be initialized when using this window function.
+	 *
+	 * @param Params Function parameters. If NULL, the table values will be
+	 * used. If not NULL, the first parameter should specify the "Sigma"
+	 * value.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function.
+	 * @param IsCentered "True" if centered window should be used. This
+	 * parameter usually equals to "false" for fractional delay filters only.
+	 */
+
+	void setWindowGaussian( const double* Params, const bool UsePower,
+		const bool IsCentered )
+	{
+		wn = ( IsCentered ? 0 : -fl2 );
+
+		if( Params == NULL )
+		{
+			GaussianSigma = 1.0;
+			Power = -1.0;
+		}
+		else
+		{
+			GaussianSigma = clampr( fabs( Params[ 0 ]), 1e-1, 100.0 );
+			Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
+		}
+
+		GaussianSigma *= Len2;
+		GaussianSigmaFrac = FracDelay / GaussianSigma;
+	}
+
+	/**
+	 * Function initializes calculation of window function of the specified
+	 * type.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 * @param IsCentered "True" if centered window should be used. This
+	 * parameter usually equals to "false" for fractional delay filters only.
+	 * @param UseFracDelay Fractional delay to use.
+	 */
+
+	void setWindow( const EWindowFunctionType WinType,
+		const double* const Params, const bool UsePower,
+		const bool IsCentered, const double UseFracDelay = 0.0 )
+	{
+		FracDelay = UseFracDelay;
+
+		if( WinType == wftCosine )
+		{
+			if( IsCentered )
+			{
+				w1.init( R8B_PI / Len2, R8B_PId2 );
+				w2.init( R8B_2PI / Len2, R8B_PId2 );
+				w3.init( R8B_3PI / Len2, R8B_PId2 );
+			}
+			else
+			{
+				const double step1 = R8B_PI / Len2;
+				w1.init( step1, R8B_PId2 - step1 * fl2 + step1 * FracDelay );
+
+				const double step2 = R8B_2PI / Len2;
+				w2.init( step2, R8B_PId2 - step2 * fl2 + step2 * FracDelay );
+
+				const double step3 = R8B_3PI / Len2;
+				w3.init( step3, R8B_PId2 - step3 * fl2 + step3 * FracDelay );
+			}
+
+			Power = ( UsePower && Params != NULL ? Params[ 0 ] : -1.0 );
+		}
+		else
+		if( WinType == wftKaiser )
+		{
+			setWindowKaiser( Params, UsePower, IsCentered );
+		}
+		else
+		if( WinType == wftGaussian )
+		{
+			setWindowGaussian( Params, UsePower, IsCentered );
+		}
+	}
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPSINCFILTERGEN_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2013-2022 Aleksey Vaneev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
@ -0,0 +1,6 @@
+r8brain-free resampling library from https://github.com/avaneev/r8brain-free-src
+version 5.6.
+The example program, DLL and "other" folder, PFFFT sources and logo file have been removed.
+No further local changes have been made.
+For building, premake is used to generate Visual Studio project files.
+See ../build/premake/ for details.
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
@ -0,0 +1,408 @@
+# r8brain-free-src - High-Quality Resampler #
+
+## Introduction ##
+
+Open source (under the MIT license) high-quality professional audio sample
+rate converter (SRC) / resampler C++ library.  Features routines for SRC,
+both up- and downsampling, to/from any sample rate, including non-integer
+sample rates: it can be also used for conversion to/from SACD/DSD sample
+rates, and even go beyond that.  SRC routines were implemented in a
+multi-platform C++ code, and have a high level of optimality. Also suitable
+for fast general-purpose 1D time-series resampling / interpolation (with
+relaxed filter parameters).
+
+The structure of this library's objects is such that they can be frequently
+created and destroyed in large applications with a minimal performance impact
+due to a high level of reusability of its most "initialization-expensive"
+objects: the fast Fourier transform and FIR filter objects.
+
+The SRC algorithm at first produces 2X oversampled (relative to the source
+sample rate, or the destination sample rate if the downsampling is performed)
+signal, then performs interpolation using a bank of short (8 to 30 taps,
+depending on the required precision) polynomial-interpolated sinc
+function-based fractional delay filters.  This puts the algorithm into the
+league of the fastest among the most precise SRC algorithms.  The more precise
+alternative being only the whole number-factored SRC, which can be slower.
+
+P.S. Please credit the creator of this library in your documentation in the
+following way: "Sample rate converter designed by Aleksey Vaneev of Voxengo".
+
+## Requirements ##
+
+C++ compiler and system with the "double" floating point type (53-bit
+mantissa) support.  No explicit code for the "float" type is present in this
+library, because as practice has shown the "float"-based code performs
+considerably slower on a modern processor, at least in this library.  This
+library does not have dependencies beside the standard C library, the
+"windows.h" on Windows and the "pthread.h" on macOS and Linux.
+
+## Links ##
+
+* [Documentation](https://www.voxengo.com/public/r8brain-free-src/Documentation/)
+* [Discussion](https://www.kvraudio.com/forum/viewtopic.php?t=389711)
+
+## Usage Information ##
+
+The sample rate converter (resampler) is represented by the
+**r8b::CDSPResampler** class, which is a single front-end class for the
+whole library.  You do not basically need to use nor understand any other
+classes beside this class.  Several derived classes that have varying levels
+of precision are also available (for full-resolution 16-bit and 24-bit
+resampling).
+
+The code of the library resides in the "r8b" C++ namespace, effectively
+isolating it from all other code.  The code is thread-safe.  A separate
+resampler object should be created for each audio channel or stream being
+processed concurrently.
+
+Note that you will need to compile the "r8bbase.cpp" source file and include
+the resulting object file into your application build.  This source file
+includes definitions of several global static objects used by the library.
+You may also need to include to your project: the "Kernel32" library (on
+Windows) and the "pthread" library on macOS and Linux.
+
+The library is able to process signal of any scale and loudness: it is not
+limited to just a "usual" -1.0 to 1.0 range.
+
+By defining the `R8B_IPP` configuration macro it is possible to enable Intel
+IPP back-end for FFT functions, instead of the default Ooura FFT.  IPP FFT
+makes sample rate conversion faster by 23% on average.
+
+    #define R8B_IPP 1
+
+If a larger initial processing delay and a very minor sample timing error are
+not an issue, for the most efficiency you can define these macros at
+the beginning of the `r8bconf.h` file, or during compilation:
+
+    #define R8B_IPP 1
+    #define R8B_FASTTIMING 1
+    #define R8B_EXTFFT 1
+
+If you do not have access to the Intel IPP then you may consider enabling the
+PFFFT which is only slightly slower than Intel IPP FFT in performance.  There
+are two macros available: `R8B_PFFFT` and `R8B_PFFFT_DOUBLE`.  The first macro
+enables PFFFT that works in single-precision resolution, thus limiting the
+overall resampler's precision to 24-bit sample rate conversions (for
+mission-critical professional audio applications, using the `R8B_PFFFT` macro
+is not recommended as its peak error is quite large).  The second macro
+enables PFFFT implementation that works in double-precision resolution, making
+use of SSE2, AVX, and NEON intrinsics, yielding precision that is equal to
+both Intel IPP and Ooura FFT implementations.
+
+To use the PFFFT, define the `R8B_PFFFT` or `R8B_PFFFT_DOUBLE` macro, compile
+and include the supplied `pffft.cpp` or `pffft_double/pffft_double.c` file to
+your project build.
+
+    #define R8B_PFFFT 1
+
+    or
+
+    #define R8B_PFFFT_DOUBLE 1
+
+The code of this library was commented in the [Doxygen](http://www.doxygen.org/)
+style.  To generate the documentation locally you may run the
+`doxygen ./other/r8bdoxy.txt` command from the library's folder.
+
+Preliminary tests show that the r8b::CDSPResampler24 resampler class achieves
+`31.8*n_cores` Mrops (`44*n_cores` for Intel IPP FFT) when converting 1
+channel of 24-bit audio from 44100 to 96000 sample rate (2% transition band),
+on a Ryzen 3700X processor-based 64-bit system.  This approximately translates
+to a real-time resampling of `720*n_cores` (`1000*n_cores`) audio streams, at
+100% CPU load.  Speed performance when converting to other sample rates may
+vary greatly.  When comparing performance of this resampler library to another
+library make sure that the competing library is also tuned to produce a fully
+linear-phase response, has similar stop-band characteristics and similar
+sample timing precision.
+
+## Dynamic Link Library ##
+
+The functions of this SRC library are also accessible in simplified form via
+the DLL file on Windows, requiring a processor with SSE2 support (Win64
+version includes AVX2 auto-dispatch code).  Delphi Pascal interface unit file
+for the DLL file is available.  DLL and C LIB files are distributed in the DLL
+folder on the project's homepage.  On non-Windows systems it is preferrable
+to use the C++ library directly.  Note that the DLL was compiled with the
+Intel IPP enabled.
+
+## Real-Time Applications ##
+
+The resampler class of this library was designed as an asynchronous processor:
+it may produce any number of output samples, depending on the input sample
+data length and the resampling parameters.  The resampler must be fed with the
+input sample data until enough output sample data were produced, with any
+excess output samples used before feeding the resampler with more input data.
+A "relief" factor here is that the resampler removes the initial processing
+latency automatically, and that after initial moments of processing the output
+becomes steady, with only minor output sample data length fluctuations.
+
+So, while for an off-line resampling a "push" method can be used,
+demonstrated in the `example.cpp` file, for a real-time resampling a "pull"
+method should be used which calls the resampling process until the output
+buffer is filled.
+
+## Notes ##
+
+When using the **r8b::CDSPResampler** class directly, you may select the
+transition band/steepness of the low-pass (reconstruction) filter, expressed
+as a percentage of the full spectral bandwidth of the input signal (or the
+output signal if the downsampling is performed), and the desired stop-band
+attenuation in decibel.
+
+The transition band is specified as the normalized spectral space of the input
+signal (or the output signal if the downsampling is performed) between the
+low-pass filter's -3 dB point and the Nyquist frequency, and ranges from 0.5%
+to 45%.  Stop-band attenuation can be specified in the range from 49 to 218
+decibel.  Both the transition band and stop-band attenuation affect
+resampler's overall speed performance and initial output delay.  For your
+information, transition frequency range spans 175% of the specified transition
+band, which means that for 2% transition band, frequency response below
+0.965\*Nyquist is linear.
+
+This SRC library also implements a much faster "power of 2" resampling (e.g.
+2X, 4X, 8X, 16X, 3X, 3\*2X, 3\*4X, 3\*8X, etc. upsampling and downsampling),
+which is engaged automatically if the resampling parameters permit.
+
+This library was tested for compatibility with [GNU C++](https://gcc.gnu.org/),
+[Microsoft Visual C++](https://visualstudio.microsoft.com/),
+[LLVM](https://llvm.org/) and [Intel C++](https://software.intel.com/en-us/c-compilers)
+compilers, on 32- and 64-bit Windows, macOS, and CentOS Linux.
+
+Most code is "inline", without the need to compile many source files. The
+memory footprint is quite modest.
+
+## Acknowledgements ##
+
+r8brain-free-src is bundled with the following code:
+
+* FFT routines Copyright (c) 1996-2001 Takuya OOURA.
+[Homepage](http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
+* PFFFT Copyright (c) 2013 Julien Pommier.
+[Homepage](https://bitbucket.org/jpommier/pffft)
+* PFFFT DOUBLE Copyright (c) 2020 Hayati Ayguen, Dario Mambro.
+[Homepage](https://github.com/unevens/pffft)
+
+## Users ##
+
+This library is used by:
+
+* [REAPER](https://reaper.fm/)
+* [AUDIRVANA](https://audirvana.com/)
+* [Red Dead Redemption 2](https://www.rockstargames.com/reddeadredemption2/credits)
+* [Mini Piano Lite](https://play.google.com/store/apps/details?id=umito.android.minipiano)
+* [OpenMPT](https://openmpt.org/)
+* [Boogex Guitar Amp audio plugin](https://www.voxengo.com/product/boogex/)
+* [r8brain free sample rate converter](https://www.voxengo.com/product/r8brain/)
+* [Voice Aloud Reader](https://play.google.com/store/apps/details?id=com.hyperionics.avarLic)
+* [Zynewave Podium](https://zynewave.com/podium/)
+* [Phonometrica](http://www.phonometrica-ling.org/index.html)
+* [Ripcord](https://cancel.fm/ripcord/)
+* [TensorVox](https://github.com/ZDisket/TensorVox)
+* [Curvessor](https://github.com/unevens/Curvessor)
+
+Please send me a note via aleksey.vaneev@gmail.com and I will include a link
+to your software product to this list of users. This list is important in
+maintaining confidence in this library among the interested parties. The
+inclusion into this list is not mandatory.
+
+## Change Log ##
+
+Version 5.6:
+
+* Added SSE and NEON implementations to `CDSPHBUpsampler` yielding 15%
+performance improvement of power-of-2 upsampling.
+* Added SSE and NEON implementations to the `CDSPRealFFT::multiplyBlocksZP`
+function, for 2-3% performance improvement.
+* Added intermediate interpolator's transition band limitation, for logical
+hardness (not practically needed).
+* Added the `aDoConsumeLatency` parameter to `CDSPHBUpsampler` constructor,
+for "inline" DSP uses of the class.
+* Made various minor changes across the codebase.
+
+Version 5.5:
+
+* Hardened positional logic of fractional filter calculation, removed
+redundant multiplications.
+* Removed unnecessary function templating from the `CDSPSincFilterGen` class.
+* Added the `__ARM_NEON` macro to NEON availability detection.
+
+Version 5.4:
+
+* Added compiler specializations to previously optimized inner loops.
+"Shuffled" SIMD interpolation code is not efficient on Apple M1. Intel C++
+Compiler vectorizes "whole stepping" interpolation as good as a
+manually-written SSE.
+* Reorganized SIMD instructions for a slightly better performance.
+* Changed internal buffer sizes of half-band resamplers (1-2% performance
+boost).
+* Fixed compiler warnings in PFFFT code.
+* Added several asserts to the code.
+
+Version 5.3:
+
+* Optimized inner loops of the fractional interpolator, added SSE2 and NEON
+intrinsics, resulting in a measurable (8-25%) performance gain.
+* Optimized filter calculation functions: changed some divisions by a constant
+to multiplications.
+* Renamed M_PI macros to R8B_PI, to avoid macro collisions.
+* Removed redundant code and macros.
+
+Version 5.2:
+
+* Modified `PFFFT` and `PFFFT DOUBLE` conditional pre-processor directives to
+always enable NEON on `aarch64`/`arm64` (this includes code built for
+Apple M1).
+
+Version 5.1:
+
+* Changed alignment in the `CFixedBuffer` class to 64 bytes. This improves AVX
+performance of the `PFFFT DOUBLE` implementation by a few percent.
+* Removed redundant files from the `pffft_double` folder, integrated the
+`pffft_common.c` file into the `pffft_double.c` file.
+
+Version 5.0:
+
+* Removed a long-outdated macros from the `r8bconf.h` file.
+* Changed a conditional pre-processor directive in the `pf_sse2_double.h` file
+as per PFFFT DOUBLE author's suggestion, to allow SSE2 intrinsics in most
+compilers.
+* Fixed "License.txt" misnaming in the source files to "LICENSE".
+
+Version 4.10:
+
+* Added the `PFFFT DOUBLE` implementation support. Now available via the
+`R8B_PFFFT_DOUBLE` definition macro.
+
+Version 4.9:
+
+* Reoptimized half-band and fractional interpolation filters with a stricter
+frequency response linearity constraints. This did not impact the average
+speed performance.
+
+Version 4.8:
+
+* Added a limit to the intermediate filter's transition band, to keep the
+latency under control at any resampling ratio.
+
+Version 4.7:
+
+* Added `#ifndef _USE_MATH_DEFINES` to `pffft.cpp`.
+* Moved `#include "pffft.h"` to `CDSPRealFFT.h`.
+
+Version 4.6:
+
+* Removed the `MaxInLen` parameter from the `oneshot()` function.
+* Decreased intermediate low-pass filter's transition band slightly, for more
+stable quality.
+
+Version 4.5:
+
+* Fixed VS2017 compiler warnings.
+
+Version 4.4:
+
+* Fixed the "Declaration hides a member" Intel C++ compiler warnings.
+
+Version 4.3:
+
+* Added //$ markers for internal debugging purposes.
+
+Version 4.2:
+
+* Backed-off max transition band to 45 and MinAtten to 49.
+* Implemented Wave64 and AIFF file input in the `r8bfreesrc` bench tool. The
+tool is now compiled with the `R8B_IPP 1` and `R8B_EXTFFT 1` macros to
+demonstrate the maximal achievable performance.
+
+Version 4.1:
+
+* Updated allowed ReqAtten range to 52-218, ReqTransBand 0.5-56. It is
+possible to specify filter parameters slightly beyond these values, but the
+resulting filter will be slightly out of specification as well.
+* Optimized static filter banks allocation.
+
+Version 4.0:
+
+* A major overhaul of interpolation classes: now templated parameters are not
+used, all required parameters are calculated at runtime. Static filter bank
+object is not used, it is created when necessary, and then cached.
+* Implemented one-third interpolation filters, however, this did not
+measurably increase resampler's speed.
+
+Version 3.7:
+
+* Used ippsMul_64f_I() in the CDSPRealFFT::multiplyBlockZ() function for a
+minor conversion speed increase in Intel IPP mode.
+
+Version 3.6:
+
+* Added memory alignment to allocated buffers which boosts performance by 1.5%
+when Intel IPP FFT is in use.
+* Implemented PFFFT support.
+
+Version 3.5:
+
+* Improved resampling speed very slightly.
+* Updated the `r8bfreesrc` benchmark tool to support RF64 WAV files.
+
+Version 3.4:
+
+* Added a more efficient half-band filters for >= 256 resampling ratios.
+
+Version 3.3:
+
+* Made minor fix to downsampling for some use cases of CDSPBlockConvolver,
+did not affect resampler.
+* Converted CDSPHBUpsampler and CDSPHBDownsampler's inner functions to
+static functions, which boosted high-ratio resampling performance measurably.
+
+Version 3.2:
+
+* Minor fix to the latency consumption mechanism.
+
+Version 3.1:
+
+* Reoptimized fractional delay filter's windowing function.
+
+Version 3.0:
+
+* Implemented a new variant of the getInLenBeforeOutStart() function.
+* Reimplemented oneshot() function to support `float` buffer types.
+* Considerably improved downsampling performance at high resampling ratios.
+* Implemented intermediate interpolation technique which boosted upsampling
+performance for most resampling ratios considerably.
+* Removed the ConvCount constant - now resampler supports virtually any
+resampling ratios.
+* Removed the UsePower2 parameter from the resampler constructor.
+* Now resampler's process() function always returns pointer to the internal
+buffer, input buffer is returned only if no resampling happens.
+* Resampler's getMaxOutLen() function can now be used to obtain the maximal
+output length that can be produced by the resampler in a single call.
+* Added a more efficient "one-third" filters to half-band upsampler and
+downsampler.
+
+Version 2.1:
+
+* Optimized 2X half-band downsampler.
+
+Version 2.0:
+
+* Optimized power-of-2 upsampling.
+
+Version 1.9:
+
+* Optimized half-band downsampling filter.
+* Implemented whole-number stepping resampling.
+* Added `R8B_EXTFFT` configuration option.
+* Fixed initial sub-sample offseting on downsampling.
+
+Version 1.8:
+
+* Added `R8B_FASTTIMING` configuration option.
+
+Version 1.7:
+
+* Improved sample timing precision.
+* Increased CDSPResampler :: ConvCountMax to 28 to support a lot wider
+resampling ratios.
+* Added `bench` tools.
+* Removed getInLenBeforeOutStart() due to incorrect calculation.
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
@ -0,0 +1,38 @@
+/**
+ * @file r8bbase.cpp
+ *
+ * @brief C++ file that should be compiled and included into your application.
+ *
+ * This is a single library file that should be compiled and included into the
+ * project that uses the "r8brain-free-src" sample rate converter. This file
+ * defines several global static objects used by the library.
+ *
+ * You may also need to include to your project: the "Kernel32" library
+ * (on Windows) and the "pthread" library on macOS and Linux.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#include "CDSPFIRFilter.h"
+#include "CDSPFracInterpolator.h"
+
+namespace r8b {
+
+#if R8B_FLTTEST
+	int InterpFilterFracs = -1;
+#endif // R8B_FLTTEST
+
+CSyncObject CDSPRealFFTKeeper :: StateSync;
+CDSPRealFFT :: CObjKeeper CDSPRealFFTKeeper :: FFTObjects[ 31 ];
+
+CSyncObject CDSPFIRFilterCache :: StateSync;
+CPtrKeeper< CDSPFIRFilter* > CDSPFIRFilterCache :: Objects;
+int CDSPFIRFilterCache :: ObjCount = 0;
+
+CSyncObject CDSPFracDelayFilterBankCache :: StateSync;
+CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: Objects;
+CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: StaticObjects;
+int CDSPFracDelayFilterBankCache :: ObjCount = 0;
+
+} // namespace r8b
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
@ -0,0 +1,176 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file r8bconf.h
+ *
+ * @brief The "configuration" inclusion file you can modify.
+ *
+ * This is the "configuration" inclusion file for the "r8brain-free-src"
+ * sample rate converter. You may redefine the macros here as you see fit.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8BCONF_INCLUDED
+#define R8BCONF_INCLUDED
+
+#if !defined( R8B_IPP )
+	/**
+	 * Set the R8B_IPP macro definition to 1 to enable the use of Intel IPP's
+	 * fast Fourier transform functions. Also uncomment and correct the IPP
+	 * header inclusion macros.
+	 *
+	 * Do not forget to call the ippInit() function at the start of the
+	 * application, before using this library's functions.
+	 */
+
+	#define R8B_IPP 0
+
+//	#include <ippcore.h>
+//	#include <ipps.h>
+#endif // !defined( R8B_IPP )
+
+#if !defined( R8BASSERT )
+	/**
+	 * Assertion macro used to check for certain run-time conditions. By
+	 * default no action is taken if assertion fails.
+	 *
+	 * @param e Expression to check.
+	 */
+
+	#define R8BASSERT( e )
+#endif // !defined( R8BASSERT )
+
+#if !defined( R8BCONSOLE )
+	/**
+	 * Console output macro, used to output various resampler status strings,
+	 * including filter design parameters, convolver parameters.
+	 *
+	 * @param e Expression to send to the console, usually consists of a
+	 * standard "printf" format string followed by several parameters
+	 * (__VA_ARGS__).
+	 */
+
+	#define R8BCONSOLE( ... )
+#endif // !defined( R8BCONSOLE )
+
+#if !defined( R8B_BASECLASS )
+	/**
+	 * Macro defines the name of the class from which all classes that are
+	 * designed to be created on heap are derived. The default
+	 * r8b::CStdClassAllocator class uses "stdlib" memory allocation
+	 * functions.
+	 *
+	 * The classes that are best placed on stack or as class members are not
+	 * derived from any class.
+	 */
+
+	#define R8B_BASECLASS :: r8b :: CStdClassAllocator
+#endif // !defined( R8B_BASECLASS )
+
+#if !defined( R8B_MEMALLOCCLASS )
+	/**
+	 * Macro defines the name of the class that implements raw memory
+	 * allocation functions, see the r8b::CStdMemAllocator class for details.
+	 */
+
+	#define R8B_MEMALLOCCLASS :: r8b :: CStdMemAllocator
+#endif // !defined( R8B_MEMALLOCCLASS )
+
+#if !defined( R8B_FILTER_CACHE_MAX )
+	/**
+	 * This macro specifies the number of filters kept in the cache at most.
+	 * The actual number can be higher if many different filters are in use at
+	 * the same time.
+	 */
+
+	#define R8B_FILTER_CACHE_MAX 96
+#endif // !defined( R8B_FILTER_CACHE_MAX )
+
+#if !defined( R8B_FRACBANK_CACHE_MAX )
+	/**
+	 * This macro specifies the number of whole-number stepping fractional
+	 * delay filter banks kept in the cache at most. The actual number can be
+	 * higher if many different filter banks are in use at the same time. As
+	 * filter banks are usually big objects, it is advisable to keep this
+	 * cache size small.
+	 */
+
+	#define R8B_FRACBANK_CACHE_MAX 12
+#endif // !defined( R8B_FRACBANK_CACHE_MAX )
+
+#if !defined( R8B_FLTTEST )
+	/**
+	 * This macro, when equal to 1, enables fractional delay filter bank
+	 * testing: in this mode the filter bank becomes a dynamic member of the
+	 * CDSPFracInterpolator object instead of being a global static object.
+	 */
+
+	#define R8B_FLTTEST 0
+#endif // !defined( R8B_FLTTEST )
+
+#if !defined( R8B_FASTTIMING )
+	/**
+	 * This macro, when equal to 1, enables a fast interpolation sample
+	 * timing technique. This technique improves interpolation performance
+	 * (by around 10%) at the expense of a minor sample timing drift which is
+	 * on the order of 1e-6 samples per 10 billion output samples. This
+	 * setting does not apply to whole-number stepping, if it is in use, as
+	 * this stepping provides zero timing error without performance impact.
+	 * Also does not apply to the cases when a whole-numbered (2X, 3X, etc.)
+	 * resampling is in the actual use.
+	 */
+
+	#define R8B_FASTTIMING 0
+#endif // !defined( R8B_FASTTIMING )
+
+#if !defined( R8B_EXTFFT )
+	/**
+	 * This macro, when equal to 1, extends length of low-pass filters' FFT
+	 * block by a factor of 2, by zero-padding it. This usually improves the
+	 * overall time performance of the resampler at the expense of a higher
+	 * overall latency (initial processing delay). If such delay is not an
+	 * issue, setting this macro to 1 is preferrable. This macro can only have
+	 * a value of 0 or 1.
+	 */
+
+	#define R8B_EXTFFT 0
+#endif // !defined( R8B_EXTFFT )
+
+#if !defined( R8B_PFFFT )
+	/**
+	 * When defined as 1, enables PFFFT routines which are fast, but which
+	 * are limited to 24-bit precision. May be a good choice for time-series
+	 * interpolation, when stop-band attenuation higher than 120 dB is not
+	 * required.
+	 */
+
+	#define R8B_PFFFT 0
+#endif // !defined( R8B_PFFFT )
+
+#if R8B_PFFFT
+	#define R8B_FLOATFFT 1
+#endif // R8B_PFFFT
+
+#if !defined( R8B_PFFFT_DOUBLE )
+	/**
+	 * When defined as 1, enables PFFFT "double" routines which are fast, and
+	 * which provide the highest precision.
+	 */
+
+	#define R8B_PFFFT_DOUBLE 0
+#endif // !defined( R8B_PFFFT_DOUBLE )
+
+#if !defined( R8B_FLOATFFT )
+	/**
+	 * The R8B_FLOATFFT definition enables double-to-float buffer conversions
+	 * for FFT operations, for algorithms that work with "float" values. This
+	 * macro should not be changed from the default "0" here.
+	 */
+
+	#define R8B_FLOATFFT 0
+#endif // !defined( R8B_FLOATFFT )
+
+#endif // R8BCONF_INCLUDED
--- a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
@ -0,0 +1,306 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file r8butil.h
+ *
+ * @brief The inclusion file with several utility functions.
+ *
+ * This file includes several utility functions used by various utility
+ * programs like "calcErrorTable.cpp".
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8BUTIL_INCLUDED
+#define R8BUTIL_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @param re Real part of the frequency response.
+ * @param im Imaginary part of the frequency response.
+ * @return A magnitude response value converted from the linear scale to the
+ * logarithmic scale.
+ */
+
+inline double convertResponseToLog( const double re, const double im )
+{
+	return( 4.34294481903251828 * log( re * re + im * im + 1e-100 ));
+}
+
+/**
+ * An utility function that performs frequency response scanning step update
+ * based on the current magnitude response's slope.
+ *
+ * @param[in,out] step The current scanning step. Will be updated on
+ * function's return. Must be a positive value.
+ * @param curg Squared magnitude response at the current frequency point.
+ * @param[in,out] prevg_log Previous magnitude response, log scale. Will be
+ * updated on function's return.
+ * @param prec Precision multiplier, affects the size of the step.
+ * @param maxstep The maximal allowed step.
+ * @param minstep The minimal allowed step.
+ */
+
+inline void updateScanStep( double& step, const double curg,
+	double& prevg_log, const double prec, const double maxstep,
+	const double minstep = 1e-11 )
+{
+	double curg_log = 4.34294481903251828 * log( curg + 1e-100 );
+	curg_log += ( prevg_log - curg_log ) * 0.7;
+
+	const double slope = fabs( curg_log - prevg_log );
+	prevg_log = curg_log;
+
+	if( slope > 0.0 )
+	{
+		step /= prec * slope;
+		step = max( min( step, maxstep ), minstep );
+	}
+}
+
+/**
+ * Function locates normalized frequency at which the minimum filter gain
+ * is reached. The scanning is performed from lower (left) to higher
+ * (right) frequencies, the whole range is scanned.
+ *
+ * Function expects that the magnitude response is always reducing from lower
+ * to high frequencies, starting at "minth".
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param[out] ming The current minimal gain (squared). On function's return
+ * will contain the minimal gain value found (squared).
+ * @param[out] minth The normalized frequency where the minimal gain is
+ * currently at. On function's return will point to the normalized frequency
+ * where the new minimum was found.
+ * @param thend The ending frequency, inclusive.
+ */
+
+inline void findFIRFilterResponseMinLtoR( const double* const flt,
+	const int fltlen, double& ming, double& minth, const double thend )
+{
+	const double maxstep = minth * 2e-3;
+	double curth = minth;
+	double re;
+	double im;
+	calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+	double prevg_log = convertResponseToLog( re, im );
+	double step = 1e-11;
+
+	while( true )
+	{
+		curth += step;
+
+		if( curth > thend )
+		{
+			break;
+		}
+
+		calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+		const double curg = re * re + im * im;
+
+		if( curg > ming )
+		{
+			ming = curg;
+			minth = curth;
+			break;
+		}
+
+		ming = curg;
+		minth = curth;
+
+		updateScanStep( step, curg, prevg_log, 0.31, maxstep );
+	}
+}
+
+/**
+ * Function locates normalized frequency at which the maximal filter gain
+ * is reached. The scanning is performed from lower (left) to higher
+ * (right) frequencies, the whole range is scanned.
+ *
+ * Note: this function may "stall" in very rare cases if the magnitude
+ * response happens to be "saw-tooth" like, requiring a very small stepping to
+ * be used. If this happens, it may take dozens of seconds to complete.
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param[out] maxg The current maximal gain (squared). On function's return
+ * will contain the maximal gain value (squared).
+ * @param[out] maxth The normalized frequency where the maximal gain is
+ * currently at. On function's return will point to the normalized frequency
+ * where the maximum was reached.
+ * @param thend The ending frequency, inclusive.
+ */
+
+inline void findFIRFilterResponseMaxLtoR( const double* const flt,
+	const int fltlen, double& maxg, double& maxth, const double thend )
+{
+	const double maxstep = maxth * 1e-4;
+	double premaxth = maxth;
+	double premaxg = maxg;
+	double postmaxth = maxth;
+	double postmaxg = maxg;
+
+	double prevth = maxth;
+	double prevg = maxg;
+	double curth = maxth;
+	double re;
+	double im;
+	calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+	double prevg_log = convertResponseToLog( re, im );
+	double step = 1e-11;
+
+	bool WasPeak = false;
+	int AfterPeakCount = 0;
+
+	while( true )
+	{
+		curth += step;
+
+		if( curth > thend )
+		{
+			break;
+		}
+
+		calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+		const double curg = re * re + im * im;
+
+		if( curg > maxg )
+		{
+			premaxth = prevth;
+			premaxg = prevg;
+			maxg = curg;
+			maxth = curth;
+			WasPeak = true;
+			AfterPeakCount = 0;
+		}
+		else
+		if( WasPeak )
+		{
+			if( AfterPeakCount == 0 )
+			{
+				postmaxth = curth;
+				postmaxg = curg;
+			}
+
+			if( AfterPeakCount == 5 )
+			{
+				// Perform 2 approximate binary searches.
+
+				int k;
+
+				for( k = 0; k < 2; k++ )
+				{
+					double l = ( k == 0 ? premaxth : maxth );
+					double curgl = ( k == 0 ? premaxg : maxg );
+					double r = ( k == 0 ? maxth : postmaxth );
+					double curgr = ( k == 0 ? maxg : postmaxg );
+
+					while( true )
+					{
+						const double c = ( l + r ) * 0.5;
+						calcFIRFilterResponse( flt, fltlen, R8B_PI * c,
+							re, im );
+
+						const double curg = re * re + im * im;
+
+						if( curgl > curgr )
+						{
+							r = c;
+							curgr = curg;
+						}
+						else
+						{
+							l = c;
+							curgl = curg;
+						}
+
+						if( r - l < 1e-11 )
+						{
+							if( curgl > curgr )
+							{
+								maxth = l;
+								maxg = curgl;
+							}
+							else
+							{
+								maxth = r;
+								maxg = curgr;
+							}
+
+							break;
+						}
+					}
+				}
+
+				break;
+			}
+
+			AfterPeakCount++;
+		}
+
+		prevth = curth;
+		prevg = curg;
+
+		updateScanStep( step, curg, prevg_log, 1.0, maxstep );
+	}
+}
+
+/**
+ * Function locates normalized frequency at which the specified maximum
+ * filter gain is reached. The scanning is performed from higher (right)
+ * to lower (left) frequencies, scanning stops when the required gain
+ * value was crossed. Function uses an extremely efficient binary search and
+ * thus expects that the magnitude response has the "main lobe" form produced
+ * by windowing, with a minimal pass-band ripple.
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param maxg Maximal gain (squared).
+ * @param[out] th The current normalized frequency. On function's return will
+ * point to the normalized frequency where "maxg" is reached.
+ * @param thend The leftmost frequency to scan, inclusive.
+ */
+
+inline void findFIRFilterResponseLevelRtoL( const double* const flt,
+	const int fltlen, const double maxg, double& th, const double thend )
+{
+	// Perform exact binary search.
+
+	double l = thend;
+	double r = th;
+
+	while( true )
+	{
+		const double c = ( l + r ) * 0.5;
+
+		if( r - l < 1e-14 )
+		{
+			th = c;
+			break;
+		}
+
+		double re;
+		double im;
+		calcFIRFilterResponse( flt, fltlen, R8B_PI * c, re, im );
+		const double curg = re * re + im * im;
+
+		if( curg > maxg )
+		{
+			l = c;
+		}
+		else
+		{
+			r = c;
+		}
+	}
+}
+
+} // namespace r8b
+
+#endif // R8BUTIL_INCLUDED