Force 32bit alignment for vectorized operations, fixes 32bit build
Signed-off-by: falkTX <falktx@falktx.com>
This commit is contained in:
parent
6d9633b7fb
commit
2dc12fb1ca
8 changed files with 656 additions and 5 deletions
164
include/dsp/fir.hpp
Normal file
164
include/dsp/fir.hpp
Normal file
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* DISTRHO Cardinal Plugin
|
||||
* Copyright (C) 2021-2022 Filipe Coelho <falktx@falktx.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 3 of
|
||||
* the License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* For a full copy of the GNU General Public License see the LICENSE file.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This file is an edited version of VCVRack's dsp/fir.hpp
|
||||
* Copyright (C) 2016-2021 VCV.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <pffft.h>
|
||||
|
||||
#include <dsp/common.hpp>
|
||||
|
||||
|
||||
namespace rack {
|
||||
namespace dsp {
|
||||
|
||||
|
||||
/** Performs a direct sum convolution */
|
||||
inline float convolveNaive(const float* in, const float* kernel, int len) {
|
||||
float y = 0.f;
|
||||
for (int i = 0; i < len; i++) {
|
||||
y += in[len - 1 - i] * kernel[i];
|
||||
}
|
||||
return y;
|
||||
}
|
||||
|
||||
/** Computes the impulse response of a boxcar lowpass filter */
|
||||
inline void boxcarLowpassIR(float* out, int len, float cutoff = 0.5f) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
float t = i - (len - 1) / 2.f;
|
||||
out[i] = 2 * cutoff * sinc(2 * cutoff * t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct RealTimeConvolver {
|
||||
// `kernelBlocks` number of contiguous FFT blocks of size `blockSize`
|
||||
// indexed by [i * blockSize*2 + j]
|
||||
float* kernelFfts = NULL;
|
||||
float* inputFfts = NULL;
|
||||
float* outputTail = NULL;
|
||||
float* tmpBlock = NULL;
|
||||
size_t blockSize;
|
||||
size_t kernelBlocks = 0;
|
||||
size_t inputPos = 0;
|
||||
PFFFT_Setup* pffft;
|
||||
|
||||
/** `blockSize` is the size of each FFT block. It should be >=32 and a power of 2. */
|
||||
RealTimeConvolver(size_t blockSize) {
|
||||
this->blockSize = blockSize;
|
||||
pffft = pffft_new_setup(blockSize * 2, PFFFT_REAL);
|
||||
outputTail = (float*) pffft_aligned_malloc(sizeof(float) * blockSize);
|
||||
std::memset(outputTail, 0, blockSize * sizeof(float));
|
||||
tmpBlock = (float*) pffft_aligned_malloc(sizeof(float) * blockSize * 2);
|
||||
std::memset(tmpBlock, 0, blockSize * 2 * sizeof(float));
|
||||
}
|
||||
|
||||
~RealTimeConvolver() {
|
||||
setKernel(NULL, 0);
|
||||
pffft_aligned_free(outputTail);
|
||||
pffft_aligned_free(tmpBlock);
|
||||
pffft_destroy_setup(pffft);
|
||||
}
|
||||
|
||||
void setKernel(const float* kernel, size_t length) {
|
||||
// Clear existing kernel
|
||||
if (kernelFfts) {
|
||||
pffft_aligned_free(kernelFfts);
|
||||
kernelFfts = NULL;
|
||||
}
|
||||
if (inputFfts) {
|
||||
pffft_aligned_free(inputFfts);
|
||||
inputFfts = NULL;
|
||||
}
|
||||
kernelBlocks = 0;
|
||||
inputPos = 0;
|
||||
|
||||
if (kernel && length > 0) {
|
||||
// Round up to the nearest factor of `blockSize`
|
||||
kernelBlocks = (length - 1) / blockSize + 1;
|
||||
|
||||
// Allocate blocks
|
||||
kernelFfts = (float*) pffft_aligned_malloc(sizeof(float) * blockSize * 2 * kernelBlocks);
|
||||
inputFfts = (float*) pffft_aligned_malloc(sizeof(float) * blockSize * 2 * kernelBlocks);
|
||||
std::memset(inputFfts, 0, sizeof(float) * blockSize * 2 * kernelBlocks);
|
||||
|
||||
for (size_t i = 0; i < kernelBlocks; i++) {
|
||||
// Pad each block with zeros
|
||||
std::memset(tmpBlock, 0, sizeof(float) * blockSize * 2);
|
||||
size_t len = std::min((int) blockSize, (int)(length - i * blockSize));
|
||||
std::memcpy(tmpBlock, &kernel[i * blockSize], sizeof(float)*len);
|
||||
// Compute fft
|
||||
pffft_transform(pffft, tmpBlock, &kernelFfts[blockSize * 2 * i], NULL, PFFFT_FORWARD);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Applies reverb to input
|
||||
input and output must be of size `blockSize`
|
||||
*/
|
||||
void processBlock(const float* input, float* output) {
|
||||
if (kernelBlocks == 0) {
|
||||
std::memset(output, 0, sizeof(float) * blockSize);
|
||||
return;
|
||||
}
|
||||
|
||||
// Step input position
|
||||
inputPos = (inputPos + 1) % kernelBlocks;
|
||||
// Pad block with zeros
|
||||
std::memset(tmpBlock, 0, sizeof(float) * blockSize * 2);
|
||||
std::memcpy(tmpBlock, input, sizeof(float) * blockSize);
|
||||
// Compute input fft
|
||||
pffft_transform(pffft, tmpBlock, &inputFfts[blockSize * 2 * inputPos], NULL, PFFFT_FORWARD);
|
||||
// Create output fft
|
||||
std::memset(tmpBlock, 0, sizeof(float) * blockSize * 2);
|
||||
// convolve input fft by kernel fft
|
||||
// Note: This is the CPU bottleneck loop
|
||||
for (size_t i = 0; i < kernelBlocks; i++) {
|
||||
size_t pos = (inputPos - i + kernelBlocks) % kernelBlocks;
|
||||
pffft_zconvolve_accumulate(pffft, &kernelFfts[blockSize * 2 * i], &inputFfts[blockSize * 2 * pos], tmpBlock, 1.f);
|
||||
}
|
||||
// Compute output
|
||||
pffft_transform(pffft, tmpBlock, tmpBlock, NULL, PFFFT_BACKWARD);
|
||||
// Add block tail from last output block
|
||||
for (size_t i = 0; i < blockSize; i++) {
|
||||
tmpBlock[i] += outputTail[i];
|
||||
}
|
||||
// Copy output block to output
|
||||
float scale = 1.f / (blockSize * 2);
|
||||
for (size_t i = 0; i < blockSize; i++) {
|
||||
// Scale based on FFT
|
||||
output[i] = tmpBlock[i] * scale;
|
||||
}
|
||||
// Set tail
|
||||
for (size_t i = 0; i < blockSize; i++) {
|
||||
outputTail[i] = tmpBlock[i + blockSize];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // namespace dsp
|
||||
} // namespace rack
|
Loading…
Add table
Add a link
Reference in a new issue