| 1 | #include "./common.h" |
| 2 | |
| 3 | #ifndef SIGNALSMITH_DSP_SPECTRAL_H |
| 4 | #define SIGNALSMITH_DSP_SPECTRAL_H |
| 5 | |
| 6 | #include "./perf.h" |
| 7 | #include "./fft.h" |
| 8 | #include "./windows.h" |
| 9 | #include "./delay.h" |
| 10 | |
| 11 | #include <cmath> |
| 12 | |
| 13 | namespace signalsmith { |
| 14 | namespace spectral { |
| 15 | /** @defgroup Spectral Spectral Processing |
| 16 | @brief Tools for frequency-domain manipulation of audio signals |
| 17 | |
| 18 | @{ |
| 19 | @file |
| 20 | */ |
| 21 | |
| 22 | /** @brief An FFT with built-in windowing and round-trip scaling |
| 23 | |
| 24 | This uses a Modified Real FFT, which applies half-bin shift before the transform. The result therefore has `N/2` bins, centred at the frequencies: `(i + 0.5)/N`. |
| 25 | |
| 26 | This avoids the awkward (real-valued) bands for DC-offset and Nyquist. |
| 27 | */ |
| 28 | template<typename Sample> |
| 29 | class WindowedFFT { |
| 30 | using MRFFT = signalsmith::fft::ModifiedRealFFT<Sample>; |
| 31 | using Complex = std::complex<Sample>; |
| 32 | MRFFT mrfft{2}; |
| 33 | |
| 34 | std::vector<Sample> fftWindow; |
| 35 | std::vector<Sample> timeBuffer; |
| 36 | int offsetSamples = 0; |
| 37 | public: |
| 38 | /// Returns a fast FFT size <= `size` |
| 39 | static int fastSizeAbove(int size, int divisor=1) { |
| 40 | return MRFFT::fastSizeAbove(size/divisor)*divisor; |
| 41 | } |
| 42 | /// Returns a fast FFT size >= `size` |
| 43 | static int fastSizeBelow(int size, int divisor=1) { |
| 44 | return MRFFT::fastSizeBelow(1 + (size - 1)/divisor)*divisor; |
| 45 | } |
| 46 | |
| 47 | WindowedFFT() {} |
| 48 | WindowedFFT(int size, int rotateSamples=0) { |
| 49 | setSize(size, rotateSamples); |
| 50 | } |
| 51 | template<class WindowFn> |
| 52 | WindowedFFT(int size, WindowFn fn, Sample windowOffset=0.5, int rotateSamples=0) { |
| 53 | setSize(size, fn, windowOffset, rotateSamples); |
| 54 | } |
| 55 | |
| 56 | /// Sets the size, returning the window for modification (initially all 1s) |
| 57 | std::vector<Sample> & setSizeWindow(int size, int rotateSamples=0) { |
| 58 | mrfft.setSize(size); |
| 59 | fftWindow.assign(size, 1); |
| 60 | timeBuffer.resize(size); |
| 61 | offsetSamples = rotateSamples; |
| 62 | if (offsetSamples < 0) offsetSamples += size; // TODO: for a negative rotation, the other half of the result is inverted |
| 63 | return fftWindow; |
| 64 | } |
| 65 | /// Sets the FFT size, with a user-defined functor for the window |
| 66 | template<class WindowFn> |
| 67 | void setSize(int size, WindowFn fn, Sample windowOffset=0.5, int rotateSamples=0) { |
| 68 | setSizeWindow(size, rotateSamples); |
| 69 | |
| 70 | Sample invSize = 1/(Sample)size; |
| 71 | for (int i = 0; i < size; ++i) { |
| 72 | Sample r = (i + windowOffset)*invSize; |
| 73 | fftWindow[i] = fn(r); |
| 74 | } |
| 75 | } |
| 76 | /// Sets the size (using the default Blackman-Harris window) |
| 77 | void setSize(int size, int rotateSamples=0) { |
| 78 | setSize(size, [](double x) { |
| 79 | double phase = 2*M_PI*x; |
| 80 | // Blackman-Harris |
| 81 | return 0.35875 - 0.48829*std::cos(x: phase) + 0.14128*std::cos(x: phase*2) - 0.01168*std::cos(x: phase*3); |
| 82 | }, Sample(0.5), rotateSamples); |
| 83 | } |
| 84 | |
| 85 | const std::vector<Sample> & window() const { |
| 86 | return this->fftWindow; |
| 87 | } |
| 88 | int size() const { |
| 89 | return mrfft.size(); |
| 90 | } |
| 91 | |
| 92 | /// Performs an FFT (with windowing) |
| 93 | template<class Input, class Output> |
| 94 | void fft(Input &&input, Output &&output) { |
| 95 | int fftSize = size(); |
| 96 | for (int i = 0; i < offsetSamples; ++i) { |
| 97 | // Inverted polarity since we're using the MRFFT |
| 98 | timeBuffer[i + fftSize - offsetSamples] = -input[i]*fftWindow[i]; |
| 99 | } |
| 100 | for (int i = offsetSamples; i < fftSize; ++i) { |
| 101 | timeBuffer[i - offsetSamples] = input[i]*fftWindow[i]; |
| 102 | } |
| 103 | mrfft.fft(timeBuffer, output); |
| 104 | } |
| 105 | /// Performs an FFT (no windowing or rotation) |
| 106 | template<class Input, class Output> |
| 107 | void fftRaw(Input &&input, Output &&output) { |
| 108 | mrfft.fft(input, output); |
| 109 | } |
| 110 | |
| 111 | /// Inverse FFT, with windowing and 1/N scaling |
| 112 | template<class Input, class Output> |
| 113 | void ifft(Input &&input, Output &&output) { |
| 114 | mrfft.ifft(input, timeBuffer); |
| 115 | int fftSize = mrfft.size(); |
| 116 | Sample norm = 1/(Sample)fftSize; |
| 117 | |
| 118 | for (int i = 0; i < offsetSamples; ++i) { |
| 119 | // Inverted polarity since we're using the MRFFT |
| 120 | output[i] = -timeBuffer[i + fftSize - offsetSamples]*norm*fftWindow[i]; |
| 121 | } |
| 122 | for (int i = offsetSamples; i < fftSize; ++i) { |
| 123 | output[i] = timeBuffer[i - offsetSamples]*norm*fftWindow[i]; |
| 124 | } |
| 125 | } |
| 126 | /// Performs an IFFT (no windowing or rotation) |
| 127 | template<class Input, class Output> |
| 128 | void ifftRaw(Input &&input, Output &&output) { |
| 129 | mrfft.ifft(input, output); |
| 130 | } |
| 131 | }; |
| 132 | |
| 133 | /** STFT synthesis, built on a `MultiBuffer`. |
| 134 | |
| 135 | Any window length and block interval is supported, but the FFT size may be rounded up to a faster size (by zero-padding). It uses a heuristically-optimal Kaiser window modified for perfect-reconstruction. |
| 136 | |
| 137 | \diagram{stft-aliasing-simulated.svg,Simulated bad-case aliasing (random phase-shift for each band) for overlapping ratios} |
| 138 | |
| 139 | There is a "latest valid index", and you can read the output up to one `historyLength` behind this (see `.resize()`). You can read up to one window-length _ahead_ to get partially-summed future output. |
| 140 | |
| 141 | \diagram{stft-buffer-validity.svg} |
| 142 | |
| 143 | You move the valid index along using `.ensureValid()`, passing in a functor which provides spectra (using `.analyse()` and/or direct modification through `.spectrum[c]`): |
| 144 | |
| 145 | \code |
| 146 | void processSample(...) { |
| 147 | stft.ensureValid([&](int) { |
| 148 | // Here, we introduce (1 - windowSize) of latency |
| 149 | stft.analyse(inputBuffer.view(1 - windowSize)) |
| 150 | }); |
| 151 | // read as a MultiBuffer |
| 152 | auto result = stft.at(0); |
| 153 | ++stft; // also moves the latest valid index |
| 154 | } |
| 155 | |
| 156 | void processBlock(...) { |
| 157 | // assuming `historyLength` == blockSize |
| 158 | stft.ensureValid(blockSize, [&](int blockStartIndex) { |
| 159 | int inputStart = blockStartIndex + (1 - windowSize); |
| 160 | stft.analyse(inputBuffer.view(inputStart)); |
| 161 | }); |
| 162 | auto earliestValid = stft.at(0); |
| 163 | auto latestValid = stft.at(blockSize); |
| 164 | stft += blockSize; |
| 165 | } |
| 166 | \endcode |
| 167 | |
| 168 | The index passed to this functor will be greater than the previous valid index, and `<=` the index you pass in. Therefore, if you call `.ensureValid()` every sample, it can only ever be `0`. |
| 169 | */ |
| 170 | template<typename Sample> |
| 171 | class STFT : public signalsmith::delay::MultiBuffer<Sample> { |
| 172 | using Super = signalsmith::delay::MultiBuffer<Sample>; |
| 173 | using Complex = std::complex<Sample>; |
| 174 | |
| 175 | int channels = 0, _windowSize = 0, _fftSize = 0, _interval = 1; |
| 176 | int validUntilIndex = 0; |
| 177 | |
| 178 | class MultiSpectrum { |
| 179 | int channels, stride; |
| 180 | std::vector<Complex> buffer; |
| 181 | public: |
| 182 | MultiSpectrum() : MultiSpectrum(0, 0) {} |
| 183 | MultiSpectrum(int channels, int bands) : channels(channels), stride(bands), buffer(channels*bands, 0) {} |
| 184 | |
| 185 | void resize(int nChannels, int nBands) { |
| 186 | channels = nChannels; |
| 187 | stride = nBands; |
| 188 | buffer.assign(channels*stride, 0); |
| 189 | } |
| 190 | |
| 191 | void reset() { |
| 192 | buffer.assign(buffer.size(), 0); |
| 193 | } |
| 194 | |
| 195 | void swap(MultiSpectrum &other) { |
| 196 | using std::swap; |
| 197 | swap(buffer, other.buffer); |
| 198 | } |
| 199 | |
| 200 | Complex * operator [](int channel) { |
| 201 | return buffer.data() + channel*stride; |
| 202 | } |
| 203 | const Complex * operator [](int channel) const { |
| 204 | return buffer.data() + channel*stride; |
| 205 | } |
| 206 | }; |
| 207 | std::vector<Sample> timeBuffer; |
| 208 | |
| 209 | void resizeInternal(int newChannels, int windowSize, int newInterval, int historyLength, int zeroPadding) { |
| 210 | Super::resize(newChannels, |
| 211 | windowSize /* for output summing */ |
| 212 | + newInterval /* so we can read `windowSize` ahead (we'll be at most `interval-1` from the most recent block */ |
| 213 | + historyLength); |
| 214 | |
| 215 | int fftSize = fft.fastSizeAbove(windowSize + zeroPadding); |
| 216 | |
| 217 | this->channels = newChannels; |
| 218 | _windowSize = windowSize; |
| 219 | this->_fftSize = fftSize; |
| 220 | this->_interval = newInterval; |
| 221 | validUntilIndex = -1; |
| 222 | |
| 223 | setWindow(shape: windowShape); |
| 224 | |
| 225 | spectrum.resize(channels, fftSize/2); |
| 226 | timeBuffer.resize(fftSize); |
| 227 | } |
| 228 | public: |
| 229 | enum class Window {kaiser, acg}; |
| 230 | /// \deprecated use `.setWindow()` which actually updates the window when you change it |
| 231 | Window windowShape = Window::kaiser; |
| 232 | // for convenience |
| 233 | static constexpr Window kaiser = Window::kaiser; |
| 234 | static constexpr Window acg = Window::acg; |
| 235 | |
| 236 | /** Swaps between the default (Kaiser) shape and Approximate Confined Gaussian (ACG). |
| 237 | \diagram{stft-windows.svg,Default (Kaiser) windows and partial cumulative sum} |
| 238 | The ACG has better rolloff since its edges go to 0: |
| 239 | \diagram{stft-windows-acg.svg,ACG windows and partial cumulative sum} |
| 240 | However, it generally has worse performance in terms of total sidelobe energy, affecting worst-case aliasing levels for (most) higher overlap ratios: |
| 241 | \diagram{stft-aliasing-simulated-acg.svg,Simulated bad-case aliasing for ACG windows - compare with above}*/ |
| 242 | // TODO: these should both be set before resize() |
| 243 | void setWindow(Window shape, bool rotateToZero=false) { |
| 244 | windowShape = shape; |
| 245 | |
| 246 | auto &window = fft.setSizeWindow(_fftSize, rotateToZero ? _windowSize/2 : 0); |
| 247 | if (windowShape == Window::kaiser) { |
| 248 | using Kaiser = ::signalsmith::windows::Kaiser; |
| 249 | /// Roughly optimal Kaiser for STFT analysis (forced to perfect reconstruction) |
| 250 | auto kaiser = Kaiser::withBandwidth(bandwidth: _windowSize/double(_interval), heuristicOptimal: true); |
| 251 | kaiser.fill(window, _windowSize); |
| 252 | } else { |
| 253 | using Confined = ::signalsmith::windows::ApproximateConfinedGaussian; |
| 254 | auto confined = Confined::withBandwidth(bandwidth: _windowSize/double(_interval)); |
| 255 | confined.fill(window, _windowSize); |
| 256 | } |
| 257 | ::signalsmith::windows::forcePerfectReconstruction(window, _windowSize, _interval); |
| 258 | |
| 259 | // TODO: fill extra bits of an input buffer with NaN/Infinity, to break this, and then fix by adding zero-padding to WindowedFFT (as opposed to zero-valued window sections) |
| 260 | for (int i = _windowSize; i < _fftSize; ++i) { |
| 261 | window[i] = 0; |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | using Spectrum = MultiSpectrum; |
| 266 | Spectrum spectrum; |
| 267 | WindowedFFT<Sample> fft; |
| 268 | |
| 269 | STFT() {} |
| 270 | /// Parameters passed straight to `.resize()` |
| 271 | STFT(int channels, int windowSize, int interval, int historyLength=0, int zeroPadding=0) { |
| 272 | resize(nChannels: channels, windowSize, interval, historyLength, zeroPadding); |
| 273 | } |
| 274 | |
| 275 | /// Sets the channel-count, FFT size and interval. |
| 276 | void resize(int nChannels, int windowSize, int interval, int historyLength=0, int zeroPadding=0) { |
| 277 | resizeInternal(newChannels: nChannels, windowSize, newInterval: interval, historyLength, zeroPadding); |
| 278 | } |
| 279 | |
| 280 | int windowSize() const { |
| 281 | return _windowSize; |
| 282 | } |
| 283 | int fftSize() const { |
| 284 | return _fftSize; |
| 285 | } |
| 286 | int interval() const { |
| 287 | return _interval; |
| 288 | } |
| 289 | /// Returns the (analysis and synthesis) window |
| 290 | decltype(fft.window()) window() const { |
| 291 | return fft.window(); |
| 292 | } |
| 293 | /// Calculates the effective window for the partially-summed future output (relative to the most recent block) |
| 294 | std::vector<Sample> partialSumWindow(bool includeLatestBlock=true) const { |
| 295 | const auto &w = window(); |
| 296 | std::vector<Sample> result(_windowSize, 0); |
| 297 | int firstOffset = (includeLatestBlock ? 0 : _interval); |
| 298 | for (int offset = firstOffset; offset < _windowSize; offset += _interval) { |
| 299 | for (int i = 0; i < _windowSize - offset; ++i) { |
| 300 | Sample value = w[i + offset]; |
| 301 | result[i] += value*value; |
| 302 | } |
| 303 | } |
| 304 | return result; |
| 305 | } |
| 306 | |
| 307 | /// Resets everything - since we clear the output sum, it will take `windowSize` samples to get proper output. |
| 308 | void reset() { |
| 309 | Super::reset(); |
| 310 | spectrum.reset(); |
| 311 | validUntilIndex = -1; |
| 312 | } |
| 313 | |
| 314 | /** Generates valid output up to the specified index (or 0), using the callback as many times as needed. |
| 315 | |
| 316 | The callback should be a functor accepting a single integer argument, which is the index for which a spectrum is required. |
| 317 | |
| 318 | The block created from these spectra will start at this index in the output, plus `.latency()`. |
| 319 | */ |
| 320 | template<class AnalysisFn> |
| 321 | void ensureValid(int i, AnalysisFn fn) { |
| 322 | while (validUntilIndex < i) { |
| 323 | int blockIndex = validUntilIndex + 1; |
| 324 | fn(blockIndex); |
| 325 | |
| 326 | auto output = this->view(blockIndex); |
| 327 | for (int c = 0; c < channels; ++c) { |
| 328 | auto channel = output[c]; |
| 329 | |
| 330 | // Clear out the future sum, a window-length and an interval ahead |
| 331 | for (int wi = _windowSize; wi < _windowSize + _interval; ++wi) { |
| 332 | channel[wi] = 0; |
| 333 | } |
| 334 | |
| 335 | // Add in the IFFT'd result |
| 336 | fft.ifft(spectrum[c], timeBuffer); |
| 337 | for (int wi = 0; wi < _windowSize; ++wi) { |
| 338 | channel[wi] += timeBuffer[wi]; |
| 339 | } |
| 340 | } |
| 341 | validUntilIndex += _interval; |
| 342 | } |
| 343 | } |
| 344 | /// The same as above, assuming index 0 |
| 345 | template<class AnalysisFn> |
| 346 | void ensureValid(AnalysisFn fn) { |
| 347 | return ensureValid(0, fn); |
| 348 | } |
| 349 | /// Returns the next invalid index (a.k.a. the index of the next block) |
| 350 | int nextInvalid() const { |
| 351 | return validUntilIndex + 1; |
| 352 | } |
| 353 | |
| 354 | /** Analyse a multi-channel input, for any type where `data[channel][index]` returns samples |
| 355 | |
| 356 | Results can be read/edited using `.spectrum`. */ |
| 357 | template<class Data> |
| 358 | void analyse(Data &&data) { |
| 359 | for (int c = 0; c < channels; ++c) { |
| 360 | fft.fft(data[c], spectrum[c]); |
| 361 | } |
| 362 | } |
| 363 | template<class Data> |
| 364 | void analyse(int c, Data &&data) { |
| 365 | fft.fft(data, spectrum[c]); |
| 366 | } |
| 367 | /// Analyse without windowing or zero-rotation |
| 368 | template<class Data> |
| 369 | void analyseRaw(Data &&data) { |
| 370 | for (int c = 0; c < channels; ++c) { |
| 371 | fft.fftRaw(data[c], spectrum[c]); |
| 372 | } |
| 373 | } |
| 374 | template<class Data> |
| 375 | void analyseRaw(int c, Data &&data) { |
| 376 | fft.fftRaw(data, spectrum[c]); |
| 377 | } |
| 378 | |
| 379 | int bands() const { |
| 380 | return _fftSize/2; |
| 381 | } |
| 382 | |
| 383 | /** Internal latency (between the block-index requested in `.ensureValid()` and its position in the output) |
| 384 | |
| 385 | Currently unused, but it's in here to allow for a future implementation which spreads the FFT calculations out across each interval.*/ |
| 386 | int latency() { |
| 387 | return 0; |
| 388 | } |
| 389 | |
| 390 | // @name Shift the underlying buffer (moving the "valid" index accordingly) |
| 391 | // @{ |
| 392 | STFT & operator ++() { |
| 393 | Super::operator ++(); |
| 394 | validUntilIndex--; |
| 395 | return *this; |
| 396 | } |
| 397 | STFT & operator +=(int i) { |
| 398 | Super::operator +=(i); |
| 399 | validUntilIndex -= i; |
| 400 | return *this; |
| 401 | } |
| 402 | STFT & operator --() { |
| 403 | Super::operator --(); |
| 404 | validUntilIndex++; |
| 405 | return *this; |
| 406 | } |
| 407 | STFT & operator -=(int i) { |
| 408 | Super::operator -=(i); |
| 409 | validUntilIndex += i; |
| 410 | return *this; |
| 411 | } |
| 412 | // @} |
| 413 | |
| 414 | typename Super::MutableView operator ++(int postIncrement) { |
| 415 | auto result = Super::operator ++(postIncrement); |
| 416 | validUntilIndex--; |
| 417 | return result; |
| 418 | } |
| 419 | typename Super::MutableView operator --(int postIncrement) { |
| 420 | auto result = Super::operator --(postIncrement); |
| 421 | validUntilIndex++; |
| 422 | return result; |
| 423 | } |
| 424 | }; |
| 425 | |
| 426 | /** STFT processing, with input/output. |
| 427 | Before calling `.ensureValid(index)`, you should make sure the input is filled up to `index`. |
| 428 | */ |
| 429 | template<typename Sample> |
| 430 | class ProcessSTFT : public STFT<Sample> { |
| 431 | using Super = STFT<Sample>; |
| 432 | public: |
| 433 | signalsmith::delay::MultiBuffer<Sample> input; |
| 434 | |
| 435 | ProcessSTFT(int inChannels, int outChannels, int windowSize, int interval, int historyLength=0) { |
| 436 | resize(inChannels, outChannels, windowSize, interval, historyLength); |
| 437 | } |
| 438 | |
| 439 | /** Alter the spectrum, using input up to this point, for the output block starting from this point. |
| 440 | Sub-classes should replace this with whatever processing is desired. */ |
| 441 | virtual void processSpectrum(int /*blockIndex*/) {} |
| 442 | |
| 443 | /// Sets the input/output channels, FFT size and interval. |
| 444 | void resize(int inChannels, int outChannels, int windowSize, int interval, int historyLength=0) { |
| 445 | Super::resize(outChannels, windowSize, interval, historyLength); |
| 446 | input.resize(inChannels, windowSize + interval + historyLength); |
| 447 | } |
| 448 | void reset(Sample value=Sample()) { |
| 449 | Super::reset(value); |
| 450 | input.reset(value); |
| 451 | } |
| 452 | |
| 453 | /// Internal latency, including buffering samples for analysis. |
| 454 | int latency() { |
| 455 | return Super::latency() + (this->windowSize() - 1); |
| 456 | } |
| 457 | |
| 458 | void ensureValid(int i=0) { |
| 459 | Super::ensureValid(i, [&](int blockIndex) { |
| 460 | this->analyse(input.view(blockIndex - this->windowSize() + 1)); |
| 461 | this->processSpectrum(blockIndex); |
| 462 | }); |
| 463 | } |
| 464 | |
| 465 | // @name Shift the output, input, and valid index. |
| 466 | // @{ |
| 467 | ProcessSTFT & operator ++() { |
| 468 | Super::operator ++(); |
| 469 | ++input; |
| 470 | return *this; |
| 471 | } |
| 472 | ProcessSTFT & operator +=(int i) { |
| 473 | Super::operator +=(i); |
| 474 | input += i; |
| 475 | return *this; |
| 476 | } |
| 477 | ProcessSTFT & operator --() { |
| 478 | Super::operator --(); |
| 479 | --input; |
| 480 | return *this; |
| 481 | } |
| 482 | ProcessSTFT & operator -=(int i) { |
| 483 | Super::operator -=(i); |
| 484 | input -= i; |
| 485 | return *this; |
| 486 | } |
| 487 | // @} |
| 488 | }; |
| 489 | |
| 490 | /** @} */ |
| 491 | }} // signalsmith::spectral:: |
| 492 | #endif // include guard |
| 493 | |