| 1 | /* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com ) | 
| 2 |  | 
| 3 |    Based on original fortran 77 code from FFTPACKv4 from NETLIB, | 
| 4 |    authored by Dr Paul Swarztrauber of NCAR, in 1985. | 
| 5 |  | 
| 6 |    As confirmed by the NCAR fftpack software curators, the following | 
| 7 |    FFTPACKv5 license applies to FFTPACKv4 sources. My changes are | 
| 8 |    released under the same terms. | 
| 9 |  | 
| 10 |    FFTPACK license: | 
| 11 |  | 
| 12 |    http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html | 
| 13 |  | 
| 14 |    Copyright (c) 2004 the University Corporation for Atmospheric | 
| 15 |    Research ("UCAR"). All rights reserved. Developed by NCAR's | 
| 16 |    Computational and Information Systems Laboratory, UCAR, | 
| 17 |    www.cisl.ucar.edu. | 
| 18 |  | 
| 19 |    Redistribution and use of the Software in source and binary forms, | 
| 20 |    with or without modification, is permitted provided that the | 
| 21 |    following conditions are met: | 
| 22 |  | 
| 23 |    - Neither the names of NCAR's Computational and Information Systems | 
| 24 |    Laboratory, the University Corporation for Atmospheric Research, | 
| 25 |    nor the names of its sponsors or contributors may be used to | 
| 26 |    endorse or promote products derived from this Software without | 
| 27 |    specific prior written permission. | 
| 28 |  | 
| 29 |    - Redistributions of source code must retain the above copyright | 
| 30 |    notices, this list of conditions, and the disclaimer below. | 
| 31 |  | 
| 32 |    - Redistributions in binary form must reproduce the above copyright | 
| 33 |    notice, this list of conditions, and the disclaimer below in the | 
| 34 |    documentation and/or other materials provided with the | 
| 35 |    distribution. | 
| 36 |  | 
| 37 |    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | 
| 38 |    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF | 
| 39 |    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | 
| 40 |    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT | 
| 41 |    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, | 
| 42 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN | 
| 43 |    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
| 44 |    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE | 
| 45 |    SOFTWARE. | 
| 46 | */ | 
| 47 |  | 
| 48 | /* | 
| 49 |   PFFFT : a Pretty Fast FFT. | 
| 50 |  | 
| 51 |   This is basically an adaptation of the single precision fftpack | 
| 52 |   (v4) as found on netlib taking advantage of SIMD instruction found | 
| 53 |   on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). | 
| 54 |  | 
| 55 |   For architectures where no SIMD instruction is available, the code | 
| 56 |   falls back to a scalar version. | 
| 57 |  | 
| 58 |   Restrictions: | 
| 59 |  | 
| 60 |   - 1D transforms only, with 32-bit single precision. | 
| 61 |  | 
| 62 |   - supports only transforms for inputs of length N of the form | 
| 63 |   N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, | 
| 64 |   144, 160, etc are all acceptable lengths). Performance is best for | 
| 65 |   128<=N<=8192. | 
| 66 |  | 
| 67 |   - all (float*) pointers in the functions below are expected to | 
| 68 |   have an "simd-compatible" alignment, that is 16 bytes on x86 and | 
| 69 |   powerpc CPUs. | 
| 70 |  | 
| 71 |   You can allocate such buffers with the functions | 
| 72 |   pffft_aligned_malloc / pffft_aligned_free (or with stuff like | 
| 73 |   posix_memalign..) | 
| 74 |  | 
| 75 | */ | 
| 76 |  | 
| 77 | #ifndef PFFFT_H | 
| 78 | #define PFFFT_H | 
| 79 |  | 
| 80 | #include <stddef.h> // for size_t | 
| 81 |  | 
| 82 | #ifdef __cplusplus | 
| 83 | extern "C"  { | 
| 84 | #endif | 
| 85 |  | 
| 86 |   /** | 
| 87 |      Opaque struct holding internal stuff (precomputed twiddle factors) | 
| 88 |      this struct can be shared by many threads as it contains only | 
| 89 |      read-only data. | 
| 90 |   */ | 
| 91 |   typedef struct PFFFT_Setup PFFFT_Setup; | 
| 92 |  | 
| 93 |   /** Direction of the transform */ | 
| 94 |   typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; | 
| 95 |  | 
| 96 |   /** Type of transform */ | 
| 97 |   typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; | 
| 98 |  | 
| 99 |   /** | 
| 100 |     Prepare for performing transforms of size N -- the returned | 
| 101 |     PFFFT_Setup structure is read-only so it can safely be shared by | 
| 102 |     multiple concurrent threads. | 
| 103 |  | 
| 104 |     Will return NULL if N is not suitable (too large / no decomposable with simple integer | 
| 105 |     factors..) | 
| 106 |   */ | 
| 107 |   PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform); | 
| 108 |   void pffft_destroy_setup(PFFFT_Setup *); | 
| 109 |   /** | 
| 110 |     Perform a Fourier transform , The z-domain data is stored in the | 
| 111 |     most efficient order for transforming it back, or using it for | 
| 112 |     convolution. If you need to have its content sorted in the | 
| 113 |     "usual" way, that is as an array of interleaved complex numbers, | 
| 114 |     either use pffft_transform_ordered , or call pffft_zreorder after | 
| 115 |     the forward fft, and before the backward fft. | 
| 116 |  | 
| 117 |     Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. | 
| 118 |     Typically you will want to scale the backward transform by 1/N. | 
| 119 |  | 
| 120 |     The 'work' pointer should point to an area of N (2*N for complex | 
| 121 |     fft) floats, properly aligned. If 'work' is NULL, then stack will | 
| 122 |     be used instead (this is probably the best strategy for small | 
| 123 |     FFTs, say for N < 16384). | 
| 124 |  | 
| 125 |     input and output may alias. | 
| 126 |   */ | 
| 127 |   void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); | 
| 128 |  | 
| 129 |   /** | 
| 130 |     Similar to pffft_transform, but makes sure that the output is | 
| 131 |     ordered as expected (interleaved complex numbers).  This is | 
| 132 |     similar to calling pffft_transform and then pffft_zreorder. | 
| 133 |  | 
| 134 |     input and output may alias. | 
| 135 |   */ | 
| 136 |   void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); | 
| 137 |  | 
| 138 |   /** | 
| 139 |     call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., | 
| 140 |     PFFFT_FORWARD) if you want to have the frequency components in | 
| 141 |     the correct "canonical" order, as interleaved complex numbers. | 
| 142 |  | 
| 143 |     (for real transforms, both 0-frequency and half frequency | 
| 144 |     components, which are real, are assembled in the first entry as | 
| 145 |     F(0)+i*F(n/2+1). Note that the original fftpack did place | 
| 146 |     F(n/2+1) at the end of the arrays). | 
| 147 |  | 
| 148 |     input and output should not alias. | 
| 149 |   */ | 
| 150 |   void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); | 
| 151 |  | 
| 152 |   /** | 
| 153 |     Perform a multiplication of the frequency components of dft_a and | 
| 154 |     dft_b and accumulate them into dft_ab. The arrays should have | 
| 155 |     been obtained with pffft_transform(.., PFFFT_FORWARD) and should | 
| 156 |     *not* have been reordered with pffft_zreorder (otherwise just | 
| 157 |     perform the operation yourself as the dft coefs are stored as | 
| 158 |     interleaved complex numbers). | 
| 159 |  | 
| 160 |     the operation performed is: dft_ab += (dft_a * fdt_b)*scaling | 
| 161 |  | 
| 162 |     The dft_a, dft_b and dft_ab pointers may alias. | 
| 163 |   */ | 
| 164 |   void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); | 
| 165 |  | 
| 166 |   /** | 
| 167 |     the float buffers must have the correct alignment (16-byte boundary | 
| 168 |     on intel and powerpc). This function may be used to obtain such | 
| 169 |     correctly aligned buffers. | 
| 170 |   */ | 
| 171 |   void *pffft_aligned_malloc(size_t nb_bytes); | 
| 172 |   void pffft_aligned_free(void *); | 
| 173 |  | 
| 174 |   /** return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */ | 
| 175 |   int pffft_simd_size(void); | 
| 176 |  | 
| 177 | #ifdef __cplusplus | 
| 178 | } | 
| 179 | #endif | 
| 180 |  | 
| 181 | #endif // PFFFT_H | 
| 182 |  |