1 | /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) |
2 | |
3 | Based on original fortran 77 code from FFTPACKv4 from NETLIB, |
4 | authored by Dr Paul Swarztrauber of NCAR, in 1985. |
5 | |
6 | As confirmed by the NCAR fftpack software curators, the following |
7 | FFTPACKv5 license applies to FFTPACKv4 sources. My changes are |
8 | released under the same terms. |
9 | |
10 | FFTPACK license: |
11 | |
12 | http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html |
13 | |
14 | Copyright (c) 2004 the University Corporation for Atmospheric |
15 | Research ("UCAR"). All rights reserved. Developed by NCAR's |
16 | Computational and Information Systems Laboratory, UCAR, |
17 | www.cisl.ucar.edu. |
18 | |
19 | Redistribution and use of the Software in source and binary forms, |
20 | with or without modification, is permitted provided that the |
21 | following conditions are met: |
22 | |
23 | - Neither the names of NCAR's Computational and Information Systems |
24 | Laboratory, the University Corporation for Atmospheric Research, |
25 | nor the names of its sponsors or contributors may be used to |
26 | endorse or promote products derived from this Software without |
27 | specific prior written permission. |
28 | |
29 | - Redistributions of source code must retain the above copyright |
30 | notices, this list of conditions, and the disclaimer below. |
31 | |
32 | - Redistributions in binary form must reproduce the above copyright |
33 | notice, this list of conditions, and the disclaimer below in the |
34 | documentation and/or other materials provided with the |
35 | distribution. |
36 | |
37 | THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
38 | EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF |
39 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
40 | NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT |
41 | HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, |
42 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
43 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
44 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE |
45 | SOFTWARE. |
46 | */ |
47 | |
48 | /* |
49 | PFFFT : a Pretty Fast FFT. |
50 | |
51 | This is basically an adaptation of the single precision fftpack |
52 | (v4) as found on netlib taking advantage of SIMD instruction found |
53 | on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). |
54 | |
55 | For architectures where no SIMD instruction is available, the code |
56 | falls back to a scalar version. |
57 | |
58 | Restrictions: |
59 | |
60 | - 1D transforms only, with 32-bit single precision. |
61 | |
62 | - supports only transforms for inputs of length N of the form |
63 | N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, |
64 | 144, 160, etc are all acceptable lengths). Performance is best for |
65 | 128<=N<=8192. |
66 | |
67 | - all (float*) pointers in the functions below are expected to |
68 | have an "simd-compatible" alignment, that is 16 bytes on x86 and |
69 | powerpc CPUs. |
70 | |
71 | You can allocate such buffers with the functions |
72 | pffft_aligned_malloc / pffft_aligned_free (or with stuff like |
73 | posix_memalign..) |
74 | |
75 | */ |
76 | |
77 | #ifndef PFFFT_H |
78 | #define PFFFT_H |
79 | |
80 | #include <stddef.h> // for size_t |
81 | |
82 | #ifdef __cplusplus |
83 | extern "C" { |
84 | #endif |
85 | |
86 | /* opaque struct holding internal stuff (precomputed twiddle factors) |
87 | this struct can be shared by many threads as it contains only |
88 | read-only data. |
89 | */ |
90 | typedef struct PFFFT_Setup PFFFT_Setup; |
91 | |
92 | /* direction of the transform */ |
93 | typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; |
94 | |
95 | /* type of transform */ |
96 | typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; |
97 | |
98 | /* |
99 | prepare for performing transforms of size N -- the returned |
100 | PFFFT_Setup structure is read-only so it can safely be shared by |
101 | multiple concurrent threads. |
102 | */ |
103 | PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform); |
104 | void pffft_destroy_setup(PFFFT_Setup *); |
105 | /* |
106 | Perform a Fourier transform , The z-domain data is stored in the |
107 | most efficient order for transforming it back, or using it for |
108 | convolution. If you need to have its content sorted in the |
109 | "usual" way, that is as an array of interleaved complex numbers, |
110 | either use pffft_transform_ordered , or call pffft_zreorder after |
111 | the forward fft, and before the backward fft. |
112 | |
113 | Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. |
114 | Typically you will want to scale the backward transform by 1/N. |
115 | |
116 | The 'work' pointer should point to an area of N (2*N for complex |
117 | fft) floats, properly aligned. If 'work' is NULL, then stack will |
118 | be used instead (this is probably the best strategy for small |
119 | FFTs, say for N < 16384). |
120 | |
121 | input and output may alias. |
122 | */ |
123 | void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); |
124 | |
125 | /* |
126 | Similar to pffft_transform, but makes sure that the output is |
127 | ordered as expected (interleaved complex numbers). This is |
128 | similar to calling pffft_transform and then pffft_zreorder. |
129 | |
130 | input and output may alias. |
131 | */ |
132 | void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); |
133 | |
134 | /* |
135 | call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., |
136 | PFFFT_FORWARD) if you want to have the frequency components in |
137 | the correct "canonical" order, as interleaved complex numbers. |
138 | |
139 | (for real transforms, both 0-frequency and half frequency |
140 | components, which are real, are assembled in the first entry as |
141 | F(0)+i*F(n/2+1). Note that the original fftpack did place |
142 | F(n/2+1) at the end of the arrays). |
143 | |
144 | input and output should not alias. |
145 | */ |
146 | void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); |
147 | |
148 | /* |
149 | Perform a multiplication of the frequency components of dft_a and |
150 | dft_b and accumulate them into dft_ab. The arrays should have |
151 | been obtained with pffft_transform(.., PFFFT_FORWARD) and should |
152 | *not* have been reordered with pffft_zreorder (otherwise just |
153 | perform the operation yourself as the dft coefs are stored as |
154 | interleaved complex numbers). |
155 | |
156 | the operation performed is: dft_ab += (dft_a * fdt_b)*scaling |
157 | |
158 | The dft_a, dft_b and dft_ab pointers may alias. |
159 | */ |
160 | void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); |
161 | |
162 | /* |
163 | the float buffers must have the correct alignment (16-byte boundary |
164 | on intel and powerpc). This function may be used to obtain such |
165 | correctly aligned buffers. |
166 | */ |
167 | void *pffft_aligned_malloc(size_t nb_bytes); |
168 | void pffft_aligned_free(void *); |
169 | |
170 | /* return 4 or 1 wether support SSE/Altivec instructions was enable when building pffft.c */ |
171 | int pffft_simd_size(void); |
172 | |
173 | #ifdef __cplusplus |
174 | } |
175 | #endif |
176 | |
177 | #endif // PFFFT_H |
178 | |