Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_fft_generic_int32.cpp
1/*
2 * Copyright 2015 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* license of Kiss FFT */
29/*
30Copyright (c) 2003-2010, Mark Borgerding
31
32All rights reserved.
33
34Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35
36 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*/
42
43/*
44 * NE10 Library : dsp/NE10_fft_generic_int32.cpp
45 */
46
47#include "NE10_types.h"
48#include "NE10_macros.h"
49#include "NE10_fft.h"
50#include "NE10_fft_generic_int32.h"
51
68template<int RADIX, bool is_first_stage, bool is_inverse, bool is_scaled>
69inline void ne10_radix_butterfly_int32_c (ne10_fft_cpx_int32_t *Fout,
70 const ne10_fft_cpx_int32_t *Fin,
71 const ne10_fft_cpx_int32_t *twiddles,
72 const ne10_int32_t fstride,
73 const ne10_int32_t out_step,
74 const ne10_int32_t nfft)
75{
76 const ne10_int32_t in_step = nfft / RADIX;
77 ne10_int32_t f_count;
78 ne10_int32_t m_count;
79
80 for (f_count = fstride; f_count > 0; f_count--)
81 {
82 for (m_count = out_step; m_count > 0; m_count--)
83 {
84 ne10_fft_cpx_int32_t scratch_in[RADIX];
85 ne10_fft_cpx_int32_t scratch_out[RADIX];
86
87 // Load from input buffer.
88 NE10_LOAD_BY_STEP <RADIX> (scratch_in, Fin, in_step);
89
90 if (is_inverse)
91 {
92 // Conjugate all elements in scratch_in.
93 NE10_CONJ<RADIX> (scratch_in);
94 }
95
96 if (is_scaled)
97 {
98 // All elements in scratch_in are divided by radix of this stage.
99 NE10_SCALED<RADIX> (scratch_in, RADIX);
100 }
101
102 if (!is_first_stage)
103 {
104 // Multiply twiddles for all stages but the first one.
105 ne10_fft_cpx_int32_t scratch_tw[RADIX - 1];
106 ne10_fft_cpx_int32_t scratch[RADIX];
107
108 // Load twiddles from twiddles buffer.
109 NE10_LOAD_BY_STEP<RADIX - 1> (scratch_tw, twiddles, out_step);
110
111 FFT_MUL_TW<RADIX> (scratch, scratch_in, scratch_tw);
112
113 // Copy from temp buff scratch to scratch_in.
114 NE10_LOAD_BY_STEP<RADIX> (scratch_in, scratch, 1);
115 }
116
117 // Radix -2, -3, -4 or -5 butterfly
118 // From scratch_in to scratch_out.
119 FFT_FCU<RADIX> (scratch_out, scratch_in);
120
121 if (is_inverse)
122 {
123 // Conjugate all elements in scratch_out.
124 NE10_CONJ<RADIX> (scratch_out);
125 }
126
127 // Store to output buffer.
128 NE10_STORE_BY_STEP<RADIX> (Fout, scratch_out, out_step);
129
130 // Update input, output and twiddles pointers.
131 Fin++;
132 if (!is_first_stage)
133 {
134 Fout++;
135 twiddles++;
136 }
137 else
138 {
139 Fout += RADIX;
140 }
141 }
142 if (!is_first_stage)
143 {
144 // Roll back twiddles.
145 twiddles -= out_step;
146 // Next output groups.
147 Fout += (RADIX - 1) * out_step;
148 }
149 }
150}
151
164template<bool is_inverse, bool is_scaled>
165static inline void ne10_radix_generic_butterfly_int32_c (ne10_fft_cpx_int32_t *Fout,
166 const ne10_fft_cpx_int32_t *Fin,
167 const ne10_fft_cpx_int32_t *twiddles,
168 const ne10_int32_t radix,
169 const ne10_int32_t in_step,
170 const ne10_int32_t out_step)
171{
172 ne10_int32_t q, q1;
173 ne10_int32_t f_count = in_step;
174
176 ne10_fft_cpx_int32_t *scratch;
177 scratch = (ne10_fft_cpx_int32_t *) NE10_MALLOC (radix *
178 sizeof (ne10_fft_cpx_int32_t));
179
180 for (; f_count > 0; f_count--)
181 {
182 // load
183 for (q1 = 0; q1 < radix; q1++)
184 {
185 scratch[q1] = Fin[in_step * q1];
186 if (is_inverse)
187 {
188 scratch[q1].i = -scratch[q1].i;
189 }
190 if (is_scaled)
191 {
192 NE10_F2I32_FIXDIV (scratch[q1], radix);
193 }
194 } // q1
195
196 // compute Fout[q1 * out_step] from definition
197 for (q1 = 0; q1 < radix; q1++)
198 {
199 ne10_int32_t twidx = 0;
200 Fout[q1 * out_step] = scratch[0];
201 for (q = 1; q < radix; q++)
202 {
203 twidx += 1 * q1;
204 if (twidx >= radix)
205 {
206 twidx -= radix;
207 }
208 NE10_CPX_MUL_F32 (tmp, scratch[q], twiddles[twidx]);
209 NE10_CPX_ADDTO (Fout[q1 * out_step], tmp);
210 } // q
211 if (is_inverse)
212 {
213 Fout[q1 * out_step].i = -Fout[q1 * out_step].i;
214 }
215 } // q1
216
217 Fout += radix;
218 Fin++;
219 }
220
221 NE10_FREE (scratch);
222}
223
234template<bool is_inverse, bool is_scaled>
235inline void ne10_mixed_radix_generic_butterfly_int32_impl_c (ne10_fft_cpx_int32_t *Fout,
236 const ne10_fft_cpx_int32_t *Fin,
237 const ne10_int32_t *factors,
238 const ne10_fft_cpx_int32_t *twiddles,
239 ne10_fft_cpx_int32_t *buffer)
240{
241 ne10_int32_t fstride, mstride, radix;
242 ne10_int32_t stage_count;
243 ne10_int32_t nfft;
244
245 // init fstride, mstride, radix, nfft
246 stage_count = factors[0];
247 fstride = factors[1];
248 mstride = 1;
249 radix = factors[stage_count << 1]; // radix of first stage
250 nfft = fstride * radix;
251
252 if (stage_count % 2 == 0)
253 {
254 ne10_swap_ptr (buffer, Fout);
255 }
256
257 // first stage
258 switch (radix)
259 {
260 case 2:
261 ne10_radix_butterfly_int32_c<2, true, is_inverse, is_scaled> (Fout, Fin,
262 NULL, // Twiddles are not used for first stage.
263 fstride, 1, nfft);
264 break;
265 case 4:
266 ne10_radix_butterfly_int32_c<4, true, is_inverse, is_scaled> (Fout, Fin,
267 NULL, // Same as above.
268 fstride, 1, nfft);
269 break;
270 case 3:
271 ne10_radix_butterfly_int32_c<3, true, is_inverse, is_scaled> (Fout, Fin,
272 NULL, // Same as above.
273 fstride, 1, nfft);
274 break;
275 case 5:
276 ne10_radix_butterfly_int32_c<5, true, is_inverse, is_scaled> (Fout, Fin,
277 NULL, // Same as above.
278 fstride, 1, nfft);
279 break;
280 default:
281 ne10_radix_generic_butterfly_int32_c<is_inverse, is_scaled> (Fout, Fin,
282 twiddles, // Twiddles for butterfly.
283 radix, fstride, 1);
284 break;
285 }
286
287 stage_count--;
288 if (!stage_count) // finish
289 {
290 return;
291 }
292
293 if (radix % 2)
294 {
295 twiddles += radix;
296 }
297
298 // other stages
299 while (stage_count > 0)
300 {
301 ne10_swap_ptr (buffer, Fout);
302 mstride *= radix;
303
304 // update radix
305 radix = factors[stage_count << 1];
306 assert ((radix > 1) && (radix < 6));
307
308 fstride /= radix;
309 switch (radix)
310 {
311 case 2:
312 ne10_radix_butterfly_int32_c<2, false, is_inverse, is_scaled> (Fout,
313 buffer, twiddles, fstride, mstride, nfft);
314 break;
315 case 3:
316 ne10_radix_butterfly_int32_c<3, false, is_inverse, is_scaled> (Fout,
317 buffer, twiddles, fstride, mstride, nfft);
318 break;
319 case 4:
320 ne10_radix_butterfly_int32_c<4, false, is_inverse, is_scaled> (Fout,
321 buffer, twiddles, fstride, mstride, nfft);
322 break;
323 case 5:
324 ne10_radix_butterfly_int32_c<5, false, is_inverse, is_scaled> (Fout,
325 buffer, twiddles, fstride, mstride, nfft);
326 break;
327 } // switch (radix)
328
329 twiddles += mstride * (radix - 1);
330 stage_count--;
331 } // while (stage_count)
332}
333
343void ne10_mixed_radix_generic_butterfly_int32_c (ne10_fft_cpx_int32_t *Fout,
344 const ne10_fft_cpx_int32_t *Fin,
345 const ne10_int32_t *factors,
346 const ne10_fft_cpx_int32_t *twiddles,
347 ne10_fft_cpx_int32_t *buffer,
348 const ne10_int32_t is_scaled)
349{
350 const bool is_inverse = false;
351 if (is_scaled)
352 {
353 const bool is_scaled_flag = true;
354 ne10_mixed_radix_generic_butterfly_int32_impl_c<is_inverse,
355 is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
356 }
357 else
358 {
359 const bool is_scaled_flag = false;
360 ne10_mixed_radix_generic_butterfly_int32_impl_c<is_inverse,
361 is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
362 }
363}
364
374void ne10_mixed_radix_generic_butterfly_inverse_int32_c (
376 const ne10_fft_cpx_int32_t *Fin,
377 const ne10_int32_t *factors,
378 const ne10_fft_cpx_int32_t *twiddles,
379 ne10_fft_cpx_int32_t *buffer,
380 const ne10_int32_t is_scaled)
381{
382 const bool is_inverse = true;
383 if (is_scaled)
384 {
385 const bool is_scaled_flag = true;
386 ne10_mixed_radix_generic_butterfly_int32_impl_c<is_inverse,
387 is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
388 }
389 else
390 {
391 const bool is_scaled_flag = false;
392 ne10_mixed_radix_generic_butterfly_int32_impl_c<is_inverse,
393 is_scaled_flag> (Fout, Fin, factors, twiddles, buffer);
394 }
395}
structure for the 32 bits fixed point FFT function.
Definition NE10_types.h:329