начало
This commit is contained in:
213
Drivers/CMSIS/DSP/Source/TransformFunctions/CMakeLists.txt
Normal file
213
Drivers/CMSIS/DSP/Source/TransformFunctions/CMakeLists.txt
Normal file
@@ -0,0 +1,213 @@
|
||||
cmake_minimum_required (VERSION 3.14)
|
||||
|
||||
project(CMSISDSPTransform)
|
||||
|
||||
include(configLib)
|
||||
include(configDsp)
|
||||
|
||||
add_library(CMSISDSPTransform STATIC)
|
||||
configLib(CMSISDSPTransform ${ROOT})
|
||||
configDsp(CMSISDSPTransform ${ROOT})
|
||||
|
||||
include(fft)
|
||||
fft(CMSISDSPTransform)
|
||||
|
||||
if (CONFIGTABLE AND ALLFFT)
|
||||
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_ALL_FFT_TABLES)
|
||||
endif()
|
||||
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_bitreversal.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_bitreversal2.c)
|
||||
|
||||
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_bitreversal_f16.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F32_16 OR CFFT_F32_32 OR CFFT_F32_64 OR CFFT_F32_128 OR CFFT_F32_256 OR CFFT_F32_512
|
||||
OR CFFT_F32_1024 OR CFFT_F32_2048 OR CFFT_F32_4096)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f32.c)
|
||||
endif()
|
||||
|
||||
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F16_16 OR CFFT_F16_32 OR CFFT_F16_64 OR CFFT_F16_128 OR CFFT_F16_256 OR CFFT_F16_512
|
||||
OR CFFT_F16_1024 OR CFFT_F16_2048 OR CFFT_F16_4096)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f16.c)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F16_128 OR RFFT_F16_512 OR RFFT_F16_2048 OR RFFT_F16_8192)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f16.c)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F64_16 OR CFFT_F64_32 OR CFFT_F64_64 OR CFFT_F64_128 OR CFFT_F64_256 OR CFFT_F64_512
|
||||
OR CFFT_F64_1024 OR CFFT_F64_2048 OR CFFT_F64_4096)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f64.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f64.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q15_16 OR CFFT_Q15_32 OR CFFT_Q15_64 OR CFFT_Q15_128 OR CFFT_Q15_256 OR CFFT_Q15_512
|
||||
OR CFFT_Q15_1024 OR CFFT_Q15_2048 OR CFFT_Q15_4096)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q31_16 OR CFFT_Q31_32 OR CFFT_Q31_64 OR CFFT_Q31_128 OR CFFT_Q31_256 OR CFFT_Q31_512
|
||||
OR CFFT_Q31_1024 OR CFFT_Q31_2048 OR CFFT_Q31_4096)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c)
|
||||
endif()
|
||||
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR DCT4_F32_128 OR DCT4_F32_512 OR DCT4_F32_2048 OR DCT4_F32_8192)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_dct4_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_f32.c)
|
||||
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR DCT4_Q31_128 OR DCT4_Q31_512 OR DCT4_Q31_2048 OR DCT4_Q31_8192)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_dct4_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q31.c)
|
||||
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR ALLFFT OR DCT4_Q15_128 OR DCT4_Q15_512 OR DCT4_Q15_2048 OR DCT4_Q15_8192)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_dct4_q15.c)
|
||||
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F32_32 OR RFFT_FAST_F32_64 OR RFFT_FAST_F32_128
|
||||
OR RFFT_FAST_F32_256 OR RFFT_FAST_F32_512 OR RFFT_FAST_F32_1024 OR RFFT_FAST_F32_2048
|
||||
OR RFFT_FAST_F32_4096 )
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F64_32 OR RFFT_FAST_F64_64 OR RFFT_FAST_F64_128
|
||||
OR RFFT_FAST_F64_256 OR RFFT_FAST_F64_512 OR RFFT_FAST_F64_1024 OR RFFT_FAST_F64_2048
|
||||
OR RFFT_FAST_F64_4096 )
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f64.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f64.c)
|
||||
endif()
|
||||
|
||||
if ((NOT DISABLEFLOAT16))
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F16_32 OR RFFT_FAST_F16_64 OR RFFT_FAST_F16_128
|
||||
OR RFFT_FAST_F16_256 OR RFFT_FAST_F16_512 OR RFFT_FAST_F16_1024 OR RFFT_FAST_F16_2048
|
||||
OR RFFT_FAST_F16_4096 )
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f16.c)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F32_128 OR RFFT_F32_512 OR RFFT_F32_2048 OR RFFT_F32_8192)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q15_32 OR RFFT_Q15_64 OR RFFT_Q15_128 OR RFFT_Q15_256
|
||||
OR RFFT_Q15_512 OR RFFT_Q15_1024 OR RFFT_Q15_2048 OR RFFT_Q15_4096 OR RFFT_Q15_8192)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q31_32 OR RFFT_Q31_64 OR RFFT_Q31_128 OR RFFT_Q31_256
|
||||
OR RFFT_Q31_512 OR RFFT_Q31_1024 OR RFFT_Q31_2048 OR RFFT_Q31_4096 OR RFFT_Q31_8192)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_init_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
|
||||
endif()
|
||||
|
||||
if (WRAPPER OR ARM_CFFT_RADIX2_Q15)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q15.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR ARM_CFFT_RADIX4_Q15)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c)
|
||||
endif()
|
||||
|
||||
if (WRAPPER OR ARM_CFFT_RADIX2_Q31)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q31.c)
|
||||
endif()
|
||||
|
||||
if (NOT CONFIGTABLE OR ALLFFT OR ARM_CFFT_RADIX4_Q31)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q31.c)
|
||||
endif()
|
||||
|
||||
# For scipy or wrappers or benchmarks
|
||||
if (WRAPPER)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_f32.c)
|
||||
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_f16.c)
|
||||
endif()
|
||||
|
||||
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_BITREV_1024)
|
||||
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_F32_4096)
|
||||
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_Q31_4096)
|
||||
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_Q15_4096)
|
||||
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
|
||||
target_compile_definitions(CMSISDSPTransform PUBLIC ARM_TABLE_TWIDDLECOEF_F16_4096)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_f32.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_f32.c)
|
||||
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_q31.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_q31.c)
|
||||
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_q15.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_q15.c)
|
||||
|
||||
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_init_f16.c)
|
||||
target_sources(CMSISDSPTransform PRIVATE arm_mfcc_f16.c)
|
||||
endif()
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSPTransform PUBLIC "${DSP}/Include")
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: TransformFunctions.c
|
||||
* Description: Combination of all transform function source files.
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.0.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_bitreversal.c"
|
||||
#include "arm_bitreversal2.c"
|
||||
#include "arm_cfft_f32.c"
|
||||
#include "arm_cfft_f64.c"
|
||||
#include "arm_cfft_q15.c"
|
||||
#include "arm_cfft_q31.c"
|
||||
#include "arm_cfft_init_f32.c"
|
||||
#include "arm_cfft_init_f64.c"
|
||||
#include "arm_cfft_init_q15.c"
|
||||
#include "arm_cfft_init_q31.c"
|
||||
#include "arm_cfft_radix2_f32.c"
|
||||
#include "arm_cfft_radix2_q15.c"
|
||||
#include "arm_cfft_radix2_q31.c"
|
||||
#include "arm_cfft_radix4_f32.c"
|
||||
#include "arm_cfft_radix4_q15.c"
|
||||
#include "arm_cfft_radix4_q31.c"
|
||||
#include "arm_cfft_radix8_f32.c"
|
||||
#include "arm_rfft_fast_f32.c"
|
||||
#include "arm_rfft_fast_f64.c"
|
||||
#include "arm_rfft_fast_init_f32.c"
|
||||
#include "arm_rfft_fast_init_f64.c"
|
||||
|
||||
#include "arm_mfcc_init_f32.c"
|
||||
#include "arm_mfcc_f32.c"
|
||||
|
||||
#include "arm_mfcc_init_q31.c"
|
||||
#include "arm_mfcc_q31.c"
|
||||
|
||||
#include "arm_mfcc_init_q15.c"
|
||||
#include "arm_mfcc_q15.c"
|
||||
|
||||
/* Deprecated */
|
||||
|
||||
#include "arm_dct4_f32.c"
|
||||
#include "arm_dct4_init_f32.c"
|
||||
#include "arm_dct4_init_q15.c"
|
||||
#include "arm_dct4_init_q31.c"
|
||||
#include "arm_dct4_q15.c"
|
||||
#include "arm_dct4_q31.c"
|
||||
|
||||
#include "arm_rfft_f32.c"
|
||||
#include "arm_rfft_q15.c"
|
||||
#include "arm_rfft_q31.c"
|
||||
|
||||
#include "arm_rfft_init_f32.c"
|
||||
#include "arm_rfft_init_q15.c"
|
||||
#include "arm_rfft_init_q31.c"
|
||||
|
||||
#include "arm_cfft_radix4_init_f32.c"
|
||||
#include "arm_cfft_radix4_init_q15.c"
|
||||
#include "arm_cfft_radix4_init_q31.c"
|
||||
|
||||
#include "arm_cfft_radix2_init_f32.c"
|
||||
#include "arm_cfft_radix2_init_q15.c"
|
||||
#include "arm_cfft_radix2_init_q31.c"
|
||||
@@ -0,0 +1,44 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: TransformFunctionsF16.c
|
||||
* Description: Combination of all transform function f16 source files.
|
||||
*
|
||||
* $Date: 20. April 2020
|
||||
* $Revision: V1.0.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_cfft_f16.c"
|
||||
#include "arm_cfft_init_f16.c"
|
||||
#include "arm_cfft_radix2_f16.c"
|
||||
#include "arm_cfft_radix4_f16.c"
|
||||
#include "arm_rfft_fast_init_f16.c"
|
||||
#include "arm_rfft_fast_f16.c"
|
||||
#include "arm_cfft_radix8_f16.c"
|
||||
|
||||
#include "arm_bitreversal_f16.c"
|
||||
|
||||
#include "arm_mfcc_init_f16.c"
|
||||
#include "arm_mfcc_f16.c"
|
||||
|
||||
/* Deprecated */
|
||||
#include "arm_cfft_radix2_init_f16.c"
|
||||
#include "arm_cfft_radix4_init_f16.c"
|
||||
230
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c
Normal file
230
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal.c
Normal file
@@ -0,0 +1,230 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_bitreversal.c
|
||||
* Description: Bitreversal functions
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
|
||||
/**
|
||||
@brief In-place floating-point bit reversal function.
|
||||
@param[in,out] pSrc points to in-place floating-point data buffer
|
||||
@param[in] fftSize length of FFT
|
||||
@param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
|
||||
@param[in] pBitRevTab points to bit reversal table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_bitreversal_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftSize,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab)
|
||||
{
|
||||
uint16_t fftLenBy2, fftLenBy2p1;
|
||||
uint16_t i, j;
|
||||
float32_t in;
|
||||
|
||||
/* Initializations */
|
||||
j = 0U;
|
||||
fftLenBy2 = fftSize >> 1U;
|
||||
fftLenBy2p1 = (fftSize >> 1U) + 1U;
|
||||
|
||||
/* Bit Reversal Implementation */
|
||||
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
|
||||
{
|
||||
if (i < j)
|
||||
{
|
||||
/* pSrc[i] <-> pSrc[j]; */
|
||||
in = pSrc[2U * i];
|
||||
pSrc[2U * i] = pSrc[2U * j];
|
||||
pSrc[2U * j] = in;
|
||||
|
||||
/* pSrc[i+1U] <-> pSrc[j+1U] */
|
||||
in = pSrc[(2U * i) + 1U];
|
||||
pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
|
||||
pSrc[(2U * j) + 1U] = in;
|
||||
|
||||
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
|
||||
in = pSrc[2U * (i + fftLenBy2p1)];
|
||||
pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
|
||||
pSrc[2U * (j + fftLenBy2p1)] = in;
|
||||
|
||||
/* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
|
||||
in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
|
||||
pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
|
||||
pSrc[(2U * (j + fftLenBy2p1)) + 1U];
|
||||
pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
|
||||
|
||||
}
|
||||
|
||||
/* pSrc[i+1U] <-> pSrc[j+1U] */
|
||||
in = pSrc[2U * (i + 1U)];
|
||||
pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
|
||||
pSrc[2U * (j + fftLenBy2)] = in;
|
||||
|
||||
/* pSrc[i+2U] <-> pSrc[j+2U] */
|
||||
in = pSrc[(2U * (i + 1U)) + 1U];
|
||||
pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
|
||||
pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
|
||||
|
||||
/* Reading the index for the bit reversal */
|
||||
j = *pBitRevTab;
|
||||
|
||||
/* Updating the bit reversal index depending on the fft length */
|
||||
pBitRevTab += bitRevFactor;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief In-place Q31 bit reversal function.
|
||||
@param[in,out] pSrc points to in-place Q31 data buffer.
|
||||
@param[in] fftLen length of FFT.
|
||||
@param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
|
||||
@param[in] pBitRevTab points to bit reversal table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_bitreversal_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab)
|
||||
{
|
||||
uint32_t fftLenBy2, fftLenBy2p1, i, j;
|
||||
q31_t in;
|
||||
|
||||
/* Initializations */
|
||||
j = 0U;
|
||||
fftLenBy2 = fftLen / 2U;
|
||||
fftLenBy2p1 = (fftLen / 2U) + 1U;
|
||||
|
||||
/* Bit Reversal Implementation */
|
||||
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
|
||||
{
|
||||
if (i < j)
|
||||
{
|
||||
/* pSrc[i] <-> pSrc[j]; */
|
||||
in = pSrc[2U * i];
|
||||
pSrc[2U * i] = pSrc[2U * j];
|
||||
pSrc[2U * j] = in;
|
||||
|
||||
/* pSrc[i+1U] <-> pSrc[j+1U] */
|
||||
in = pSrc[(2U * i) + 1U];
|
||||
pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
|
||||
pSrc[(2U * j) + 1U] = in;
|
||||
|
||||
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
|
||||
in = pSrc[2U * (i + fftLenBy2p1)];
|
||||
pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
|
||||
pSrc[2U * (j + fftLenBy2p1)] = in;
|
||||
|
||||
/* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
|
||||
in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
|
||||
pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
|
||||
pSrc[(2U * (j + fftLenBy2p1)) + 1U];
|
||||
pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
|
||||
|
||||
}
|
||||
|
||||
/* pSrc[i+1U] <-> pSrc[j+1U] */
|
||||
in = pSrc[2U * (i + 1U)];
|
||||
pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
|
||||
pSrc[2U * (j + fftLenBy2)] = in;
|
||||
|
||||
/* pSrc[i+2U] <-> pSrc[j+2U] */
|
||||
in = pSrc[(2U * (i + 1U)) + 1U];
|
||||
pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
|
||||
pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
|
||||
|
||||
/* Reading the index for the bit reversal */
|
||||
j = *pBitRevTab;
|
||||
|
||||
/* Updating the bit reversal index depending on the fft length */
|
||||
pBitRevTab += bitRevFactor;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@brief In-place Q15 bit reversal function.
|
||||
@param[in,out] pSrc16 points to in-place Q15 data buffer
|
||||
@param[in] fftLen length of FFT
|
||||
@param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
|
||||
@param[in] pBitRevTab points to bit reversal table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_bitreversal_q15(
|
||||
q15_t * pSrc16,
|
||||
uint32_t fftLen,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab)
|
||||
{
|
||||
q31_t *pSrc = (q31_t *) pSrc16;
|
||||
q31_t in;
|
||||
uint32_t fftLenBy2, fftLenBy2p1;
|
||||
uint32_t i, j;
|
||||
|
||||
/* Initializations */
|
||||
j = 0U;
|
||||
fftLenBy2 = fftLen / 2U;
|
||||
fftLenBy2p1 = (fftLen / 2U) + 1U;
|
||||
|
||||
/* Bit Reversal Implementation */
|
||||
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
|
||||
{
|
||||
if (i < j)
|
||||
{
|
||||
/* pSrc[i] <-> pSrc[j]; */
|
||||
/* pSrc[i+1U] <-> pSrc[j+1U] */
|
||||
in = pSrc[i];
|
||||
pSrc[i] = pSrc[j];
|
||||
pSrc[j] = in;
|
||||
|
||||
/* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */
|
||||
/* pSrc[i + fftLenBy2p1+1U] <-> pSrc[j + fftLenBy2p1+1U] */
|
||||
in = pSrc[i + fftLenBy2p1];
|
||||
pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
|
||||
pSrc[j + fftLenBy2p1] = in;
|
||||
}
|
||||
|
||||
/* pSrc[i+1U] <-> pSrc[j+fftLenBy2]; */
|
||||
/* pSrc[i+2] <-> pSrc[j+fftLenBy2+1U] */
|
||||
in = pSrc[i + 1U];
|
||||
pSrc[i + 1U] = pSrc[j + fftLenBy2];
|
||||
pSrc[j + fftLenBy2] = in;
|
||||
|
||||
/* Reading the index for the bit reversal */
|
||||
j = *pBitRevTab;
|
||||
|
||||
/* Updating the bit reversal index depending on the fft length */
|
||||
pBitRevTab += bitRevFactor;
|
||||
}
|
||||
}
|
||||
216
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.S
Normal file
216
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.S
Normal file
@@ -0,0 +1,216 @@
|
||||
;/* ----------------------------------------------------------------------
|
||||
; * Project: CMSIS DSP Library
|
||||
; * Title: arm_bitreversal2.S
|
||||
; * Description: arm_bitreversal_32 function done in assembly for maximum speed.
|
||||
; * Called after doing an fft to reorder the output.
|
||||
; * The function is loop unrolled by 2. arm_bitreversal_16 as well.
|
||||
; *
|
||||
; * $Date: 18. March 2019
|
||||
; * $Revision: V1.5.2
|
||||
; *
|
||||
; * Target Processor: Cortex-M cores
|
||||
; * -------------------------------------------------------------------- */
|
||||
;/*
|
||||
; * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
; *
|
||||
; * SPDX-License-Identifier: Apache-2.0
|
||||
; *
|
||||
; * Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
; * not use this file except in compliance with the License.
|
||||
; * You may obtain a copy of the License at
|
||||
; *
|
||||
; * www.apache.org/licenses/LICENSE-2.0
|
||||
; *
|
||||
; * Unless required by applicable law or agreed to in writing, software
|
||||
; * distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
; * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
; * See the License for the specific language governing permissions and
|
||||
; * limitations under the License.
|
||||
; */
|
||||
|
||||
#if defined ( __CC_ARM ) /* Keil */
|
||||
#define CODESECT AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
#define LABEL
|
||||
#elif defined ( __IASMARM__ ) /* IAR */
|
||||
#define CODESECT SECTION `.text`:CODE
|
||||
#define PROC
|
||||
#define LABEL
|
||||
#define ENDP
|
||||
#define EXPORT PUBLIC
|
||||
#elif defined ( __CSMC__ ) /* Cosmic */
|
||||
#define CODESECT switch .text
|
||||
#define THUMB
|
||||
#define EXPORT xdef
|
||||
#define PROC :
|
||||
#define LABEL :
|
||||
#define ENDP
|
||||
#define arm_bitreversal_32 _arm_bitreversal_32
|
||||
#elif defined ( __TI_ARM__ ) /* TI ARM */
|
||||
#define THUMB .thumb
|
||||
#define CODESECT .text
|
||||
#define EXPORT .global
|
||||
#define PROC : .asmfunc
|
||||
#define LABEL :
|
||||
#define ENDP .endasmfunc
|
||||
#define END
|
||||
#elif defined ( __GNUC__ ) /* GCC */
|
||||
#define THUMB .thumb
|
||||
#define CODESECT .section .text
|
||||
#define EXPORT .global
|
||||
#define PROC :
|
||||
#define LABEL :
|
||||
#define ENDP
|
||||
#define END
|
||||
|
||||
.syntax unified
|
||||
#endif
|
||||
|
||||
CODESECT
|
||||
THUMB
|
||||
|
||||
;/**
|
||||
; @brief In-place bit reversal function.
|
||||
; @param[in,out] pSrc points to the in-place buffer of unknown 32-bit data type
|
||||
; @param[in] bitRevLen bit reversal table length
|
||||
; @param[in] pBitRevTab points to bit reversal table
|
||||
; @return none
|
||||
; */
|
||||
EXPORT arm_bitreversal_32
|
||||
EXPORT arm_bitreversal_16
|
||||
|
||||
#if defined ( __CC_ARM ) /* Keil */
|
||||
#elif defined ( __IASMARM__ ) /* IAR */
|
||||
#elif defined ( __CSMC__ ) /* Cosmic */
|
||||
#elif defined ( __TI_ARM__ ) /* TI ARM */
|
||||
#elif defined ( __GNUC__ ) /* GCC */
|
||||
.type arm_bitreversal_16, %function
|
||||
.type arm_bitreversal_32, %function
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_CM0_FAMILY)
|
||||
|
||||
arm_bitreversal_32 PROC
|
||||
ADDS r3,r1,#1
|
||||
PUSH {r4-r6}
|
||||
ADDS r1,r2,#0
|
||||
LSRS r3,r3,#1
|
||||
arm_bitreversal_32_0 LABEL
|
||||
LDRH r2,[r1,#2]
|
||||
LDRH r6,[r1,#0]
|
||||
ADD r2,r0,r2
|
||||
ADD r6,r0,r6
|
||||
LDR r5,[r2,#0]
|
||||
LDR r4,[r6,#0]
|
||||
STR r5,[r6,#0]
|
||||
STR r4,[r2,#0]
|
||||
LDR r5,[r2,#4]
|
||||
LDR r4,[r6,#4]
|
||||
STR r5,[r6,#4]
|
||||
STR r4,[r2,#4]
|
||||
ADDS r1,r1,#4
|
||||
SUBS r3,r3,#1
|
||||
BNE arm_bitreversal_32_0
|
||||
POP {r4-r6}
|
||||
BX lr
|
||||
ENDP
|
||||
|
||||
arm_bitreversal_16 PROC
|
||||
ADDS r3,r1,#1
|
||||
PUSH {r4-r6}
|
||||
ADDS r1,r2,#0
|
||||
LSRS r3,r3,#1
|
||||
arm_bitreversal_16_0 LABEL
|
||||
LDRH r2,[r1,#2]
|
||||
LDRH r6,[r1,#0]
|
||||
LSRS r2,r2,#1
|
||||
LSRS r6,r6,#1
|
||||
ADD r2,r0,r2
|
||||
ADD r6,r0,r6
|
||||
LDR r5,[r2,#0]
|
||||
LDR r4,[r6,#0]
|
||||
STR r5,[r6,#0]
|
||||
STR r4,[r2,#0]
|
||||
ADDS r1,r1,#4
|
||||
SUBS r3,r3,#1
|
||||
BNE arm_bitreversal_16_0
|
||||
POP {r4-r6}
|
||||
BX lr
|
||||
ENDP
|
||||
|
||||
#else
|
||||
|
||||
arm_bitreversal_32 PROC
|
||||
ADDS r3,r1,#1
|
||||
CMP r3,#1
|
||||
IT LS
|
||||
BXLS lr
|
||||
PUSH {r4-r9}
|
||||
ADDS r1,r2,#2
|
||||
LSRS r3,r3,#2
|
||||
arm_bitreversal_32_0 LABEL ;/* loop unrolled by 2 */
|
||||
LDRH r8,[r1,#4]
|
||||
LDRH r9,[r1,#2]
|
||||
LDRH r2,[r1,#0]
|
||||
LDRH r12,[r1,#-2]
|
||||
ADD r8,r0,r8
|
||||
ADD r9,r0,r9
|
||||
ADD r2,r0,r2
|
||||
ADD r12,r0,r12
|
||||
LDR r7,[r9,#0]
|
||||
LDR r6,[r8,#0]
|
||||
LDR r5,[r2,#0]
|
||||
LDR r4,[r12,#0]
|
||||
STR r6,[r9,#0]
|
||||
STR r7,[r8,#0]
|
||||
STR r5,[r12,#0]
|
||||
STR r4,[r2,#0]
|
||||
LDR r7,[r9,#4]
|
||||
LDR r6,[r8,#4]
|
||||
LDR r5,[r2,#4]
|
||||
LDR r4,[r12,#4]
|
||||
STR r6,[r9,#4]
|
||||
STR r7,[r8,#4]
|
||||
STR r5,[r12,#4]
|
||||
STR r4,[r2,#4]
|
||||
ADDS r1,r1,#8
|
||||
SUBS r3,r3,#1
|
||||
BNE arm_bitreversal_32_0
|
||||
POP {r4-r9}
|
||||
BX lr
|
||||
ENDP
|
||||
|
||||
arm_bitreversal_16 PROC
|
||||
ADDS r3,r1,#1
|
||||
CMP r3,#1
|
||||
IT LS
|
||||
BXLS lr
|
||||
PUSH {r4-r9}
|
||||
ADDS r1,r2,#2
|
||||
LSRS r3,r3,#2
|
||||
arm_bitreversal_16_0 LABEL ;/* loop unrolled by 2 */
|
||||
LDRH r8,[r1,#4]
|
||||
LDRH r9,[r1,#2]
|
||||
LDRH r2,[r1,#0]
|
||||
LDRH r12,[r1,#-2]
|
||||
ADD r8,r0,r8,LSR #1
|
||||
ADD r9,r0,r9,LSR #1
|
||||
ADD r2,r0,r2,LSR #1
|
||||
ADD r12,r0,r12,LSR #1
|
||||
LDR r7,[r9,#0]
|
||||
LDR r6,[r8,#0]
|
||||
LDR r5,[r2,#0]
|
||||
LDR r4,[r12,#0]
|
||||
STR r6,[r9,#0]
|
||||
STR r7,[r8,#0]
|
||||
STR r5,[r12,#0]
|
||||
STR r4,[r2,#0]
|
||||
ADDS r1,r1,#8
|
||||
SUBS r3,r3,#1
|
||||
BNE arm_bitreversal_16_0
|
||||
POP {r4-r9}
|
||||
BX lr
|
||||
ENDP
|
||||
|
||||
#endif
|
||||
|
||||
END
|
||||
134
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c
Normal file
134
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_bitreversal2.c
Normal file
@@ -0,0 +1,134 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_bitreversal2.c
|
||||
* Description: Bitreversal functions
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
|
||||
/**
|
||||
@brief In-place 64 bit reversal function.
|
||||
@param[in,out] pSrc points to in-place buffer of unknown 64-bit data type
|
||||
@param[in] bitRevLen bit reversal table length
|
||||
@param[in] pBitRevTab points to bit reversal table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_bitreversal_64(
|
||||
uint64_t *pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t *pBitRevTab)
|
||||
{
|
||||
uint64_t a, b, i, tmp;
|
||||
|
||||
for (i = 0; i < bitRevLen; )
|
||||
{
|
||||
a = pBitRevTab[i ] >> 2;
|
||||
b = pBitRevTab[i + 1] >> 2;
|
||||
|
||||
//real
|
||||
tmp = pSrc[a];
|
||||
pSrc[a] = pSrc[b];
|
||||
pSrc[b] = tmp;
|
||||
|
||||
//complex
|
||||
tmp = pSrc[a+1];
|
||||
pSrc[a+1] = pSrc[b+1];
|
||||
pSrc[b+1] = tmp;
|
||||
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@brief In-place 32 bit reversal function.
|
||||
@param[in,out] pSrc points to in-place buffer of unknown 32-bit data type
|
||||
@param[in] bitRevLen bit reversal table length
|
||||
@param[in] pBitRevTab points to bit reversal table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_bitreversal_32(
|
||||
uint32_t *pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t *pBitRevTab)
|
||||
{
|
||||
uint32_t a, b, i, tmp;
|
||||
|
||||
for (i = 0; i < bitRevLen; )
|
||||
{
|
||||
a = pBitRevTab[i ] >> 2;
|
||||
b = pBitRevTab[i + 1] >> 2;
|
||||
|
||||
//real
|
||||
tmp = pSrc[a];
|
||||
pSrc[a] = pSrc[b];
|
||||
pSrc[b] = tmp;
|
||||
|
||||
//complex
|
||||
tmp = pSrc[a+1];
|
||||
pSrc[a+1] = pSrc[b+1];
|
||||
pSrc[b+1] = tmp;
|
||||
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief In-place 16 bit reversal function.
|
||||
@param[in,out] pSrc points to in-place buffer of unknown 16-bit data type
|
||||
@param[in] bitRevLen bit reversal table length
|
||||
@param[in] pBitRevTab points to bit reversal table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_bitreversal_16(
|
||||
uint16_t *pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t *pBitRevTab)
|
||||
{
|
||||
uint16_t a, b, i, tmp;
|
||||
|
||||
for (i = 0; i < bitRevLen; )
|
||||
{
|
||||
a = pBitRevTab[i ] >> 2;
|
||||
b = pBitRevTab[i + 1] >> 2;
|
||||
|
||||
//real
|
||||
tmp = pSrc[a];
|
||||
pSrc[a] = pSrc[b];
|
||||
pSrc[b] = tmp;
|
||||
|
||||
//complex
|
||||
tmp = pSrc[a+1];
|
||||
pSrc[a+1] = pSrc[b+1];
|
||||
pSrc[b+1] = tmp;
|
||||
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_bitreversal_f16.c
|
||||
* Description: Bitreversal functions
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
|
||||
/*
|
||||
* @brief In-place bit reversal function.
|
||||
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
|
||||
* @param[in] fftSize length of the FFT.
|
||||
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
|
||||
* @param[in] *pBitRevTab points to the bit reversal table.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
void arm_bitreversal_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftSize,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab)
|
||||
{
|
||||
uint16_t fftLenBy2, fftLenBy2p1;
|
||||
uint16_t i, j;
|
||||
float16_t in;
|
||||
|
||||
/* Initializations */
|
||||
j = 0U;
|
||||
fftLenBy2 = fftSize >> 1U;
|
||||
fftLenBy2p1 = (fftSize >> 1U) + 1U;
|
||||
|
||||
/* Bit Reversal Implementation */
|
||||
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
|
||||
{
|
||||
if (i < j)
|
||||
{
|
||||
/* pSrc[i] <-> pSrc[j]; */
|
||||
in = pSrc[2U * i];
|
||||
pSrc[2U * i] = pSrc[2U * j];
|
||||
pSrc[2U * j] = in;
|
||||
|
||||
/* pSrc[i+1U] <-> pSrc[j+1U] */
|
||||
in = pSrc[(2U * i) + 1U];
|
||||
pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
|
||||
pSrc[(2U * j) + 1U] = in;
|
||||
|
||||
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
|
||||
in = pSrc[2U * (i + fftLenBy2p1)];
|
||||
pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
|
||||
pSrc[2U * (j + fftLenBy2p1)] = in;
|
||||
|
||||
/* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
|
||||
in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
|
||||
pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
|
||||
pSrc[(2U * (j + fftLenBy2p1)) + 1U];
|
||||
pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
|
||||
|
||||
}
|
||||
|
||||
/* pSrc[i+1U] <-> pSrc[j+1U] */
|
||||
in = pSrc[2U * (i + 1U)];
|
||||
pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
|
||||
pSrc[2U * (j + fftLenBy2)] = in;
|
||||
|
||||
/* pSrc[i+2U] <-> pSrc[j+2U] */
|
||||
in = pSrc[(2U * (i + 1U)) + 1U];
|
||||
pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
|
||||
pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
|
||||
|
||||
/* Reading the index for the bit reversal */
|
||||
j = *pBitRevTab;
|
||||
|
||||
/* Updating the bit reversal index depending on the fft length */
|
||||
pBitRevTab += bitRevFactor;
|
||||
}
|
||||
}
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
842
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
Normal file
842
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f16.c
Normal file
@@ -0,0 +1,842 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_f32.c
|
||||
* Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
#include "arm_common_tables_f16.h"
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
#include "arm_vec_fft.h"
|
||||
#include "arm_mve_tables_f16.h"
|
||||
|
||||
|
||||
static float16_t arm_inverse_fft_length_f16(uint16_t fftLen)
|
||||
{
|
||||
float16_t retValue=1.0;
|
||||
|
||||
switch (fftLen)
|
||||
{
|
||||
|
||||
case 4096U:
|
||||
retValue = (float16_t)0.000244140625f;
|
||||
break;
|
||||
|
||||
case 2048U:
|
||||
retValue = (float16_t)0.00048828125f;
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
retValue = (float16_t)0.0009765625f;
|
||||
break;
|
||||
|
||||
case 512U:
|
||||
retValue = (float16_t)0.001953125f;
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
retValue = (float16_t)0.00390625f;
|
||||
break;
|
||||
|
||||
case 128U:
|
||||
retValue = (float16_t)0.0078125f;
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
retValue = (float16_t)0.015625f;
|
||||
break;
|
||||
|
||||
case 32U:
|
||||
retValue = (float16_t)0.03125f;
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
retValue = (float16_t)0.0625f;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return(retValue);
|
||||
}
|
||||
|
||||
|
||||
static void _arm_radix4_butterfly_f16_mve(const arm_cfft_instance_f16 * S,float16_t * pSrc, uint32_t fftLen)
|
||||
{
|
||||
f16x8_t vecTmp0, vecTmp1;
|
||||
f16x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
f16x8_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] =
|
||||
{ ( 0 - 16) * (int32_t)sizeof(float16_t *)
|
||||
, ( 4 - 16) * (int32_t)sizeof(float16_t *)
|
||||
, ( 8 - 16) * (int32_t)sizeof(float16_t *)
|
||||
, (12 - 16) * (int32_t)sizeof(float16_t *)};
|
||||
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
for (int k = fftLen / 4u; k > 1; k >>= 2)
|
||||
{
|
||||
float16_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
float16_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
float16_t const *p_rearranged_twiddle_tab_stride3 =
|
||||
&S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
float16_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
float16_t *inA = pBase;
|
||||
float16_t *inB = inA + n2 * CMPLX_DIM;
|
||||
float16_t *inC = inB + n2 * CMPLX_DIM;
|
||||
float16_t *inD = inC + n2 * CMPLX_DIM;
|
||||
float16_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
float16_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
float16_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
f16x8_t vecW;
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
/*
|
||||
* load 2 f16 complex pair
|
||||
*/
|
||||
vecA = vldrhq_f16(inA);
|
||||
vecC = vldrhq_f16(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrhq_f16(inB);
|
||||
vecD = vldrhq_f16(inD);
|
||||
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 8;
|
||||
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0);
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 8;
|
||||
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 +=8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 8;
|
||||
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 8;
|
||||
|
||||
vecA = vldrhq_f16(inA);
|
||||
vecC = vldrhq_f16(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/* load scheduling */
|
||||
vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
|
||||
|
||||
blkCnt = (fftLen >> 4);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecB = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 4);
|
||||
vecD = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 12);
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
|
||||
/* pre-load for next iteration */
|
||||
vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
|
||||
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64, (f32x4_t)vecTmp0);
|
||||
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 4, (f32x4_t)vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, (f32x4_t)vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 12, (f32x4_t)vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of last stage process
|
||||
*/
|
||||
}
|
||||
|
||||
static void arm_cfft_radix4by2_f16_mve(const arm_cfft_instance_f16 * S, float16_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
float16_t const *pCoefVec;
|
||||
float16_t const *pCoef = S->pTwiddle;
|
||||
float16_t *pIn0, *pIn1;
|
||||
uint32_t n2;
|
||||
uint32_t blkCnt;
|
||||
f16x8_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
f16x8_t vecCmplxTmp, vecTw;
|
||||
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
pCoefVec = pCoef;
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(f16x8_t *) pIn0;
|
||||
vecIn1 = *(f16x8_t *) pIn1;
|
||||
vecTw = vld1q(pCoefVec);
|
||||
pCoefVec += 8;
|
||||
|
||||
vecSum = vaddq(vecIn0, vecIn1);
|
||||
vecDiff = vsubq(vecIn0, vecIn1);
|
||||
|
||||
vecCmplxTmp = MVE_CMPLX_MULT_FLT_Conj_AxB(vecTw, vecDiff);
|
||||
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 8;
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 8;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
_arm_radix4_butterfly_f16_mve(S, pSrc, n2);
|
||||
|
||||
_arm_radix4_butterfly_f16_mve(S, pSrc + fftLen, n2);
|
||||
|
||||
pIn0 = pSrc;
|
||||
}
|
||||
|
||||
static void _arm_radix4_butterfly_inverse_f16_mve(const arm_cfft_instance_f16 * S,float16_t * pSrc, uint32_t fftLen, float16_t onebyfftLen)
|
||||
{
|
||||
f16x8_t vecTmp0, vecTmp1;
|
||||
f16x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
f16x8_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] = {
|
||||
( 0 - 16) * (int32_t)sizeof(q31_t *),
|
||||
( 4 - 16) * (int32_t)sizeof(q31_t *),
|
||||
( 8 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(12 - 16) * (int32_t)sizeof(q31_t *)
|
||||
};
|
||||
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
for (int k = fftLen / 4; k > 1; k >>= 2)
|
||||
{
|
||||
float16_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
float16_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
float16_t const *p_rearranged_twiddle_tab_stride3 =
|
||||
&S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
|
||||
float16_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
float16_t *inA = pBase;
|
||||
float16_t *inB = inA + n2 * CMPLX_DIM;
|
||||
float16_t *inC = inB + n2 * CMPLX_DIM;
|
||||
float16_t *inD = inC + n2 * CMPLX_DIM;
|
||||
float16_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
float16_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
float16_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
f16x8_t vecW;
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
/*
|
||||
* load 2 f32 complex pair
|
||||
*/
|
||||
vecA = vldrhq_f16(inA);
|
||||
vecC = vldrhq_f16(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrhq_f16(inB);
|
||||
vecD = vldrhq_f16(inD);
|
||||
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 8;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0);
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 8;
|
||||
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 8;
|
||||
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 8;
|
||||
|
||||
vecA = vldrhq_f16(inA);
|
||||
vecC = vldrhq_f16(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/*
|
||||
* load scheduling
|
||||
*/
|
||||
vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
|
||||
|
||||
blkCnt = (fftLen >> 4);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecB = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 4);
|
||||
vecD = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 12);
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
|
||||
vecA = (f16x8_t)vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = (f16x8_t)vldrwq_gather_base_f32(vecScGathAddr, 8);
|
||||
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64, (f32x4_t)vecTmp0);
|
||||
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 4, (f32x4_t)vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, (f32x4_t)vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 12, (f32x4_t)vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of last stage process
|
||||
*/
|
||||
}
|
||||
|
||||
static void arm_cfft_radix4by2_inverse_f16_mve(const arm_cfft_instance_f16 * S,float16_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
float16_t const *pCoefVec;
|
||||
float16_t const *pCoef = S->pTwiddle;
|
||||
float16_t *pIn0, *pIn1;
|
||||
uint32_t n2;
|
||||
float16_t onebyfftLen = arm_inverse_fft_length_f16(fftLen);
|
||||
uint32_t blkCnt;
|
||||
f16x8_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
f16x8_t vecCmplxTmp, vecTw;
|
||||
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
pCoefVec = pCoef;
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(f16x8_t *) pIn0;
|
||||
vecIn1 = *(f16x8_t *) pIn1;
|
||||
vecTw = vld1q(pCoefVec);
|
||||
pCoefVec += 8;
|
||||
|
||||
vecSum = vaddq(vecIn0, vecIn1);
|
||||
vecDiff = vsubq(vecIn0, vecIn1);
|
||||
|
||||
vecCmplxTmp = MVE_CMPLX_MULT_FLT_AxB(vecTw, vecDiff);
|
||||
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 8;
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 8;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
_arm_radix4_butterfly_inverse_f16_mve(S, pSrc, n2, onebyfftLen);
|
||||
|
||||
_arm_radix4_butterfly_inverse_f16_mve(S, pSrc + fftLen, n2, onebyfftLen);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point complex FFT.
|
||||
@param[in] S points to an instance of the floating-point CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
|
||||
|
||||
void arm_cfft_f16(
|
||||
const arm_cfft_instance_f16 * S,
|
||||
float16_t * pSrc,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t fftLen = S->fftLen;
|
||||
|
||||
if (ifftFlag == 1U) {
|
||||
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_inverse_f16_mve(S, pSrc, fftLen, arm_inverse_fft_length_f16(S->fftLen));
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_inverse_f16_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_f16_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_f16_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (bitReverseFlag)
|
||||
{
|
||||
|
||||
arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
extern void arm_bitreversal_16(
|
||||
uint16_t * pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t * pBitRevTable);
|
||||
|
||||
|
||||
extern void arm_cfft_radix4by2_f16(
|
||||
float16_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float16_t * pCoef);
|
||||
|
||||
extern void arm_radix4_butterfly_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup ComplexFFT Complex FFT Functions
|
||||
|
||||
@par
|
||||
The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
|
||||
Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster
|
||||
than the DFT, especially for long lengths.
|
||||
The algorithms described in this section
|
||||
operate on complex data. A separate set of functions is devoted to handling
|
||||
of real sequences.
|
||||
@par
|
||||
There are separate algorithms for handling floating-point, Q15, and Q31 data
|
||||
types. The algorithms available for each data type are described next.
|
||||
@par
|
||||
The FFT functions operate in-place. That is, the array holding the input data
|
||||
will also be used to hold the corresponding result. The input data is complex
|
||||
and contains <code>2*fftLen</code> interleaved values as shown below.
|
||||
<pre>{real[0], imag[0], real[1], imag[1], ...} </pre>
|
||||
The FFT result will be contained in the same array and the frequency domain
|
||||
values will have the same interleaving.
|
||||
|
||||
@par Floating-point
|
||||
The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
|
||||
stages are performed along with a single radix-2 or radix-4 stage, as needed.
|
||||
The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
|
||||
a different twiddle factor table.
|
||||
@par
|
||||
The function uses the standard FFT definition and output values may grow by a
|
||||
factor of <code>fftLen</code> when computing the forward transform. The
|
||||
inverse transform includes a scale of <code>1/fftLen</code> as part of the
|
||||
calculation and this matches the textbook definition of the inverse FFT.
|
||||
@par
|
||||
For the MVE version, the new arm_cfft_init_f32 initialization function is
|
||||
<b>mandatory</b>. <b>Compilation flags are available to include only the required tables for the
|
||||
needed FFTs.</b> Other FFT versions can continue to be initialized as
|
||||
explained below.
|
||||
@par
|
||||
For not MVE versions, pre-initialized data structures containing twiddle factors
|
||||
and bit reversal tables are provided and defined in <code>arm_const_structs.h</code>. Include
|
||||
this header in your function and then pass one of the constant structures as
|
||||
an argument to arm_cfft_f32. For example:
|
||||
@par
|
||||
<code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
|
||||
@par
|
||||
computes a 64-point inverse complex FFT including bit reversal.
|
||||
The data structures are treated as constant data and not modified during the
|
||||
calculation. The same data structure can be reused for multiple transforms
|
||||
including mixing forward and inverse transforms.
|
||||
@par
|
||||
Earlier releases of the library provided separate radix-2 and radix-4
|
||||
algorithms that operated on floating-point data. These functions are still
|
||||
provided but are deprecated. The older functions are slower and less general
|
||||
than the new functions.
|
||||
@par
|
||||
An example of initialization of the constants for the arm_cfft_f32 function follows:
|
||||
@code
|
||||
const static arm_cfft_instance_f32 *S;
|
||||
...
|
||||
switch (length) {
|
||||
case 16:
|
||||
S = &arm_cfft_sR_f32_len16;
|
||||
break;
|
||||
case 32:
|
||||
S = &arm_cfft_sR_f32_len32;
|
||||
break;
|
||||
case 64:
|
||||
S = &arm_cfft_sR_f32_len64;
|
||||
break;
|
||||
case 128:
|
||||
S = &arm_cfft_sR_f32_len128;
|
||||
break;
|
||||
case 256:
|
||||
S = &arm_cfft_sR_f32_len256;
|
||||
break;
|
||||
case 512:
|
||||
S = &arm_cfft_sR_f32_len512;
|
||||
break;
|
||||
case 1024:
|
||||
S = &arm_cfft_sR_f32_len1024;
|
||||
break;
|
||||
case 2048:
|
||||
S = &arm_cfft_sR_f32_len2048;
|
||||
break;
|
||||
case 4096:
|
||||
S = &arm_cfft_sR_f32_len4096;
|
||||
break;
|
||||
}
|
||||
@endcode
|
||||
@par
|
||||
The new arm_cfft_init_f32 can also be used.
|
||||
@par Q15 and Q31
|
||||
The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4
|
||||
stages are performed along with a single radix-2 stage, as needed.
|
||||
The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
|
||||
a different twiddle factor table.
|
||||
@par
|
||||
The function uses the standard FFT definition and output values may grow by a
|
||||
factor of <code>fftLen</code> when computing the forward transform. The
|
||||
inverse transform includes a scale of <code>1/fftLen</code> as part of the
|
||||
calculation and this matches the textbook definition of the inverse FFT.
|
||||
@par
|
||||
Pre-initialized data structures containing twiddle factors and bit reversal
|
||||
tables are provided and defined in <code>arm_const_structs.h</code>. Include
|
||||
this header in your function and then pass one of the constant structures as
|
||||
an argument to arm_cfft_q31. For example:
|
||||
@par
|
||||
<code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code>
|
||||
@par
|
||||
computes a 64-point inverse complex FFT including bit reversal.
|
||||
The data structures are treated as constant data and not modified during the
|
||||
calculation. The same data structure can be reused for multiple transforms
|
||||
including mixing forward and inverse transforms.
|
||||
@par
|
||||
Earlier releases of the library provided separate radix-2 and radix-4
|
||||
algorithms that operated on floating-point data. These functions are still
|
||||
provided but are deprecated. The older functions are slower and less general
|
||||
than the new functions.
|
||||
@par
|
||||
An example of initialization of the constants for the arm_cfft_q31 function follows:
|
||||
@code
|
||||
const static arm_cfft_instance_q31 *S;
|
||||
...
|
||||
switch (length) {
|
||||
case 16:
|
||||
S = &arm_cfft_sR_q31_len16;
|
||||
break;
|
||||
case 32:
|
||||
S = &arm_cfft_sR_q31_len32;
|
||||
break;
|
||||
case 64:
|
||||
S = &arm_cfft_sR_q31_len64;
|
||||
break;
|
||||
case 128:
|
||||
S = &arm_cfft_sR_q31_len128;
|
||||
break;
|
||||
case 256:
|
||||
S = &arm_cfft_sR_q31_len256;
|
||||
break;
|
||||
case 512:
|
||||
S = &arm_cfft_sR_q31_len512;
|
||||
break;
|
||||
case 1024:
|
||||
S = &arm_cfft_sR_q31_len1024;
|
||||
break;
|
||||
case 2048:
|
||||
S = &arm_cfft_sR_q31_len2048;
|
||||
break;
|
||||
case 4096:
|
||||
S = &arm_cfft_sR_q31_len4096;
|
||||
break;
|
||||
}
|
||||
@endcode
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point complex FFT.
|
||||
@param[in] S points to an instance of the floating-point CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_f16(
|
||||
const arm_cfft_instance_f16 * S,
|
||||
float16_t * p1,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t L = S->fftLen, l;
|
||||
float16_t invL, * pSrc;
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
/* Conjugate input data */
|
||||
pSrc = p1 + 1;
|
||||
for(l=0; l<L; l++)
|
||||
{
|
||||
*pSrc = -(_Float16)*pSrc;
|
||||
pSrc += 2;
|
||||
}
|
||||
}
|
||||
|
||||
switch (L)
|
||||
{
|
||||
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
arm_radix4_butterfly_f16 (p1, L, (float16_t*)S->pTwiddle, 1U);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_f16 ( p1, L, (float16_t*)S->pTwiddle);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
if ( bitReverseFlag )
|
||||
arm_bitreversal_16((uint16_t*)p1, S->bitRevLength,(uint16_t*)S->pBitRevTable);
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
invL = 1.0f16/(_Float16)L;
|
||||
/* Conjugate and scale output data */
|
||||
pSrc = p1;
|
||||
for(l=0; l<L; l++)
|
||||
{
|
||||
*pSrc++ *= (_Float16)invL ;
|
||||
*pSrc = -(_Float16)(*pSrc) * (_Float16)invL;
|
||||
pSrc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
1192
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c
Normal file
1192
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f32.c
Normal file
@@ -0,0 +1,1192 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_f32.c
|
||||
* Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
#include "arm_vec_fft.h"
|
||||
#include "arm_mve_tables.h"
|
||||
|
||||
|
||||
static float32_t arm_inverse_fft_length_f32(uint16_t fftLen)
|
||||
{
|
||||
float32_t retValue=1.0;
|
||||
|
||||
switch (fftLen)
|
||||
{
|
||||
|
||||
case 4096U:
|
||||
retValue = 0.000244140625;
|
||||
break;
|
||||
|
||||
case 2048U:
|
||||
retValue = 0.00048828125;
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
retValue = 0.0009765625f;
|
||||
break;
|
||||
|
||||
case 512U:
|
||||
retValue = 0.001953125;
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
retValue = 0.00390625f;
|
||||
break;
|
||||
|
||||
case 128U:
|
||||
retValue = 0.0078125;
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
retValue = 0.015625f;
|
||||
break;
|
||||
|
||||
case 32U:
|
||||
retValue = 0.03125;
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
retValue = 0.0625f;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return(retValue);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void _arm_radix4_butterfly_f32_mve(const arm_cfft_instance_f32 * S,float32_t * pSrc, uint32_t fftLen)
|
||||
{
|
||||
f32x4_t vecTmp0, vecTmp1;
|
||||
f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
f32x4_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] = {
|
||||
(0 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(1 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(8 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(9 - 16) * (int32_t)sizeof(q31_t *)
|
||||
};
|
||||
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
for (int k = fftLen / 4u; k > 1; k >>= 2)
|
||||
{
|
||||
float32_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
float32_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
float32_t const *p_rearranged_twiddle_tab_stride3 =
|
||||
&S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
|
||||
float32_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
float32_t *inA = pBase;
|
||||
float32_t *inB = inA + n2 * CMPLX_DIM;
|
||||
float32_t *inC = inB + n2 * CMPLX_DIM;
|
||||
float32_t *inD = inC + n2 * CMPLX_DIM;
|
||||
float32_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
float32_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
float32_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
f32x4_t vecW;
|
||||
|
||||
blkCnt = n2 / 2;
|
||||
/*
|
||||
* load 2 f32 complex pair
|
||||
*/
|
||||
vecA = vldrwq_f32(inA);
|
||||
vecC = vldrwq_f32(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrwq_f32(inB);
|
||||
vecD = vldrwq_f32(inD);
|
||||
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 4;
|
||||
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0);
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 4;
|
||||
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 +=4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 4;
|
||||
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_Conj_AxB(vecW, vecTmp0);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 4;
|
||||
|
||||
vecA = vldrwq_f32(inA);
|
||||
vecC = vldrwq_f32(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/* load scheduling */
|
||||
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_f32(vecScGathAddr, 16);
|
||||
|
||||
blkCnt = (fftLen >> 3);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecB = vldrwq_gather_base_f32(vecScGathAddr, 8);
|
||||
vecD = vldrwq_gather_base_f32(vecScGathAddr, 24);
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
|
||||
/* pre-load for next iteration */
|
||||
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_f32(vecScGathAddr, 16);
|
||||
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0);
|
||||
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 16, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 24, vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of last stage process
|
||||
*/
|
||||
}
|
||||
|
||||
static void arm_cfft_radix4by2_f32_mve(const arm_cfft_instance_f32 * S, float32_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
float32_t const *pCoefVec;
|
||||
float32_t const *pCoef = S->pTwiddle;
|
||||
float32_t *pIn0, *pIn1;
|
||||
uint32_t n2;
|
||||
uint32_t blkCnt;
|
||||
f32x4_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
f32x4_t vecCmplxTmp, vecTw;
|
||||
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
pCoefVec = pCoef;
|
||||
|
||||
blkCnt = n2 / 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(f32x4_t *) pIn0;
|
||||
vecIn1 = *(f32x4_t *) pIn1;
|
||||
vecTw = vld1q(pCoefVec);
|
||||
pCoefVec += 4;
|
||||
|
||||
vecSum = vecIn0 + vecIn1;
|
||||
vecDiff = vecIn0 - vecIn1;
|
||||
|
||||
vecCmplxTmp = MVE_CMPLX_MULT_FLT_Conj_AxB(vecTw, vecDiff);
|
||||
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 4;
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 4;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
_arm_radix4_butterfly_f32_mve(S, pSrc, n2);
|
||||
|
||||
_arm_radix4_butterfly_f32_mve(S, pSrc + fftLen, n2);
|
||||
|
||||
pIn0 = pSrc;
|
||||
}
|
||||
|
||||
static void _arm_radix4_butterfly_inverse_f32_mve(const arm_cfft_instance_f32 * S,float32_t * pSrc, uint32_t fftLen, float32_t onebyfftLen)
|
||||
{
|
||||
f32x4_t vecTmp0, vecTmp1;
|
||||
f32x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
f32x4_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] = {
|
||||
(0 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(1 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(8 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(9 - 16) * (int32_t)sizeof(q31_t *)
|
||||
};
|
||||
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
for (int k = fftLen / 4; k > 1; k >>= 2)
|
||||
{
|
||||
float32_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
float32_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
float32_t const *p_rearranged_twiddle_tab_stride3 =
|
||||
&S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
|
||||
float32_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
float32_t *inA = pBase;
|
||||
float32_t *inB = inA + n2 * CMPLX_DIM;
|
||||
float32_t *inC = inB + n2 * CMPLX_DIM;
|
||||
float32_t *inD = inC + n2 * CMPLX_DIM;
|
||||
float32_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
float32_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
float32_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
f32x4_t vecW;
|
||||
|
||||
blkCnt = n2 / 2;
|
||||
/*
|
||||
* load 2 f32 complex pair
|
||||
*/
|
||||
vecA = vldrwq_f32(inA);
|
||||
vecC = vldrwq_f32(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrwq_f32(inB);
|
||||
vecD = vldrwq_f32(inD);
|
||||
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 4;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0);
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 4;
|
||||
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 4;
|
||||
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FLT_AxB(vecW, vecTmp0);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 4;
|
||||
|
||||
vecA = vldrwq_f32(inA);
|
||||
vecC = vldrwq_f32(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32 ((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/*
|
||||
* load scheduling
|
||||
*/
|
||||
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_f32(vecScGathAddr, 16);
|
||||
|
||||
blkCnt = (fftLen >> 3);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vecA + vecC; /* vecSum0 = vaddq(vecA, vecC) */
|
||||
vecDiff0 = vecA - vecC; /* vecSum0 = vsubq(vecA, vecC) */
|
||||
|
||||
vecB = vldrwq_gather_base_f32(vecScGathAddr, 8);
|
||||
vecD = vldrwq_gather_base_f32(vecScGathAddr, 24);
|
||||
|
||||
vecSum1 = vecB + vecD;
|
||||
vecDiff1 = vecB - vecD;
|
||||
|
||||
vecA = vldrwq_gather_base_wb_f32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_f32(vecScGathAddr, 16);
|
||||
|
||||
vecTmp0 = vecSum0 + vecSum1;
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64, vecTmp0);
|
||||
|
||||
vecTmp0 = vecSum0 - vecSum1;
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 8, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_A_ixB(vecDiff0, vecDiff1);
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 16, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_A_ixB(vecDiff0, vecDiff1);
|
||||
vecTmp0 = vecTmp0 * onebyfftLen;
|
||||
vstrwq_scatter_base_f32(vecScGathAddr, -64 + 24, vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of last stage process
|
||||
*/
|
||||
}
|
||||
|
||||
static void arm_cfft_radix4by2_inverse_f32_mve(const arm_cfft_instance_f32 * S,float32_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
float32_t const *pCoefVec;
|
||||
float32_t const *pCoef = S->pTwiddle;
|
||||
float32_t *pIn0, *pIn1;
|
||||
uint32_t n2;
|
||||
float32_t onebyfftLen = arm_inverse_fft_length_f32(fftLen);
|
||||
uint32_t blkCnt;
|
||||
f32x4_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
f32x4_t vecCmplxTmp, vecTw;
|
||||
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
pCoefVec = pCoef;
|
||||
|
||||
blkCnt = n2 / 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(f32x4_t *) pIn0;
|
||||
vecIn1 = *(f32x4_t *) pIn1;
|
||||
vecTw = vld1q(pCoefVec);
|
||||
pCoefVec += 4;
|
||||
|
||||
vecSum = vecIn0 + vecIn1;
|
||||
vecDiff = vecIn0 - vecIn1;
|
||||
|
||||
vecCmplxTmp = MVE_CMPLX_MULT_FLT_AxB(vecTw, vecDiff);
|
||||
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 4;
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 4;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
_arm_radix4_butterfly_inverse_f32_mve(S, pSrc, n2, onebyfftLen);
|
||||
|
||||
_arm_radix4_butterfly_inverse_f32_mve(S, pSrc + fftLen, n2, onebyfftLen);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point complex FFT.
|
||||
@param[in] S points to an instance of the floating-point CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
|
||||
|
||||
void arm_cfft_f32(
|
||||
const arm_cfft_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t fftLen = S->fftLen;
|
||||
|
||||
if (ifftFlag == 1U) {
|
||||
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_inverse_f32_mve(S, pSrc, fftLen, arm_inverse_fft_length_f32(S->fftLen));
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_inverse_f32_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_f32_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_f32_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (bitReverseFlag)
|
||||
{
|
||||
|
||||
arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
extern void arm_radix8_butterfly_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
extern void arm_bitreversal_32(
|
||||
uint32_t * pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t * pBitRevTable);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup ComplexFFT Complex FFT Functions
|
||||
|
||||
@par
|
||||
The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
|
||||
Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster
|
||||
than the DFT, especially for long lengths.
|
||||
The algorithms described in this section
|
||||
operate on complex data. A separate set of functions is devoted to handling
|
||||
of real sequences.
|
||||
@par
|
||||
There are separate algorithms for handling floating-point, Q15, and Q31 data
|
||||
types. The algorithms available for each data type are described next.
|
||||
@par
|
||||
The FFT functions operate in-place. That is, the array holding the input data
|
||||
will also be used to hold the corresponding result. The input data is complex
|
||||
and contains <code>2*fftLen</code> interleaved values as shown below.
|
||||
<pre>{real[0], imag[0], real[1], imag[1], ...} </pre>
|
||||
The FFT result will be contained in the same array and the frequency domain
|
||||
values will have the same interleaving.
|
||||
|
||||
@par Floating-point
|
||||
The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
|
||||
stages are performed along with a single radix-2 or radix-4 stage, as needed.
|
||||
The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
|
||||
a different twiddle factor table.
|
||||
@par
|
||||
The function uses the standard FFT definition and output values may grow by a
|
||||
factor of <code>fftLen</code> when computing the forward transform. The
|
||||
inverse transform includes a scale of <code>1/fftLen</code> as part of the
|
||||
calculation and this matches the textbook definition of the inverse FFT.
|
||||
@par
|
||||
For the MVE version, the new arm_cfft_init_f32 initialization function is
|
||||
<b>mandatory</b>. <b>Compilation flags are available to include only the required tables for the
|
||||
needed FFTs.</b> Other FFT versions can continue to be initialized as
|
||||
explained below.
|
||||
@par
|
||||
For not MVE versions, pre-initialized data structures containing twiddle factors
|
||||
and bit reversal tables are provided and defined in <code>arm_const_structs.h</code>. Include
|
||||
this header in your function and then pass one of the constant structures as
|
||||
an argument to arm_cfft_f32. For example:
|
||||
@par
|
||||
<code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
|
||||
@par
|
||||
computes a 64-point inverse complex FFT including bit reversal.
|
||||
The data structures are treated as constant data and not modified during the
|
||||
calculation. The same data structure can be reused for multiple transforms
|
||||
including mixing forward and inverse transforms.
|
||||
@par
|
||||
Earlier releases of the library provided separate radix-2 and radix-4
|
||||
algorithms that operated on floating-point data. These functions are still
|
||||
provided but are deprecated. The older functions are slower and less general
|
||||
than the new functions.
|
||||
@par
|
||||
An example of initialization of the constants for the arm_cfft_f32 function follows:
|
||||
@code
|
||||
const static arm_cfft_instance_f32 *S;
|
||||
...
|
||||
switch (length) {
|
||||
case 16:
|
||||
S = &arm_cfft_sR_f32_len16;
|
||||
break;
|
||||
case 32:
|
||||
S = &arm_cfft_sR_f32_len32;
|
||||
break;
|
||||
case 64:
|
||||
S = &arm_cfft_sR_f32_len64;
|
||||
break;
|
||||
case 128:
|
||||
S = &arm_cfft_sR_f32_len128;
|
||||
break;
|
||||
case 256:
|
||||
S = &arm_cfft_sR_f32_len256;
|
||||
break;
|
||||
case 512:
|
||||
S = &arm_cfft_sR_f32_len512;
|
||||
break;
|
||||
case 1024:
|
||||
S = &arm_cfft_sR_f32_len1024;
|
||||
break;
|
||||
case 2048:
|
||||
S = &arm_cfft_sR_f32_len2048;
|
||||
break;
|
||||
case 4096:
|
||||
S = &arm_cfft_sR_f32_len4096;
|
||||
break;
|
||||
}
|
||||
@endcode
|
||||
@par
|
||||
The new arm_cfft_init_f32 can also be used.
|
||||
@par Q15 and Q31
|
||||
The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4
|
||||
stages are performed along with a single radix-2 stage, as needed.
|
||||
The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
|
||||
a different twiddle factor table.
|
||||
@par
|
||||
The function uses the standard FFT definition and output values may grow by a
|
||||
factor of <code>fftLen</code> when computing the forward transform. The
|
||||
inverse transform includes a scale of <code>1/fftLen</code> as part of the
|
||||
calculation and this matches the textbook definition of the inverse FFT.
|
||||
@par
|
||||
Pre-initialized data structures containing twiddle factors and bit reversal
|
||||
tables are provided and defined in <code>arm_const_structs.h</code>. Include
|
||||
this header in your function and then pass one of the constant structures as
|
||||
an argument to arm_cfft_q31. For example:
|
||||
@par
|
||||
<code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code>
|
||||
@par
|
||||
computes a 64-point inverse complex FFT including bit reversal.
|
||||
The data structures are treated as constant data and not modified during the
|
||||
calculation. The same data structure can be reused for multiple transforms
|
||||
including mixing forward and inverse transforms.
|
||||
@par
|
||||
Earlier releases of the library provided separate radix-2 and radix-4
|
||||
algorithms that operated on floating-point data. These functions are still
|
||||
provided but are deprecated. The older functions are slower and less general
|
||||
than the new functions.
|
||||
@par
|
||||
An example of initialization of the constants for the arm_cfft_q31 function follows:
|
||||
@code
|
||||
const static arm_cfft_instance_q31 *S;
|
||||
...
|
||||
switch (length) {
|
||||
case 16:
|
||||
S = &arm_cfft_sR_q31_len16;
|
||||
break;
|
||||
case 32:
|
||||
S = &arm_cfft_sR_q31_len32;
|
||||
break;
|
||||
case 64:
|
||||
S = &arm_cfft_sR_q31_len64;
|
||||
break;
|
||||
case 128:
|
||||
S = &arm_cfft_sR_q31_len128;
|
||||
break;
|
||||
case 256:
|
||||
S = &arm_cfft_sR_q31_len256;
|
||||
break;
|
||||
case 512:
|
||||
S = &arm_cfft_sR_q31_len512;
|
||||
break;
|
||||
case 1024:
|
||||
S = &arm_cfft_sR_q31_len1024;
|
||||
break;
|
||||
case 2048:
|
||||
S = &arm_cfft_sR_q31_len2048;
|
||||
break;
|
||||
case 4096:
|
||||
S = &arm_cfft_sR_q31_len4096;
|
||||
break;
|
||||
}
|
||||
@endcode
|
||||
|
||||
*/
|
||||
|
||||
void arm_cfft_radix8by2_f32 (arm_cfft_instance_f32 * S, float32_t * p1)
|
||||
{
|
||||
uint32_t L = S->fftLen;
|
||||
float32_t * pCol1, * pCol2, * pMid1, * pMid2;
|
||||
float32_t * p2 = p1 + L;
|
||||
const float32_t * tw = (float32_t *) S->pTwiddle;
|
||||
float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
|
||||
float32_t m0, m1, m2, m3;
|
||||
uint32_t l;
|
||||
|
||||
pCol1 = p1;
|
||||
pCol2 = p2;
|
||||
|
||||
/* Define new length */
|
||||
L >>= 1;
|
||||
|
||||
/* Initialize mid pointers */
|
||||
pMid1 = p1 + L;
|
||||
pMid2 = p2 + L;
|
||||
|
||||
/* do two dot Fourier transform */
|
||||
for (l = L >> 2; l > 0; l-- )
|
||||
{
|
||||
t1[0] = p1[0];
|
||||
t1[1] = p1[1];
|
||||
t1[2] = p1[2];
|
||||
t1[3] = p1[3];
|
||||
|
||||
t2[0] = p2[0];
|
||||
t2[1] = p2[1];
|
||||
t2[2] = p2[2];
|
||||
t2[3] = p2[3];
|
||||
|
||||
t3[0] = pMid1[0];
|
||||
t3[1] = pMid1[1];
|
||||
t3[2] = pMid1[2];
|
||||
t3[3] = pMid1[3];
|
||||
|
||||
t4[0] = pMid2[0];
|
||||
t4[1] = pMid2[1];
|
||||
t4[2] = pMid2[2];
|
||||
t4[3] = pMid2[3];
|
||||
|
||||
*p1++ = t1[0] + t2[0];
|
||||
*p1++ = t1[1] + t2[1];
|
||||
*p1++ = t1[2] + t2[2];
|
||||
*p1++ = t1[3] + t2[3]; /* col 1 */
|
||||
|
||||
t2[0] = t1[0] - t2[0];
|
||||
t2[1] = t1[1] - t2[1];
|
||||
t2[2] = t1[2] - t2[2];
|
||||
t2[3] = t1[3] - t2[3]; /* for col 2 */
|
||||
|
||||
*pMid1++ = t3[0] + t4[0];
|
||||
*pMid1++ = t3[1] + t4[1];
|
||||
*pMid1++ = t3[2] + t4[2];
|
||||
*pMid1++ = t3[3] + t4[3]; /* col 1 */
|
||||
|
||||
t4[0] = t4[0] - t3[0];
|
||||
t4[1] = t4[1] - t3[1];
|
||||
t4[2] = t4[2] - t3[2];
|
||||
t4[3] = t4[3] - t3[3]; /* for col 2 */
|
||||
|
||||
twR = *tw++;
|
||||
twI = *tw++;
|
||||
|
||||
/* multiply by twiddle factors */
|
||||
m0 = t2[0] * twR;
|
||||
m1 = t2[1] * twI;
|
||||
m2 = t2[1] * twR;
|
||||
m3 = t2[0] * twI;
|
||||
|
||||
/* R = R * Tr - I * Ti */
|
||||
*p2++ = m0 + m1;
|
||||
/* I = I * Tr + R * Ti */
|
||||
*p2++ = m2 - m3;
|
||||
|
||||
/* use vertical symmetry */
|
||||
/* 0.9988 - 0.0491i <==> -0.0491 - 0.9988i */
|
||||
m0 = t4[0] * twI;
|
||||
m1 = t4[1] * twR;
|
||||
m2 = t4[1] * twI;
|
||||
m3 = t4[0] * twR;
|
||||
|
||||
*pMid2++ = m0 - m1;
|
||||
*pMid2++ = m2 + m3;
|
||||
|
||||
twR = *tw++;
|
||||
twI = *tw++;
|
||||
|
||||
m0 = t2[2] * twR;
|
||||
m1 = t2[3] * twI;
|
||||
m2 = t2[3] * twR;
|
||||
m3 = t2[2] * twI;
|
||||
|
||||
*p2++ = m0 + m1;
|
||||
*p2++ = m2 - m3;
|
||||
|
||||
m0 = t4[2] * twI;
|
||||
m1 = t4[3] * twR;
|
||||
m2 = t4[3] * twI;
|
||||
m3 = t4[2] * twR;
|
||||
|
||||
*pMid2++ = m0 - m1;
|
||||
*pMid2++ = m2 + m3;
|
||||
}
|
||||
|
||||
/* first col */
|
||||
arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 2U);
|
||||
|
||||
/* second col */
|
||||
arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 2U);
|
||||
}
|
||||
|
||||
void arm_cfft_radix8by4_f32 (arm_cfft_instance_f32 * S, float32_t * p1)
|
||||
{
|
||||
uint32_t L = S->fftLen >> 1;
|
||||
float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
|
||||
const float32_t *tw2, *tw3, *tw4;
|
||||
float32_t * p2 = p1 + L;
|
||||
float32_t * p3 = p2 + L;
|
||||
float32_t * p4 = p3 + L;
|
||||
float32_t t2[4], t3[4], t4[4], twR, twI;
|
||||
float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
|
||||
float32_t m0, m1, m2, m3;
|
||||
uint32_t l, twMod2, twMod3, twMod4;
|
||||
|
||||
pCol1 = p1; /* points to real values by default */
|
||||
pCol2 = p2;
|
||||
pCol3 = p3;
|
||||
pCol4 = p4;
|
||||
pEnd1 = p2 - 1; /* points to imaginary values by default */
|
||||
pEnd2 = p3 - 1;
|
||||
pEnd3 = p4 - 1;
|
||||
pEnd4 = pEnd3 + L;
|
||||
|
||||
tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
|
||||
|
||||
L >>= 1;
|
||||
|
||||
/* do four dot Fourier transform */
|
||||
|
||||
twMod2 = 2;
|
||||
twMod3 = 4;
|
||||
twMod4 = 6;
|
||||
|
||||
/* TOP */
|
||||
p1ap3_0 = p1[0] + p3[0];
|
||||
p1sp3_0 = p1[0] - p3[0];
|
||||
p1ap3_1 = p1[1] + p3[1];
|
||||
p1sp3_1 = p1[1] - p3[1];
|
||||
|
||||
/* col 2 */
|
||||
t2[0] = p1sp3_0 + p2[1] - p4[1];
|
||||
t2[1] = p1sp3_1 - p2[0] + p4[0];
|
||||
/* col 3 */
|
||||
t3[0] = p1ap3_0 - p2[0] - p4[0];
|
||||
t3[1] = p1ap3_1 - p2[1] - p4[1];
|
||||
/* col 4 */
|
||||
t4[0] = p1sp3_0 - p2[1] + p4[1];
|
||||
t4[1] = p1sp3_1 + p2[0] - p4[0];
|
||||
/* col 1 */
|
||||
*p1++ = p1ap3_0 + p2[0] + p4[0];
|
||||
*p1++ = p1ap3_1 + p2[1] + p4[1];
|
||||
|
||||
/* Twiddle factors are ones */
|
||||
*p2++ = t2[0];
|
||||
*p2++ = t2[1];
|
||||
*p3++ = t3[0];
|
||||
*p3++ = t3[1];
|
||||
*p4++ = t4[0];
|
||||
*p4++ = t4[1];
|
||||
|
||||
tw2 += twMod2;
|
||||
tw3 += twMod3;
|
||||
tw4 += twMod4;
|
||||
|
||||
for (l = (L - 2) >> 1; l > 0; l-- )
|
||||
{
|
||||
/* TOP */
|
||||
p1ap3_0 = p1[0] + p3[0];
|
||||
p1sp3_0 = p1[0] - p3[0];
|
||||
p1ap3_1 = p1[1] + p3[1];
|
||||
p1sp3_1 = p1[1] - p3[1];
|
||||
/* col 2 */
|
||||
t2[0] = p1sp3_0 + p2[1] - p4[1];
|
||||
t2[1] = p1sp3_1 - p2[0] + p4[0];
|
||||
/* col 3 */
|
||||
t3[0] = p1ap3_0 - p2[0] - p4[0];
|
||||
t3[1] = p1ap3_1 - p2[1] - p4[1];
|
||||
/* col 4 */
|
||||
t4[0] = p1sp3_0 - p2[1] + p4[1];
|
||||
t4[1] = p1sp3_1 + p2[0] - p4[0];
|
||||
/* col 1 - top */
|
||||
*p1++ = p1ap3_0 + p2[0] + p4[0];
|
||||
*p1++ = p1ap3_1 + p2[1] + p4[1];
|
||||
|
||||
/* BOTTOM */
|
||||
p1ap3_1 = pEnd1[-1] + pEnd3[-1];
|
||||
p1sp3_1 = pEnd1[-1] - pEnd3[-1];
|
||||
p1ap3_0 = pEnd1[ 0] + pEnd3[0];
|
||||
p1sp3_0 = pEnd1[ 0] - pEnd3[0];
|
||||
/* col 2 */
|
||||
t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1;
|
||||
t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
|
||||
/* col 3 */
|
||||
t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
|
||||
t3[3] = p1ap3_0 - pEnd2[ 0] - pEnd4[ 0];
|
||||
/* col 4 */
|
||||
t4[2] = pEnd2[ 0] - pEnd4[ 0] - p1sp3_1;
|
||||
t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
|
||||
/* col 1 - Bottom */
|
||||
*pEnd1-- = p1ap3_0 + pEnd2[ 0] + pEnd4[ 0];
|
||||
*pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
|
||||
|
||||
/* COL 2 */
|
||||
/* read twiddle factors */
|
||||
twR = *tw2++;
|
||||
twI = *tw2++;
|
||||
/* multiply by twiddle factors */
|
||||
/* let Z1 = a + i(b), Z2 = c + i(d) */
|
||||
/* => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d) */
|
||||
|
||||
/* Top */
|
||||
m0 = t2[0] * twR;
|
||||
m1 = t2[1] * twI;
|
||||
m2 = t2[1] * twR;
|
||||
m3 = t2[0] * twI;
|
||||
|
||||
*p2++ = m0 + m1;
|
||||
*p2++ = m2 - m3;
|
||||
/* use vertical symmetry col 2 */
|
||||
/* 0.9997 - 0.0245i <==> 0.0245 - 0.9997i */
|
||||
/* Bottom */
|
||||
m0 = t2[3] * twI;
|
||||
m1 = t2[2] * twR;
|
||||
m2 = t2[2] * twI;
|
||||
m3 = t2[3] * twR;
|
||||
|
||||
*pEnd2-- = m0 - m1;
|
||||
*pEnd2-- = m2 + m3;
|
||||
|
||||
/* COL 3 */
|
||||
twR = tw3[0];
|
||||
twI = tw3[1];
|
||||
tw3 += twMod3;
|
||||
/* Top */
|
||||
m0 = t3[0] * twR;
|
||||
m1 = t3[1] * twI;
|
||||
m2 = t3[1] * twR;
|
||||
m3 = t3[0] * twI;
|
||||
|
||||
*p3++ = m0 + m1;
|
||||
*p3++ = m2 - m3;
|
||||
/* use vertical symmetry col 3 */
|
||||
/* 0.9988 - 0.0491i <==> -0.9988 - 0.0491i */
|
||||
/* Bottom */
|
||||
m0 = -t3[3] * twR;
|
||||
m1 = t3[2] * twI;
|
||||
m2 = t3[2] * twR;
|
||||
m3 = t3[3] * twI;
|
||||
|
||||
*pEnd3-- = m0 - m1;
|
||||
*pEnd3-- = m3 - m2;
|
||||
|
||||
/* COL 4 */
|
||||
twR = tw4[0];
|
||||
twI = tw4[1];
|
||||
tw4 += twMod4;
|
||||
/* Top */
|
||||
m0 = t4[0] * twR;
|
||||
m1 = t4[1] * twI;
|
||||
m2 = t4[1] * twR;
|
||||
m3 = t4[0] * twI;
|
||||
|
||||
*p4++ = m0 + m1;
|
||||
*p4++ = m2 - m3;
|
||||
/* use vertical symmetry col 4 */
|
||||
/* 0.9973 - 0.0736i <==> -0.0736 + 0.9973i */
|
||||
/* Bottom */
|
||||
m0 = t4[3] * twI;
|
||||
m1 = t4[2] * twR;
|
||||
m2 = t4[2] * twI;
|
||||
m3 = t4[3] * twR;
|
||||
|
||||
*pEnd4-- = m0 - m1;
|
||||
*pEnd4-- = m2 + m3;
|
||||
}
|
||||
|
||||
/* MIDDLE */
|
||||
/* Twiddle factors are */
|
||||
/* 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i */
|
||||
p1ap3_0 = p1[0] + p3[0];
|
||||
p1sp3_0 = p1[0] - p3[0];
|
||||
p1ap3_1 = p1[1] + p3[1];
|
||||
p1sp3_1 = p1[1] - p3[1];
|
||||
|
||||
/* col 2 */
|
||||
t2[0] = p1sp3_0 + p2[1] - p4[1];
|
||||
t2[1] = p1sp3_1 - p2[0] + p4[0];
|
||||
/* col 3 */
|
||||
t3[0] = p1ap3_0 - p2[0] - p4[0];
|
||||
t3[1] = p1ap3_1 - p2[1] - p4[1];
|
||||
/* col 4 */
|
||||
t4[0] = p1sp3_0 - p2[1] + p4[1];
|
||||
t4[1] = p1sp3_1 + p2[0] - p4[0];
|
||||
/* col 1 - Top */
|
||||
*p1++ = p1ap3_0 + p2[0] + p4[0];
|
||||
*p1++ = p1ap3_1 + p2[1] + p4[1];
|
||||
|
||||
/* COL 2 */
|
||||
twR = tw2[0];
|
||||
twI = tw2[1];
|
||||
|
||||
m0 = t2[0] * twR;
|
||||
m1 = t2[1] * twI;
|
||||
m2 = t2[1] * twR;
|
||||
m3 = t2[0] * twI;
|
||||
|
||||
*p2++ = m0 + m1;
|
||||
*p2++ = m2 - m3;
|
||||
/* COL 3 */
|
||||
twR = tw3[0];
|
||||
twI = tw3[1];
|
||||
|
||||
m0 = t3[0] * twR;
|
||||
m1 = t3[1] * twI;
|
||||
m2 = t3[1] * twR;
|
||||
m3 = t3[0] * twI;
|
||||
|
||||
*p3++ = m0 + m1;
|
||||
*p3++ = m2 - m3;
|
||||
/* COL 4 */
|
||||
twR = tw4[0];
|
||||
twI = tw4[1];
|
||||
|
||||
m0 = t4[0] * twR;
|
||||
m1 = t4[1] * twI;
|
||||
m2 = t4[1] * twR;
|
||||
m3 = t4[0] * twI;
|
||||
|
||||
*p4++ = m0 + m1;
|
||||
*p4++ = m2 - m3;
|
||||
|
||||
/* first col */
|
||||
arm_radix8_butterfly_f32 (pCol1, L, (float32_t *) S->pTwiddle, 4U);
|
||||
|
||||
/* second col */
|
||||
arm_radix8_butterfly_f32 (pCol2, L, (float32_t *) S->pTwiddle, 4U);
|
||||
|
||||
/* third col */
|
||||
arm_radix8_butterfly_f32 (pCol3, L, (float32_t *) S->pTwiddle, 4U);
|
||||
|
||||
/* fourth col */
|
||||
arm_radix8_butterfly_f32 (pCol4, L, (float32_t *) S->pTwiddle, 4U);
|
||||
}
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point complex FFT.
|
||||
@param[in] S points to an instance of the floating-point CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_f32(
|
||||
const arm_cfft_instance_f32 * S,
|
||||
float32_t * p1,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t L = S->fftLen, l;
|
||||
float32_t invL, * pSrc;
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
/* Conjugate input data */
|
||||
pSrc = p1 + 1;
|
||||
for (l = 0; l < L; l++)
|
||||
{
|
||||
*pSrc = -*pSrc;
|
||||
pSrc += 2;
|
||||
}
|
||||
}
|
||||
|
||||
switch (L)
|
||||
{
|
||||
case 16:
|
||||
case 128:
|
||||
case 1024:
|
||||
arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32 *) S, p1);
|
||||
break;
|
||||
case 32:
|
||||
case 256:
|
||||
case 2048:
|
||||
arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32 *) S, p1);
|
||||
break;
|
||||
case 64:
|
||||
case 512:
|
||||
case 4096:
|
||||
arm_radix8_butterfly_f32 ( p1, L, (float32_t *) S->pTwiddle, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if ( bitReverseFlag )
|
||||
arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable);
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
invL = 1.0f / (float32_t)L;
|
||||
|
||||
/* Conjugate and scale output data */
|
||||
pSrc = p1;
|
||||
for (l= 0; l < L; l++)
|
||||
{
|
||||
*pSrc++ *= invL ;
|
||||
*pSrc = -(*pSrc) * invL;
|
||||
pSrc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
318
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f64.c
Normal file
318
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_f64.c
Normal file
@@ -0,0 +1,318 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_f64.c
|
||||
* Description: Combined Radix Decimation in Frequency CFFT Double Precision Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
|
||||
extern void arm_radix4_butterfly_f64(
|
||||
float64_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float64_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
extern void arm_bitreversal_64(
|
||||
uint64_t * pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t * pBitRevTable);
|
||||
|
||||
/**
|
||||
* @} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Internal helper function used by the FFTs
|
||||
* ---------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* @brief Core function for the Double Precision floating-point CFFT butterfly process.
|
||||
* @param[in, out] *pSrc points to the in-place buffer of F64 data type.
|
||||
* @param[in] fftLen length of the FFT.
|
||||
* @param[in] *pCoef points to the twiddle coefficient buffer.
|
||||
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_f64(
|
||||
float64_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float64_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
|
||||
float64_t co1, co2, co3, si1, si2, si3;
|
||||
uint32_t ia1, ia2, ia3;
|
||||
uint32_t i0, i1, i2, i3;
|
||||
uint32_t n1, n2, j, k;
|
||||
|
||||
float64_t t1, t2, r1, r2, s1, s2;
|
||||
|
||||
|
||||
/* Initializations for the fft calculation */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
for (k = fftLen; k > 1U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the fft calculation */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* FFT Calculation */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* xa + xc */
|
||||
r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
|
||||
|
||||
/* xa - xc */
|
||||
r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* ya - yc */
|
||||
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xb + xd */
|
||||
t1 = pSrc[2U * i1] + pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = r1 + t1;
|
||||
|
||||
/* xa + xc -(xb + xd) */
|
||||
r1 = r1 - t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = s1 + t2;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* (yb - yd) */
|
||||
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* (xb - xd) */
|
||||
t2 = pSrc[2U * i1] - pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r1 = r2 + t1;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r2 = r2 - t1;
|
||||
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s1 = s2 - t2;
|
||||
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s2 = s2 + t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
|
||||
|
||||
i0 += n1;
|
||||
} while ( i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Core function for the Double Precision floating-point CFFT butterfly process.
|
||||
* @param[in, out] *pSrc points to the in-place buffer of F64 data type.
|
||||
* @param[in] fftLen length of the FFT.
|
||||
* @param[in] *pCoef points to the twiddle coefficient buffer.
|
||||
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4by2_f64(
|
||||
float64_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float64_t * pCoef)
|
||||
{
|
||||
uint32_t i, l;
|
||||
uint32_t n2, ia;
|
||||
float64_t xt, yt, cosVal, sinVal;
|
||||
float64_t p0, p1,p2,p3,a0,a1;
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
ia = 0;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[2*ia];
|
||||
sinVal = pCoef[2*ia + 1];
|
||||
ia++;
|
||||
|
||||
l = i + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
|
||||
p0 = xt * cosVal;
|
||||
p1 = yt * sinVal;
|
||||
p2 = yt * cosVal;
|
||||
p3 = xt * sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = p0 + p1;
|
||||
pSrc[2 * l + 1] = p2 - p3;
|
||||
|
||||
}
|
||||
|
||||
// first col
|
||||
arm_radix4_butterfly_f64( pSrc, n2, (float64_t*)pCoef, 2U);
|
||||
// second col
|
||||
arm_radix4_butterfly_f64( pSrc + fftLen, n2, (float64_t*)pCoef, 2U);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the Double Precision floating-point complex FFT.
|
||||
@param[in] S points to an instance of the Double Precision floating-point CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_f64(
|
||||
const arm_cfft_instance_f64 * S,
|
||||
float64_t * p1,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t L = S->fftLen, l;
|
||||
float64_t invL, * pSrc;
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
/* Conjugate input data */
|
||||
pSrc = p1 + 1;
|
||||
for(l=0; l<L; l++)
|
||||
{
|
||||
*pSrc = -*pSrc;
|
||||
pSrc += 2;
|
||||
}
|
||||
}
|
||||
|
||||
switch (L)
|
||||
{
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
arm_radix4_butterfly_f64 (p1, L, (float64_t*)S->pTwiddle, 1U);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_f64 ( p1, L, (float64_t*)S->pTwiddle);
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
if ( bitReverseFlag )
|
||||
arm_bitreversal_64((uint64_t*)p1, S->bitRevLength,S->pBitRevTable);
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
invL = 1.0 / (float64_t)L;
|
||||
/* Conjugate and scale output data */
|
||||
pSrc = p1;
|
||||
for(l=0; l<L; l++)
|
||||
{
|
||||
*pSrc++ *= invL ;
|
||||
*pSrc = -(*pSrc) * invL;
|
||||
pSrc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
363
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f16.c
Normal file
363
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f16.c
Normal file
@@ -0,0 +1,363 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_init_f16.c
|
||||
* Description: Initialization function for cfft f16 instance
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define FFTINIT(EXT,SIZE) \
|
||||
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
|
||||
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
|
||||
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the cfft f16 function
|
||||
@param[in,out] S points to an instance of the floating-point CFFT structure
|
||||
@param[in] fftLen fft length (number of complex samples)
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
|
||||
|
||||
@par Use of this function is mandatory only for the MVE version of the FFT.
|
||||
Other versions can still initialize directly the data structure using
|
||||
variables declared in arm_const_structs.h
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
#include "arm_common_tables_f16.h"
|
||||
#include "arm_const_structs_f16.h"
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_vec_fft.h"
|
||||
#include "arm_mve_tables_f16.h"
|
||||
|
||||
arm_status arm_cfft_radix4by2_rearrange_twiddles_f16(arm_cfft_instance_f16 *S, int twidCoefModifier)
|
||||
{
|
||||
|
||||
switch (S->fftLen >> (twidCoefModifier - 1)) {
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
|
||||
case 4096U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_f16;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_f16;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_f16;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_f16;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F16_1024) || defined(ARM_TABLE_TWIDDLECOEF_F16_2048)
|
||||
case 1024U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_f16;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_f16;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_f16;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_f16;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F16_256) || defined(ARM_TABLE_TWIDDLECOEF_F16_512)
|
||||
case 256U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_f16;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_f16;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_f16;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_f16;
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F16_64) || defined(ARM_TABLE_TWIDDLECOEF_F16_128)
|
||||
case 64U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_f16;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_f16;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_f16;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_f16;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F16_16) || defined(ARM_TABLE_TWIDDLECOEF_F16_32)
|
||||
case 16U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_f16;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_f16;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_f16;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_f16;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_f16;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
return(ARM_MATH_ARGUMENT_ERROR);
|
||||
break;
|
||||
/* invalid sizes already filtered */
|
||||
}
|
||||
|
||||
return(ARM_MATH_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
arm_status arm_cfft_init_f16(
|
||||
arm_cfft_instance_f16 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_4096) && defined(ARM_TABLE_TWIDDLECOEF_F16_4096))
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_4096;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F16_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_2048;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F16_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_1024;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_512) && defined(ARM_TABLE_TWIDDLECOEF_F16_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_512;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_256) && defined(ARM_TABLE_TWIDDLECOEF_F16_256))
|
||||
case 256U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_256;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_128) && defined(ARM_TABLE_TWIDDLECOEF_F16_128))
|
||||
case 128U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_128;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_64) && defined(ARM_TABLE_TWIDDLECOEF_F16_64))
|
||||
case 64U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_64;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_32) && defined(ARM_TABLE_TWIDDLECOEF_F16_32))
|
||||
case 32U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_32;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_16) && defined(ARM_TABLE_TWIDDLECOEF_F16_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
|
||||
S->pTwiddle = (float16_t *)twiddleCoefF16_16;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f16(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#else
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
arm_status arm_cfft_init_f16(
|
||||
arm_cfft_instance_f16 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f16,4096);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f16,2048);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f16,1024);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f16,512);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
|
||||
case 256U:
|
||||
FFTINIT(f16,256);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
|
||||
case 128U:
|
||||
FFTINIT(f16,128);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
|
||||
case 64U:
|
||||
FFTINIT(f16,64);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
|
||||
case 32U:
|
||||
FFTINIT(f16,32);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F16_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
FFTINIT(f16,16);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
358
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f32.c
Normal file
358
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f32.c
Normal file
@@ -0,0 +1,358 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_init_f32.c
|
||||
* Description: Initialization function for cfft f32 instance
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define FFTINIT(EXT,SIZE) \
|
||||
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
|
||||
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
|
||||
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the cfft f32 function
|
||||
@param[in,out] S points to an instance of the floating-point CFFT structure
|
||||
@param[in] fftLen fft length (number of complex samples)
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
|
||||
|
||||
@par Use of this function is mandatory only for the MVE version of the FFT.
|
||||
Other versions can still initialize directly the data structure using
|
||||
variables declared in arm_const_structs.h
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_const_structs.h"
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_vec_fft.h"
|
||||
#include "arm_mve_tables.h"
|
||||
|
||||
arm_status arm_cfft_radix4by2_rearrange_twiddles_f32(arm_cfft_instance_f32 *S, int twidCoefModifier)
|
||||
{
|
||||
|
||||
switch (S->fftLen >> (twidCoefModifier - 1)) {
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
|
||||
case 4096U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_f32;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_f32;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_f32;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_f32;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F32_1024) || defined(ARM_TABLE_TWIDDLECOEF_F32_2048)
|
||||
case 1024U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_f32;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_f32;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_f32;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_f32;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F32_256) || defined(ARM_TABLE_TWIDDLECOEF_F32_512)
|
||||
case 256U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_f32;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_f32;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_f32;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_f32;
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F32_64) || defined(ARM_TABLE_TWIDDLECOEF_F32_128)
|
||||
case 64U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_f32;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_f32;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_f32;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_f32;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) \
|
||||
|| defined(ARM_TABLE_TWIDDLECOEF_F32_16) || defined(ARM_TABLE_TWIDDLECOEF_F32_32)
|
||||
case 16U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_f32;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_f32;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_f32;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_f32;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_f32;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
return(ARM_MATH_ARGUMENT_ERROR);
|
||||
break;
|
||||
/* invalid sizes already filtered */
|
||||
}
|
||||
|
||||
return(ARM_MATH_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
arm_status arm_cfft_init_f32(
|
||||
arm_cfft_instance_f32 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_4096) && defined(ARM_TABLE_TWIDDLECOEF_F32_4096))
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_4096;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F32_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_2048;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F32_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_1024;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_512) && defined(ARM_TABLE_TWIDDLECOEF_F32_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_512;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_256) && defined(ARM_TABLE_TWIDDLECOEF_F32_256))
|
||||
case 256U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_256;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_128) && defined(ARM_TABLE_TWIDDLECOEF_F32_128))
|
||||
case 128U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_128;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_64) && defined(ARM_TABLE_TWIDDLECOEF_F32_64))
|
||||
case 64U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_64;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_32) && defined(ARM_TABLE_TWIDDLECOEF_F32_32))
|
||||
case 32U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_32;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_16) && defined(ARM_TABLE_TWIDDLECOEF_F32_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
|
||||
S->pTwiddle = (float32_t *)twiddleCoef_16;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_f32(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#else
|
||||
arm_status arm_cfft_init_f32(
|
||||
arm_cfft_instance_f32 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f32,4096);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f32,2048);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f32,1024);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f32,512);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
|
||||
case 256U:
|
||||
FFTINIT(f32,256);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
|
||||
case 128U:
|
||||
FFTINIT(f32,128);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
|
||||
case 64U:
|
||||
FFTINIT(f32,64);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
|
||||
case 32U:
|
||||
FFTINIT(f32,32);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
FFTINIT(f32,16);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
150
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f64.c
Normal file
150
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_f64.c
Normal file
@@ -0,0 +1,150 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_init_f64.c
|
||||
* Description: Initialization function for cfft f64 instance
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define FFTINIT(EXT,SIZE) \
|
||||
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
|
||||
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
|
||||
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the cfft f64 function
|
||||
@param[in,out] S points to an instance of the floating-point CFFT structure
|
||||
@param[in] fftLen fft length (number of complex samples)
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
|
||||
|
||||
@par Use of this function is mandatory only for the MVE version of the FFT.
|
||||
Other versions can still initialize directly the data structure using
|
||||
variables declared in arm_const_structs.h
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_const_structs.h"
|
||||
|
||||
|
||||
arm_status arm_cfft_init_f64(
|
||||
arm_cfft_instance_f64 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_4096) && defined(ARM_TABLE_BITREVIDX_FLT_4096))
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f64,4096);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f64,2048);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f64,1024);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_512) && defined(ARM_TABLE_BITREVIDX_FLT_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(f64,512);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_256) && defined(ARM_TABLE_BITREVIDX_FLT_256))
|
||||
case 256U:
|
||||
FFTINIT(f64,256);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_128) && defined(ARM_TABLE_BITREVIDX_FLT_128))
|
||||
case 128U:
|
||||
FFTINIT(f64,128);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_64) && defined(ARM_TABLE_BITREVIDX_FLT_64))
|
||||
case 64U:
|
||||
FFTINIT(f64,64);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_32) && defined(ARM_TABLE_BITREVIDX_FLT_32))
|
||||
case 32U:
|
||||
FFTINIT(f64,32);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F64_16) && defined(ARM_TABLE_BITREVIDX_FLT_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
FFTINIT(f64,16);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
356
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q15.c
Normal file
356
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q15.c
Normal file
@@ -0,0 +1,356 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_init_q15.c
|
||||
* Description: Initialization function for cfft q15 instance
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define FFTINIT(EXT,SIZE) \
|
||||
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
|
||||
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
|
||||
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the cfft q15 function
|
||||
@param[in,out] S points to an instance of the floating-point CFFT structure
|
||||
@param[in] fftLen fft length (number of complex samples)
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
|
||||
|
||||
@par Use of this function is mandatory only for the MVE version of the FFT.
|
||||
Other versions can still initialize directly the data structure using
|
||||
variables declared in arm_const_structs.h
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_const_structs.h"
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_vec_fft.h"
|
||||
#include "arm_mve_tables.h"
|
||||
|
||||
|
||||
arm_status arm_cfft_radix4by2_rearrange_twiddles_q15(arm_cfft_instance_q15 *S, int twidCoefModifier)
|
||||
{
|
||||
|
||||
switch (S->fftLen >> (twidCoefModifier - 1)) {
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_4096) && defined(ARM_TABLE_TWIDDLECOEF_Q15_4096))
|
||||
case 4096U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_q15;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_q15;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_q15;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_q15;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_1024) && defined(ARM_TABLE_TWIDDLECOEF_Q15_1024)) || (defined(ARM_TABLE_BITREVIDX_FXT_2048) && defined(ARM_TABLE_TWIDDLECOEF_Q15_2048))
|
||||
case 1024U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_q15;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_q15;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_q15;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_q15;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_256) && defined(ARM_TABLE_TWIDDLECOEF_Q15_256)) || (defined(ARM_TABLE_BITREVIDX_FXT_512) && defined(ARM_TABLE_TWIDDLECOEF_Q15_512))
|
||||
case 256U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_q15;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_q15;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_q15;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_q15;
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_64) && defined(ARM_TABLE_TWIDDLECOEF_Q15_64)) || (defined(ARM_TABLE_BITREVIDX_FXT_128) && defined(ARM_TABLE_TWIDDLECOEF_Q15_128))
|
||||
case 64U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_q15;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_q15;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_q15;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_q15;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_16) && defined(ARM_TABLE_TWIDDLECOEF_Q15_16)) || (defined(ARM_TABLE_BITREVIDX_FXT_32) && defined(ARM_TABLE_TWIDDLECOEF_Q15_32))
|
||||
case 16U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_q15;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_q15;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_q15;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_q15;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_q15;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
return(ARM_MATH_ARGUMENT_ERROR);
|
||||
break;
|
||||
/* invalid sizes already filtered */
|
||||
}
|
||||
|
||||
return(ARM_MATH_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
arm_status arm_cfft_init_q15(
|
||||
arm_cfft_instance_q15 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_4096) && defined(ARM_TABLE_TWIDDLECOEF_Q15_4096))
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_4096_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_2048) && defined(ARM_TABLE_TWIDDLECOEF_Q15_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_2048_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_1024) && defined(ARM_TABLE_TWIDDLECOEF_Q15_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_1024_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_512) && defined(ARM_TABLE_TWIDDLECOEF_Q15_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_512_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_256) && defined(ARM_TABLE_TWIDDLECOEF_Q15_256))
|
||||
case 256U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_256_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_128) && defined(ARM_TABLE_TWIDDLECOEF_Q15_128))
|
||||
case 128U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_128_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_64) && defined(ARM_TABLE_TWIDDLECOEF_Q15_64))
|
||||
case 64U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_64_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_32) && defined(ARM_TABLE_TWIDDLECOEF_Q15_32))
|
||||
case 32U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_32_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_16) && defined(ARM_TABLE_TWIDDLECOEF_Q15_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
|
||||
S->pTwiddle = (q15_t *)twiddleCoef_16_q15;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q15(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#else
|
||||
arm_status arm_cfft_init_q15(
|
||||
arm_cfft_instance_q15 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q15,4096);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q15,2048);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q15,1024);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q15,512);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
|
||||
case 256U:
|
||||
FFTINIT(q15,256);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
|
||||
case 128U:
|
||||
FFTINIT(q15,128);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
|
||||
case 64U:
|
||||
FFTINIT(q15,64);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
|
||||
case 32U:
|
||||
FFTINIT(q15,32);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q15_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
FFTINIT(q15,16);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
356
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q31.c
Normal file
356
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_init_q31.c
Normal file
@@ -0,0 +1,356 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_init_q31.c
|
||||
* Description: Initialization function for cfft q31 instance
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#define FFTINIT(EXT,SIZE) \
|
||||
S->bitRevLength = arm_cfft_sR_##EXT##_len##SIZE.bitRevLength; \
|
||||
S->pBitRevTable = arm_cfft_sR_##EXT##_len##SIZE.pBitRevTable; \
|
||||
S->pTwiddle = arm_cfft_sR_##EXT##_len##SIZE.pTwiddle;
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the cfft q31 function
|
||||
@param[in,out] S points to an instance of the floating-point CFFT structure
|
||||
@param[in] fftLen fft length (number of complex samples)
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
|
||||
|
||||
@par Use of this function is mandatory only for the MVE version of the FFT.
|
||||
Other versions can still initialize directly the data structure using
|
||||
variables declared in arm_const_structs.h
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_const_structs.h"
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_vec_fft.h"
|
||||
#include "arm_mve_tables.h"
|
||||
|
||||
|
||||
arm_status arm_cfft_radix4by2_rearrange_twiddles_q31(arm_cfft_instance_q31 *S, int twidCoefModifier)
|
||||
{
|
||||
|
||||
switch (S->fftLen >> (twidCoefModifier - 1)) {
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_4096) && defined(ARM_TABLE_TWIDDLECOEF_Q31_4096))
|
||||
case 4096U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_4096_q31;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_4096_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_4096_q31;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_4096_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_4096_q31;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_4096_q31;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_1024) && defined(ARM_TABLE_TWIDDLECOEF_Q31_1024)) || (defined(ARM_TABLE_BITREVIDX_FXT_2048) && defined(ARM_TABLE_TWIDDLECOEF_Q31_2048))
|
||||
case 1024U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_1024_q31;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_1024_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_1024_q31;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_1024_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_1024_q31;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_1024_q31;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_256) && defined(ARM_TABLE_TWIDDLECOEF_Q31_256)) || (defined(ARM_TABLE_BITREVIDX_FXT_512) && defined(ARM_TABLE_TWIDDLECOEF_Q31_512))
|
||||
case 256U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_256_q31;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_256_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_256_q31;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_256_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_256_q31;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_256_q31;
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_64) && defined(ARM_TABLE_TWIDDLECOEF_Q31_64)) || (defined(ARM_TABLE_BITREVIDX_FXT_128) && defined(ARM_TABLE_TWIDDLECOEF_Q31_128))
|
||||
case 64U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_64_q31;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_64_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_64_q31;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_64_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_64_q31;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_64_q31;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_16) && defined(ARM_TABLE_TWIDDLECOEF_Q31_16)) || (defined(ARM_TABLE_BITREVIDX_FXT_32) && defined(ARM_TABLE_TWIDDLECOEF_Q31_32))
|
||||
case 16U:
|
||||
S->rearranged_twiddle_tab_stride1_arr = rearranged_twiddle_tab_stride1_arr_16_q31;
|
||||
S->rearranged_twiddle_stride1 = rearranged_twiddle_stride1_16_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride2_arr = rearranged_twiddle_tab_stride2_arr_16_q31;
|
||||
S->rearranged_twiddle_stride2 = rearranged_twiddle_stride2_16_q31;
|
||||
|
||||
S->rearranged_twiddle_tab_stride3_arr = rearranged_twiddle_tab_stride3_arr_16_q31;
|
||||
S->rearranged_twiddle_stride3 = rearranged_twiddle_stride3_16_q31;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
return(ARM_MATH_ARGUMENT_ERROR);
|
||||
break;
|
||||
/* invalid sizes already filtered */
|
||||
}
|
||||
|
||||
return(ARM_MATH_SUCCESS);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
arm_status arm_cfft_init_q31(
|
||||
arm_cfft_instance_q31 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_4096) && defined(ARM_TABLE_TWIDDLECOEF_Q31_4096))
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_4096;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_4096_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_2048) && defined(ARM_TABLE_TWIDDLECOEF_Q31_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_2048;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_2048_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_1024) && defined(ARM_TABLE_TWIDDLECOEF_Q31_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_1024;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_1024_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_512) && defined(ARM_TABLE_TWIDDLECOEF_Q31_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_512;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_512_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_256) && defined(ARM_TABLE_TWIDDLECOEF_Q31_256))
|
||||
case 256U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_256;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_256_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_128) && defined(ARM_TABLE_TWIDDLECOEF_Q31_128))
|
||||
case 128U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_128;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_128_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_64) && defined(ARM_TABLE_TWIDDLECOEF_Q31_64))
|
||||
case 64U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_64;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_64_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_32) && defined(ARM_TABLE_TWIDDLECOEF_Q31_32))
|
||||
case 32U:
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_32;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_32_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 2);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_BITREVIDX_FXT_16) && defined(ARM_TABLE_TWIDDLECOEF_Q31_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->bitRevLength = ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH;
|
||||
S->pBitRevTable = (uint16_t *)armBitRevIndexTable_fixed_16;
|
||||
S->pTwiddle = (q31_t *)twiddleCoef_16_q31;
|
||||
status=arm_cfft_radix4by2_rearrange_twiddles_q31(S, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#else
|
||||
arm_status arm_cfft_init_q31(
|
||||
arm_cfft_instance_q31 * S,
|
||||
uint16_t fftLen)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = NULL;
|
||||
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen) {
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_4096) && defined(ARM_TABLE_BITREVIDX_FXT_4096))
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
case 4096U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q31,4096);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_2048) && defined(ARM_TABLE_BITREVIDX_FXT_2048))
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q31,2048);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_1024) && defined(ARM_TABLE_BITREVIDX_FXT_1024))
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q31,1024);
|
||||
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_512) && defined(ARM_TABLE_BITREVIDX_FXT_512))
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the bit reversal table modifier */
|
||||
FFTINIT(q31,512);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_256) && defined(ARM_TABLE_BITREVIDX_FXT_256))
|
||||
case 256U:
|
||||
FFTINIT(q31,256);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_128) && defined(ARM_TABLE_BITREVIDX_FXT_128))
|
||||
case 128U:
|
||||
FFTINIT(q31,128);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_64) && defined(ARM_TABLE_BITREVIDX_FXT_64))
|
||||
case 64U:
|
||||
FFTINIT(q31,64);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_32) && defined(ARM_TABLE_BITREVIDX_FXT_32))
|
||||
case 32U:
|
||||
FFTINIT(q31,32);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_Q31_16) && defined(ARM_TABLE_BITREVIDX_FXT_16))
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
FFTINIT(q31,16);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return (status);
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
893
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
Normal file
893
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q15.c
Normal file
@@ -0,0 +1,893 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_q15.c
|
||||
* Description: Combined Radix Decimation in Q15 Frequency CFFT processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_vec_fft.h"
|
||||
|
||||
|
||||
static void _arm_radix4_butterfly_q15_mve(
|
||||
const arm_cfft_instance_q15 * S,
|
||||
q15_t *pSrc,
|
||||
uint32_t fftLen)
|
||||
{
|
||||
q15x8_t vecTmp0, vecTmp1;
|
||||
q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
q15x8_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] = {
|
||||
(0 - 16) * (int32_t)sizeof(q15_t *), (4 - 16) * (int32_t)sizeof(q15_t *),
|
||||
(8 - 16) * (int32_t)sizeof(q15_t *), (12 - 16) * (int32_t)sizeof(q15_t *)
|
||||
};
|
||||
|
||||
/*
|
||||
* Process first stages
|
||||
* Each stage in middle stages provides two down scaling of the input
|
||||
*/
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
|
||||
for (int k = fftLen / 4u; k > 1; k >>= 2u)
|
||||
{
|
||||
q15_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
q15_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
|
||||
q15_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
q15_t *inA = pBase;
|
||||
q15_t *inB = inA + n2 * CMPLX_DIM;
|
||||
q15_t *inC = inB + n2 * CMPLX_DIM;
|
||||
q15_t *inD = inC + n2 * CMPLX_DIM;
|
||||
q15_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
q15_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
q15_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
q15x8_t vecW;
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
/*
|
||||
* load 4 x q15 complex pair
|
||||
*/
|
||||
vecA = vldrhq_s16(inA);
|
||||
vecC = vldrhq_s16(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrhq_s16(inB);
|
||||
vecD = vldrhq_s16(inD);
|
||||
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 8;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
|
||||
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 8;
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 8;
|
||||
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q15x8_t);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 8;
|
||||
|
||||
vecA = vldrhq_s16(inA);
|
||||
vecC = vldrhq_s16(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32 ((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/*
|
||||
* load scheduling
|
||||
*/
|
||||
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
|
||||
|
||||
blkCnt = (fftLen >> 4);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4);
|
||||
vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* pre-load for next iteration
|
||||
*/
|
||||
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
|
||||
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0);
|
||||
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void arm_cfft_radix4by2_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
uint32_t n2;
|
||||
q15_t *pIn0;
|
||||
q15_t *pIn1;
|
||||
const q15_t *pCoef = S->pTwiddle;
|
||||
uint32_t blkCnt;
|
||||
q15x8_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
q15x8_t vecCmplxTmp, vecTw;
|
||||
q15_t const *pCoefVec;
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
pCoefVec = pCoef;
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(q15x8_t *) pIn0;
|
||||
vecIn1 = *(q15x8_t *) pIn1;
|
||||
|
||||
vecIn0 = vecIn0 >> 1;
|
||||
vecIn1 = vecIn1 >> 1;
|
||||
vecSum = vhaddq(vecIn0, vecIn1);
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 8;
|
||||
|
||||
vecTw = vld1q(pCoefVec);
|
||||
pCoefVec += 8;
|
||||
|
||||
vecDiff = vhsubq(vecIn0, vecIn1);
|
||||
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q15x8_t);
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 8;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
_arm_radix4_butterfly_q15_mve(S, pSrc, n2);
|
||||
|
||||
_arm_radix4_butterfly_q15_mve(S, pSrc + fftLen, n2);
|
||||
|
||||
|
||||
pIn0 = pSrc;
|
||||
blkCnt = (fftLen << 1) >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(q15x8_t *) pIn0;
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vst1q(pIn0, vecIn0);
|
||||
pIn0 += 8;
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
* (will be merged thru tail predication)
|
||||
*/
|
||||
blkCnt = (fftLen << 1) & 7;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
|
||||
vecIn0 = *(q15x8_t *) pIn0;
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vstrhq_p(pIn0, vecIn0, p0);
|
||||
}
|
||||
}
|
||||
|
||||
static void _arm_radix4_butterfly_inverse_q15_mve(const arm_cfft_instance_q15 *S,q15_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
q15x8_t vecTmp0, vecTmp1;
|
||||
q15x8_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
q15x8_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] = {
|
||||
(0 - 16) * (int32_t)sizeof(q15_t *), (4 - 16) * (int32_t)sizeof(q15_t *),
|
||||
(8 - 16) * (int32_t)sizeof(q15_t *), (12 - 16) * (int32_t)sizeof(q15_t *)
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Process first stages
|
||||
* Each stage in middle stages provides two down scaling of the input
|
||||
*/
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
|
||||
for (int k = fftLen / 4u; k > 1; k >>= 2u)
|
||||
{
|
||||
q15_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
q15_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
q15_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
|
||||
q15_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
q15_t *inA = pBase;
|
||||
q15_t *inB = inA + n2 * CMPLX_DIM;
|
||||
q15_t *inC = inB + n2 * CMPLX_DIM;
|
||||
q15_t *inD = inC + n2 * CMPLX_DIM;
|
||||
q15_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
q15_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
q15_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
q15x8_t vecW;
|
||||
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
/*
|
||||
* load 4 x q15 complex pair
|
||||
*/
|
||||
vecA = vldrhq_s16(inA);
|
||||
vecC = vldrhq_s16(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrhq_s16(inB);
|
||||
vecD = vldrhq_s16(inD);
|
||||
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 8;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
|
||||
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 8;
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 8;
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 8;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q15x8_t);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 8;
|
||||
|
||||
vecA = vldrhq_s16(inA);
|
||||
vecC = vldrhq_s16(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/*
|
||||
* load scheduling
|
||||
*/
|
||||
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
|
||||
|
||||
blkCnt = (fftLen >> 4);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecB = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 4);
|
||||
vecD = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 12);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* pre-load for next iteration
|
||||
*/
|
||||
vecA = (q15x8_t) vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = (q15x8_t) vldrwq_gather_base_s32(vecScGathAddr, 8);
|
||||
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64, (int32x4_t) vecTmp0);
|
||||
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 4, (int32x4_t) vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, (int32x4_t) vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 12, (int32x4_t) vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
static void arm_cfft_radix4by2_inverse_q15_mve(const arm_cfft_instance_q15 *S, q15_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
uint32_t n2;
|
||||
q15_t *pIn0;
|
||||
q15_t *pIn1;
|
||||
const q15_t *pCoef = S->pTwiddle;
|
||||
|
||||
uint32_t blkCnt;
|
||||
q15x8_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
q15x8_t vecCmplxTmp, vecTw;
|
||||
q15_t const *pCoefVec;
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
pCoefVec = pCoef;
|
||||
|
||||
blkCnt = n2 / 4;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(q15x8_t *) pIn0;
|
||||
vecIn1 = *(q15x8_t *) pIn1;
|
||||
|
||||
vecIn0 = vecIn0 >> 1;
|
||||
vecIn1 = vecIn1 >> 1;
|
||||
vecSum = vhaddq(vecIn0, vecIn1);
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 8;
|
||||
|
||||
vecTw = vld1q(pCoefVec);
|
||||
pCoefVec += 8;
|
||||
|
||||
vecDiff = vhsubq(vecIn0, vecIn1);
|
||||
vecCmplxTmp = vqrdmlsdhq(vuninitializedq_s16() , vecDiff, vecTw);
|
||||
vecCmplxTmp = vqrdmladhxq(vecCmplxTmp, vecDiff, vecTw);
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 8;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc, n2);
|
||||
|
||||
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc + fftLen, n2);
|
||||
|
||||
pIn0 = pSrc;
|
||||
blkCnt = (fftLen << 1) >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = *(q15x8_t *) pIn0;
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vst1q(pIn0, vecIn0);
|
||||
pIn0 += 8;
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
* (will be merged thru tail predication)
|
||||
*/
|
||||
blkCnt = (fftLen << 1) & 7;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
|
||||
vecIn0 = *(q15x8_t *) pIn0;
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vstrhq_p(pIn0, vecIn0, p0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for Q15 complex FFT.
|
||||
@param[in] S points to an instance of Q15 CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
void arm_cfft_q15(
|
||||
const arm_cfft_instance_q15 * S,
|
||||
q15_t * pSrc,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t fftLen = S->fftLen;
|
||||
|
||||
if (ifftFlag == 1U) {
|
||||
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_inverse_q15_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_inverse_q15_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_q15_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_q15_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (bitReverseFlag)
|
||||
{
|
||||
|
||||
arm_bitreversal_16_inpl_mve((uint16_t*)pSrc, S->bitRevLength, S->pBitRevTable);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
extern void arm_radix4_butterfly_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
extern void arm_radix4_butterfly_inverse_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
extern void arm_bitreversal_16(
|
||||
uint16_t * pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t * pBitRevTable);
|
||||
|
||||
void arm_cfft_radix4by2_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef);
|
||||
|
||||
void arm_cfft_radix4by2_inverse_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for Q15 complex FFT.
|
||||
@param[in] S points to an instance of Q15 CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_q15(
|
||||
const arm_cfft_instance_q15 * S,
|
||||
q15_t * p1,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t L = S->fftLen;
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
switch (L)
|
||||
{
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
arm_radix4_butterfly_inverse_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_inverse_q15 ( p1, L, S->pTwiddle );
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (L)
|
||||
{
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
arm_radix4_butterfly_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_q15 ( p1, L, S->pTwiddle );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( bitReverseFlag )
|
||||
arm_bitreversal_16 ((uint16_t*) p1, S->bitRevLength, S->pBitRevTable);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4by2_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t n2;
|
||||
q15_t p0, p1, p2, p3;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t T, S, R;
|
||||
q31_t coeff, out1, out2;
|
||||
const q15_t *pC = pCoef;
|
||||
q15_t *pSi = pSrc;
|
||||
q15_t *pSl = pSrc + fftLen;
|
||||
#else
|
||||
uint32_t l;
|
||||
q15_t xt, yt, cosVal, sinVal;
|
||||
#endif
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
for (i = n2; i > 0; i--)
|
||||
{
|
||||
coeff = read_q15x2_ia (&pC);
|
||||
|
||||
T = read_q15x2 (pSi);
|
||||
T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
|
||||
|
||||
S = read_q15x2 (pSl);
|
||||
S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2_ia (&pSi, __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUAD(coeff, R) >> 16U;
|
||||
out2 = __SMUSDX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUSDX(R, coeff) >> 16U;
|
||||
out2 = __SMUAD(coeff, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2_ia (&pSl, (q31_t)__PKHBT( out1, out2, 0 ) );
|
||||
}
|
||||
|
||||
#else /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[2 * i];
|
||||
sinVal = pCoef[2 * i + 1];
|
||||
|
||||
l = i + n2;
|
||||
|
||||
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
|
||||
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
|
||||
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
|
||||
|
||||
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) +
|
||||
((int16_t) (((q31_t) yt * sinVal) >> 16U)) );
|
||||
|
||||
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) -
|
||||
((int16_t) (((q31_t) xt * sinVal) >> 16U)) );
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
/* first col */
|
||||
arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2U);
|
||||
|
||||
/* second col */
|
||||
arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
p0 = pSrc[4 * i + 0];
|
||||
p1 = pSrc[4 * i + 1];
|
||||
p2 = pSrc[4 * i + 2];
|
||||
p3 = pSrc[4 * i + 3];
|
||||
|
||||
p0 <<= 1U;
|
||||
p1 <<= 1U;
|
||||
p2 <<= 1U;
|
||||
p3 <<= 1U;
|
||||
|
||||
pSrc[4 * i + 0] = p0;
|
||||
pSrc[4 * i + 1] = p1;
|
||||
pSrc[4 * i + 2] = p2;
|
||||
pSrc[4 * i + 3] = p3;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void arm_cfft_radix4by2_inverse_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t n2;
|
||||
q15_t p0, p1, p2, p3;
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t T, S, R;
|
||||
q31_t coeff, out1, out2;
|
||||
const q15_t *pC = pCoef;
|
||||
q15_t *pSi = pSrc;
|
||||
q15_t *pSl = pSrc + fftLen;
|
||||
#else
|
||||
uint32_t l;
|
||||
q15_t xt, yt, cosVal, sinVal;
|
||||
#endif
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
for (i = n2; i > 0; i--)
|
||||
{
|
||||
coeff = read_q15x2_ia (&pC);
|
||||
|
||||
T = read_q15x2 (pSi);
|
||||
T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
|
||||
|
||||
S = read_q15x2 (pSl);
|
||||
S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2_ia (&pSi, __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUSD(coeff, R) >> 16U;
|
||||
out2 = __SMUADX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUADX(R, coeff) >> 16U;
|
||||
out2 = __SMUSD(__QSUB(0, coeff), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2_ia (&pSl, (q31_t)__PKHBT( out1, out2, 0 ));
|
||||
}
|
||||
|
||||
#else /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[2 * i];
|
||||
sinVal = pCoef[2 * i + 1];
|
||||
|
||||
l = i + n2;
|
||||
|
||||
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
|
||||
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
|
||||
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
|
||||
|
||||
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) -
|
||||
((int16_t) (((q31_t) yt * sinVal) >> 16U)) );
|
||||
|
||||
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) +
|
||||
((int16_t) (((q31_t) xt * sinVal) >> 16U)) );
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
/* first col */
|
||||
arm_radix4_butterfly_inverse_q15( pSrc, n2, (q15_t*)pCoef, 2U);
|
||||
|
||||
/* second col */
|
||||
arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
p0 = pSrc[4 * i + 0];
|
||||
p1 = pSrc[4 * i + 1];
|
||||
p2 = pSrc[4 * i + 2];
|
||||
p3 = pSrc[4 * i + 3];
|
||||
|
||||
p0 <<= 1U;
|
||||
p1 <<= 1U;
|
||||
p2 <<= 1U;
|
||||
p3 <<= 1U;
|
||||
|
||||
pSrc[4 * i + 0] = p0;
|
||||
pSrc[4 * i + 1] = p1;
|
||||
pSrc[4 * i + 2] = p2;
|
||||
pSrc[4 * i + 3] = p3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
847
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c
Normal file
847
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_q31.c
Normal file
@@ -0,0 +1,847 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_q31.c
|
||||
* Description: Combined Radix Decimation in Frequency CFFT fixed point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_vec_fft.h"
|
||||
|
||||
|
||||
static void _arm_radix4_butterfly_q31_mve(
|
||||
const arm_cfft_instance_q31 * S,
|
||||
q31_t *pSrc,
|
||||
uint32_t fftLen)
|
||||
{
|
||||
q31x4_t vecTmp0, vecTmp1;
|
||||
q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
q31x4_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] = {
|
||||
(0 - 16) * (int32_t)sizeof(q31_t *), (1 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(8 - 16) * (int32_t)sizeof(q31_t *), (9 - 16) * (int32_t)sizeof(q31_t *)
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Process first stages
|
||||
* Each stage in middle stages provides two down scaling of the input
|
||||
*/
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
|
||||
for (int k = fftLen / 4u; k > 1; k >>= 2u)
|
||||
{
|
||||
q31_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
q31_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
|
||||
q31_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
q31_t *inA = pBase;
|
||||
q31_t *inB = inA + n2 * CMPLX_DIM;
|
||||
q31_t *inC = inB + n2 * CMPLX_DIM;
|
||||
q31_t *inD = inC + n2 * CMPLX_DIM;
|
||||
q31_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
q31_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
q31_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
q31x4_t vecW;
|
||||
|
||||
|
||||
blkCnt = n2 / 2;
|
||||
/*
|
||||
* load 2 x q31 complex pair
|
||||
*/
|
||||
vecA = vldrwq_s32(inA);
|
||||
vecC = vldrwq_s32(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrwq_s32(inB);
|
||||
vecD = vldrwq_s32(inD);
|
||||
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 4;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
|
||||
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 4;
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 4;
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxB(vecW, vecTmp0, q31x4_t);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 4;
|
||||
|
||||
vecA = vldrwq_s32(inA);
|
||||
vecC = vldrwq_s32(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of 1st stages process
|
||||
* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages
|
||||
* data is in 9.23(q23) format for the 256 point as there are 2 middle stages
|
||||
* data is in 7.25(q25) format for the 64 point as there are 1 middle stage
|
||||
* data is in 5.27(q27) format for the 16 point as there are no middle stages
|
||||
*/
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/*
|
||||
* load scheduling
|
||||
*/
|
||||
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
|
||||
|
||||
blkCnt = (fftLen >> 3);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecB = vldrwq_gather_base_s32(vecScGathAddr, 8);
|
||||
vecD = vldrwq_gather_base_s32(vecScGathAddr, 24);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* pre-load for next iteration
|
||||
*/
|
||||
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
|
||||
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64, vecTmp0);
|
||||
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 16, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 24, vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* output is in 11.21(q21) format for the 1024 point
|
||||
* output is in 9.23(q23) format for the 256 point
|
||||
* output is in 7.25(q25) format for the 64 point
|
||||
* output is in 5.27(q27) format for the 16 point
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
static void arm_cfft_radix4by2_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
uint32_t n2;
|
||||
q31_t *pIn0;
|
||||
q31_t *pIn1;
|
||||
const q31_t *pCoef = S->pTwiddle;
|
||||
uint32_t blkCnt;
|
||||
q31x4_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
q31x4_t vecCmplxTmp, vecTw;
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
|
||||
blkCnt = n2 / 2;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = vld1q_s32(pIn0);
|
||||
vecIn1 = vld1q_s32(pIn1);
|
||||
|
||||
vecIn0 = vecIn0 >> 1;
|
||||
vecIn1 = vecIn1 >> 1;
|
||||
vecSum = vhaddq(vecIn0, vecIn1);
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 4;
|
||||
|
||||
vecTw = vld1q_s32(pCoef);
|
||||
pCoef += 4;
|
||||
vecDiff = vhsubq(vecIn0, vecIn1);
|
||||
|
||||
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxConjB(vecDiff, vecTw, q31x4_t);
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 4;
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
_arm_radix4_butterfly_q31_mve(S, pSrc, n2);
|
||||
|
||||
_arm_radix4_butterfly_q31_mve(S, pSrc + fftLen, n2);
|
||||
|
||||
pIn0 = pSrc;
|
||||
blkCnt = (fftLen << 1) >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = vld1q_s32(pIn0);
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vst1q(pIn0, vecIn0);
|
||||
pIn0 += 4;
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
* (will be merged thru tail predication)
|
||||
*/
|
||||
blkCnt = (fftLen << 1) & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
|
||||
vecIn0 = vld1q_s32(pIn0);
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vstrwq_p(pIn0, vecIn0, p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void _arm_radix4_butterfly_inverse_q31_mve(
|
||||
const arm_cfft_instance_q31 *S,
|
||||
q31_t *pSrc,
|
||||
uint32_t fftLen)
|
||||
{
|
||||
q31x4_t vecTmp0, vecTmp1;
|
||||
q31x4_t vecSum0, vecDiff0, vecSum1, vecDiff1;
|
||||
q31x4_t vecA, vecB, vecC, vecD;
|
||||
uint32_t blkCnt;
|
||||
uint32_t n1, n2;
|
||||
uint32_t stage = 0;
|
||||
int32_t iter = 1;
|
||||
static const int32_t strides[4] = {
|
||||
(0 - 16) * (int32_t)sizeof(q31_t *), (1 - 16) * (int32_t)sizeof(q31_t *),
|
||||
(8 - 16) * (int32_t)sizeof(q31_t *), (9 - 16) * (int32_t)sizeof(q31_t *)
|
||||
};
|
||||
|
||||
/*
|
||||
* Process first stages
|
||||
* Each stage in middle stages provides two down scaling of the input
|
||||
*/
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
|
||||
for (int k = fftLen / 4u; k > 1; k >>= 2u)
|
||||
{
|
||||
q31_t const *p_rearranged_twiddle_tab_stride2 =
|
||||
&S->rearranged_twiddle_stride2[
|
||||
S->rearranged_twiddle_tab_stride2_arr[stage]];
|
||||
q31_t const *p_rearranged_twiddle_tab_stride3 = &S->rearranged_twiddle_stride3[
|
||||
S->rearranged_twiddle_tab_stride3_arr[stage]];
|
||||
q31_t const *p_rearranged_twiddle_tab_stride1 =
|
||||
&S->rearranged_twiddle_stride1[
|
||||
S->rearranged_twiddle_tab_stride1_arr[stage]];
|
||||
|
||||
q31_t * pBase = pSrc;
|
||||
for (int i = 0; i < iter; i++)
|
||||
{
|
||||
q31_t *inA = pBase;
|
||||
q31_t *inB = inA + n2 * CMPLX_DIM;
|
||||
q31_t *inC = inB + n2 * CMPLX_DIM;
|
||||
q31_t *inD = inC + n2 * CMPLX_DIM;
|
||||
q31_t const *pW1 = p_rearranged_twiddle_tab_stride1;
|
||||
q31_t const *pW2 = p_rearranged_twiddle_tab_stride2;
|
||||
q31_t const *pW3 = p_rearranged_twiddle_tab_stride3;
|
||||
q31x4_t vecW;
|
||||
|
||||
blkCnt = n2 / 2;
|
||||
/*
|
||||
* load 2 x q31 complex pair
|
||||
*/
|
||||
vecA = vldrwq_s32(inA);
|
||||
vecC = vldrwq_s32(inC);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecB = vldrwq_s32(inB);
|
||||
vecD = vldrwq_s32(inD);
|
||||
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* [ 1 1 1 1 ] * [ A B C D ]' .* 1
|
||||
*/
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vst1q(inA, vecTmp0);
|
||||
inA += 4;
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
/*
|
||||
* [ 1 -1 1 -1 ] * [ A B C D ]'.* W2
|
||||
*/
|
||||
vecW = vld1q(pW2);
|
||||
pW2 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
|
||||
|
||||
vst1q(inB, vecTmp1);
|
||||
inB += 4;
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 -i -1 +i ] * [ A B C D ]'.* W1
|
||||
*/
|
||||
vecW = vld1q(pW1);
|
||||
pW1 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
|
||||
vst1q(inC, vecTmp1);
|
||||
inC += 4;
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'
|
||||
*/
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
/*
|
||||
* [ 1 +i -1 -i ] * [ A B C D ]'.* W3
|
||||
*/
|
||||
vecW = vld1q(pW3);
|
||||
pW3 += 4;
|
||||
vecTmp1 = MVE_CMPLX_MULT_FX_AxConjB(vecTmp0, vecW, q31x4_t);
|
||||
vst1q(inD, vecTmp1);
|
||||
inD += 4;
|
||||
|
||||
vecA = vldrwq_s32(inA);
|
||||
vecC = vldrwq_s32(inC);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
pBase += CMPLX_DIM * n1;
|
||||
}
|
||||
n1 = n2;
|
||||
n2 >>= 2u;
|
||||
iter = iter << 2;
|
||||
stage++;
|
||||
}
|
||||
|
||||
/*
|
||||
* End of 1st stages process
|
||||
* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages
|
||||
* data is in 9.23(q23) format for the 256 point as there are 2 middle stages
|
||||
* data is in 7.25(q25) format for the 64 point as there are 1 middle stage
|
||||
* data is in 5.27(q27) format for the 16 point as there are no middle stages
|
||||
*/
|
||||
|
||||
/*
|
||||
* start of Last stage process
|
||||
*/
|
||||
uint32x4_t vecScGathAddr = vld1q_u32((uint32_t*)strides);
|
||||
vecScGathAddr = vecScGathAddr + (uint32_t) pSrc;
|
||||
|
||||
/*
|
||||
* load scheduling
|
||||
*/
|
||||
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
|
||||
|
||||
blkCnt = (fftLen >> 3);
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecSum0 = vhaddq(vecA, vecC);
|
||||
vecDiff0 = vhsubq(vecA, vecC);
|
||||
|
||||
vecB = vldrwq_gather_base_s32(vecScGathAddr, 8);
|
||||
vecD = vldrwq_gather_base_s32(vecScGathAddr, 24);
|
||||
|
||||
vecSum1 = vhaddq(vecB, vecD);
|
||||
vecDiff1 = vhsubq(vecB, vecD);
|
||||
/*
|
||||
* pre-load for next iteration
|
||||
*/
|
||||
vecA = vldrwq_gather_base_wb_s32(&vecScGathAddr, 64);
|
||||
vecC = vldrwq_gather_base_s32(vecScGathAddr, 16);
|
||||
|
||||
vecTmp0 = vhaddq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64, vecTmp0);
|
||||
|
||||
vecTmp0 = vhsubq(vecSum0, vecSum1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 8, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_ADD_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 16, vecTmp0);
|
||||
|
||||
vecTmp0 = MVE_CMPLX_SUB_FX_A_ixB(vecDiff0, vecDiff1);
|
||||
vstrwq_scatter_base_s32(vecScGathAddr, -64 + 24, vecTmp0);
|
||||
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* output is in 11.21(q21) format for the 1024 point
|
||||
* output is in 9.23(q23) format for the 256 point
|
||||
* output is in 7.25(q25) format for the 64 point
|
||||
* output is in 5.27(q27) format for the 16 point
|
||||
*/
|
||||
}
|
||||
|
||||
static void arm_cfft_radix4by2_inverse_q31_mve(const arm_cfft_instance_q31 *S, q31_t *pSrc, uint32_t fftLen)
|
||||
{
|
||||
uint32_t n2;
|
||||
q31_t *pIn0;
|
||||
q31_t *pIn1;
|
||||
const q31_t *pCoef = S->pTwiddle;
|
||||
|
||||
//uint16_t twidCoefModifier = arm_cfft_radix2_twiddle_factor(S->fftLen);
|
||||
//q31_t twidIncr = (2 * twidCoefModifier * sizeof(q31_t));
|
||||
uint32_t blkCnt;
|
||||
//uint64x2_t vecOffs;
|
||||
q31x4_t vecIn0, vecIn1, vecSum, vecDiff;
|
||||
q31x4_t vecCmplxTmp, vecTw;
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
|
||||
pIn0 = pSrc;
|
||||
pIn1 = pSrc + fftLen;
|
||||
//vecOffs[0] = 0;
|
||||
//vecOffs[1] = (uint64_t) twidIncr;
|
||||
blkCnt = n2 / 2;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = vld1q_s32(pIn0);
|
||||
vecIn1 = vld1q_s32(pIn1);
|
||||
|
||||
vecIn0 = vecIn0 >> 1;
|
||||
vecIn1 = vecIn1 >> 1;
|
||||
vecSum = vhaddq(vecIn0, vecIn1);
|
||||
vst1q(pIn0, vecSum);
|
||||
pIn0 += 4;
|
||||
|
||||
//vecTw = (q31x4_t) vldrdq_gather_offset_s64(pCoef, vecOffs);
|
||||
vecTw = vld1q_s32(pCoef);
|
||||
pCoef += 4;
|
||||
vecDiff = vhsubq(vecIn0, vecIn1);
|
||||
|
||||
vecCmplxTmp = MVE_CMPLX_MULT_FX_AxB(vecDiff, vecTw, q31x4_t);
|
||||
vst1q(pIn1, vecCmplxTmp);
|
||||
pIn1 += 4;
|
||||
|
||||
//vecOffs = vaddq((q31x4_t) vecOffs, 2 * twidIncr);
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
_arm_radix4_butterfly_inverse_q31_mve(S, pSrc, n2);
|
||||
|
||||
_arm_radix4_butterfly_inverse_q31_mve(S, pSrc + fftLen, n2);
|
||||
|
||||
pIn0 = pSrc;
|
||||
blkCnt = (fftLen << 1) >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vecIn0 = vld1q_s32(pIn0);
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vst1q(pIn0, vecIn0);
|
||||
pIn0 += 4;
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
* (will be merged thru tail predication)
|
||||
*/
|
||||
blkCnt = (fftLen << 1) & 3;
|
||||
if (blkCnt > 0U)
|
||||
{
|
||||
mve_pred16_t p0 = vctp32q(blkCnt);
|
||||
|
||||
vecIn0 = vld1q_s32(pIn0);
|
||||
vecIn0 = vecIn0 << 1;
|
||||
vstrwq_p(pIn0, vecIn0, p0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the Q31 complex FFT.
|
||||
@param[in] S points to an instance of the fixed-point CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
void arm_cfft_q31(
|
||||
const arm_cfft_instance_q31 * S,
|
||||
q31_t * pSrc,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t fftLen = S->fftLen;
|
||||
|
||||
if (ifftFlag == 1U) {
|
||||
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_inverse_q31_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_inverse_q31_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (fftLen) {
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
_arm_radix4_butterfly_q31_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_q31_mve(S, pSrc, fftLen);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (bitReverseFlag)
|
||||
{
|
||||
|
||||
arm_bitreversal_32_inpl_mve((uint32_t*)pSrc, S->bitRevLength, S->pBitRevTable);
|
||||
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
extern void arm_radix4_butterfly_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
extern void arm_radix4_butterfly_inverse_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
extern void arm_bitreversal_32(
|
||||
uint32_t * pSrc,
|
||||
const uint16_t bitRevLen,
|
||||
const uint16_t * pBitRevTable);
|
||||
|
||||
void arm_cfft_radix4by2_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef);
|
||||
|
||||
void arm_cfft_radix4by2_inverse_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef);
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the Q31 complex FFT.
|
||||
@param[in] S points to an instance of the fixed-point CFFT structure
|
||||
@param[in,out] p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return none
|
||||
*/
|
||||
void arm_cfft_q31(
|
||||
const arm_cfft_instance_q31 * S,
|
||||
q31_t * p1,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
uint32_t L = S->fftLen;
|
||||
|
||||
if (ifftFlag == 1U)
|
||||
{
|
||||
switch (L)
|
||||
{
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
arm_radix4_butterfly_inverse_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_inverse_q31 ( p1, L, S->pTwiddle );
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (L)
|
||||
{
|
||||
case 16:
|
||||
case 64:
|
||||
case 256:
|
||||
case 1024:
|
||||
case 4096:
|
||||
arm_radix4_butterfly_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
|
||||
break;
|
||||
|
||||
case 32:
|
||||
case 128:
|
||||
case 512:
|
||||
case 2048:
|
||||
arm_cfft_radix4by2_q31 ( p1, L, S->pTwiddle );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( bitReverseFlag )
|
||||
arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4by2_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef)
|
||||
{
|
||||
uint32_t i, l;
|
||||
uint32_t n2;
|
||||
q31_t xt, yt, cosVal, sinVal;
|
||||
q31_t p0, p1;
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[2 * i];
|
||||
sinVal = pCoef[2 * i + 1];
|
||||
|
||||
l = i + n2;
|
||||
|
||||
xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
|
||||
pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);
|
||||
|
||||
mult_32x32_keep32_R(p0, xt, cosVal);
|
||||
mult_32x32_keep32_R(p1, yt, cosVal);
|
||||
multAcc_32x32_keep32_R(p0, yt, sinVal);
|
||||
multSub_32x32_keep32_R(p1, xt, sinVal);
|
||||
|
||||
pSrc[2 * l] = p0 << 1;
|
||||
pSrc[2 * l + 1] = p1 << 1;
|
||||
}
|
||||
|
||||
|
||||
/* first col */
|
||||
arm_radix4_butterfly_q31 (pSrc, n2, (q31_t*)pCoef, 2U);
|
||||
|
||||
/* second col */
|
||||
arm_radix4_butterfly_q31 (pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
p0 = pSrc[4 * i + 0];
|
||||
p1 = pSrc[4 * i + 1];
|
||||
xt = pSrc[4 * i + 2];
|
||||
yt = pSrc[4 * i + 3];
|
||||
|
||||
p0 <<= 1U;
|
||||
p1 <<= 1U;
|
||||
xt <<= 1U;
|
||||
yt <<= 1U;
|
||||
|
||||
pSrc[4 * i + 0] = p0;
|
||||
pSrc[4 * i + 1] = p1;
|
||||
pSrc[4 * i + 2] = xt;
|
||||
pSrc[4 * i + 3] = yt;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void arm_cfft_radix4by2_inverse_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef)
|
||||
{
|
||||
uint32_t i, l;
|
||||
uint32_t n2;
|
||||
q31_t xt, yt, cosVal, sinVal;
|
||||
q31_t p0, p1;
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[2 * i];
|
||||
sinVal = pCoef[2 * i + 1];
|
||||
|
||||
l = i + n2;
|
||||
|
||||
xt = (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
|
||||
pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);
|
||||
|
||||
mult_32x32_keep32_R(p0, xt, cosVal);
|
||||
mult_32x32_keep32_R(p1, yt, cosVal);
|
||||
multSub_32x32_keep32_R(p0, yt, sinVal);
|
||||
multAcc_32x32_keep32_R(p1, xt, sinVal);
|
||||
|
||||
pSrc[2 * l] = p0 << 1U;
|
||||
pSrc[2 * l + 1] = p1 << 1U;
|
||||
}
|
||||
|
||||
/* first col */
|
||||
arm_radix4_butterfly_inverse_q31( pSrc, n2, (q31_t*)pCoef, 2U);
|
||||
|
||||
/* second col */
|
||||
arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
|
||||
|
||||
n2 = fftLen >> 1U;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
p0 = pSrc[4 * i + 0];
|
||||
p1 = pSrc[4 * i + 1];
|
||||
xt = pSrc[4 * i + 2];
|
||||
yt = pSrc[4 * i + 3];
|
||||
|
||||
p0 <<= 1U;
|
||||
p1 <<= 1U;
|
||||
xt <<= 1U;
|
||||
yt <<= 1U;
|
||||
|
||||
pSrc[4 * i + 0] = p0;
|
||||
pSrc[4 * i + 1] = p1;
|
||||
pSrc[4 * i + 2] = xt;
|
||||
pSrc[4 * i + 3] = yt;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
@@ -0,0 +1,475 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_f16.c
|
||||
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
void arm_radix2_butterfly_f16(
|
||||
float16_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_radix2_butterfly_inverse_f16(
|
||||
float16_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float16_t onebyfftLen);
|
||||
|
||||
extern void arm_bitreversal_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftSize,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Radix-2 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future
|
||||
@param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
|
||||
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_radix2_f16(
|
||||
const arm_cfft_radix2_instance_f16 * S,
|
||||
float16_t * pSrc)
|
||||
{
|
||||
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
/* Complex IFFT radix-2 */
|
||||
arm_radix2_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle,
|
||||
S->twidCoefModifier, S->onebyfftLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complex FFT radix-2 */
|
||||
arm_radix2_butterfly_f16(pSrc, S->fftLen, S->pTwiddle,
|
||||
S->twidCoefModifier);
|
||||
}
|
||||
|
||||
if (S->bitReverseFlag == 1U)
|
||||
{
|
||||
/* Bit Reversal */
|
||||
arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
** Internal helper function used by the FFTs
|
||||
** ------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* @brief Core function for the floating-point CFFT butterfly process.
|
||||
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
|
||||
* @param[in] fftLen length of the FFT.
|
||||
* @param[in] *pCoef points to the twiddle coefficient buffer.
|
||||
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_radix2_butterfly_f16(
|
||||
float16_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
float16_t xt, yt, cosVal, sinVal;
|
||||
float16_t p0, p1, p2, p3;
|
||||
float16_t a0, a1;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen >> 1;
|
||||
ia = 0;
|
||||
i = 0;
|
||||
|
||||
// loop for groups
|
||||
for (k = n2; k > 0; k--)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia += twidCoefModifier;
|
||||
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i + 0], pSrc[i + fftLen/1] */
|
||||
l = i + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
p0 = (_Float16)xt * (_Float16)cosVal;
|
||||
p1 = (_Float16)yt * (_Float16)sinVal;
|
||||
p2 = (_Float16)yt * (_Float16)cosVal;
|
||||
p3 = (_Float16)xt * (_Float16)sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
|
||||
pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
|
||||
|
||||
i++;
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
|
||||
// loop for stage
|
||||
for (k = n2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
p0 = (_Float16)xt * (_Float16)cosVal;
|
||||
p1 = (_Float16)yt * (_Float16)sinVal;
|
||||
p2 = (_Float16)yt * (_Float16)cosVal;
|
||||
p3 = (_Float16)xt * (_Float16)sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
|
||||
pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
|
||||
|
||||
i += n1;
|
||||
} while ( i < fftLen ); // butterfly loop end
|
||||
j++;
|
||||
} while ( j < n2); // groups loop end
|
||||
twidCoefModifier <<= 1U;
|
||||
} // stages loop end
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += 2)
|
||||
{
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
|
||||
a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
pSrc[2 * i + 2] = xt;
|
||||
pSrc[2 * i + 3] = yt;
|
||||
} // groups loop end
|
||||
|
||||
#else
|
||||
|
||||
n2 = fftLen;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen; k > 1; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
p0 = (_Float16)xt * (_Float16)cosVal;
|
||||
p1 = (_Float16)yt * (_Float16)sinVal;
|
||||
p2 = (_Float16)yt * (_Float16)cosVal;
|
||||
p3 = (_Float16)xt * (_Float16)sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
|
||||
pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
|
||||
|
||||
i += n1;
|
||||
} while (i < fftLen);
|
||||
j++;
|
||||
} while (j < n2);
|
||||
twidCoefModifier <<= 1U;
|
||||
}
|
||||
|
||||
#endif // #if defined (ARM_MATH_DSP)
|
||||
|
||||
}
|
||||
|
||||
|
||||
void arm_radix2_butterfly_inverse_f16(
|
||||
float16_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float16_t onebyfftLen)
|
||||
{
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
float16_t xt, yt, cosVal, sinVal;
|
||||
float16_t p0, p1, p2, p3;
|
||||
float16_t a0, a1;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
l = i + n2;
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
p0 = (_Float16)xt * (_Float16)cosVal;
|
||||
p1 = (_Float16)yt * (_Float16)sinVal;
|
||||
p2 = (_Float16)yt * (_Float16)cosVal;
|
||||
p3 = (_Float16)xt * (_Float16)sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
|
||||
pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
p0 = (_Float16)xt * (_Float16)cosVal;
|
||||
p1 = (_Float16)yt * (_Float16)sinVal;
|
||||
p2 = (_Float16)yt * (_Float16)cosVal;
|
||||
p3 = (_Float16)xt * (_Float16)sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
|
||||
pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
|
||||
|
||||
i += n1;
|
||||
} while ( i < fftLen ); // butterfly loop end
|
||||
j++;
|
||||
} while (j < n2); // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
} // stages loop end
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += 2)
|
||||
{
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * i + 2];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * i + 2];
|
||||
|
||||
a1 = (_Float16)pSrc[2 * i + 3] + (_Float16)pSrc[2 * i + 1];
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * i + 3];
|
||||
|
||||
p0 = (_Float16)a0 * (_Float16)onebyfftLen;
|
||||
p2 = (_Float16)xt * (_Float16)onebyfftLen;
|
||||
p1 = (_Float16)a1 * (_Float16)onebyfftLen;
|
||||
p3 = (_Float16)yt * (_Float16)onebyfftLen;
|
||||
|
||||
pSrc[2 * i] = p0;
|
||||
pSrc[2 * i + 1] = p1;
|
||||
pSrc[2 * i + 2] = p2;
|
||||
pSrc[2 * i + 3] = p3;
|
||||
} // butterfly loop end
|
||||
|
||||
#else
|
||||
|
||||
n2 = fftLen;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
p0 = (_Float16)xt * (_Float16)cosVal;
|
||||
p1 = (_Float16)yt * (_Float16)sinVal;
|
||||
p2 = (_Float16)yt * (_Float16)cosVal;
|
||||
p3 = (_Float16)xt * (_Float16)sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = (_Float16)p0 - (_Float16)p1;
|
||||
pSrc[2 * l + 1] = (_Float16)p2 + (_Float16)p3;
|
||||
|
||||
i += n1;
|
||||
} while ( i < fftLen ); // butterfly loop end
|
||||
j++;
|
||||
} while ( j < n2 ); // groups loop end
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
} // stages loop end
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
|
||||
p0 = (_Float16)a0 * (_Float16)onebyfftLen;
|
||||
p2 = (_Float16)xt * (_Float16)onebyfftLen;
|
||||
p1 = (_Float16)a1 * (_Float16)onebyfftLen;
|
||||
p3 = (_Float16)yt * (_Float16)onebyfftLen;
|
||||
|
||||
pSrc[2 * i] = p0;
|
||||
pSrc[2U * l] = p2;
|
||||
|
||||
pSrc[2 * i + 1] = p1;
|
||||
pSrc[2U * l + 1U] = p3;
|
||||
} // butterfly loop end
|
||||
|
||||
#endif // #if defined (ARM_MATH_DSP)
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
@@ -0,0 +1,470 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_f32.c
|
||||
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
void arm_radix2_butterfly_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_radix2_butterfly_inverse_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float32_t onebyfftLen);
|
||||
|
||||
extern void arm_bitreversal_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftSize,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Radix-2 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future
|
||||
@param[in] S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
|
||||
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_radix2_f32(
|
||||
const arm_cfft_radix2_instance_f32 * S,
|
||||
float32_t * pSrc)
|
||||
{
|
||||
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
/* Complex IFFT radix-2 */
|
||||
arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
|
||||
S->twidCoefModifier, S->onebyfftLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complex FFT radix-2 */
|
||||
arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
|
||||
S->twidCoefModifier);
|
||||
}
|
||||
|
||||
if (S->bitReverseFlag == 1U)
|
||||
{
|
||||
/* Bit Reversal */
|
||||
arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
** Internal helper function used by the FFTs
|
||||
** ------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
brief Core function for the floating-point CFFT butterfly process.
|
||||
param[in,out] pSrc points to in-place buffer of floating-point data type
|
||||
param[in] fftLen length of the FFT
|
||||
param[in] pCoef points to twiddle coefficient buffer
|
||||
param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
|
||||
return none
|
||||
*/
|
||||
|
||||
void arm_radix2_butterfly_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
float32_t xt, yt, cosVal, sinVal;
|
||||
float32_t p0, p1, p2, p3;
|
||||
float32_t a0, a1;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen >> 1;
|
||||
ia = 0;
|
||||
i = 0;
|
||||
|
||||
// loop for groups
|
||||
for (k = n2; k > 0; k--)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia += twidCoefModifier;
|
||||
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i + 0], pSrc[i + fftLen/1] */
|
||||
l = i + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
|
||||
p0 = xt * cosVal;
|
||||
p1 = yt * sinVal;
|
||||
p2 = yt * cosVal;
|
||||
p3 = xt * sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = p0 + p1;
|
||||
pSrc[2 * l + 1] = p2 - p3;
|
||||
|
||||
i++;
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
|
||||
// loop for stage
|
||||
for (k = n2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
|
||||
p0 = xt * cosVal;
|
||||
p1 = yt * sinVal;
|
||||
p2 = yt * cosVal;
|
||||
p3 = xt * sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = p0 + p1;
|
||||
pSrc[2 * l + 1] = p2 - p3;
|
||||
|
||||
i += n1;
|
||||
} while ( i < fftLen ); // butterfly loop end
|
||||
j++;
|
||||
} while ( j < n2); // groups loop end
|
||||
twidCoefModifier <<= 1U;
|
||||
} // stages loop end
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += 2)
|
||||
{
|
||||
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
|
||||
xt = pSrc[2 * i] - pSrc[2 * i + 2];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
|
||||
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
pSrc[2 * i + 2] = xt;
|
||||
pSrc[2 * i + 3] = yt;
|
||||
} // groups loop end
|
||||
|
||||
#else /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
n2 = fftLen;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen; k > 1; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
|
||||
p0 = xt * cosVal;
|
||||
p1 = yt * sinVal;
|
||||
p2 = yt * cosVal;
|
||||
p3 = xt * sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = p0 + p1;
|
||||
pSrc[2 * l + 1] = p2 - p3;
|
||||
|
||||
i += n1;
|
||||
} while (i < fftLen);
|
||||
j++;
|
||||
} while (j < n2);
|
||||
twidCoefModifier <<= 1U;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
|
||||
|
||||
void arm_radix2_butterfly_inverse_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float32_t onebyfftLen)
|
||||
{
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
float32_t xt, yt, cosVal, sinVal;
|
||||
float32_t p0, p1, p2, p3;
|
||||
float32_t a0, a1;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
l = i + n2;
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
|
||||
p0 = xt * cosVal;
|
||||
p1 = yt * sinVal;
|
||||
p2 = yt * cosVal;
|
||||
p3 = xt * sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = p0 - p1;
|
||||
pSrc[2 * l + 1] = p2 + p3;
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia += twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
|
||||
p0 = xt * cosVal;
|
||||
p1 = yt * sinVal;
|
||||
p2 = yt * cosVal;
|
||||
p3 = xt * sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = p0 - p1;
|
||||
pSrc[2 * l + 1] = p2 + p3;
|
||||
|
||||
i += n1;
|
||||
} while ( i < fftLen ); // butterfly loop end
|
||||
j++;
|
||||
} while (j < n2); // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
} // stages loop end
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += 2)
|
||||
{
|
||||
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
|
||||
xt = pSrc[2 * i] - pSrc[2 * i + 2];
|
||||
|
||||
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
|
||||
|
||||
p0 = a0 * onebyfftLen;
|
||||
p2 = xt * onebyfftLen;
|
||||
p1 = a1 * onebyfftLen;
|
||||
p3 = yt * onebyfftLen;
|
||||
|
||||
pSrc[2 * i] = p0;
|
||||
pSrc[2 * i + 1] = p1;
|
||||
pSrc[2 * i + 2] = p2;
|
||||
pSrc[2 * i + 3] = p3;
|
||||
} // butterfly loop end
|
||||
|
||||
#else /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
n2 = fftLen;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
|
||||
p0 = xt * cosVal;
|
||||
p1 = yt * sinVal;
|
||||
p2 = yt * cosVal;
|
||||
p3 = xt * sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = p0 - p1;
|
||||
pSrc[2 * l + 1] = p2 + p3;
|
||||
|
||||
i += n1;
|
||||
} while ( i < fftLen ); // butterfly loop end
|
||||
j++;
|
||||
} while ( j < n2 ); // groups loop end
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
} // stages loop end
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
|
||||
a0 = pSrc[2 * i] + pSrc[2 * l];
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
|
||||
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
|
||||
p0 = a0 * onebyfftLen;
|
||||
p2 = xt * onebyfftLen;
|
||||
p1 = a1 * onebyfftLen;
|
||||
p3 = yt * onebyfftLen;
|
||||
|
||||
pSrc[2 * i] = p0;
|
||||
pSrc[2 * l] = p2;
|
||||
|
||||
pSrc[2 * i + 1] = p1;
|
||||
pSrc[2 * l + 1] = p3;
|
||||
} // butterfly loop end
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
@@ -0,0 +1,214 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_init_f16.c
|
||||
* Description: Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_common_tables_f16.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the floating-point CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future.
|
||||
@param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
arm_status arm_cfft_radix2_init_f16(
|
||||
arm_cfft_radix2_instance_f16 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
|
||||
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (float16_t *) twiddleCoefF16_4096;
|
||||
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
|
||||
|
||||
/* Initializations of structure parameters depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
|
||||
case 4096U:
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.000244140625;
|
||||
break;
|
||||
|
||||
case 2048U:
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 2U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 2U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.00048828125;
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 4U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 4U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.0009765625f;
|
||||
break;
|
||||
|
||||
case 512U:
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 8U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 8U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.001953125;
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
S->onebyfftLen = 0.00390625f;
|
||||
break;
|
||||
|
||||
case 128U:
|
||||
/* Initializations of structure parameters for 128 point FFT */
|
||||
S->twidCoefModifier = 32U;
|
||||
S->bitRevFactor = 32U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
|
||||
S->onebyfftLen = 0.0078125;
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
S->onebyfftLen = 0.015625f;
|
||||
break;
|
||||
|
||||
case 32U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 128U;
|
||||
S->bitRevFactor = 128U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
|
||||
S->onebyfftLen = 0.03125;
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
S->onebyfftLen = 0.0625f;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return (status);
|
||||
}
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
@@ -0,0 +1,209 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_init_f32.c
|
||||
* Description: Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the floating-point CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
|
||||
@param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
arm_status arm_cfft_radix2_init_f32(
|
||||
arm_cfft_radix2_instance_f32 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (float32_t *) twiddleCoef;
|
||||
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
|
||||
|
||||
/* Initializations of structure parameters depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
|
||||
case 4096U:
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.000244140625;
|
||||
break;
|
||||
|
||||
case 2048U:
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 2U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 2U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.00048828125;
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 4U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 4U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.0009765625f;
|
||||
break;
|
||||
|
||||
case 512U:
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 8U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 8U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.001953125;
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
S->onebyfftLen = 0.00390625f;
|
||||
break;
|
||||
|
||||
case 128U:
|
||||
/* Initializations of structure parameters for 128 point FFT */
|
||||
S->twidCoefModifier = 32U;
|
||||
S->bitRevFactor = 32U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
|
||||
S->onebyfftLen = 0.0078125;
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
S->onebyfftLen = 0.015625f;
|
||||
break;
|
||||
|
||||
case 32U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 128U;
|
||||
S->bitRevFactor = 128U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
|
||||
S->onebyfftLen = 0.03125;
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
S->onebyfftLen = 0.0625f;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
@@ -0,0 +1,194 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_init_q15.c
|
||||
* Description: Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the Q15 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
|
||||
@param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure.
|
||||
@param[in] fftLen length of the FFT.
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
arm_status arm_cfft_radix2_init_q15(
|
||||
arm_cfft_radix2_instance_q15 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096)
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
|
||||
|
||||
/* Initializations of structure parameters depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
case 4096U:
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
|
||||
break;
|
||||
|
||||
case 2048U:
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 2U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 2U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
|
||||
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
S->twidCoefModifier = 4U;
|
||||
S->bitRevFactor = 4U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
|
||||
break;
|
||||
|
||||
case 512U:
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
S->twidCoefModifier = 8U;
|
||||
S->bitRevFactor = 8U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
|
||||
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
|
||||
break;
|
||||
|
||||
case 128U:
|
||||
/* Initializations of structure parameters for 128 point FFT */
|
||||
S->twidCoefModifier = 32U;
|
||||
S->bitRevFactor = 32U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
|
||||
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
|
||||
break;
|
||||
|
||||
case 32U:
|
||||
/* Initializations of structure parameters for 32 point FFT */
|
||||
S->twidCoefModifier = 128U;
|
||||
S->bitRevFactor = 128U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
|
||||
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
@@ -0,0 +1,191 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_init_q31.c
|
||||
* Description: Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the Q31 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
|
||||
@param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
arm_status arm_cfft_radix2_init_q31(
|
||||
arm_cfft_radix2_instance_q31 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096)
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
|
||||
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
case 4096U:
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
break;
|
||||
|
||||
/* Initializations of structure parameters for 2048 point FFT */
|
||||
case 2048U:
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 2U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 2U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
|
||||
break;
|
||||
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 4U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 4U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
break;
|
||||
|
||||
/* Initializations of structure parameters for 512 point FFT */
|
||||
case 512U:
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 8U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 8U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
break;
|
||||
|
||||
case 128U:
|
||||
/* Initializations of structure parameters for 128 point FFT */
|
||||
S->twidCoefModifier = 32U;
|
||||
S->bitRevFactor = 32U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
break;
|
||||
|
||||
case 32U:
|
||||
/* Initializations of structure parameters for 32 point FFT */
|
||||
S->twidCoefModifier = 128U;
|
||||
S->bitRevFactor = 128U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
@@ -0,0 +1,689 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_q15.c
|
||||
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
void arm_radix2_butterfly_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_radix2_butterfly_inverse_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_bitreversal_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the fixed-point CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
|
||||
@param[in] S points to an instance of the fixed-point CFFT/CIFFT structure
|
||||
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_radix2_q15(
|
||||
const arm_cfft_radix2_instance_q15 * S,
|
||||
q15_t * pSrc)
|
||||
{
|
||||
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
arm_radix2_butterfly_inverse_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
else
|
||||
{
|
||||
arm_radix2_butterfly_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
|
||||
arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
void arm_radix2_butterfly_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
q15_t in;
|
||||
q31_t T, S, R;
|
||||
q31_t coeff, out1, out2;
|
||||
|
||||
//N = fftLen;
|
||||
n2 = fftLen;
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
in = ((int16_t) (T & 0xFFFF)) >> 1;
|
||||
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
in = ((int16_t) (S & 0xFFFF)) >> 1;
|
||||
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUAD(coeff, R) >> 16;
|
||||
out2 = __SMUSDX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUSDX(R, coeff) >> 16U;
|
||||
out2 = __SMUAD(coeff, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
i++;
|
||||
l++;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
in = ((int16_t) (T & 0xFFFF)) >> 1;
|
||||
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
in = ((int16_t) (S & 0xFFFF)) >> 1;
|
||||
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUAD(coeff, R) >> 16;
|
||||
out2 = __SMUSDX(coeff, R);
|
||||
#else
|
||||
|
||||
out1 = __SMUSDX(R, coeff) >> 16U;
|
||||
out2 = __SMUAD(coeff, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
|
||||
/* loop for stage */
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUAD(coeff, R) >> 16;
|
||||
out2 = __SMUSDX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUSDX(R, coeff) >> 16U;
|
||||
out2 = __SMUAD(coeff, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
i += n1;
|
||||
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUAD(coeff, R) >> 16;
|
||||
out2 = __SMUSDX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUSDX(R, coeff) >> 16U;
|
||||
out2 = __SMUAD(coeff, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
} /* stages loop end */
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = 0; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
|
||||
|
||||
write_q15x2 (pSrc + (2 * l), R);
|
||||
|
||||
i += n1;
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
|
||||
|
||||
write_q15x2 (pSrc + (2 * l), R);
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
|
||||
#else /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
q15_t xt, yt, cosVal, sinVal;
|
||||
|
||||
|
||||
// N = fftLen;
|
||||
n2 = fftLen;
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
cosVal = pCoef[(ia * 2)];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
|
||||
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
|
||||
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
|
||||
(pSrc[2 * i + 1] >> 1U) ) >> 1U;
|
||||
|
||||
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
|
||||
((int16_t) (((q31_t) yt * sinVal) >> 16)));
|
||||
|
||||
pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
|
||||
((int16_t) (((q31_t) xt * sinVal) >> 16)));
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
|
||||
/* loop for stage */
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
|
||||
|
||||
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
|
||||
((int16_t) (((q31_t) yt * sinVal) >> 16)));
|
||||
|
||||
pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
|
||||
((int16_t) (((q31_t) xt * sinVal) >> 16)));
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
} /* stages loop end */
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
|
||||
|
||||
pSrc[2 * l] = xt;
|
||||
|
||||
pSrc[2 * l + 1] = yt;
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
|
||||
|
||||
void arm_radix2_butterfly_inverse_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
q15_t in;
|
||||
q31_t T, S, R;
|
||||
q31_t coeff, out1, out2;
|
||||
|
||||
// N = fftLen;
|
||||
n2 = fftLen;
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
in = ((int16_t) (T & 0xFFFF)) >> 1;
|
||||
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
in = ((int16_t) (S & 0xFFFF)) >> 1;
|
||||
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUSD(coeff, R) >> 16;
|
||||
out2 = __SMUADX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUADX(R, coeff) >> 16U;
|
||||
out2 = __SMUSD(__QSUB(0, coeff), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
i++;
|
||||
l++;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
in = ((int16_t) (T & 0xFFFF)) >> 1;
|
||||
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
in = ((int16_t) (S & 0xFFFF)) >> 1;
|
||||
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUSD(coeff, R) >> 16;
|
||||
out2 = __SMUADX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUADX(R, coeff) >> 16U;
|
||||
out2 = __SMUSD(__QSUB(0, coeff), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
|
||||
/* loop for stage */
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUSD(coeff, R) >> 16;
|
||||
out2 = __SMUADX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUADX(R, coeff) >> 16U;
|
||||
out2 = __SMUSD(__QSUB(0, coeff), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
i += n1;
|
||||
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUSD(coeff, R) >> 16;
|
||||
out2 = __SMUADX(coeff, R);
|
||||
#else
|
||||
out1 = __SMUADX(R, coeff) >> 16U;
|
||||
out2 = __SMUSD(__QSUB(0, coeff), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
} /* stages loop end */
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
|
||||
T = read_q15x2 (pSrc + (2 * i));
|
||||
|
||||
S = read_q15x2 (pSrc + (2 * l));
|
||||
|
||||
R = __QSUB16(T, S);
|
||||
|
||||
write_q15x2 (pSrc + (2 * i), __QADD16(T, S));
|
||||
|
||||
write_q15x2 (pSrc + (2 * l), R);
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
|
||||
#else /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
uint32_t i, j, k, l;
|
||||
uint32_t n1, n2, ia;
|
||||
q15_t xt, yt, cosVal, sinVal;
|
||||
|
||||
// N = fftLen;
|
||||
n2 = fftLen;
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
cosVal = pCoef[(ia * 2)];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
|
||||
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
|
||||
pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
|
||||
(pSrc[2 * i + 1] >> 1U) ) >> 1U;
|
||||
|
||||
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
|
||||
((int16_t) (((q31_t) yt * sinVal) >> 16)));
|
||||
|
||||
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
|
||||
((int16_t) (((q31_t) xt * sinVal) >> 16)));
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
|
||||
/* loop for stage */
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
/* loop for groups */
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
cosVal = pCoef[(ia * 2)];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
|
||||
|
||||
pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
|
||||
((int16_t) (((q31_t) yt * sinVal) >> 16)) );
|
||||
|
||||
pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
|
||||
((int16_t) (((q31_t) xt * sinVal) >> 16)) );
|
||||
|
||||
} /* butterfly loop end */
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
} /* stages loop end */
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
cosVal = pCoef[(ia * 2)];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
/* loop for butterfly */
|
||||
for (i = 0; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
|
||||
|
||||
pSrc[2 * l] = xt;
|
||||
|
||||
pSrc[2 * l + 1] = yt;
|
||||
|
||||
} /* groups loop end */
|
||||
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
@@ -0,0 +1,337 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix2_q31.c
|
||||
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
void arm_radix2_butterfly_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_radix2_butterfly_inverse_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_bitreversal_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the fixed-point CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
|
||||
@param[in] S points to an instance of the fixed-point CFFT/CIFFT structure
|
||||
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_radix2_q31(
|
||||
const arm_cfft_radix2_instance_q31 * S,
|
||||
q31_t * pSrc)
|
||||
{
|
||||
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen,
|
||||
S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
else
|
||||
{
|
||||
arm_radix2_butterfly_q31(pSrc, S->fftLen,
|
||||
S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
|
||||
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
void arm_radix2_butterfly_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
|
||||
unsigned i, j, k, l, m;
|
||||
unsigned n1, n2, ia;
|
||||
q31_t xt, yt, cosVal, sinVal;
|
||||
q31_t p0, p1;
|
||||
|
||||
//N = fftLen;
|
||||
n2 = fftLen;
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
l = i + n2;
|
||||
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
|
||||
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
|
||||
pSrc[2 * i + 1] =
|
||||
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
|
||||
|
||||
mult_32x32_keep32_R(p0, xt, cosVal);
|
||||
mult_32x32_keep32_R(p1, yt, cosVal);
|
||||
multAcc_32x32_keep32_R(p0, yt, sinVal);
|
||||
multSub_32x32_keep32_R(p1, xt, sinVal);
|
||||
|
||||
pSrc[2U * l] = p0;
|
||||
pSrc[2U * l + 1U] = p1;
|
||||
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
i = j;
|
||||
m = fftLen / n1;
|
||||
do
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
|
||||
|
||||
mult_32x32_keep32_R(p0, xt, cosVal);
|
||||
mult_32x32_keep32_R(p1, yt, cosVal);
|
||||
multAcc_32x32_keep32_R(p0, yt, sinVal);
|
||||
multSub_32x32_keep32_R(p1, xt, sinVal);
|
||||
|
||||
pSrc[2U * l] = p0;
|
||||
pSrc[2U * l + 1U] = p1;
|
||||
i += n1;
|
||||
m--;
|
||||
} while ( m > 0); // butterfly loop end
|
||||
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier <<= 1U;
|
||||
} // stages loop end
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
|
||||
|
||||
pSrc[2U * l] = xt;
|
||||
|
||||
pSrc[2U * l + 1U] = yt;
|
||||
|
||||
i += n1;
|
||||
l = i + n2;
|
||||
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
|
||||
|
||||
pSrc[2U * l] = xt;
|
||||
|
||||
pSrc[2U * l + 1U] = yt;
|
||||
|
||||
} // butterfly loop end
|
||||
|
||||
}
|
||||
|
||||
|
||||
void arm_radix2_butterfly_inverse_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
|
||||
unsigned i, j, k, l;
|
||||
unsigned n1, n2, ia;
|
||||
q31_t xt, yt, cosVal, sinVal;
|
||||
q31_t p0, p1;
|
||||
|
||||
//N = fftLen;
|
||||
n2 = fftLen;
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
l = i + n2;
|
||||
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
|
||||
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
|
||||
|
||||
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
|
||||
pSrc[2 * i + 1] =
|
||||
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
|
||||
|
||||
mult_32x32_keep32_R(p0, xt, cosVal);
|
||||
mult_32x32_keep32_R(p1, yt, cosVal);
|
||||
multSub_32x32_keep32_R(p0, yt, sinVal);
|
||||
multAcc_32x32_keep32_R(p1, xt, sinVal);
|
||||
|
||||
pSrc[2U * l] = p0;
|
||||
pSrc[2U * l + 1U] = p1;
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
|
||||
// loop for stage
|
||||
for (k = fftLen / 2; k > 2; k = k >> 1)
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
// loop for groups
|
||||
for (j = 0; j < n2; j++)
|
||||
{
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
for (i = j; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
|
||||
|
||||
mult_32x32_keep32_R(p0, xt, cosVal);
|
||||
mult_32x32_keep32_R(p1, yt, cosVal);
|
||||
multSub_32x32_keep32_R(p0, yt, sinVal);
|
||||
multAcc_32x32_keep32_R(p1, xt, sinVal);
|
||||
|
||||
pSrc[2U * l] = p0;
|
||||
pSrc[2U * l + 1U] = p1;
|
||||
} // butterfly loop end
|
||||
|
||||
} // groups loop end
|
||||
|
||||
twidCoefModifier = twidCoefModifier << 1U;
|
||||
} // stages loop end
|
||||
|
||||
n1 = n2;
|
||||
n2 = n2 >> 1;
|
||||
ia = 0;
|
||||
|
||||
cosVal = pCoef[ia * 2];
|
||||
sinVal = pCoef[(ia * 2) + 1];
|
||||
ia = ia + twidCoefModifier;
|
||||
|
||||
// loop for butterfly
|
||||
for (i = 0; i < fftLen; i += n1)
|
||||
{
|
||||
l = i + n2;
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
|
||||
|
||||
pSrc[2U * l] = xt;
|
||||
|
||||
pSrc[2U * l + 1U] = yt;
|
||||
|
||||
i += n1;
|
||||
l = i + n2;
|
||||
|
||||
xt = pSrc[2 * i] - pSrc[2 * l];
|
||||
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
|
||||
|
||||
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
|
||||
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
|
||||
|
||||
pSrc[2U * l] = xt;
|
||||
|
||||
pSrc[2U * l + 1U] = yt;
|
||||
|
||||
} // butterfly loop end
|
||||
|
||||
}
|
||||
1272
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
Normal file
1272
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f16.c
Normal file
@@ -0,0 +1,1272 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_f16.c
|
||||
* Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
extern void arm_bitreversal_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftSize,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
void arm_radix4_butterfly_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_radix4_butterfly_inverse_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float16_t onebyfftLen);
|
||||
|
||||
|
||||
void arm_cfft_radix4by2_f16(
|
||||
float16_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float16_t * pCoef);
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/*
|
||||
* @brief Core function for the floating-point CFFT butterfly process.
|
||||
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
|
||||
* @param[in] fftLen length of the FFT.
|
||||
* @param[in] *pCoef points to the twiddle coefficient buffer.
|
||||
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4by2_f16(
|
||||
float16_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float16_t * pCoef)
|
||||
{
|
||||
uint32_t i, l;
|
||||
uint32_t n2, ia;
|
||||
float16_t xt, yt, cosVal, sinVal;
|
||||
float16_t p0, p1,p2,p3,a0,a1;
|
||||
|
||||
n2 = fftLen >> 1;
|
||||
ia = 0;
|
||||
for (i = 0; i < n2; i++)
|
||||
{
|
||||
cosVal = pCoef[2*ia];
|
||||
sinVal = pCoef[2*ia + 1];
|
||||
ia++;
|
||||
|
||||
l = i + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
a0 = (_Float16)pSrc[2 * i] + (_Float16)pSrc[2 * l];
|
||||
xt = (_Float16)pSrc[2 * i] - (_Float16)pSrc[2 * l];
|
||||
|
||||
yt = (_Float16)pSrc[2 * i + 1] - (_Float16)pSrc[2 * l + 1];
|
||||
a1 = (_Float16)pSrc[2 * l + 1] + (_Float16)pSrc[2 * i + 1];
|
||||
|
||||
p0 = (_Float16)xt * (_Float16)cosVal;
|
||||
p1 = (_Float16)yt * (_Float16)sinVal;
|
||||
p2 = (_Float16)yt * (_Float16)cosVal;
|
||||
p3 = (_Float16)xt * (_Float16)sinVal;
|
||||
|
||||
pSrc[2 * i] = a0;
|
||||
pSrc[2 * i + 1] = a1;
|
||||
|
||||
pSrc[2 * l] = (_Float16)p0 + (_Float16)p1;
|
||||
pSrc[2 * l + 1] = (_Float16)p2 - (_Float16)p3;
|
||||
|
||||
}
|
||||
|
||||
// first col
|
||||
arm_radix4_butterfly_f16( pSrc, n2, (float16_t*)pCoef, 2U);
|
||||
// second col
|
||||
arm_radix4_butterfly_f16( pSrc + fftLen, n2, (float16_t*)pCoef, 2U);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f16 and will be removed in the future.
|
||||
@param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
|
||||
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4_f16(
|
||||
const arm_cfft_radix4_instance_f16 * S,
|
||||
float16_t * pSrc)
|
||||
{
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
/* Complex IFFT radix-4 */
|
||||
arm_radix4_butterfly_inverse_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complex FFT radix-4 */
|
||||
arm_radix4_butterfly_f16(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
|
||||
if (S->bitReverseFlag == 1U)
|
||||
{
|
||||
/* Bit Reversal */
|
||||
arm_bitreversal_f16(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Internal helper function used by the FFTs
|
||||
* ---------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* @brief Core function for the floating-point CFFT butterfly process.
|
||||
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
|
||||
* @param[in] fftLen length of the FFT.
|
||||
* @param[in] *pCoef points to the twiddle coefficient buffer.
|
||||
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
|
||||
float16_t co1, co2, co3, si1, si2, si3;
|
||||
uint32_t ia1, ia2, ia3;
|
||||
uint32_t i0, i1, i2, i3;
|
||||
uint32_t n1, n2, j, k;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
|
||||
float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
|
||||
Ybminusd;
|
||||
float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
|
||||
float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
|
||||
float16_t *ptr1;
|
||||
float16_t p0,p1,p2,p3,p4,p5;
|
||||
float16_t a0,a1,a2,a3,a4,a5,a6,a7;
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
i0 = 0U;
|
||||
ia1 = 0U;
|
||||
|
||||
j = n2;
|
||||
|
||||
/* Calculation of first stage */
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
|
||||
/* xb - xd */
|
||||
Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
|
||||
/* yb - yd */
|
||||
Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
|
||||
/* (xa + xc) - (xb + xd) */
|
||||
Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
|
||||
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia3 = ia2 + ia1;
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
|
||||
Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
|
||||
Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
|
||||
Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
|
||||
Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
|
||||
Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = (_Float16)Yb12C_out * (_Float16)si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = (_Float16)Xb12C_out * (_Float16)si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = (_Float16)Yc12C_out * (_Float16)si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = (_Float16)Xc12C_out * (_Float16)si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = (_Float16)Yd12C_out * (_Float16)si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 = (_Float16)Xd12C_out * (_Float16)si3;
|
||||
|
||||
Xb12_out += (_Float16)p0;
|
||||
Yb12_out -= (_Float16)p1;
|
||||
Xc12_out += (_Float16)p2;
|
||||
Yc12_out -= (_Float16)p3;
|
||||
Xd12_out += (_Float16)p4;
|
||||
Yd12_out -= (_Float16)p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 += twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0++;
|
||||
|
||||
}
|
||||
while (--j);
|
||||
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of second stage to excluding last stage */
|
||||
for (k = fftLen >> 2U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* Calculation of first stage */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 += twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
|
||||
/* (xb - xd) */
|
||||
Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
|
||||
/* (yb - yd) */
|
||||
Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xb12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
Yb12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
|
||||
/* xa + xc -(xb + xd) */
|
||||
Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xd12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yd12C_out = ((_Float16)Xbminusd + (_Float16)Yaminusc);
|
||||
|
||||
pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
|
||||
pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
|
||||
|
||||
Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
|
||||
Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
|
||||
Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
|
||||
Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
|
||||
Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
|
||||
Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = (_Float16)Yb12C_out * (_Float16)si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = (_Float16)Xb12C_out * (_Float16)si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = (_Float16)Yc12C_out * (_Float16)si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = (_Float16)Xc12C_out * (_Float16)si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = (_Float16)Yd12C_out * (_Float16)si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 = (_Float16)Xd12C_out * (_Float16)si3;
|
||||
|
||||
Xb12_out += (_Float16)p0;
|
||||
Yb12_out -= (_Float16)p1;
|
||||
Xc12_out += (_Float16)p2;
|
||||
Yc12_out -= (_Float16)p3;
|
||||
Xd12_out += (_Float16)p4;
|
||||
Yd12_out -= (_Float16)p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
i0 += n1;
|
||||
} while (i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
|
||||
j = fftLen >> 2;
|
||||
ptr1 = &pSrc[0];
|
||||
|
||||
/* Calculations of last stage */
|
||||
do
|
||||
{
|
||||
xaIn = ptr1[0];
|
||||
yaIn = ptr1[1];
|
||||
xbIn = ptr1[2];
|
||||
ybIn = ptr1[3];
|
||||
xcIn = ptr1[4];
|
||||
ycIn = ptr1[5];
|
||||
xdIn = ptr1[6];
|
||||
ydIn = ptr1[7];
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
|
||||
|
||||
/* ya + yc */
|
||||
Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
|
||||
|
||||
/* ya - yc */
|
||||
Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
|
||||
|
||||
/* xb + xd */
|
||||
Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
|
||||
|
||||
/* yb + yd */
|
||||
Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
|
||||
|
||||
/* (xb-xd) */
|
||||
Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
|
||||
|
||||
/* (yb-yd) */
|
||||
Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
|
||||
/* xc' = (xa-xb+xc-xd) */
|
||||
a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
|
||||
/* yc' = (ya-yb+yc-yd) */
|
||||
a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
|
||||
/* xb' = (xa+yb-xc-yd) */
|
||||
a4 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
|
||||
/* yb' = (ya-xb-yc+xd) */
|
||||
a5 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
|
||||
/* xd' = (xa-yb-xc+yd)) */
|
||||
a6 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
|
||||
/* yd' = (ya+xb-yc-xd) */
|
||||
a7 = ((_Float16)Xbminusd + (_Float16)Yaminusc);
|
||||
|
||||
ptr1[0] = a0;
|
||||
ptr1[1] = a1;
|
||||
ptr1[2] = a2;
|
||||
ptr1[3] = a3;
|
||||
ptr1[4] = a4;
|
||||
ptr1[5] = a5;
|
||||
ptr1[6] = a6;
|
||||
ptr1[7] = a7;
|
||||
|
||||
/* increment pointer by 8 */
|
||||
ptr1 += 8U;
|
||||
} while (--j);
|
||||
|
||||
#else
|
||||
|
||||
float16_t t1, t2, r1, r2, s1, s2;
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initializations for the fft calculation */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
for (k = fftLen; k > 1U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the fft calculation */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* FFT Calculation */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* xa + xc */
|
||||
r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
|
||||
|
||||
/* xa - xc */
|
||||
r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* ya - yc */
|
||||
s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xb + xd */
|
||||
t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
|
||||
|
||||
/* xa + xc -(xb + xd) */
|
||||
r1 = (_Float16)r1 - (_Float16)t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = (_Float16)s1 - (_Float16)t2;
|
||||
|
||||
/* (yb - yd) */
|
||||
t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* (xb - xd) */
|
||||
t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) + ((_Float16)s1 * (_Float16)si2);
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) - ((_Float16)r1 * (_Float16)si2);
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r1 = (_Float16)r2 + (_Float16)t1;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r2 = (_Float16)r2 - (_Float16)t1;
|
||||
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s1 = (_Float16)s2 - (_Float16)t2;
|
||||
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s2 = (_Float16)s2 + (_Float16)t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) + ((_Float16)s1 * (_Float16)si1);
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) - ((_Float16)r1 * (_Float16)si1);
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) + ((_Float16)s2 * (_Float16)si3);
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) - ((_Float16)r2 * (_Float16)si3);
|
||||
|
||||
i0 += n1;
|
||||
} while ( i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* @brief Core function for the floating-point CIFFT butterfly process.
|
||||
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
|
||||
* @param[in] fftLen length of the FFT.
|
||||
* @param[in] *pCoef points to twiddle coefficient buffer.
|
||||
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
* @param[in] onebyfftLen value of 1/fftLen.
|
||||
* @return none.
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_inverse_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float16_t onebyfftLen)
|
||||
{
|
||||
float16_t co1, co2, co3, si1, si2, si3;
|
||||
uint32_t ia1, ia2, ia3;
|
||||
uint32_t i0, i1, i2, i3;
|
||||
uint32_t n1, n2, j, k;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
float16_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
|
||||
float16_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
|
||||
Ybminusd;
|
||||
float16_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
|
||||
float16_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
|
||||
float16_t *ptr1;
|
||||
float16_t p0,p1,p2,p3,p4,p5,p6,p7;
|
||||
float16_t a0,a1,a2,a3,a4,a5,a6,a7;
|
||||
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
i0 = 0U;
|
||||
ia1 = 0U;
|
||||
|
||||
j = n2;
|
||||
|
||||
/* Calculation of first stage */
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
|
||||
/* xb - xd */
|
||||
Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
|
||||
/* yb - yd */
|
||||
Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
|
||||
/* (xa + xc) - (xb + xd) */
|
||||
Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
|
||||
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia3 = ia2 + ia1;
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
|
||||
Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
|
||||
Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
|
||||
Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
|
||||
Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
|
||||
Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = (_Float16)Yb12C_out * (_Float16)si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = (_Float16)Xb12C_out * (_Float16)si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = (_Float16)Yc12C_out * (_Float16)si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = (_Float16)Xc12C_out * (_Float16)si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = (_Float16)Yd12C_out * (_Float16)si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 =(_Float16) Xd12C_out * (_Float16)si3;
|
||||
|
||||
Xb12_out -= (_Float16)p0;
|
||||
Yb12_out += (_Float16)p1;
|
||||
Xc12_out -= (_Float16)p2;
|
||||
Yc12_out += (_Float16)p3;
|
||||
Xd12_out -= (_Float16)p4;
|
||||
Yd12_out += (_Float16)p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0 = i0 + 1U;
|
||||
|
||||
} while (--j);
|
||||
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of second stage to excluding last stage */
|
||||
for (k = fftLen >> 2U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* Calculation of first stage */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
|
||||
/* (xb - xd) */
|
||||
Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
|
||||
/* (yb - yd) */
|
||||
Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xb12C_out = ((_Float16)Xaminusc - (_Float16)Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yb12C_out = ((_Float16)Yaminusc + (_Float16)Xbminusd);
|
||||
/* xa + xc -(xb + xd) */
|
||||
Xc12C_out = ((_Float16)Xaplusc - (_Float16)Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = ((_Float16)Yaplusc - (_Float16)Ybplusd);
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xd12C_out = ((_Float16)Xaminusc + (_Float16)Ybminusd);
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
Yd12C_out = ((_Float16)Yaminusc - (_Float16)Xbminusd);
|
||||
|
||||
pSrc[(2U * i0)] = (_Float16)Xaplusc + (_Float16)Xbplusd;
|
||||
pSrc[(2U * i0) + 1U] = (_Float16)Yaplusc + (_Float16)Ybplusd;
|
||||
|
||||
Xb12_out = (_Float16)Xb12C_out * (_Float16)co1;
|
||||
Yb12_out = (_Float16)Yb12C_out * (_Float16)co1;
|
||||
Xc12_out = (_Float16)Xc12C_out * (_Float16)co2;
|
||||
Yc12_out = (_Float16)Yc12C_out * (_Float16)co2;
|
||||
Xd12_out = (_Float16)Xd12C_out * (_Float16)co3;
|
||||
Yd12_out = (_Float16)Yd12C_out * (_Float16)co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = (_Float16)Yb12C_out * (_Float16)si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = (_Float16)Xb12C_out * (_Float16)si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = (_Float16)Yc12C_out * (_Float16)si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = (_Float16)Xc12C_out * (_Float16)si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = (_Float16)Yd12C_out * (_Float16)si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 = (_Float16)Xd12C_out * (_Float16)si3;
|
||||
|
||||
Xb12_out -= (_Float16)p0;
|
||||
Yb12_out += (_Float16)p1;
|
||||
Xc12_out -= (_Float16)p2;
|
||||
Yc12_out += (_Float16)p3;
|
||||
Xd12_out -= (_Float16)p4;
|
||||
Yd12_out += (_Float16)p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
i0 += n1;
|
||||
} while (i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* Initializations of last stage */
|
||||
|
||||
j = fftLen >> 2;
|
||||
ptr1 = &pSrc[0];
|
||||
|
||||
/* Calculations of last stage */
|
||||
do
|
||||
{
|
||||
xaIn = ptr1[0];
|
||||
yaIn = ptr1[1];
|
||||
xbIn = ptr1[2];
|
||||
ybIn = ptr1[3];
|
||||
xcIn = ptr1[4];
|
||||
ycIn = ptr1[5];
|
||||
xdIn = ptr1[6];
|
||||
ydIn = ptr1[7];
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
Xaplusc = (_Float16)xaIn + (_Float16)xcIn;
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = (_Float16)xaIn - (_Float16)xcIn;
|
||||
|
||||
/* ya + yc */
|
||||
Yaplusc = (_Float16)yaIn + (_Float16)ycIn;
|
||||
|
||||
/* ya - yc */
|
||||
Yaminusc = (_Float16)yaIn - (_Float16)ycIn;
|
||||
|
||||
/* xb + xd */
|
||||
Xbplusd = (_Float16)xbIn + (_Float16)xdIn;
|
||||
|
||||
/* yb + yd */
|
||||
Ybplusd = (_Float16)ybIn + (_Float16)ydIn;
|
||||
|
||||
/* (xb-xd) */
|
||||
Xbminusd = (_Float16)xbIn - (_Float16)xdIn;
|
||||
|
||||
/* (yb-yd) */
|
||||
Ybminusd = (_Float16)ybIn - (_Float16)ydIn;
|
||||
|
||||
/* xa' = (xa+xb+xc+xd) * onebyfftLen */
|
||||
a0 = ((_Float16)Xaplusc + (_Float16)Xbplusd);
|
||||
/* ya' = (ya+yb+yc+yd) * onebyfftLen */
|
||||
a1 = ((_Float16)Yaplusc + (_Float16)Ybplusd);
|
||||
/* xc' = (xa-xb+xc-xd) * onebyfftLen */
|
||||
a2 = ((_Float16)Xaplusc - (_Float16)Xbplusd);
|
||||
/* yc' = (ya-yb+yc-yd) * onebyfftLen */
|
||||
a3 = ((_Float16)Yaplusc - (_Float16)Ybplusd);
|
||||
/* xb' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
a4 = ((_Float16)Xaminusc - (_Float16)Ybminusd);
|
||||
/* yb' = (ya+xb-yc-xd) * onebyfftLen */
|
||||
a5 = ((_Float16)Yaminusc + (_Float16)Xbminusd);
|
||||
/* xd' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
a6 = ((_Float16)Xaminusc + (_Float16)Ybminusd);
|
||||
/* yd' = (ya-xb-yc+xd) * onebyfftLen */
|
||||
a7 = ((_Float16)Yaminusc - (_Float16)Xbminusd);
|
||||
|
||||
p0 = (_Float16)a0 * (_Float16)onebyfftLen;
|
||||
p1 = (_Float16)a1 * (_Float16)onebyfftLen;
|
||||
p2 = (_Float16)a2 * (_Float16)onebyfftLen;
|
||||
p3 = (_Float16)a3 * (_Float16)onebyfftLen;
|
||||
p4 = (_Float16)a4 * (_Float16)onebyfftLen;
|
||||
p5 = (_Float16)a5 * (_Float16)onebyfftLen;
|
||||
p6 = (_Float16)a6 * (_Float16)onebyfftLen;
|
||||
p7 = (_Float16)a7 * (_Float16)onebyfftLen;
|
||||
|
||||
/* xa' = (xa+xb+xc+xd) * onebyfftLen */
|
||||
ptr1[0] = p0;
|
||||
/* ya' = (ya+yb+yc+yd) * onebyfftLen */
|
||||
ptr1[1] = p1;
|
||||
/* xc' = (xa-xb+xc-xd) * onebyfftLen */
|
||||
ptr1[2] = p2;
|
||||
/* yc' = (ya-yb+yc-yd) * onebyfftLen */
|
||||
ptr1[3] = p3;
|
||||
/* xb' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
ptr1[4] = p4;
|
||||
/* yb' = (ya+xb-yc-xd) * onebyfftLen */
|
||||
ptr1[5] = p5;
|
||||
/* xd' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
ptr1[6] = p6;
|
||||
/* yd' = (ya-xb-yc+xd) * onebyfftLen */
|
||||
ptr1[7] = p7;
|
||||
|
||||
/* increment source pointer by 8 for next calculations */
|
||||
ptr1 = ptr1 + 8U;
|
||||
|
||||
} while (--j);
|
||||
|
||||
#else
|
||||
|
||||
float16_t t1, t2, r1, r2, s1, s2;
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* Calculation of first stage */
|
||||
for (k = fftLen; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* Calculation of first stage */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* xa + xc */
|
||||
r1 = (_Float16)pSrc[(2U * i0)] + (_Float16)pSrc[(2U * i2)];
|
||||
|
||||
/* xa - xc */
|
||||
r2 = (_Float16)pSrc[(2U * i0)] - (_Float16)pSrc[(2U * i2)];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* ya - yc */
|
||||
s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xb + xd */
|
||||
t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = (_Float16)r1 + (_Float16)t1;
|
||||
|
||||
/* xa + xc -(xb + xd) */
|
||||
r1 = (_Float16)r1 - (_Float16)t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (_Float16)s1 + (_Float16)t2;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = (_Float16)s1 - (_Float16)t2;
|
||||
|
||||
/* (yb - yd) */
|
||||
t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* (xb - xd) */
|
||||
t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = ((_Float16)r1 * (_Float16)co2) - ((_Float16)s1 * (_Float16)si2);
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = ((_Float16)s1 * (_Float16)co2) + ((_Float16)r1 * (_Float16)si2);
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r1 = (_Float16)r2 - (_Float16)t1;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r2 = (_Float16)r2 + (_Float16)t1;
|
||||
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s1 = (_Float16)s2 + (_Float16)t2;
|
||||
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s2 = (_Float16)s2 - (_Float16)t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = ((_Float16)r1 * (_Float16)co1) - ((_Float16)s1 * (_Float16)si1);
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = ((_Float16)s1 * (_Float16)co1) + ((_Float16)r1 * (_Float16)si1);
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = ((_Float16)r2 * (_Float16)co3) - ((_Float16)s2 * (_Float16)si3);
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = ((_Float16)s2 * (_Float16)co3) + ((_Float16)r2 * (_Float16)si3);
|
||||
|
||||
i0 += n1;
|
||||
} while ( i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* Initializations of last stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
|
||||
/* Calculations of last stage */
|
||||
for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
r1 = (_Float16)pSrc[2U * i0] + (_Float16)pSrc[2U * i2];
|
||||
|
||||
/* xa - xc */
|
||||
r2 = (_Float16)pSrc[2U * i0] - (_Float16)pSrc[2U * i2];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = (_Float16)pSrc[(2U * i0) + 1U] + (_Float16)pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* ya - yc */
|
||||
s2 = (_Float16)pSrc[(2U * i0) + 1U] - (_Float16)pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xc + xd */
|
||||
t1 = (_Float16)pSrc[2U * i1] + (_Float16)pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = ((_Float16)r1 + (_Float16)t1) * (_Float16)onebyfftLen;
|
||||
|
||||
/* (xa + xb) - (xc + xd) */
|
||||
r1 = (_Float16)r1 - (_Float16)t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = (_Float16)pSrc[(2U * i1) + 1U] + (_Float16)pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = ((_Float16)s1 + (_Float16)t2) * (_Float16)onebyfftLen;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = (_Float16)s1 - (_Float16)t2;
|
||||
|
||||
/* (yb-yd) */
|
||||
t1 = (_Float16)pSrc[(2U * i1) + 1U] - (_Float16)pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* (xb-xd) */
|
||||
t2 = (_Float16)pSrc[2U * i1] - (_Float16)pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (_Float16)r1 * (_Float16)onebyfftLen;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
|
||||
|
||||
/* (xa - xc) - (yb-yd) */
|
||||
r1 = (_Float16)r2 - (_Float16)t1;
|
||||
|
||||
/* (xa - xc) + (yb-yd) */
|
||||
r2 = (_Float16)r2 + (_Float16)t1;
|
||||
|
||||
/* (ya - yc) + (xb-xd) */
|
||||
s1 = (_Float16)s2 + (_Float16)t2;
|
||||
|
||||
/* (ya - yc) - (xb-xd) */
|
||||
s2 = (_Float16)s2 - (_Float16)t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (_Float16)r1 * (_Float16)onebyfftLen;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (_Float16)s1 * (_Float16)onebyfftLen;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = (_Float16)r2 * (_Float16)onebyfftLen;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (_Float16)s2 * (_Float16)onebyfftLen;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
}
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
1203
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c
Normal file
1203
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c
Normal file
@@ -0,0 +1,1203 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_f32.c
|
||||
* Description: Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
extern void arm_bitreversal_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftSize,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
void arm_radix4_butterfly_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
void arm_radix4_butterfly_inverse_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float32_t onebyfftLen);
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
|
||||
@param[in] S points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
|
||||
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4_f32(
|
||||
const arm_cfft_radix4_instance_f32 * S,
|
||||
float32_t * pSrc)
|
||||
{
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
/* Complex IFFT radix-4 */
|
||||
arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier, S->onebyfftLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complex FFT radix-4 */
|
||||
arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
|
||||
if (S->bitReverseFlag == 1U)
|
||||
{
|
||||
/* Bit Reversal */
|
||||
arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Internal helper function used by the FFTs
|
||||
* ---------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
brief Core function for the floating-point CFFT butterfly process.
|
||||
param[in,out] pSrc points to the in-place buffer of floating-point data type
|
||||
param[in] fftLen length of the FFT
|
||||
param[in] pCoef points to the twiddle coefficient buffer
|
||||
param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
|
||||
return none
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
float32_t co1, co2, co3, si1, si2, si3;
|
||||
uint32_t ia1, ia2, ia3;
|
||||
uint32_t i0, i1, i2, i3;
|
||||
uint32_t n1, n2, j, k;
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
|
||||
float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
|
||||
Ybminusd;
|
||||
float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
|
||||
float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
|
||||
float32_t *ptr1;
|
||||
float32_t p0,p1,p2,p3,p4,p5;
|
||||
float32_t a0,a1,a2,a3,a4,a5,a6,a7;
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
i0 = 0U;
|
||||
ia1 = 0U;
|
||||
|
||||
j = n2;
|
||||
|
||||
/* Calculation of first stage */
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = xaIn + xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = xbIn + xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = yaIn + ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = ybIn + ydIn;
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = xaIn - xcIn;
|
||||
/* xb - xd */
|
||||
Xbminusd = xbIn - xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = yaIn - ycIn;
|
||||
/* yb - yd */
|
||||
Ybminusd = ybIn - ydIn;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[(2U * i0)] = Xaplusc + Xbplusd;
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xb12C_out = (Xaminusc + Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yb12C_out = (Yaminusc - Xbminusd);
|
||||
/* (xa + xc) - (xb + xd) */
|
||||
Xc12C_out = (Xaplusc - Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = (Yaplusc - Ybplusd);
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xd12C_out = (Xaminusc - Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yd12C_out = (Xbminusd + Yaminusc);
|
||||
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia3 = ia2 + ia1;
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
Xb12_out = Xb12C_out * co1;
|
||||
Yb12_out = Yb12C_out * co1;
|
||||
Xc12_out = Xc12C_out * co2;
|
||||
Yc12_out = Yc12C_out * co2;
|
||||
Xd12_out = Xd12C_out * co3;
|
||||
Yd12_out = Yd12C_out * co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = Yb12C_out * si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = Xb12C_out * si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = Yc12C_out * si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = Xc12C_out * si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = Yd12C_out * si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 = Xd12C_out * si3;
|
||||
|
||||
Xb12_out += p0;
|
||||
Yb12_out -= p1;
|
||||
Xc12_out += p2;
|
||||
Yc12_out -= p3;
|
||||
Xd12_out += p4;
|
||||
Yd12_out -= p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 += twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0++;
|
||||
|
||||
}
|
||||
while (--j);
|
||||
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of second stage to excluding last stage */
|
||||
for (k = fftLen >> 2U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* Calculation of first stage */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[(ia1 * 2U)];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[(ia2 * 2U)];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[(ia3 * 2U)];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 += twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = xaIn - xcIn;
|
||||
/* (xb - xd) */
|
||||
Xbminusd = xbIn - xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = yaIn - ycIn;
|
||||
/* (yb - yd) */
|
||||
Ybminusd = ybIn - ydIn;
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = xaIn + xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = xbIn + xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = yaIn + ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = ybIn + ydIn;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xb12C_out = (Xaminusc + Ybminusd);
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
Yb12C_out = (Yaminusc - Xbminusd);
|
||||
/* xa + xc -(xb + xd) */
|
||||
Xc12C_out = (Xaplusc - Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = (Yaplusc - Ybplusd);
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xd12C_out = (Xaminusc - Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yd12C_out = (Xbminusd + Yaminusc);
|
||||
|
||||
pSrc[(2U * i0)] = Xaplusc + Xbplusd;
|
||||
pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
|
||||
|
||||
Xb12_out = Xb12C_out * co1;
|
||||
Yb12_out = Yb12C_out * co1;
|
||||
Xc12_out = Xc12C_out * co2;
|
||||
Yc12_out = Yc12C_out * co2;
|
||||
Xd12_out = Xd12C_out * co3;
|
||||
Yd12_out = Yd12C_out * co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = Yb12C_out * si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = Xb12C_out * si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = Yc12C_out * si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = Xc12C_out * si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = Yd12C_out * si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 = Xd12C_out * si3;
|
||||
|
||||
Xb12_out += p0;
|
||||
Yb12_out -= p1;
|
||||
Xc12_out += p2;
|
||||
Yc12_out -= p3;
|
||||
Xd12_out += p4;
|
||||
Yd12_out -= p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
i0 += n1;
|
||||
} while (i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
|
||||
j = fftLen >> 2;
|
||||
ptr1 = &pSrc[0];
|
||||
|
||||
/* Calculations of last stage */
|
||||
do
|
||||
{
|
||||
xaIn = ptr1[0];
|
||||
yaIn = ptr1[1];
|
||||
xbIn = ptr1[2];
|
||||
ybIn = ptr1[3];
|
||||
xcIn = ptr1[4];
|
||||
ycIn = ptr1[5];
|
||||
xdIn = ptr1[6];
|
||||
ydIn = ptr1[7];
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = xaIn + xcIn;
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = xaIn - xcIn;
|
||||
|
||||
/* ya + yc */
|
||||
Yaplusc = yaIn + ycIn;
|
||||
|
||||
/* ya - yc */
|
||||
Yaminusc = yaIn - ycIn;
|
||||
|
||||
/* xb + xd */
|
||||
Xbplusd = xbIn + xdIn;
|
||||
|
||||
/* yb + yd */
|
||||
Ybplusd = ybIn + ydIn;
|
||||
|
||||
/* (xb-xd) */
|
||||
Xbminusd = xbIn - xdIn;
|
||||
|
||||
/* (yb-yd) */
|
||||
Ybminusd = ybIn - ydIn;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
a0 = (Xaplusc + Xbplusd);
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
a1 = (Yaplusc + Ybplusd);
|
||||
/* xc' = (xa-xb+xc-xd) */
|
||||
a2 = (Xaplusc - Xbplusd);
|
||||
/* yc' = (ya-yb+yc-yd) */
|
||||
a3 = (Yaplusc - Ybplusd);
|
||||
/* xb' = (xa+yb-xc-yd) */
|
||||
a4 = (Xaminusc + Ybminusd);
|
||||
/* yb' = (ya-xb-yc+xd) */
|
||||
a5 = (Yaminusc - Xbminusd);
|
||||
/* xd' = (xa-yb-xc+yd)) */
|
||||
a6 = (Xaminusc - Ybminusd);
|
||||
/* yd' = (ya+xb-yc-xd) */
|
||||
a7 = (Xbminusd + Yaminusc);
|
||||
|
||||
ptr1[0] = a0;
|
||||
ptr1[1] = a1;
|
||||
ptr1[2] = a2;
|
||||
ptr1[3] = a3;
|
||||
ptr1[4] = a4;
|
||||
ptr1[5] = a5;
|
||||
ptr1[6] = a6;
|
||||
ptr1[7] = a7;
|
||||
|
||||
/* increment pointer by 8 */
|
||||
ptr1 += 8U;
|
||||
} while (--j);
|
||||
|
||||
#else
|
||||
|
||||
float32_t t1, t2, r1, r2, s1, s2;
|
||||
|
||||
/* Initializations for the fft calculation */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
for (k = fftLen; k > 1U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the fft calculation */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* FFT Calculation */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* xa + xc */
|
||||
r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
|
||||
|
||||
/* xa - xc */
|
||||
r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* ya - yc */
|
||||
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xb + xd */
|
||||
t1 = pSrc[2U * i1] + pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = r1 + t1;
|
||||
|
||||
/* xa + xc -(xb + xd) */
|
||||
r1 = r1 - t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = s1 + t2;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* (yb - yd) */
|
||||
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* (xb - xd) */
|
||||
t2 = pSrc[2U * i1] - pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (r1 * co2) + (s1 * si2);
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = (s1 * co2) - (r1 * si2);
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r1 = r2 + t1;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r2 = r2 - t1;
|
||||
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s1 = s2 - t2;
|
||||
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s2 = s2 + t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (r1 * co1) + (s1 * si1);
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (s1 * co1) - (r1 * si1);
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = (r2 * co3) + (s2 * si3);
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (s2 * co3) - (r2 * si3);
|
||||
|
||||
i0 += n1;
|
||||
} while ( i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
brief Core function for the floating-point CIFFT butterfly process.
|
||||
param[in,out] pSrc points to the in-place buffer of floating-point data type
|
||||
param[in] fftLen length of the FFT
|
||||
param[in] pCoef points to twiddle coefficient buffer
|
||||
param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
param[in] onebyfftLen value of 1/fftLen
|
||||
return none
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_inverse_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float32_t onebyfftLen)
|
||||
{
|
||||
float32_t co1, co2, co3, si1, si2, si3;
|
||||
uint32_t ia1, ia2, ia3;
|
||||
uint32_t i0, i1, i2, i3;
|
||||
uint32_t n1, n2, j, k;
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
|
||||
float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
|
||||
Ybminusd;
|
||||
float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
|
||||
float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
|
||||
float32_t *ptr1;
|
||||
float32_t p0,p1,p2,p3,p4,p5,p6,p7;
|
||||
float32_t a0,a1,a2,a3,a4,a5,a6,a7;
|
||||
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
i0 = 0U;
|
||||
ia1 = 0U;
|
||||
|
||||
j = n2;
|
||||
|
||||
/* Calculation of first stage */
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = xaIn + xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = xbIn + xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = yaIn + ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = ybIn + ydIn;
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = xaIn - xcIn;
|
||||
/* xb - xd */
|
||||
Xbminusd = xbIn - xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = yaIn - ycIn;
|
||||
/* yb - yd */
|
||||
Ybminusd = ybIn - ydIn;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[(2U * i0)] = Xaplusc + Xbplusd;
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xb12C_out = (Xaminusc - Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yb12C_out = (Yaminusc + Xbminusd);
|
||||
/* (xa + xc) - (xb + xd) */
|
||||
Xc12C_out = (Xaplusc - Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = (Yaplusc - Ybplusd);
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xd12C_out = (Xaminusc + Ybminusd);
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
Yd12C_out = (Yaminusc - Xbminusd);
|
||||
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia3 = ia2 + ia1;
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
Xb12_out = Xb12C_out * co1;
|
||||
Yb12_out = Yb12C_out * co1;
|
||||
Xc12_out = Xc12C_out * co2;
|
||||
Yc12_out = Yc12C_out * co2;
|
||||
Xd12_out = Xd12C_out * co3;
|
||||
Yd12_out = Yd12C_out * co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = Yb12C_out * si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = Xb12C_out * si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = Yc12C_out * si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = Xc12C_out * si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = Yd12C_out * si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 = Xd12C_out * si3;
|
||||
|
||||
Xb12_out -= p0;
|
||||
Yb12_out += p1;
|
||||
Xc12_out -= p2;
|
||||
Yc12_out += p3;
|
||||
Xd12_out -= p4;
|
||||
Yd12_out += p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0 = i0 + 1U;
|
||||
|
||||
} while (--j);
|
||||
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of second stage to excluding last stage */
|
||||
for (k = fftLen >> 2U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* Calculation of first stage */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
xaIn = pSrc[(2U * i0)];
|
||||
yaIn = pSrc[(2U * i0) + 1U];
|
||||
|
||||
xbIn = pSrc[(2U * i1)];
|
||||
ybIn = pSrc[(2U * i1) + 1U];
|
||||
|
||||
xcIn = pSrc[(2U * i2)];
|
||||
ycIn = pSrc[(2U * i2) + 1U];
|
||||
|
||||
xdIn = pSrc[(2U * i3)];
|
||||
ydIn = pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = xaIn - xcIn;
|
||||
/* (xb - xd) */
|
||||
Xbminusd = xbIn - xdIn;
|
||||
/* ya - yc */
|
||||
Yaminusc = yaIn - ycIn;
|
||||
/* (yb - yd) */
|
||||
Ybminusd = ybIn - ydIn;
|
||||
|
||||
/* xa + xc */
|
||||
Xaplusc = xaIn + xcIn;
|
||||
/* xb + xd */
|
||||
Xbplusd = xbIn + xdIn;
|
||||
/* ya + yc */
|
||||
Yaplusc = yaIn + ycIn;
|
||||
/* yb + yd */
|
||||
Ybplusd = ybIn + ydIn;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
Xb12C_out = (Xaminusc - Ybminusd);
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
Yb12C_out = (Yaminusc + Xbminusd);
|
||||
/* xa + xc -(xb + xd) */
|
||||
Xc12C_out = (Xaplusc - Xbplusd);
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
Yc12C_out = (Yaplusc - Ybplusd);
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
Xd12C_out = (Xaminusc + Ybminusd);
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
Yd12C_out = (Yaminusc - Xbminusd);
|
||||
|
||||
pSrc[(2U * i0)] = Xaplusc + Xbplusd;
|
||||
pSrc[(2U * i0) + 1U] = Yaplusc + Ybplusd;
|
||||
|
||||
Xb12_out = Xb12C_out * co1;
|
||||
Yb12_out = Yb12C_out * co1;
|
||||
Xc12_out = Xc12C_out * co2;
|
||||
Yc12_out = Yc12C_out * co2;
|
||||
Xd12_out = Xd12C_out * co3;
|
||||
Yd12_out = Yd12C_out * co3;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
//Xb12_out -= Yb12C_out * si1;
|
||||
p0 = Yb12C_out * si1;
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
//Yb12_out += Xb12C_out * si1;
|
||||
p1 = Xb12C_out * si1;
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
//Xc12_out -= Yc12C_out * si2;
|
||||
p2 = Yc12C_out * si2;
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
//Yc12_out += Xc12C_out * si2;
|
||||
p3 = Xc12C_out * si2;
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
//Xd12_out -= Yd12C_out * si3;
|
||||
p4 = Yd12C_out * si3;
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
//Yd12_out += Xd12C_out * si3;
|
||||
p5 = Xd12C_out * si3;
|
||||
|
||||
Xb12_out -= p0;
|
||||
Yb12_out += p1;
|
||||
Xc12_out -= p2;
|
||||
Yc12_out += p3;
|
||||
Xd12_out -= p4;
|
||||
Yd12_out += p5;
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = Xc12_out;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = Yc12_out;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = Xb12_out;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = Yb12_out;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = Xd12_out;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = Yd12_out;
|
||||
|
||||
i0 += n1;
|
||||
} while (i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* Initializations of last stage */
|
||||
|
||||
j = fftLen >> 2;
|
||||
ptr1 = &pSrc[0];
|
||||
|
||||
/* Calculations of last stage */
|
||||
do
|
||||
{
|
||||
xaIn = ptr1[0];
|
||||
yaIn = ptr1[1];
|
||||
xbIn = ptr1[2];
|
||||
ybIn = ptr1[3];
|
||||
xcIn = ptr1[4];
|
||||
ycIn = ptr1[5];
|
||||
xdIn = ptr1[6];
|
||||
ydIn = ptr1[7];
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
Xaplusc = xaIn + xcIn;
|
||||
|
||||
/* xa - xc */
|
||||
Xaminusc = xaIn - xcIn;
|
||||
|
||||
/* ya + yc */
|
||||
Yaplusc = yaIn + ycIn;
|
||||
|
||||
/* ya - yc */
|
||||
Yaminusc = yaIn - ycIn;
|
||||
|
||||
/* xb + xd */
|
||||
Xbplusd = xbIn + xdIn;
|
||||
|
||||
/* yb + yd */
|
||||
Ybplusd = ybIn + ydIn;
|
||||
|
||||
/* (xb-xd) */
|
||||
Xbminusd = xbIn - xdIn;
|
||||
|
||||
/* (yb-yd) */
|
||||
Ybminusd = ybIn - ydIn;
|
||||
|
||||
/* xa' = (xa+xb+xc+xd) * onebyfftLen */
|
||||
a0 = (Xaplusc + Xbplusd);
|
||||
/* ya' = (ya+yb+yc+yd) * onebyfftLen */
|
||||
a1 = (Yaplusc + Ybplusd);
|
||||
/* xc' = (xa-xb+xc-xd) * onebyfftLen */
|
||||
a2 = (Xaplusc - Xbplusd);
|
||||
/* yc' = (ya-yb+yc-yd) * onebyfftLen */
|
||||
a3 = (Yaplusc - Ybplusd);
|
||||
/* xb' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
a4 = (Xaminusc - Ybminusd);
|
||||
/* yb' = (ya+xb-yc-xd) * onebyfftLen */
|
||||
a5 = (Yaminusc + Xbminusd);
|
||||
/* xd' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
a6 = (Xaminusc + Ybminusd);
|
||||
/* yd' = (ya-xb-yc+xd) * onebyfftLen */
|
||||
a7 = (Yaminusc - Xbminusd);
|
||||
|
||||
p0 = a0 * onebyfftLen;
|
||||
p1 = a1 * onebyfftLen;
|
||||
p2 = a2 * onebyfftLen;
|
||||
p3 = a3 * onebyfftLen;
|
||||
p4 = a4 * onebyfftLen;
|
||||
p5 = a5 * onebyfftLen;
|
||||
p6 = a6 * onebyfftLen;
|
||||
p7 = a7 * onebyfftLen;
|
||||
|
||||
/* xa' = (xa+xb+xc+xd) * onebyfftLen */
|
||||
ptr1[0] = p0;
|
||||
/* ya' = (ya+yb+yc+yd) * onebyfftLen */
|
||||
ptr1[1] = p1;
|
||||
/* xc' = (xa-xb+xc-xd) * onebyfftLen */
|
||||
ptr1[2] = p2;
|
||||
/* yc' = (ya-yb+yc-yd) * onebyfftLen */
|
||||
ptr1[3] = p3;
|
||||
/* xb' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
ptr1[4] = p4;
|
||||
/* yb' = (ya+xb-yc-xd) * onebyfftLen */
|
||||
ptr1[5] = p5;
|
||||
/* xd' = (xa-yb-xc+yd) * onebyfftLen */
|
||||
ptr1[6] = p6;
|
||||
/* yd' = (ya-xb-yc+xd) * onebyfftLen */
|
||||
ptr1[7] = p7;
|
||||
|
||||
/* increment source pointer by 8 for next calculations */
|
||||
ptr1 = ptr1 + 8U;
|
||||
|
||||
} while (--j);
|
||||
|
||||
#else
|
||||
|
||||
float32_t t1, t2, r1, r2, s1, s2;
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* Calculation of first stage */
|
||||
for (k = fftLen; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* Calculation of first stage */
|
||||
j = 0;
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
i0 = j;
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* xa + xc */
|
||||
r1 = pSrc[(2U * i0)] + pSrc[(2U * i2)];
|
||||
|
||||
/* xa - xc */
|
||||
r2 = pSrc[(2U * i0)] - pSrc[(2U * i2)];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* ya - yc */
|
||||
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xb + xd */
|
||||
t1 = pSrc[2U * i1] + pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = r1 + t1;
|
||||
|
||||
/* xa + xc -(xb + xd) */
|
||||
r1 = r1 - t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = s1 + t2;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* (yb - yd) */
|
||||
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* (xb - xd) */
|
||||
t2 = pSrc[2U * i1] - pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (r1 * co2) - (s1 * si2);
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = (s1 * co2) + (r1 * si2);
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r1 = r2 - t1;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r2 = r2 + t1;
|
||||
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s1 = s2 + t2;
|
||||
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s2 = s2 - t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (r1 * co1) - (s1 * si1);
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (s1 * co1) + (r1 * si1);
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = (r2 * co3) - (s2 * si3);
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (s2 * co3) + (r2 * si3);
|
||||
|
||||
i0 += n1;
|
||||
} while ( i0 < fftLen);
|
||||
j++;
|
||||
} while (j <= (n2 - 1U));
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* Initializations of last stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
|
||||
/* Calculations of last stage */
|
||||
for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
r1 = pSrc[2U * i0] + pSrc[2U * i2];
|
||||
|
||||
/* xa - xc */
|
||||
r2 = pSrc[2U * i0] - pSrc[2U * i2];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* ya - yc */
|
||||
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xc + xd */
|
||||
t1 = pSrc[2U * i1] + pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = (r1 + t1) * onebyfftLen;
|
||||
|
||||
/* (xa + xb) - (xc + xd) */
|
||||
r1 = r1 - t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (s1 + t2) * onebyfftLen;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* (yb-yd) */
|
||||
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
|
||||
|
||||
/* (xb-xd) */
|
||||
t2 = pSrc[2U * i1] - pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = r1 * onebyfftLen;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = s1 * onebyfftLen;
|
||||
|
||||
/* (xa - xc) - (yb-yd) */
|
||||
r1 = r2 - t1;
|
||||
|
||||
/* (xa - xc) + (yb-yd) */
|
||||
r2 = r2 + t1;
|
||||
|
||||
/* (ya - yc) + (xb-xd) */
|
||||
s1 = s2 + t2;
|
||||
|
||||
/* (ya - yc) - (xb-xd) */
|
||||
s2 = s2 - t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = r1 * onebyfftLen;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = s1 * onebyfftLen;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = r2 * onebyfftLen;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_init_f16.c
|
||||
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
#include "arm_common_tables.h"
|
||||
#include "arm_common_tables_f16.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the floating-point CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superceded by \ref arm_cfft_f16 and will be removed in the future.
|
||||
@param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
arm_status arm_cfft_radix4_init_f16(
|
||||
arm_cfft_radix4_instance_f16 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F16_4096)
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (float16_t *) twiddleCoefF16;
|
||||
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
|
||||
|
||||
/* Initializations of structure parameters depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
|
||||
case 4096U:
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.000244140625;
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 4U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 4U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.0009765625f;
|
||||
break;
|
||||
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
S->onebyfftLen = 0.00390625f;
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
S->onebyfftLen = 0.015625f;
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
S->onebyfftLen = 0.0625f;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return (status);
|
||||
}
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
@@ -0,0 +1,168 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_init_f32.c
|
||||
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the floating-point CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed in the future.
|
||||
@param[in,out] S points to an instance of the floating-point CFFT/CIFFT structure
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
arm_status arm_cfft_radix4_init_f32(
|
||||
arm_cfft_radix4_instance_f32 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (float32_t *) twiddleCoef;
|
||||
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_F32_4096)
|
||||
|
||||
/* Initializations of structure parameters depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
|
||||
case 4096U:
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.000244140625;
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 4U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 4U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
/* Initialise the 1/fftLen Value */
|
||||
S->onebyfftLen = 0.0009765625f;
|
||||
break;
|
||||
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
S->onebyfftLen = 0.00390625f;
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
S->onebyfftLen = 0.015625f;
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
S->onebyfftLen = 0.0625f;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
@@ -0,0 +1,157 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_init_q15.c
|
||||
* Description: Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@brief Initialization function for the Q15 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
|
||||
@param[in,out] S points to an instance of the Q15 CFFT/CIFFT structure
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
arm_status arm_cfft_radix4_init_q15(
|
||||
arm_cfft_radix4_instance_q15 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q15_4096)
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
|
||||
|
||||
/* Initializations of structure parameters depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
case 4096U:
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
|
||||
break;
|
||||
|
||||
case 1024U:
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
S->twidCoefModifier = 4U;
|
||||
S->bitRevFactor = 4U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
@@ -0,0 +1,154 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_init_q31.c
|
||||
* Description: Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
|
||||
@brief Initialization function for the Q31 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
|
||||
@param[in,out] S points to an instance of the Q31 CFFT/CIFFT structure.
|
||||
@param[in] fftLen length of the FFT.
|
||||
@param[in] ifftFlag flag that selects transform direction
|
||||
- value = 0: forward transform
|
||||
- value = 1: inverse transform
|
||||
@param[in] bitReverseFlag flag that enables / disables bit reversal of output
|
||||
- value = 0: disables bit reversal of output
|
||||
- value = 1: enables bit reversal of output
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
|
||||
|
||||
@par Details
|
||||
The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
|
||||
Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
|
||||
@par
|
||||
The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
|
||||
Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
|
||||
@par
|
||||
The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
|
||||
@par
|
||||
This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
|
||||
*/
|
||||
|
||||
arm_status arm_cfft_radix4_init_q31(
|
||||
arm_cfft_radix4_instance_q31 * S,
|
||||
uint16_t fftLen,
|
||||
uint8_t ifftFlag,
|
||||
uint8_t bitReverseFlag)
|
||||
{
|
||||
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_ARGUMENT_ERROR;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FFT_ALLOW_TABLES)
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_TWIDDLECOEF_Q31_4096)
|
||||
|
||||
/* Initialise the default arm status */
|
||||
status = ARM_MATH_SUCCESS;
|
||||
/* Initialise the FFT length */
|
||||
S->fftLen = fftLen;
|
||||
/* Initialise the Twiddle coefficient pointer */
|
||||
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
|
||||
/* Initialise the Flag for selection of CFFT or CIFFT */
|
||||
S->ifftFlag = ifftFlag;
|
||||
/* Initialise the Flag for calculation Bit reversal or not */
|
||||
S->bitReverseFlag = bitReverseFlag;
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_BITREV_1024)
|
||||
|
||||
/* Initializations of Instance structure depending on the FFT length */
|
||||
switch (S->fftLen)
|
||||
{
|
||||
/* Initializations of structure parameters for 4096 point FFT */
|
||||
case 4096U:
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 1U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 1U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) armBitRevTable;
|
||||
break;
|
||||
|
||||
/* Initializations of structure parameters for 1024 point FFT */
|
||||
case 1024U:
|
||||
/* Initialise the twiddle coef modifier value */
|
||||
S->twidCoefModifier = 4U;
|
||||
/* Initialise the bit reversal table modifier */
|
||||
S->bitRevFactor = 4U;
|
||||
/* Initialise the bit reversal table pointer */
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
|
||||
break;
|
||||
|
||||
case 256U:
|
||||
/* Initializations of structure parameters for 256 point FFT */
|
||||
S->twidCoefModifier = 16U;
|
||||
S->bitRevFactor = 16U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
|
||||
break;
|
||||
|
||||
case 64U:
|
||||
/* Initializations of structure parameters for 64 point FFT */
|
||||
S->twidCoefModifier = 64U;
|
||||
S->bitRevFactor = 64U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
|
||||
break;
|
||||
|
||||
case 16U:
|
||||
/* Initializations of structure parameters for 16 point FFT */
|
||||
S->twidCoefModifier = 256U;
|
||||
S->bitRevFactor = 256U;
|
||||
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Reporting argument error if fftSize is not valid value */
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
1809
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c
Normal file
1809
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c
Normal file
@@ -0,0 +1,1809 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_q15.c
|
||||
* Description: This file has function definition of Radix-4 FFT & IFFT function and
|
||||
* In-place bit reversal using bit reversal table
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
|
||||
void arm_radix4_butterfly_q15(
|
||||
q15_t * pSrc16,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef16,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
void arm_radix4_butterfly_inverse_q15(
|
||||
q15_t * pSrc16,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef16,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
void arm_bitreversal_q15(
|
||||
q15_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@brief Processing function for the Q15 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
|
||||
@param[in] S points to an instance of the Q15 CFFT/CIFFT structure.
|
||||
@param[in,out] pSrc points to the complex data buffer. Processing occurs in-place.
|
||||
@return none
|
||||
|
||||
@par Input and output formats:
|
||||
Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
|
||||
Hence the output format is different for different FFT sizes.
|
||||
The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
|
||||
@par
|
||||
\image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"
|
||||
\image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4_q15(
|
||||
const arm_cfft_radix4_instance_q15 * S,
|
||||
q15_t * pSrc)
|
||||
{
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
/* Complex IFFT radix-4 */
|
||||
arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complex FFT radix-4 */
|
||||
arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
|
||||
if (S->bitReverseFlag == 1U)
|
||||
{
|
||||
/* Bit Reversal */
|
||||
arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
/*
|
||||
* Radix-4 FFT algorithm used is :
|
||||
*
|
||||
* Input real and imaginary data:
|
||||
* x(n) = xa + j * ya
|
||||
* x(n+N/4 ) = xb + j * yb
|
||||
* x(n+N/2 ) = xc + j * yc
|
||||
* x(n+3N 4) = xd + j * yd
|
||||
*
|
||||
*
|
||||
* Output real and imaginary data:
|
||||
* x(4r) = xa'+ j * ya'
|
||||
* x(4r+1) = xb'+ j * yb'
|
||||
* x(4r+2) = xc'+ j * yc'
|
||||
* x(4r+3) = xd'+ j * yd'
|
||||
*
|
||||
*
|
||||
* Twiddle factors for radix-4 FFT:
|
||||
* Wn = co1 + j * (- si1)
|
||||
* W2n = co2 + j * (- si2)
|
||||
* W3n = co3 + j * (- si3)
|
||||
|
||||
* The real and imaginary output values for the radix-4 butterfly are
|
||||
* xa' = xa + xb + xc + xd
|
||||
* ya' = ya + yb + yc + yd
|
||||
* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
|
||||
* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
|
||||
* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
|
||||
* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
|
||||
* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
|
||||
* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Core function for the Q15 CFFT butterfly process.
|
||||
@param[in,out] pSrc16 points to the in-place buffer of Q15 data type
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] pCoef16 points to twiddle coefficient buffer
|
||||
@param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_q15(
|
||||
q15_t * pSrc16,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef16,
|
||||
uint32_t twidCoefModifier)
|
||||
{
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
q31_t R, S, T, U;
|
||||
q31_t C1, C2, C3, out1, out2;
|
||||
uint32_t n1, n2, ic, i0, j, k;
|
||||
|
||||
q15_t *ptr1;
|
||||
q15_t *pSi0;
|
||||
q15_t *pSi1;
|
||||
q15_t *pSi2;
|
||||
q15_t *pSi3;
|
||||
|
||||
q31_t xaya, xbyb, xcyc, xdyd;
|
||||
|
||||
/* Total process is divided into three stages */
|
||||
|
||||
/* process first stage, middle stages, & last stage */
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
|
||||
/* Index for twiddle coefficient */
|
||||
ic = 0U;
|
||||
|
||||
/* Index for input read and output write */
|
||||
j = n2;
|
||||
|
||||
pSi0 = pSrc16;
|
||||
pSi1 = pSi0 + 2 * n2;
|
||||
pSi2 = pSi1 + 2 * n2;
|
||||
pSi3 = pSi2 + 2 * n2;
|
||||
|
||||
/* Input is in 1.15(q15) format */
|
||||
|
||||
/* start of first stage process */
|
||||
do
|
||||
{
|
||||
/* Butterfly implementation */
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T = read_q15x2 (pSi0);
|
||||
T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */
|
||||
T = __SHADD16(T, 0); /* it turns out doing this twice is 2 cycles, the alternative takes 3 cycles */
|
||||
/*
|
||||
in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
|
||||
T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
|
||||
*/
|
||||
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S = read_q15x2 (pSi2);
|
||||
S = __SHADD16(S, 0);
|
||||
S = __SHADD16(S, 0);
|
||||
|
||||
/* R = packed((ya + yc), (xa + xc) ) */
|
||||
R = __QADD16(T, S);
|
||||
|
||||
/* S = packed((ya - yc), (xa - xc) ) */
|
||||
S = __QSUB16(T, S);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T = read_q15x2 (pSi1);
|
||||
T = __SHADD16(T, 0);
|
||||
T = __SHADD16(T, 0);
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U = read_q15x2 (pSi3);
|
||||
U = __SHADD16(U, 0);
|
||||
U = __SHADD16(U, 0);
|
||||
|
||||
/* T = packed((yb + yd), (xb + xd) ) */
|
||||
T = __QADD16(T, U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
write_q15x2_ia (&pSi0, __SHADD16(R, T));
|
||||
|
||||
/* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
|
||||
R = __QSUB16(R, T);
|
||||
|
||||
/* co2 & si2 are read from SIMD Coefficient pointer */
|
||||
C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
out1 = __SMUAD(C2, R) >> 16U;
|
||||
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out2 = __SMUSDX(C2, R);
|
||||
#else
|
||||
/* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out1 = __SMUSDX(R, C2) >> 16U;
|
||||
/* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
out2 = __SMUAD(C2, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Reading i0+fftLen/4 */
|
||||
/* T = packed(yb, xb) */
|
||||
T = read_q15x2 (pSi1);
|
||||
T = __SHADD16(T, 0);
|
||||
T = __SHADD16(T, 0);
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* writing output(xc', yc') in little endian format */
|
||||
write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
|
||||
|
||||
/* Butterfly calculations */
|
||||
/* U = packed(yd, xd) */
|
||||
U = read_q15x2 (pSi3);
|
||||
U = __SHADD16(U, 0);
|
||||
U = __SHADD16(U, 0);
|
||||
|
||||
/* T = packed(yb-yd, xb-xd) */
|
||||
T = __QSUB16(T, U);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __QASX(S, T);
|
||||
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
|
||||
S = __QSAX(S, T);
|
||||
#else
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __QSAX(S, T);
|
||||
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
|
||||
S = __QASX(S, T);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* co1 & si1 are read from SIMD Coefficient pointer */
|
||||
C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
out1 = __SMUAD(C1, S) >> 16U;
|
||||
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
out2 = __SMUSDX(C1, S);
|
||||
#else
|
||||
/* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
out1 = __SMUSDX(S, C1) >> 16U;
|
||||
/* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
out2 = __SMUAD(C1, S);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* writing output(xb', yb') in little endian format */
|
||||
write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
|
||||
|
||||
/* co3 & si3 are read from SIMD Coefficient pointer */
|
||||
C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
|
||||
out1 = __SMUAD(C3, R) >> 16U;
|
||||
/* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
|
||||
out2 = __SMUSDX(C3, R);
|
||||
#else
|
||||
/* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
|
||||
out1 = __SMUSDX(R, C3) >> 16U;
|
||||
/* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
|
||||
out2 = __SMUAD(C3, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* writing output(xd', yd') in little endian format */
|
||||
write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
} while (--j);
|
||||
/* data is in 4.11(q11) format */
|
||||
|
||||
/* end of first stage process */
|
||||
|
||||
|
||||
/* start of middle stage process */
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of Middle stage */
|
||||
for (k = fftLen / 4U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the middle stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ic = 0U;
|
||||
|
||||
for (j = 0U; j <= (n2 - 1U); j++)
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
|
||||
C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
|
||||
C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
pSi0 = pSrc16 + 2 * j;
|
||||
pSi1 = pSi0 + 2 * n2;
|
||||
pSi2 = pSi1 + 2 * n2;
|
||||
pSi3 = pSi2 + 2 * n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
for (i0 = j; i0 < fftLen; i0 += n1)
|
||||
{
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T = read_q15x2 (pSi0);
|
||||
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S = read_q15x2 (pSi2);
|
||||
|
||||
/* R = packed( (ya + yc), (xa + xc)) */
|
||||
R = __QADD16(T, S);
|
||||
|
||||
/* S = packed((ya - yc), (xa - xc)) */
|
||||
S = __QSUB16(T, S);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T = read_q15x2 (pSi1);
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U = read_q15x2 (pSi3);
|
||||
|
||||
/* T = packed( (yb + yd), (xb + xd)) */
|
||||
T = __QADD16(T, U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
out1 = __SHADD16(R, T);
|
||||
out1 = __SHADD16(out1, 0);
|
||||
write_q15x2 (pSi0, out1);
|
||||
pSi0 += 2 * n1;
|
||||
|
||||
/* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
|
||||
R = __SHSUB16(R, T);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
|
||||
out1 = __SMUAD(C2, R) >> 16U;
|
||||
|
||||
/* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out2 = __SMUSDX(C2, R);
|
||||
#else
|
||||
/* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out1 = __SMUSDX(R, C2) >> 16U;
|
||||
|
||||
/* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
|
||||
out2 = __SMUAD(C2, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Reading i0+3fftLen/4 */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T = read_q15x2 (pSi1);
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
|
||||
pSi1 += 2 * n1;
|
||||
|
||||
/* Butterfly calculations */
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U = read_q15x2 (pSi3);
|
||||
|
||||
/* T = packed(yb-yd, xb-xd) */
|
||||
T = __QSUB16(T, U);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __SHASX(S, T);
|
||||
|
||||
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
|
||||
S = __SHSAX(S, T);
|
||||
|
||||
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
out1 = __SMUAD(C1, S) >> 16U;
|
||||
out2 = __SMUSDX(C1, S);
|
||||
#else
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __SHSAX(S, T);
|
||||
|
||||
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
|
||||
S = __SHASX(S, T);
|
||||
|
||||
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
out1 = __SMUSDX(S, C1) >> 16U;
|
||||
out2 = __SMUAD(C1, S);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
|
||||
pSi2 += 2 * n1;
|
||||
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUAD(C3, R) >> 16U;
|
||||
out2 = __SMUSDX(C3, R);
|
||||
#else
|
||||
out1 = __SMUSDX(R, C3) >> 16U;
|
||||
out2 = __SMUAD(C3, R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
|
||||
/* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
|
||||
write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
|
||||
pSi3 += 2 * n1;
|
||||
}
|
||||
}
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* end of middle stage process */
|
||||
|
||||
|
||||
/* data is in 10.6(q6) format for the 1024 point */
|
||||
/* data is in 8.8(q8) format for the 256 point */
|
||||
/* data is in 6.10(q10) format for the 64 point */
|
||||
/* data is in 4.12(q12) format for the 16 point */
|
||||
|
||||
/* Initializations for the last stage */
|
||||
j = fftLen >> 2;
|
||||
|
||||
ptr1 = &pSrc16[0];
|
||||
|
||||
/* start of last stage process */
|
||||
|
||||
/* Butterfly implementation */
|
||||
do
|
||||
{
|
||||
/* Read xa (real), ya(imag) input */
|
||||
xaya = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* Read xb (real), yb(imag) input */
|
||||
xbyb = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* Read xc (real), yc(imag) input */
|
||||
xcyc = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* Read xd (real), yd(imag) input */
|
||||
xdyd = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* R = packed((ya + yc), (xa + xc)) */
|
||||
R = __QADD16(xaya, xcyc);
|
||||
|
||||
/* T = packed((yb + yd), (xb + xd)) */
|
||||
T = __QADD16(xbyb, xdyd);
|
||||
|
||||
/* pointer updation for writing */
|
||||
ptr1 = ptr1 - 8U;
|
||||
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
write_q15x2_ia (&ptr1, __SHADD16(R, T));
|
||||
|
||||
/* T = packed((yb + yd), (xb + xd)) */
|
||||
T = __QADD16(xbyb, xdyd);
|
||||
|
||||
/* xc' = (xa-xb+xc-xd) */
|
||||
/* yc' = (ya-yb+yc-yd) */
|
||||
write_q15x2_ia (&ptr1, __SHSUB16(R, T));
|
||||
|
||||
/* S = packed((ya - yc), (xa - xc)) */
|
||||
S = __QSUB16(xaya, xcyc);
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
/* T = packed( (yb - yd), (xb - xd)) */
|
||||
U = __QSUB16(xbyb, xdyd);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xb' = (xa+yb-xc-yd) */
|
||||
/* yb' = (ya-xb-yc+xd) */
|
||||
write_q15x2_ia (&ptr1, __SHSAX(S, U));
|
||||
|
||||
/* xd' = (xa-yb-xc+yd) */
|
||||
/* yd' = (ya+xb-yc-xd) */
|
||||
write_q15x2_ia (&ptr1, __SHASX(S, U));
|
||||
#else
|
||||
/* xb' = (xa+yb-xc-yd) */
|
||||
/* yb' = (ya-xb-yc+xd) */
|
||||
write_q15x2_ia (&ptr1, __SHASX(S, U));
|
||||
|
||||
/* xd' = (xa-yb-xc+yd) */
|
||||
/* yd' = (ya+xb-yc-xd) */
|
||||
write_q15x2_ia (&ptr1, __SHSAX(S, U));
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
} while (--j);
|
||||
|
||||
/* end of last stage process */
|
||||
|
||||
/* output is in 11.5(q5) format for the 1024 point */
|
||||
/* output is in 9.7(q7) format for the 256 point */
|
||||
/* output is in 7.9(q9) format for the 64 point */
|
||||
/* output is in 5.11(q11) format for the 16 point */
|
||||
|
||||
|
||||
#else /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
q15_t R0, R1, S0, S1, T0, T1, U0, U1;
|
||||
q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
|
||||
uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
|
||||
|
||||
/* Total process is divided into three stages */
|
||||
|
||||
/* process first stage, middle stages, & last stage */
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
|
||||
/* Index for twiddle coefficient */
|
||||
ic = 0U;
|
||||
|
||||
/* Index for input read and output write */
|
||||
i0 = 0U;
|
||||
j = n2;
|
||||
|
||||
/* Input is in 1.15(q15) format */
|
||||
|
||||
/* start of first stage process */
|
||||
do
|
||||
{
|
||||
/* Butterfly implementation */
|
||||
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T0 = pSrc16[i0 * 2U] >> 2U;
|
||||
T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
|
||||
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S0 = pSrc16[i2 * 2U] >> 2U;
|
||||
S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
|
||||
|
||||
/* R0 = (ya + yc) */
|
||||
R0 = __SSAT(T0 + S0, 16U);
|
||||
/* R1 = (xa + xc) */
|
||||
R1 = __SSAT(T1 + S1, 16U);
|
||||
|
||||
/* S0 = (ya - yc) */
|
||||
S0 = __SSAT(T0 - S0, 16);
|
||||
/* S1 = (xa - xc) */
|
||||
S1 = __SSAT(T1 - S1, 16);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U] >> 2U;
|
||||
T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
|
||||
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U] >> 2U;
|
||||
U1 = pSrc16[(i3 * 2U) + 1] >> 2U;
|
||||
|
||||
/* T0 = (yb + yd) */
|
||||
T0 = __SSAT(T0 + U0, 16U);
|
||||
/* T1 = (xb + xd) */
|
||||
T1 = __SSAT(T1 + U1, 16U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
|
||||
pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
|
||||
|
||||
/* R0 = (ya + yc) - (yb + yd) */
|
||||
/* R1 = (xa + xc) - (xb + xd) */
|
||||
R0 = __SSAT(R0 - T0, 16U);
|
||||
R1 = __SSAT(R1 - T1, 16U);
|
||||
|
||||
/* co2 & si2 are read from Coefficient pointer */
|
||||
Co2 = pCoef16[2U * ic * 2U];
|
||||
Si2 = pCoef16[(2U * ic * 2U) + 1];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
|
||||
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
|
||||
|
||||
/* Reading i0+fftLen/4 */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* T0 = yb, T1 = xb */
|
||||
T0 = pSrc16[i1 * 2U] >> 2;
|
||||
T1 = pSrc16[(i1 * 2U) + 1] >> 2;
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* writing output(xc', yc') in little endian format */
|
||||
pSrc16[i1 * 2U] = out1;
|
||||
pSrc16[(i1 * 2U) + 1] = out2;
|
||||
|
||||
/* Butterfly calculations */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* U0 = yd, U1 = xd */
|
||||
U0 = pSrc16[i3 * 2U] >> 2;
|
||||
U1 = pSrc16[(i3 * 2U) + 1] >> 2;
|
||||
/* T0 = yb-yd */
|
||||
T0 = __SSAT(T0 - U0, 16);
|
||||
/* T1 = xb-xd */
|
||||
T1 = __SSAT(T1 - U1, 16);
|
||||
|
||||
/* R1 = (ya-yc) + (xb- xd), R0 = (xa-xc) - (yb-yd)) */
|
||||
R0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
|
||||
R1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
|
||||
|
||||
/* S1 = (ya-yc) - (xb- xd), S0 = (xa-xc) + (yb-yd)) */
|
||||
S0 = (q15_t) __SSAT(((q31_t) S0 + T1), 16U);
|
||||
S1 = (q15_t) __SSAT(((q31_t) S1 - T0), 16U);
|
||||
|
||||
/* co1 & si1 are read from Coefficient pointer */
|
||||
Co1 = pCoef16[ic * 2U];
|
||||
Si1 = pCoef16[(ic * 2U) + 1];
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
out1 = (q15_t) ((Si1 * S1 + Co1 * S0) >> 16);
|
||||
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16);
|
||||
|
||||
/* writing output(xb', yb') in little endian format */
|
||||
pSrc16[i2 * 2U] = out1;
|
||||
pSrc16[(i2 * 2U) + 1] = out2;
|
||||
|
||||
/* Co3 & si3 are read from Coefficient pointer */
|
||||
Co3 = pCoef16[3U * (ic * 2U)];
|
||||
Si3 = pCoef16[(3U * (ic * 2U)) + 1];
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
/* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
|
||||
out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
|
||||
/* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
|
||||
out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
|
||||
/* writing output(xd', yd') in little endian format */
|
||||
pSrc16[i3 * 2U] = out1;
|
||||
pSrc16[(i3 * 2U) + 1] = out2;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0 = i0 + 1U;
|
||||
|
||||
} while (--j);
|
||||
/* data is in 4.11(q11) format */
|
||||
|
||||
/* end of first stage process */
|
||||
|
||||
|
||||
/* start of middle stage process */
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of Middle stage */
|
||||
for (k = fftLen / 4U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the middle stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ic = 0U;
|
||||
|
||||
for (j = 0U; j <= (n2 - 1U); j++)
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
Co1 = pCoef16[ic * 2U];
|
||||
Si1 = pCoef16[(ic * 2U) + 1U];
|
||||
Co2 = pCoef16[2U * (ic * 2U)];
|
||||
Si2 = pCoef16[(2U * (ic * 2U)) + 1U];
|
||||
Co3 = pCoef16[3U * (ic * 2U)];
|
||||
Si3 = pCoef16[(3U * (ic * 2U)) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
/* Butterfly implementation */
|
||||
for (i0 = j; i0 < fftLen; i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T0 = pSrc16[i0 * 2U];
|
||||
T1 = pSrc16[(i0 * 2U) + 1U];
|
||||
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S0 = pSrc16[i2 * 2U];
|
||||
S1 = pSrc16[(i2 * 2U) + 1U];
|
||||
|
||||
/* R0 = (ya + yc), R1 = (xa + xc) */
|
||||
R0 = __SSAT(T0 + S0, 16);
|
||||
R1 = __SSAT(T1 + S1, 16);
|
||||
|
||||
/* S0 = (ya - yc), S1 =(xa - xc) */
|
||||
S0 = __SSAT(T0 - S0, 16);
|
||||
S1 = __SSAT(T1 - S1, 16);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
|
||||
|
||||
/* T0 = (yb + yd), T1 = (xb + xd) */
|
||||
T0 = __SSAT(T0 + U0, 16);
|
||||
T1 = __SSAT(T1 + U1, 16);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
out1 = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
|
||||
out2 = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
|
||||
|
||||
pSrc16[i0 * 2U] = out1;
|
||||
pSrc16[(2U * i0) + 1U] = out2;
|
||||
|
||||
/* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
|
||||
R0 = (R0 >> 1U) - (T0 >> 1U);
|
||||
R1 = (R1 >> 1U) - (T1 >> 1U);
|
||||
|
||||
/* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
|
||||
out1 = (q15_t) ((Co2 * R0 + Si2 * R1) >> 16U);
|
||||
|
||||
/* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out2 = (q15_t) ((-Si2 * R0 + Co2 * R1) >> 16U);
|
||||
|
||||
/* Reading i0+3fftLen/4 */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
pSrc16[i1 * 2U] = out1;
|
||||
pSrc16[(i1 * 2U) + 1U] = out2;
|
||||
|
||||
/* Butterfly calculations */
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
|
||||
/* T0 = yb-yd, T1 = xb-xd */
|
||||
T0 = __SSAT(T0 - U0, 16);
|
||||
T1 = __SSAT(T1 - U1, 16);
|
||||
|
||||
/* R0 = (ya-yc) + (xb- xd), R1 = (xa-xc) - (yb-yd)) */
|
||||
R0 = (S0 >> 1U) - (T1 >> 1U);
|
||||
R1 = (S1 >> 1U) + (T0 >> 1U);
|
||||
|
||||
/* S0 = (ya-yc) - (xb- xd), S1 = (xa-xc) + (yb-yd)) */
|
||||
S0 = (S0 >> 1U) + (T1 >> 1U);
|
||||
S1 = (S1 >> 1U) - (T0 >> 1U);
|
||||
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
out1 = (q15_t) ((Co1 * S0 + Si1 * S1) >> 16U);
|
||||
|
||||
out2 = (q15_t) ((-Si1 * S0 + Co1 * S1) >> 16U);
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
pSrc16[i2 * 2U] = out1;
|
||||
pSrc16[(i2 * 2U) + 1U] = out2;
|
||||
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
out1 = (q15_t) ((Si3 * R1 + Co3 * R0) >> 16U);
|
||||
|
||||
out2 = (q15_t) ((-Si3 * R0 + Co3 * R1) >> 16U);
|
||||
/* xd' = (xa-yb-xc+yd)* Co3 + (ya+xb-yc-xd)* (si3) */
|
||||
/* yd' = (ya+xb-yc-xd)* Co3 - (xa-yb-xc+yd)* (si3) */
|
||||
pSrc16[i3 * 2U] = out1;
|
||||
pSrc16[(i3 * 2U) + 1U] = out2;
|
||||
}
|
||||
}
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* end of middle stage process */
|
||||
|
||||
|
||||
/* data is in 10.6(q6) format for the 1024 point */
|
||||
/* data is in 8.8(q8) format for the 256 point */
|
||||
/* data is in 6.10(q10) format for the 64 point */
|
||||
/* data is in 4.12(q12) format for the 16 point */
|
||||
|
||||
/* Initializations for the last stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
|
||||
/* start of last stage process */
|
||||
|
||||
/* Butterfly implementation */
|
||||
for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T0 = pSrc16[i0 * 2U];
|
||||
T1 = pSrc16[(i0 * 2U) + 1U];
|
||||
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S0 = pSrc16[i2 * 2U];
|
||||
S1 = pSrc16[(i2 * 2U) + 1U];
|
||||
|
||||
/* R0 = (ya + yc), R1 = (xa + xc) */
|
||||
R0 = __SSAT(T0 + S0, 16U);
|
||||
R1 = __SSAT(T1 + S1, 16U);
|
||||
|
||||
/* S0 = (ya - yc), S1 = (xa - xc) */
|
||||
S0 = __SSAT(T0 - S0, 16U);
|
||||
S1 = __SSAT(T1 - S1, 16U);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
|
||||
/* T0 = (yb + yd), T1 = (xb + xd)) */
|
||||
T0 = __SSAT(T0 + U0, 16U);
|
||||
T1 = __SSAT(T1 + U1, 16U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
|
||||
pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
|
||||
|
||||
/* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
|
||||
R0 = (R0 >> 1U) - (T0 >> 1U);
|
||||
R1 = (R1 >> 1U) - (T1 >> 1U);
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* xc' = (xa-xb+xc-xd) */
|
||||
/* yc' = (ya-yb+yc-yd) */
|
||||
pSrc16[i1 * 2U] = R0;
|
||||
pSrc16[(i1 * 2U) + 1U] = R1;
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
/* T0 = (yb - yd), T1 = (xb - xd) */
|
||||
T0 = __SSAT(T0 - U0, 16U);
|
||||
T1 = __SSAT(T1 - U1, 16U);
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/2 sample */
|
||||
/* xb' = (xa+yb-xc-yd) */
|
||||
/* yb' = (ya-xb-yc+xd) */
|
||||
pSrc16[i2 * 2U] = (S0 >> 1U) + (T1 >> 1U);
|
||||
pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
|
||||
|
||||
/* writing the butterfly processed i0 + 3fftLen/4 sample */
|
||||
/* xd' = (xa-yb-xc+yd) */
|
||||
/* yd' = (ya+xb-yc-xd) */
|
||||
pSrc16[i3 * 2U] = (S0 >> 1U) - (T1 >> 1U);
|
||||
pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
|
||||
|
||||
}
|
||||
|
||||
/* end of last stage process */
|
||||
|
||||
/* output is in 11.5(q5) format for the 1024 point */
|
||||
/* output is in 9.7(q7) format for the 256 point */
|
||||
/* output is in 7.9(q9) format for the 64 point */
|
||||
/* output is in 5.11(q11) format for the 16 point */
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief Core function for the Q15 CIFFT butterfly process.
|
||||
@param[in,out] pSrc16 points to the in-place buffer of Q15 data type
|
||||
@param[in] fftLen length of the FFT
|
||||
@param[in] pCoef16 points to twiddle coefficient buffer
|
||||
@param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
@return none
|
||||
*/
|
||||
|
||||
/*
|
||||
* Radix-4 IFFT algorithm used is :
|
||||
*
|
||||
* CIFFT uses same twiddle coefficients as CFFT function
|
||||
* x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
|
||||
*
|
||||
*
|
||||
* IFFT is implemented with following changes in equations from FFT
|
||||
*
|
||||
* Input real and imaginary data:
|
||||
* x(n) = xa + j * ya
|
||||
* x(n+N/4 ) = xb + j * yb
|
||||
* x(n+N/2 ) = xc + j * yc
|
||||
* x(n+3N 4) = xd + j * yd
|
||||
*
|
||||
*
|
||||
* Output real and imaginary data:
|
||||
* x(4r) = xa'+ j * ya'
|
||||
* x(4r+1) = xb'+ j * yb'
|
||||
* x(4r+2) = xc'+ j * yc'
|
||||
* x(4r+3) = xd'+ j * yd'
|
||||
*
|
||||
*
|
||||
* Twiddle factors for radix-4 IFFT:
|
||||
* Wn = co1 + j * (si1)
|
||||
* W2n = co2 + j * (si2)
|
||||
* W3n = co3 + j * (si3)
|
||||
|
||||
* The real and imaginary output values for the radix-4 butterfly are
|
||||
* xa' = xa + xb + xc + xd
|
||||
* ya' = ya + yb + yc + yd
|
||||
* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
|
||||
* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
|
||||
* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
|
||||
* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
|
||||
* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
|
||||
* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_inverse_q15(
|
||||
q15_t * pSrc16,
|
||||
uint32_t fftLen,
|
||||
const q15_t * pCoef16,
|
||||
uint32_t twidCoefModifier)
|
||||
{
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
q31_t R, S, T, U;
|
||||
q31_t C1, C2, C3, out1, out2;
|
||||
uint32_t n1, n2, ic, i0, j, k;
|
||||
|
||||
q15_t *ptr1;
|
||||
q15_t *pSi0;
|
||||
q15_t *pSi1;
|
||||
q15_t *pSi2;
|
||||
q15_t *pSi3;
|
||||
|
||||
q31_t xaya, xbyb, xcyc, xdyd;
|
||||
|
||||
/* Total process is divided into three stages */
|
||||
|
||||
/* process first stage, middle stages, & last stage */
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
|
||||
/* Index for twiddle coefficient */
|
||||
ic = 0U;
|
||||
|
||||
/* Index for input read and output write */
|
||||
j = n2;
|
||||
|
||||
pSi0 = pSrc16;
|
||||
pSi1 = pSi0 + 2 * n2;
|
||||
pSi2 = pSi1 + 2 * n2;
|
||||
pSi3 = pSi2 + 2 * n2;
|
||||
|
||||
/* Input is in 1.15(q15) format */
|
||||
|
||||
/* start of first stage process */
|
||||
do
|
||||
{
|
||||
/* Butterfly implementation */
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T = read_q15x2 (pSi0);
|
||||
T = __SHADD16(T, 0);
|
||||
T = __SHADD16(T, 0);
|
||||
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S = read_q15x2 (pSi2);
|
||||
S = __SHADD16(S, 0);
|
||||
S = __SHADD16(S, 0);
|
||||
|
||||
/* R = packed((ya + yc), (xa + xc) ) */
|
||||
R = __QADD16(T, S);
|
||||
|
||||
/* S = packed((ya - yc), (xa - xc) ) */
|
||||
S = __QSUB16(T, S);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T = read_q15x2 (pSi1);
|
||||
T = __SHADD16(T, 0);
|
||||
T = __SHADD16(T, 0);
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U = read_q15x2 (pSi3);
|
||||
U = __SHADD16(U, 0);
|
||||
U = __SHADD16(U, 0);
|
||||
|
||||
/* T = packed((yb + yd), (xb + xd) ) */
|
||||
T = __QADD16(T, U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
write_q15x2_ia (&pSi0, __SHADD16(R, T));
|
||||
|
||||
/* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
|
||||
R = __QSUB16(R, T);
|
||||
|
||||
/* co2 & si2 are read from SIMD Coefficient pointer */
|
||||
C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
out1 = __SMUSD(C2, R) >> 16U;
|
||||
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out2 = __SMUADX(C2, R);
|
||||
#else
|
||||
/* xc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out1 = __SMUADX(C2, R) >> 16U;
|
||||
/* yc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
out2 = __SMUSD(__QSUB16(0, C2), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Reading i0+fftLen/4 */
|
||||
/* T = packed(yb, xb) */
|
||||
T = read_q15x2 (pSi1);
|
||||
T = __SHADD16(T, 0);
|
||||
T = __SHADD16(T, 0);
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* writing output(xc', yc') in little endian format */
|
||||
write_q15x2_ia (&pSi1, (q31_t) __PKHBT( out1, out2, 0 ));
|
||||
|
||||
/* Butterfly calculations */
|
||||
/* U = packed(yd, xd) */
|
||||
U = read_q15x2 (pSi3);
|
||||
U = __SHADD16(U, 0);
|
||||
U = __SHADD16(U, 0);
|
||||
|
||||
/* T = packed(yb-yd, xb-xd) */
|
||||
T = __QSUB16(T, U);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __QSAX(S, T);
|
||||
/* S = packed((ya-yc) + (xb- xd), (xa-xc) - (yb-yd)) */
|
||||
S = __QASX(S, T);
|
||||
#else
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __QASX(S, T);
|
||||
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
|
||||
S = __QSAX(S, T);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* co1 & si1 are read from SIMD Coefficient pointer */
|
||||
C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
out1 = __SMUSD(C1, S) >> 16U;
|
||||
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
out2 = __SMUADX(C1, S);
|
||||
#else
|
||||
/* xb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
out1 = __SMUADX(C1, S) >> 16U;
|
||||
/* yb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
out2 = __SMUSD(__QSUB16(0, C1), S);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* writing output(xb', yb') in little endian format */
|
||||
write_q15x2_ia (&pSi2, __PKHBT( out1, out2, 0 ));
|
||||
|
||||
/* co3 & si3 are read from SIMD Coefficient pointer */
|
||||
C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
|
||||
out1 = __SMUSD(C3, R) >> 16U;
|
||||
/* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
|
||||
out2 = __SMUADX(C3, R);
|
||||
#else
|
||||
/* xd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
|
||||
out1 = __SMUADX(C3, R) >> 16U;
|
||||
/* yd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
|
||||
out2 = __SMUSD(__QSUB16(0, C3), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* writing output(xd', yd') in little endian format */
|
||||
write_q15x2_ia (&pSi3, __PKHBT( out1, out2, 0 ));
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
} while (--j);
|
||||
/* data is in 4.11(q11) format */
|
||||
|
||||
/* end of first stage process */
|
||||
|
||||
|
||||
/* start of middle stage process */
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of Middle stage */
|
||||
for (k = fftLen / 4U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the middle stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ic = 0U;
|
||||
|
||||
for (j = 0U; j <= (n2 - 1U); j++)
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
C1 = read_q15x2 ((q15_t *) pCoef16 + (2U * ic));
|
||||
C2 = read_q15x2 ((q15_t *) pCoef16 + (4U * ic));
|
||||
C3 = read_q15x2 ((q15_t *) pCoef16 + (6U * ic));
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
pSi0 = pSrc16 + 2 * j;
|
||||
pSi1 = pSi0 + 2 * n2;
|
||||
pSi2 = pSi1 + 2 * n2;
|
||||
pSi3 = pSi2 + 2 * n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
for (i0 = j; i0 < fftLen; i0 += n1)
|
||||
{
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T = read_q15x2 (pSi0);
|
||||
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S = read_q15x2 (pSi2);
|
||||
|
||||
/* R = packed( (ya + yc), (xa + xc)) */
|
||||
R = __QADD16(T, S);
|
||||
|
||||
/* S = packed((ya - yc), (xa - xc)) */
|
||||
S = __QSUB16(T, S);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T = read_q15x2 (pSi1);
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U = read_q15x2 (pSi3);
|
||||
|
||||
/* T = packed( (yb + yd), (xb + xd)) */
|
||||
T = __QADD16(T, U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
out1 = __SHADD16(R, T);
|
||||
out1 = __SHADD16(out1, 0);
|
||||
write_q15x2 (pSi0, out1);
|
||||
pSi0 += 2 * n1;
|
||||
|
||||
/* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
|
||||
R = __SHSUB16(R, T);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
|
||||
out1 = __SMUSD(C2, R) >> 16U;
|
||||
|
||||
/* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out2 = __SMUADX(C2, R);
|
||||
#else
|
||||
/* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
out1 = __SMUADX(R, C2) >> 16U;
|
||||
|
||||
/* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
|
||||
out2 = __SMUSD(__QSUB16(0, C2), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Reading i0+3fftLen/4 */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T = read_q15x2 (pSi1);
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
|
||||
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
|
||||
write_q15x2 (pSi1, __PKHBT( out1, out2, 0 ));
|
||||
pSi1 += 2 * n1;
|
||||
|
||||
/* Butterfly calculations */
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U = read_q15x2 (pSi3);
|
||||
|
||||
/* T = packed(yb-yd, xb-xd) */
|
||||
T = __QSUB16(T, U);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __SHSAX(S, T);
|
||||
|
||||
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
|
||||
S = __SHASX(S, T);
|
||||
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
out1 = __SMUSD(C1, S) >> 16U;
|
||||
out2 = __SMUADX(C1, S);
|
||||
#else
|
||||
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
|
||||
R = __SHASX(S, T);
|
||||
|
||||
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
|
||||
S = __SHSAX(S, T);
|
||||
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
out1 = __SMUADX(S, C1) >> 16U;
|
||||
out2 = __SMUSD(__QSUB16(0, C1), S);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
|
||||
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
|
||||
write_q15x2 (pSi2, __PKHBT( out1, out2, 0 ));
|
||||
pSi2 += 2 * n1;
|
||||
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __SMUSD(C3, R) >> 16U;
|
||||
out2 = __SMUADX(C3, R);
|
||||
#else
|
||||
out1 = __SMUADX(C3, R) >> 16U;
|
||||
out2 = __SMUSD(__QSUB16(0, C3), R);
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
|
||||
/* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
|
||||
write_q15x2 (pSi3, __PKHBT( out1, out2, 0 ));
|
||||
pSi3 += 2 * n1;
|
||||
}
|
||||
}
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* end of middle stage process */
|
||||
|
||||
/* data is in 10.6(q6) format for the 1024 point */
|
||||
/* data is in 8.8(q8) format for the 256 point */
|
||||
/* data is in 6.10(q10) format for the 64 point */
|
||||
/* data is in 4.12(q12) format for the 16 point */
|
||||
|
||||
/* Initializations for the last stage */
|
||||
j = fftLen >> 2;
|
||||
|
||||
ptr1 = &pSrc16[0];
|
||||
|
||||
/* start of last stage process */
|
||||
|
||||
/* Butterfly implementation */
|
||||
do
|
||||
{
|
||||
/* Read xa (real), ya(imag) input */
|
||||
xaya = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* Read xb (real), yb(imag) input */
|
||||
xbyb = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* Read xc (real), yc(imag) input */
|
||||
xcyc = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* Read xd (real), yd(imag) input */
|
||||
xdyd = read_q15x2_ia (&ptr1);
|
||||
|
||||
/* R = packed((ya + yc), (xa + xc)) */
|
||||
R = __QADD16(xaya, xcyc);
|
||||
|
||||
/* T = packed((yb + yd), (xb + xd)) */
|
||||
T = __QADD16(xbyb, xdyd);
|
||||
|
||||
/* pointer updation for writing */
|
||||
ptr1 = ptr1 - 8U;
|
||||
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
write_q15x2_ia (&ptr1, __SHADD16(R, T));
|
||||
|
||||
/* T = packed((yb + yd), (xb + xd)) */
|
||||
T = __QADD16(xbyb, xdyd);
|
||||
|
||||
/* xc' = (xa-xb+xc-xd) */
|
||||
/* yc' = (ya-yb+yc-yd) */
|
||||
write_q15x2_ia (&ptr1, __SHSUB16(R, T));
|
||||
|
||||
/* S = packed((ya - yc), (xa - xc)) */
|
||||
S = __QSUB16(xaya, xcyc);
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
/* T = packed( (yb - yd), (xb - xd)) */
|
||||
U = __QSUB16(xbyb, xdyd);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
/* xb' = (xa+yb-xc-yd) */
|
||||
/* yb' = (ya-xb-yc+xd) */
|
||||
write_q15x2_ia (&ptr1, __SHASX(S, U));
|
||||
|
||||
/* xd' = (xa-yb-xc+yd) */
|
||||
/* yd' = (ya+xb-yc-xd) */
|
||||
write_q15x2_ia (&ptr1, __SHSAX(S, U));
|
||||
#else
|
||||
/* xb' = (xa+yb-xc-yd) */
|
||||
/* yb' = (ya-xb-yc+xd) */
|
||||
write_q15x2_ia (&ptr1, __SHSAX(S, U));
|
||||
|
||||
/* xd' = (xa-yb-xc+yd) */
|
||||
/* yd' = (ya+xb-yc-xd) */
|
||||
write_q15x2_ia (&ptr1, __SHASX(S, U));
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
} while (--j);
|
||||
|
||||
/* end of last stage process */
|
||||
|
||||
/* output is in 11.5(q5) format for the 1024 point */
|
||||
/* output is in 9.7(q7) format for the 256 point */
|
||||
/* output is in 7.9(q9) format for the 64 point */
|
||||
/* output is in 5.11(q11) format for the 16 point */
|
||||
|
||||
|
||||
#else /* arm_radix4_butterfly_inverse_q15 */
|
||||
|
||||
q15_t R0, R1, S0, S1, T0, T1, U0, U1;
|
||||
q15_t Co1, Si1, Co2, Si2, Co3, Si3, out1, out2;
|
||||
uint32_t n1, n2, ic, i0, i1, i2, i3, j, k;
|
||||
|
||||
/* Total process is divided into three stages */
|
||||
|
||||
/* process first stage, middle stages, & last stage */
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
|
||||
/* Index for twiddle coefficient */
|
||||
ic = 0U;
|
||||
|
||||
/* Index for input read and output write */
|
||||
i0 = 0U;
|
||||
|
||||
j = n2;
|
||||
|
||||
/* Input is in 1.15(q15) format */
|
||||
|
||||
/* Start of first stage process */
|
||||
do
|
||||
{
|
||||
/* Butterfly implementation */
|
||||
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T0 = pSrc16[i0 * 2U] >> 2U;
|
||||
T1 = pSrc16[(i0 * 2U) + 1U] >> 2U;
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S0 = pSrc16[i2 * 2U] >> 2U;
|
||||
S1 = pSrc16[(i2 * 2U) + 1U] >> 2U;
|
||||
|
||||
/* R0 = (ya + yc), R1 = (xa + xc) */
|
||||
R0 = __SSAT(T0 + S0, 16U);
|
||||
R1 = __SSAT(T1 + S1, 16U);
|
||||
/* S0 = (ya - yc), S1 = (xa - xc) */
|
||||
S0 = __SSAT(T0 - S0, 16U);
|
||||
S1 = __SSAT(T1 - S1, 16U);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U] >> 2U;
|
||||
T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
|
||||
/* Read yd (real), xd(imag) input */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
U0 = pSrc16[i3 * 2U] >> 2U;
|
||||
U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
|
||||
|
||||
/* T0 = (yb + yd), T1 = (xb + xd) */
|
||||
T0 = __SSAT(T0 + U0, 16U);
|
||||
T1 = __SSAT(T1 + U1, 16U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
|
||||
pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
|
||||
|
||||
/* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc)- (xb + xd) */
|
||||
R0 = __SSAT(R0 - T0, 16U);
|
||||
R1 = __SSAT(R1 - T1, 16U);
|
||||
/* co2 & si2 are read from Coefficient pointer */
|
||||
Co2 = pCoef16[2U * ic * 2U];
|
||||
Si2 = pCoef16[(2U * ic * 2U) + 1U];
|
||||
/* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
|
||||
out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16U);
|
||||
/* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
|
||||
out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16U);
|
||||
|
||||
/* Reading i0+fftLen/4 */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* T0 = yb, T1 = xb */
|
||||
T0 = pSrc16[i1 * 2U] >> 2U;
|
||||
T1 = pSrc16[(i1 * 2U) + 1U] >> 2U;
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* writing output(xc', yc') in little endian format */
|
||||
pSrc16[i1 * 2U] = out1;
|
||||
pSrc16[(i1 * 2U) + 1U] = out2;
|
||||
|
||||
/* Butterfly calculations */
|
||||
/* input is down scale by 4 to avoid overflow */
|
||||
/* U0 = yd, U1 = xd) */
|
||||
U0 = pSrc16[i3 * 2U] >> 2U;
|
||||
U1 = pSrc16[(i3 * 2U) + 1U] >> 2U;
|
||||
|
||||
/* T0 = yb-yd, T1 = xb-xd) */
|
||||
T0 = __SSAT(T0 - U0, 16U);
|
||||
T1 = __SSAT(T1 - U1, 16U);
|
||||
/* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
|
||||
R0 = (q15_t) __SSAT((q31_t) (S0 + T1), 16);
|
||||
R1 = (q15_t) __SSAT((q31_t) (S1 - T0), 16);
|
||||
/* S = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
|
||||
S0 = (q15_t) __SSAT((q31_t) (S0 - T1), 16);
|
||||
S1 = (q15_t) __SSAT((q31_t) (S1 + T0), 16);
|
||||
|
||||
/* co1 & si1 are read from Coefficient pointer */
|
||||
Co1 = pCoef16[ic * 2U];
|
||||
Si1 = pCoef16[(ic * 2U) + 1U];
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
/* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
|
||||
out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
|
||||
/* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
|
||||
out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
|
||||
/* writing output(xb', yb') in little endian format */
|
||||
pSrc16[i2 * 2U] = out1;
|
||||
pSrc16[(i2 * 2U) + 1U] = out2;
|
||||
|
||||
/* Co3 & si3 are read from Coefficient pointer */
|
||||
Co3 = pCoef16[3U * ic * 2U];
|
||||
Si3 = pCoef16[(3U * ic * 2U) + 1U];
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
/* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
|
||||
out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
|
||||
/* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
|
||||
out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
|
||||
/* writing output(xd', yd') in little endian format */
|
||||
pSrc16[i3 * 2U] = out1;
|
||||
pSrc16[(i3 * 2U) + 1U] = out2;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0 = i0 + 1U;
|
||||
|
||||
} while (--j);
|
||||
|
||||
/* End of first stage process */
|
||||
|
||||
/* data is in 4.11(q11) format */
|
||||
|
||||
|
||||
/* Start of Middle stage process */
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of Middle stage */
|
||||
for (k = fftLen / 4U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the middle stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ic = 0U;
|
||||
|
||||
for (j = 0U; j <= (n2 - 1U); j++)
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
Co1 = pCoef16[ic * 2U];
|
||||
Si1 = pCoef16[(ic * 2U) + 1U];
|
||||
Co2 = pCoef16[2U * ic * 2U];
|
||||
Si2 = pCoef16[2U * ic * 2U + 1U];
|
||||
Co3 = pCoef16[3U * ic * 2U];
|
||||
Si3 = pCoef16[(3U * ic * 2U) + 1U];
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ic = ic + twidCoefModifier;
|
||||
|
||||
/* Butterfly implementation */
|
||||
for (i0 = j; i0 < fftLen; i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T0 = pSrc16[i0 * 2U];
|
||||
T1 = pSrc16[(i0 * 2U) + 1U];
|
||||
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S0 = pSrc16[i2 * 2U];
|
||||
S1 = pSrc16[(i2 * 2U) + 1U];
|
||||
|
||||
|
||||
/* R0 = (ya + yc), R1 = (xa + xc) */
|
||||
R0 = __SSAT(T0 + S0, 16U);
|
||||
R1 = __SSAT(T1 + S1, 16U);
|
||||
/* S0 = (ya - yc), S1 = (xa - xc) */
|
||||
S0 = __SSAT(T0 - S0, 16U);
|
||||
S1 = __SSAT(T1 - S1, 16U);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
|
||||
/* T0 = (yb + yd), T1 = (xb + xd) */
|
||||
T0 = __SSAT(T0 + U0, 16U);
|
||||
T1 = __SSAT(T1 + U1, 16U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc16[i0 * 2U] = ((R0 >> 1U) + (T0 >> 1U)) >> 1U;
|
||||
pSrc16[(i0 * 2U) + 1U] = ((R1 >> 1U) + (T1 >> 1U)) >> 1U;
|
||||
|
||||
/* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
|
||||
R0 = (R0 >> 1U) - (T0 >> 1U);
|
||||
R1 = (R1 >> 1U) - (T1 >> 1U);
|
||||
|
||||
/* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */
|
||||
out1 = (q15_t) ((Co2 * R0 - Si2 * R1) >> 16);
|
||||
/* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
|
||||
out2 = (q15_t) ((Si2 * R0 + Co2 * R1) >> 16);
|
||||
|
||||
/* Reading i0+3fftLen/4 */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */
|
||||
/* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */
|
||||
pSrc16[i1 * 2U] = out1;
|
||||
pSrc16[(i1 * 2U) + 1U] = out2;
|
||||
|
||||
/* Butterfly calculations */
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
|
||||
/* T0 = yb-yd, T1 = xb-xd) */
|
||||
T0 = __SSAT(T0 - U0, 16U);
|
||||
T1 = __SSAT(T1 - U1, 16U);
|
||||
|
||||
/* R0 = (ya-yc) - (xb- xd) , R1 = (xa-xc) + (yb-yd) */
|
||||
R0 = (S0 >> 1U) + (T1 >> 1U);
|
||||
R1 = (S1 >> 1U) - (T0 >> 1U);
|
||||
|
||||
/* S1 = (ya-yc) + (xb- xd), S1 = (xa-xc) - (yb-yd) */
|
||||
S0 = (S0 >> 1U) - (T1 >> 1U);
|
||||
S1 = (S1 >> 1U) + (T0 >> 1U);
|
||||
|
||||
/* Butterfly process for the i0+fftLen/2 sample */
|
||||
out1 = (q15_t) ((Co1 * S0 - Si1 * S1) >> 16U);
|
||||
out2 = (q15_t) ((Si1 * S0 + Co1 * S1) >> 16U);
|
||||
/* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */
|
||||
/* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */
|
||||
pSrc16[i2 * 2U] = out1;
|
||||
pSrc16[(i2 * 2U) + 1U] = out2;
|
||||
|
||||
/* Butterfly process for the i0+3fftLen/4 sample */
|
||||
out1 = (q15_t) ((Co3 * R0 - Si3 * R1) >> 16U);
|
||||
|
||||
out2 = (q15_t) ((Si3 * R0 + Co3 * R1) >> 16U);
|
||||
/* xd' = (xa+yb-xc-yd)* Co3 - (ya-xb-yc+xd)* (si3) */
|
||||
/* yd' = (ya-xb-yc+xd)* Co3 + (xa+yb-xc-yd)* (si3) */
|
||||
pSrc16[i3 * 2U] = out1;
|
||||
pSrc16[(i3 * 2U) + 1U] = out2;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
/* Twiddle coefficients index modifier */
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
/* End of Middle stages process */
|
||||
|
||||
|
||||
/* data is in 10.6(q6) format for the 1024 point */
|
||||
/* data is in 8.8(q8) format for the 256 point */
|
||||
/* data is in 6.10(q10) format for the 64 point */
|
||||
/* data is in 4.12(q12) format for the 16 point */
|
||||
|
||||
/* start of last stage process */
|
||||
|
||||
|
||||
/* Initializations for the last stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
|
||||
/* Butterfly implementation */
|
||||
for (i0 = 0U; i0 <= (fftLen - n1); i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc16[i0 + 0], pSrc16[i0 + fftLen/4], pSrc16[i0 + fftLen/2], pSrc16[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Reading i0, i0+fftLen/2 inputs */
|
||||
/* Read ya (real), xa(imag) input */
|
||||
T0 = pSrc16[i0 * 2U];
|
||||
T1 = pSrc16[(i0 * 2U) + 1U];
|
||||
/* Read yc (real), xc(imag) input */
|
||||
S0 = pSrc16[i2 * 2U];
|
||||
S1 = pSrc16[(i2 * 2U) + 1U];
|
||||
|
||||
/* R0 = (ya + yc), R1 = (xa + xc) */
|
||||
R0 = __SSAT(T0 + S0, 16U);
|
||||
R1 = __SSAT(T1 + S1, 16U);
|
||||
/* S0 = (ya - yc), S1 = (xa - xc) */
|
||||
S0 = __SSAT(T0 - S0, 16U);
|
||||
S1 = __SSAT(T1 - S1, 16U);
|
||||
|
||||
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
|
||||
/* T0 = (yb + yd), T1 = (xb + xd) */
|
||||
T0 = __SSAT(T0 + U0, 16U);
|
||||
T1 = __SSAT(T1 + U1, 16U);
|
||||
|
||||
/* writing the butterfly processed i0 sample */
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc16[i0 * 2U] = (R0 >> 1U) + (T0 >> 1U);
|
||||
pSrc16[(i0 * 2U) + 1U] = (R1 >> 1U) + (T1 >> 1U);
|
||||
|
||||
/* R0 = (ya + yc) - (yb + yd), R1 = (xa + xc) - (xb + xd) */
|
||||
R0 = (R0 >> 1U) - (T0 >> 1U);
|
||||
R1 = (R1 >> 1U) - (T1 >> 1U);
|
||||
|
||||
/* Read yb (real), xb(imag) input */
|
||||
T0 = pSrc16[i1 * 2U];
|
||||
T1 = pSrc16[(i1 * 2U) + 1U];
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/4 sample */
|
||||
/* xc' = (xa-xb+xc-xd) */
|
||||
/* yc' = (ya-yb+yc-yd) */
|
||||
pSrc16[i1 * 2U] = R0;
|
||||
pSrc16[(i1 * 2U) + 1U] = R1;
|
||||
|
||||
/* Read yd (real), xd(imag) input */
|
||||
U0 = pSrc16[i3 * 2U];
|
||||
U1 = pSrc16[(i3 * 2U) + 1U];
|
||||
/* T0 = (yb - yd), T1 = (xb - xd) */
|
||||
T0 = __SSAT(T0 - U0, 16U);
|
||||
T1 = __SSAT(T1 - U1, 16U);
|
||||
|
||||
/* writing the butterfly processed i0 + fftLen/2 sample */
|
||||
/* xb' = (xa-yb-xc+yd) */
|
||||
/* yb' = (ya+xb-yc-xd) */
|
||||
pSrc16[i2 * 2U] = (S0 >> 1U) - (T1 >> 1U);
|
||||
pSrc16[(i2 * 2U) + 1U] = (S1 >> 1U) + (T0 >> 1U);
|
||||
|
||||
|
||||
/* writing the butterfly processed i0 + 3fftLen/4 sample */
|
||||
/* xd' = (xa+yb-xc-yd) */
|
||||
/* yd' = (ya-xb-yc+xd) */
|
||||
pSrc16[i3 * 2U] = (S0 >> 1U) + (T1 >> 1U);
|
||||
pSrc16[(i3 * 2U) + 1U] = (S1 >> 1U) - (T0 >> 1U);
|
||||
}
|
||||
/* end of last stage process */
|
||||
|
||||
/* output is in 11.5(q5) format for the 1024 point */
|
||||
/* output is in 9.7(q7) format for the 256 point */
|
||||
/* output is in 7.9(q9) format for the 64 point */
|
||||
/* output is in 5.11(q11) format for the 16 point */
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
@@ -0,0 +1,827 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix4_q31.c
|
||||
* Description: This file has function definition of Radix-4 FFT & IFFT function and
|
||||
* In-place bit reversal using bit reversal table
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
void arm_radix4_butterfly_inverse_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
void arm_radix4_butterfly_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint32_t twidCoefModifier);
|
||||
|
||||
void arm_bitreversal_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup ComplexFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the Q31 CFFT/CIFFT.
|
||||
@deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
|
||||
@param[in] S points to an instance of the Q31 CFFT/CIFFT structure
|
||||
@param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
|
||||
@return none
|
||||
|
||||
@par Input and output formats:
|
||||
Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
|
||||
Hence the output format is different for different FFT sizes.
|
||||
The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
|
||||
@par
|
||||
\image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
|
||||
\image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
|
||||
*/
|
||||
|
||||
void arm_cfft_radix4_q31(
|
||||
const arm_cfft_radix4_instance_q31 * S,
|
||||
q31_t * pSrc)
|
||||
{
|
||||
if (S->ifftFlag == 1U)
|
||||
{
|
||||
/* Complex IFFT radix-4 */
|
||||
arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complex FFT radix-4 */
|
||||
arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
|
||||
}
|
||||
|
||||
if (S->bitReverseFlag == 1U)
|
||||
{
|
||||
/* Bit Reversal */
|
||||
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of ComplexFFT group
|
||||
*/
|
||||
|
||||
/*
|
||||
* Radix-4 FFT algorithm used is :
|
||||
*
|
||||
* Input real and imaginary data:
|
||||
* x(n) = xa + j * ya
|
||||
* x(n+N/4 ) = xb + j * yb
|
||||
* x(n+N/2 ) = xc + j * yc
|
||||
* x(n+3N 4) = xd + j * yd
|
||||
*
|
||||
*
|
||||
* Output real and imaginary data:
|
||||
* x(4r) = xa'+ j * ya'
|
||||
* x(4r+1) = xb'+ j * yb'
|
||||
* x(4r+2) = xc'+ j * yc'
|
||||
* x(4r+3) = xd'+ j * yd'
|
||||
*
|
||||
*
|
||||
* Twiddle factors for radix-4 FFT:
|
||||
* Wn = co1 + j * (- si1)
|
||||
* W2n = co2 + j * (- si2)
|
||||
* W3n = co3 + j * (- si3)
|
||||
*
|
||||
* Butterfly implementation:
|
||||
* xa' = xa + xb + xc + xd
|
||||
* ya' = ya + yb + yc + yd
|
||||
* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
|
||||
* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
|
||||
* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
|
||||
* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
|
||||
* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
|
||||
* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Core function for the Q31 CFFT butterfly process.
|
||||
@param[in,out] pSrc points to the in-place buffer of Q31 data type.
|
||||
@param[in] fftLen length of the FFT.
|
||||
@param[in] pCoef points to twiddle coefficient buffer.
|
||||
@param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint32_t twidCoefModifier)
|
||||
{
|
||||
uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
|
||||
q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
|
||||
|
||||
q31_t xa, xb, xc, xd;
|
||||
q31_t ya, yb, yc, yd;
|
||||
q31_t xa_out, xb_out, xc_out, xd_out;
|
||||
q31_t ya_out, yb_out, yc_out, yd_out;
|
||||
|
||||
q31_t *ptr1;
|
||||
|
||||
/* Total process is divided into three stages */
|
||||
|
||||
/* process first stage, middle stages, & last stage */
|
||||
|
||||
|
||||
/* start of first stage process */
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
i0 = 0U;
|
||||
ia1 = 0U;
|
||||
|
||||
j = n2;
|
||||
|
||||
/* Calculation of first stage */
|
||||
do
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* input is in 1.31(q31) format and provide 4 guard bits for the input */
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
r1 = (pSrc[(2U * i0)] >> 4U) + (pSrc[(2U * i2)] >> 4U);
|
||||
/* xa - xc */
|
||||
r2 = (pSrc[(2U * i0)] >> 4U) - (pSrc[(2U * i2)] >> 4U);
|
||||
|
||||
/* xb + xd */
|
||||
t1 = (pSrc[(2U * i1)] >> 4U) + (pSrc[(2U * i3)] >> 4U);
|
||||
|
||||
/* ya + yc */
|
||||
s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
|
||||
/* ya - yc */
|
||||
s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = (r1 + t1);
|
||||
/* (xa + xc) - (xb + xd) */
|
||||
r1 = r1 - t1;
|
||||
/* yb + yd */
|
||||
t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (s1 + t2);
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* yb - yd */
|
||||
t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
|
||||
/* xb - xd */
|
||||
t2 = (pSrc[(2U * i1)] >> 4U) - (pSrc[(2U * i3)] >> 4U);
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = 2U * ia1;
|
||||
co2 = pCoef[(ia2 * 2U)];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
|
||||
((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
|
||||
((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r1 = r2 + t1;
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r2 = r2 - t1;
|
||||
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s1 = s2 - t2;
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s2 = s2 + t2;
|
||||
|
||||
co1 = pCoef[(ia1 * 2U)];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
|
||||
((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
|
||||
((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia3 = 3U * ia1;
|
||||
co3 = pCoef[(ia3 * 2U)];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
|
||||
((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
|
||||
((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0 = i0 + 1U;
|
||||
|
||||
} while (--j);
|
||||
|
||||
/* end of first stage process */
|
||||
|
||||
/* data is in 5.27(q27) format */
|
||||
|
||||
|
||||
/* start of Middle stages process */
|
||||
|
||||
|
||||
/* each stage in middle stages provides two down scaling of the input */
|
||||
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
|
||||
for (k = fftLen / 4U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
/* Calculation of first stage */
|
||||
for (j = 0U; j <= (n2 - 1U); j++)
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[(ia1 * 2U)];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[(ia2 * 2U)];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[(ia3 * 2U)];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
for (i0 = j; i0 < fftLen; i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
r1 = pSrc[2U * i0] + pSrc[2U * i2];
|
||||
/* xa - xc */
|
||||
r2 = pSrc[2U * i0] - pSrc[2U * i2];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
|
||||
/* ya - yc */
|
||||
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xb + xd */
|
||||
t1 = pSrc[2U * i1] + pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = (r1 + t1) >> 2U;
|
||||
/* xa + xc -(xb + xd) */
|
||||
r1 = r1 - t1;
|
||||
|
||||
/* yb + yd */
|
||||
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* (yb - yd) */
|
||||
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
|
||||
/* (xb - xd) */
|
||||
t2 = pSrc[2U * i1] - pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
|
||||
((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1U;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
|
||||
((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1U;
|
||||
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r1 = r2 + t1;
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r2 = r2 - t1;
|
||||
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s1 = s2 - t2;
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s2 = s2 + t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
|
||||
((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
|
||||
((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
|
||||
((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
|
||||
((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
|
||||
}
|
||||
}
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
|
||||
/* End of Middle stages process */
|
||||
|
||||
/* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
|
||||
/* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
|
||||
/* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
|
||||
/* data is in 5.27(q27) format for the 16 point as there are no middle stages */
|
||||
|
||||
|
||||
/* start of Last stage process */
|
||||
/* Initializations for the last stage */
|
||||
j = fftLen >> 2;
|
||||
ptr1 = &pSrc[0];
|
||||
|
||||
/* Calculations of last stage */
|
||||
do
|
||||
{
|
||||
/* Read xa (real), ya(imag) input */
|
||||
xa = *ptr1++;
|
||||
ya = *ptr1++;
|
||||
|
||||
/* Read xb (real), yb(imag) input */
|
||||
xb = *ptr1++;
|
||||
yb = *ptr1++;
|
||||
|
||||
/* Read xc (real), yc(imag) input */
|
||||
xc = *ptr1++;
|
||||
yc = *ptr1++;
|
||||
|
||||
/* Read xc (real), yc(imag) input */
|
||||
xd = *ptr1++;
|
||||
yd = *ptr1++;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
xa_out = xa + xb + xc + xd;
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
ya_out = ya + yb + yc + yd;
|
||||
|
||||
/* pointer updation for writing */
|
||||
ptr1 = ptr1 - 8U;
|
||||
|
||||
/* writing xa' and ya' */
|
||||
*ptr1++ = xa_out;
|
||||
*ptr1++ = ya_out;
|
||||
|
||||
xc_out = (xa - xb + xc - xd);
|
||||
yc_out = (ya - yb + yc - yd);
|
||||
|
||||
/* writing xc' and yc' */
|
||||
*ptr1++ = xc_out;
|
||||
*ptr1++ = yc_out;
|
||||
|
||||
xb_out = (xa + yb - xc - yd);
|
||||
yb_out = (ya - xb - yc + xd);
|
||||
|
||||
/* writing xb' and yb' */
|
||||
*ptr1++ = xb_out;
|
||||
*ptr1++ = yb_out;
|
||||
|
||||
xd_out = (xa - yb - xc + yd);
|
||||
yd_out = (ya + xb - yc - xd);
|
||||
|
||||
/* writing xd' and yd' */
|
||||
*ptr1++ = xd_out;
|
||||
*ptr1++ = yd_out;
|
||||
|
||||
|
||||
} while (--j);
|
||||
|
||||
/* output is in 11.21(q21) format for the 1024 point */
|
||||
/* output is in 9.23(q23) format for the 256 point */
|
||||
/* output is in 7.25(q25) format for the 64 point */
|
||||
/* output is in 5.27(q27) format for the 16 point */
|
||||
|
||||
/* End of last stage process */
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief Core function for the Q31 CIFFT butterfly process.
|
||||
@param[in,out] pSrc points to the in-place buffer of Q31 data type.
|
||||
@param[in] fftLen length of the FFT.
|
||||
@param[in] pCoef points to twiddle coefficient buffer.
|
||||
@param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
@return none
|
||||
*/
|
||||
|
||||
/*
|
||||
* Radix-4 IFFT algorithm used is :
|
||||
*
|
||||
* CIFFT uses same twiddle coefficients as CFFT Function
|
||||
* x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
|
||||
*
|
||||
*
|
||||
* IFFT is implemented with following changes in equations from FFT
|
||||
*
|
||||
* Input real and imaginary data:
|
||||
* x(n) = xa + j * ya
|
||||
* x(n+N/4 ) = xb + j * yb
|
||||
* x(n+N/2 ) = xc + j * yc
|
||||
* x(n+3N 4) = xd + j * yd
|
||||
*
|
||||
*
|
||||
* Output real and imaginary data:
|
||||
* x(4r) = xa'+ j * ya'
|
||||
* x(4r+1) = xb'+ j * yb'
|
||||
* x(4r+2) = xc'+ j * yc'
|
||||
* x(4r+3) = xd'+ j * yd'
|
||||
*
|
||||
*
|
||||
* Twiddle factors for radix-4 IFFT:
|
||||
* Wn = co1 + j * (si1)
|
||||
* W2n = co2 + j * (si2)
|
||||
* W3n = co3 + j * (si3)
|
||||
|
||||
* The real and imaginary output values for the radix-4 butterfly are
|
||||
* xa' = xa + xb + xc + xd
|
||||
* ya' = ya + yb + yc + yd
|
||||
* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
|
||||
* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
|
||||
* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
|
||||
* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
|
||||
* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
|
||||
* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_radix4_butterfly_inverse_q31(
|
||||
q31_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const q31_t * pCoef,
|
||||
uint32_t twidCoefModifier)
|
||||
{
|
||||
uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
|
||||
q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
|
||||
q31_t xa, xb, xc, xd;
|
||||
q31_t ya, yb, yc, yd;
|
||||
q31_t xa_out, xb_out, xc_out, xd_out;
|
||||
q31_t ya_out, yb_out, yc_out, yd_out;
|
||||
|
||||
q31_t *ptr1;
|
||||
|
||||
/* input is be 1.31(q31) format for all FFT sizes */
|
||||
/* Total process is divided into three stages */
|
||||
/* process first stage, middle stages, & last stage */
|
||||
|
||||
/* Start of first stage process */
|
||||
|
||||
/* Initializations for the first stage */
|
||||
n2 = fftLen;
|
||||
n1 = n2;
|
||||
/* n2 = fftLen/4 */
|
||||
n2 >>= 2U;
|
||||
i0 = 0U;
|
||||
ia1 = 0U;
|
||||
|
||||
j = n2;
|
||||
|
||||
do
|
||||
{
|
||||
/* input is in 1.31(q31) format and provide 4 guard bits for the input */
|
||||
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
r1 = (pSrc[2U * i0] >> 4U) + (pSrc[2U * i2] >> 4U);
|
||||
/* xa - xc */
|
||||
r2 = (pSrc[2U * i0] >> 4U) - (pSrc[2U * i2] >> 4U);
|
||||
|
||||
/* xb + xd */
|
||||
t1 = (pSrc[2U * i1] >> 4U) + (pSrc[2U * i3] >> 4U);
|
||||
|
||||
/* ya + yc */
|
||||
s1 = (pSrc[(2U * i0) + 1U] >> 4U) + (pSrc[(2U * i2) + 1U] >> 4U);
|
||||
/* ya - yc */
|
||||
s2 = (pSrc[(2U * i0) + 1U] >> 4U) - (pSrc[(2U * i2) + 1U] >> 4U);
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = (r1 + t1);
|
||||
/* (xa + xc) - (xb + xd) */
|
||||
r1 = r1 - t1;
|
||||
/* yb + yd */
|
||||
t2 = (pSrc[(2U * i1) + 1U] >> 4U) + (pSrc[(2U * i3) + 1U] >> 4U);
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (s1 + t2);
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* yb - yd */
|
||||
t1 = (pSrc[(2U * i1) + 1U] >> 4U) - (pSrc[(2U * i3) + 1U] >> 4U);
|
||||
/* xb - xd */
|
||||
t2 = (pSrc[2U * i1] >> 4U) - (pSrc[2U * i3] >> 4U);
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = 2U * ia1;
|
||||
co2 = pCoef[ia2 * 2U];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
|
||||
((int32_t) (((q63_t) s1 * si2) >> 32))) << 1U;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[2U * i1 + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
|
||||
((int32_t) (((q63_t) r1 * si2) >> 32))) << 1U;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r1 = r2 - t1;
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r2 = r2 + t1;
|
||||
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s1 = s2 + t2;
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s2 = s2 - t2;
|
||||
|
||||
co1 = pCoef[ia1 * 2U];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
|
||||
((int32_t) (((q63_t) s1 * si1) >> 32))) << 1U;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
|
||||
((int32_t) (((q63_t) r1 * si1) >> 32))) << 1U;
|
||||
|
||||
/* index calculation for the coefficients */
|
||||
ia3 = 3U * ia1;
|
||||
co3 = pCoef[ia3 * 2U];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[2U * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
|
||||
((int32_t) (((q63_t) s2 * si3) >> 32))) << 1U;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
|
||||
((int32_t) (((q63_t) r2 * si3) >> 32))) << 1U;
|
||||
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
/* Updating input index */
|
||||
i0 = i0 + 1U;
|
||||
|
||||
} while (--j);
|
||||
|
||||
/* data is in 5.27(q27) format */
|
||||
/* each stage provides two down scaling of the input */
|
||||
|
||||
|
||||
/* Start of Middle stages process */
|
||||
|
||||
twidCoefModifier <<= 2U;
|
||||
|
||||
/* Calculation of second stage to excluding last stage */
|
||||
for (k = fftLen / 4U; k > 4U; k >>= 2U)
|
||||
{
|
||||
/* Initializations for the first stage */
|
||||
n1 = n2;
|
||||
n2 >>= 2U;
|
||||
ia1 = 0U;
|
||||
|
||||
for (j = 0; j <= (n2 - 1U); j++)
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
ia2 = ia1 + ia1;
|
||||
ia3 = ia2 + ia1;
|
||||
co1 = pCoef[(ia1 * 2U)];
|
||||
si1 = pCoef[(ia1 * 2U) + 1U];
|
||||
co2 = pCoef[(ia2 * 2U)];
|
||||
si2 = pCoef[(ia2 * 2U) + 1U];
|
||||
co3 = pCoef[(ia3 * 2U)];
|
||||
si3 = pCoef[(ia3 * 2U) + 1U];
|
||||
/* Twiddle coefficients index modifier */
|
||||
ia1 = ia1 + twidCoefModifier;
|
||||
|
||||
for (i0 = j; i0 < fftLen; i0 += n1)
|
||||
{
|
||||
/* index calculation for the input as, */
|
||||
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2U], pSrc[i0 + 3fftLen/4] */
|
||||
i1 = i0 + n2;
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
|
||||
/* Butterfly implementation */
|
||||
/* xa + xc */
|
||||
r1 = pSrc[2U * i0] + pSrc[2U * i2];
|
||||
/* xa - xc */
|
||||
r2 = pSrc[2U * i0] - pSrc[2U * i2];
|
||||
|
||||
/* ya + yc */
|
||||
s1 = pSrc[(2U * i0) + 1U] + pSrc[(2U * i2) + 1U];
|
||||
/* ya - yc */
|
||||
s2 = pSrc[(2U * i0) + 1U] - pSrc[(2U * i2) + 1U];
|
||||
|
||||
/* xb + xd */
|
||||
t1 = pSrc[2U * i1] + pSrc[2U * i3];
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
pSrc[2U * i0] = (r1 + t1) >> 2U;
|
||||
/* xa + xc -(xb + xd) */
|
||||
r1 = r1 - t1;
|
||||
/* yb + yd */
|
||||
t2 = pSrc[(2U * i1) + 1U] + pSrc[(2U * i3) + 1U];
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
pSrc[(2U * i0) + 1U] = (s1 + t2) >> 2U;
|
||||
|
||||
/* (ya + yc) - (yb + yd) */
|
||||
s1 = s1 - t2;
|
||||
|
||||
/* (yb - yd) */
|
||||
t1 = pSrc[(2U * i1) + 1U] - pSrc[(2U * i3) + 1U];
|
||||
/* (xb - xd) */
|
||||
t2 = pSrc[2U * i1] - pSrc[2U * i3];
|
||||
|
||||
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
|
||||
pSrc[2U * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32U)) -
|
||||
((int32_t) (((q63_t) s1 * si2) >> 32U))) >> 1U;
|
||||
|
||||
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
|
||||
pSrc[(2U * i1) + 1U] = (((int32_t) (((q63_t) s1 * co2) >> 32U)) +
|
||||
((int32_t) (((q63_t) r1 * si2) >> 32U))) >> 1U;
|
||||
|
||||
/* (xa - xc) - (yb - yd) */
|
||||
r1 = r2 - t1;
|
||||
/* (xa - xc) + (yb - yd) */
|
||||
r2 = r2 + t1;
|
||||
|
||||
/* (ya - yc) + (xb - xd) */
|
||||
s1 = s2 + t2;
|
||||
/* (ya - yc) - (xb - xd) */
|
||||
s2 = s2 - t2;
|
||||
|
||||
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
|
||||
pSrc[2U * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
|
||||
((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1U;
|
||||
|
||||
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
|
||||
pSrc[(2U * i2) + 1U] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
|
||||
((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1U;
|
||||
|
||||
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
|
||||
pSrc[(2U * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
|
||||
((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1U;
|
||||
|
||||
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
|
||||
pSrc[(2U * i3) + 1U] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
|
||||
((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1U;
|
||||
}
|
||||
}
|
||||
twidCoefModifier <<= 2U;
|
||||
}
|
||||
|
||||
/* End of Middle stages process */
|
||||
|
||||
/* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
|
||||
/* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
|
||||
/* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
|
||||
/* data is in 5.27(q27) format for the 16 point as there are no middle stages */
|
||||
|
||||
|
||||
/* Start of last stage process */
|
||||
|
||||
|
||||
/* Initializations for the last stage */
|
||||
j = fftLen >> 2;
|
||||
ptr1 = &pSrc[0];
|
||||
|
||||
/* Calculations of last stage */
|
||||
do
|
||||
{
|
||||
/* Read xa (real), ya(imag) input */
|
||||
xa = *ptr1++;
|
||||
ya = *ptr1++;
|
||||
|
||||
/* Read xb (real), yb(imag) input */
|
||||
xb = *ptr1++;
|
||||
yb = *ptr1++;
|
||||
|
||||
/* Read xc (real), yc(imag) input */
|
||||
xc = *ptr1++;
|
||||
yc = *ptr1++;
|
||||
|
||||
/* Read xc (real), yc(imag) input */
|
||||
xd = *ptr1++;
|
||||
yd = *ptr1++;
|
||||
|
||||
/* xa' = xa + xb + xc + xd */
|
||||
xa_out = xa + xb + xc + xd;
|
||||
|
||||
/* ya' = ya + yb + yc + yd */
|
||||
ya_out = ya + yb + yc + yd;
|
||||
|
||||
/* pointer updation for writing */
|
||||
ptr1 = ptr1 - 8U;
|
||||
|
||||
/* writing xa' and ya' */
|
||||
*ptr1++ = xa_out;
|
||||
*ptr1++ = ya_out;
|
||||
|
||||
xc_out = (xa - xb + xc - xd);
|
||||
yc_out = (ya - yb + yc - yd);
|
||||
|
||||
/* writing xc' and yc' */
|
||||
*ptr1++ = xc_out;
|
||||
*ptr1++ = yc_out;
|
||||
|
||||
xb_out = (xa - yb - xc + yd);
|
||||
yb_out = (ya + xb - yc - xd);
|
||||
|
||||
/* writing xb' and yb' */
|
||||
*ptr1++ = xb_out;
|
||||
*ptr1++ = yb_out;
|
||||
|
||||
xd_out = (xa + yb - xc - yd);
|
||||
yd_out = (ya - xb - yc + xd);
|
||||
|
||||
/* writing xd' and yd' */
|
||||
*ptr1++ = xd_out;
|
||||
*ptr1++ = yd_out;
|
||||
|
||||
} while (--j);
|
||||
|
||||
/* output is in 11.21(q21) format for the 1024 point */
|
||||
/* output is in 9.23(q23) format for the 256 point */
|
||||
/* output is in 7.25(q25) format for the 64 point */
|
||||
/* output is in 5.27(q27) format for the 16 point */
|
||||
|
||||
/* End of last stage process */
|
||||
}
|
||||
@@ -0,0 +1,289 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix8_f16.c
|
||||
* Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Internal helper function used by the FFTs
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
brief Core function for the floating-point CFFT butterfly process.
|
||||
param[in,out] pSrc points to the in-place buffer of floating-point data type.
|
||||
param[in] fftLen length of the FFT.
|
||||
param[in] pCoef points to the twiddle coefficient buffer.
|
||||
param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
return none
|
||||
*/
|
||||
|
||||
void arm_radix8_butterfly_f16(
|
||||
float16_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float16_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
|
||||
uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
|
||||
uint32_t id;
|
||||
uint32_t n1, n2, j;
|
||||
|
||||
float16_t r1, r2, r3, r4, r5, r6, r7, r8;
|
||||
float16_t t1, t2;
|
||||
float16_t s1, s2, s3, s4, s5, s6, s7, s8;
|
||||
float16_t p1, p2, p3, p4;
|
||||
float16_t co2, co3, co4, co5, co6, co7, co8;
|
||||
float16_t si2, si3, si4, si5, si6, si7, si8;
|
||||
const float16_t C81 = 0.70710678118f16;
|
||||
|
||||
n2 = fftLen;
|
||||
|
||||
do
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 3;
|
||||
i1 = 0;
|
||||
|
||||
do
|
||||
{
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
i4 = i3 + n2;
|
||||
i5 = i4 + n2;
|
||||
i6 = i5 + n2;
|
||||
i7 = i6 + n2;
|
||||
i8 = i7 + n2;
|
||||
r1 = (_Float16)pSrc[2 * i1] + (_Float16)pSrc[2 * i5];
|
||||
r5 = (_Float16)pSrc[2 * i1] - (_Float16)pSrc[2 * i5];
|
||||
r2 = (_Float16)pSrc[2 * i2] + (_Float16)pSrc[2 * i6];
|
||||
r6 = (_Float16)pSrc[2 * i2] - (_Float16)pSrc[2 * i6];
|
||||
r3 = (_Float16)pSrc[2 * i3] + (_Float16)pSrc[2 * i7];
|
||||
r7 = (_Float16)pSrc[2 * i3] - (_Float16)pSrc[2 * i7];
|
||||
r4 = (_Float16)pSrc[2 * i4] + (_Float16)pSrc[2 * i8];
|
||||
r8 = (_Float16)pSrc[2 * i4] - (_Float16)pSrc[2 * i8];
|
||||
t1 = (_Float16)r1 - (_Float16)r3;
|
||||
r1 = (_Float16)r1 + (_Float16)r3;
|
||||
r3 = (_Float16)r2 - (_Float16)r4;
|
||||
r2 = (_Float16)r2 + (_Float16)r4;
|
||||
pSrc[2 * i1] = (_Float16)r1 + (_Float16)r2;
|
||||
pSrc[2 * i5] = (_Float16)r1 - (_Float16)r2;
|
||||
r1 = (_Float16)pSrc[2 * i1 + 1] + (_Float16)pSrc[2 * i5 + 1];
|
||||
s5 = (_Float16)pSrc[2 * i1 + 1] - (_Float16)pSrc[2 * i5 + 1];
|
||||
r2 = (_Float16)pSrc[2 * i2 + 1] + (_Float16)pSrc[2 * i6 + 1];
|
||||
s6 = (_Float16)pSrc[2 * i2 + 1] - (_Float16)pSrc[2 * i6 + 1];
|
||||
s3 = (_Float16)pSrc[2 * i3 + 1] + (_Float16)pSrc[2 * i7 + 1];
|
||||
s7 = (_Float16)pSrc[2 * i3 + 1] - (_Float16)pSrc[2 * i7 + 1];
|
||||
r4 = (_Float16)pSrc[2 * i4 + 1] + (_Float16)pSrc[2 * i8 + 1];
|
||||
s8 = (_Float16)pSrc[2 * i4 + 1] - (_Float16)pSrc[2 * i8 + 1];
|
||||
t2 = (_Float16)r1 - (_Float16)s3;
|
||||
r1 = (_Float16)r1 + (_Float16)s3;
|
||||
s3 = (_Float16)r2 - (_Float16)r4;
|
||||
r2 = (_Float16)r2 + (_Float16)r4;
|
||||
pSrc[2 * i1 + 1] = (_Float16)r1 + (_Float16)r2;
|
||||
pSrc[2 * i5 + 1] = (_Float16)r1 - (_Float16)r2;
|
||||
pSrc[2 * i3] = (_Float16)t1 + (_Float16)s3;
|
||||
pSrc[2 * i7] = (_Float16)t1 - (_Float16)s3;
|
||||
pSrc[2 * i3 + 1] = (_Float16)t2 - (_Float16)r3;
|
||||
pSrc[2 * i7 + 1] = (_Float16)t2 + (_Float16)r3;
|
||||
r1 = ((_Float16)r6 - (_Float16)r8) * (_Float16)C81;
|
||||
r6 = ((_Float16)r6 + (_Float16)r8) * (_Float16)C81;
|
||||
r2 = ((_Float16)s6 - (_Float16)s8) * (_Float16)C81;
|
||||
s6 = ((_Float16)s6 + (_Float16)s8) * (_Float16)C81;
|
||||
t1 = (_Float16)r5 - (_Float16)r1;
|
||||
r5 = (_Float16)r5 + (_Float16)r1;
|
||||
r8 = (_Float16)r7 - (_Float16)r6;
|
||||
r7 = (_Float16)r7 + (_Float16)r6;
|
||||
t2 = (_Float16)s5 - (_Float16)r2;
|
||||
s5 = (_Float16)s5 + (_Float16)r2;
|
||||
s8 = (_Float16)s7 - (_Float16)s6;
|
||||
s7 = (_Float16)s7 + (_Float16)s6;
|
||||
pSrc[2 * i2] = (_Float16)r5 + (_Float16)s7;
|
||||
pSrc[2 * i8] = (_Float16)r5 - (_Float16)s7;
|
||||
pSrc[2 * i6] = (_Float16)t1 + (_Float16)s8;
|
||||
pSrc[2 * i4] = (_Float16)t1 - (_Float16)s8;
|
||||
pSrc[2 * i2 + 1] = (_Float16)s5 - (_Float16)r7;
|
||||
pSrc[2 * i8 + 1] = (_Float16)s5 + (_Float16)r7;
|
||||
pSrc[2 * i6 + 1] = (_Float16)t2 - (_Float16)r8;
|
||||
pSrc[2 * i4 + 1] = (_Float16)t2 + (_Float16)r8;
|
||||
|
||||
i1 += n1;
|
||||
} while (i1 < fftLen);
|
||||
|
||||
if (n2 < 8)
|
||||
break;
|
||||
|
||||
ia1 = 0;
|
||||
j = 1;
|
||||
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
id = ia1 + twidCoefModifier;
|
||||
ia1 = id;
|
||||
ia2 = ia1 + id;
|
||||
ia3 = ia2 + id;
|
||||
ia4 = ia3 + id;
|
||||
ia5 = ia4 + id;
|
||||
ia6 = ia5 + id;
|
||||
ia7 = ia6 + id;
|
||||
|
||||
co2 = pCoef[2 * ia1];
|
||||
co3 = pCoef[2 * ia2];
|
||||
co4 = pCoef[2 * ia3];
|
||||
co5 = pCoef[2 * ia4];
|
||||
co6 = pCoef[2 * ia5];
|
||||
co7 = pCoef[2 * ia6];
|
||||
co8 = pCoef[2 * ia7];
|
||||
si2 = pCoef[2 * ia1 + 1];
|
||||
si3 = pCoef[2 * ia2 + 1];
|
||||
si4 = pCoef[2 * ia3 + 1];
|
||||
si5 = pCoef[2 * ia4 + 1];
|
||||
si6 = pCoef[2 * ia5 + 1];
|
||||
si7 = pCoef[2 * ia6 + 1];
|
||||
si8 = pCoef[2 * ia7 + 1];
|
||||
|
||||
i1 = j;
|
||||
|
||||
do
|
||||
{
|
||||
/* index calculation for the input */
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
i4 = i3 + n2;
|
||||
i5 = i4 + n2;
|
||||
i6 = i5 + n2;
|
||||
i7 = i6 + n2;
|
||||
i8 = i7 + n2;
|
||||
r1 = (_Float16)pSrc[2 * i1] + (_Float16)pSrc[2 * i5];
|
||||
r5 = (_Float16)pSrc[2 * i1] - (_Float16)pSrc[2 * i5];
|
||||
r2 = (_Float16)pSrc[2 * i2] + (_Float16)pSrc[2 * i6];
|
||||
r6 = (_Float16)pSrc[2 * i2] - (_Float16)pSrc[2 * i6];
|
||||
r3 = (_Float16)pSrc[2 * i3] + (_Float16)pSrc[2 * i7];
|
||||
r7 = (_Float16)pSrc[2 * i3] - (_Float16)pSrc[2 * i7];
|
||||
r4 = (_Float16)pSrc[2 * i4] + (_Float16)pSrc[2 * i8];
|
||||
r8 = (_Float16)pSrc[2 * i4] - (_Float16)pSrc[2 * i8];
|
||||
t1 = (_Float16)r1 - (_Float16)r3;
|
||||
r1 = (_Float16)r1 + (_Float16)r3;
|
||||
r3 = (_Float16)r2 - (_Float16)r4;
|
||||
r2 = (_Float16)r2 + (_Float16)r4;
|
||||
pSrc[2 * i1] = (_Float16)r1 + (_Float16)r2;
|
||||
r2 = (_Float16)r1 - (_Float16)r2;
|
||||
s1 = (_Float16)pSrc[2 * i1 + 1] + (_Float16)pSrc[2 * i5 + 1];
|
||||
s5 = (_Float16)pSrc[2 * i1 + 1] - (_Float16)pSrc[2 * i5 + 1];
|
||||
s2 = (_Float16)pSrc[2 * i2 + 1] + (_Float16)pSrc[2 * i6 + 1];
|
||||
s6 = (_Float16)pSrc[2 * i2 + 1] - (_Float16)pSrc[2 * i6 + 1];
|
||||
s3 = (_Float16)pSrc[2 * i3 + 1] + (_Float16)pSrc[2 * i7 + 1];
|
||||
s7 = (_Float16)pSrc[2 * i3 + 1] - (_Float16)pSrc[2 * i7 + 1];
|
||||
s4 = (_Float16)pSrc[2 * i4 + 1] + (_Float16)pSrc[2 * i8 + 1];
|
||||
s8 = (_Float16)pSrc[2 * i4 + 1] - (_Float16)pSrc[2 * i8 + 1];
|
||||
t2 = (_Float16)s1 - (_Float16)s3;
|
||||
s1 = (_Float16)s1 + (_Float16)s3;
|
||||
s3 = (_Float16)s2 - (_Float16)s4;
|
||||
s2 = (_Float16)s2 + (_Float16)s4;
|
||||
r1 = (_Float16)t1 + (_Float16)s3;
|
||||
t1 = (_Float16)t1 - (_Float16)s3;
|
||||
pSrc[2 * i1 + 1] = (_Float16)s1 + (_Float16)s2;
|
||||
s2 = (_Float16)s1 - (_Float16)s2;
|
||||
s1 = (_Float16)t2 - (_Float16)r3;
|
||||
t2 = (_Float16)t2 + (_Float16)r3;
|
||||
p1 = (_Float16)co5 * (_Float16)r2;
|
||||
p2 = (_Float16)si5 * (_Float16)s2;
|
||||
p3 = (_Float16)co5 * (_Float16)s2;
|
||||
p4 = (_Float16)si5 * (_Float16)r2;
|
||||
pSrc[2 * i5] = (_Float16)p1 + (_Float16)p2;
|
||||
pSrc[2 * i5 + 1] = (_Float16)p3 - (_Float16)p4;
|
||||
p1 = (_Float16)co3 * (_Float16)r1;
|
||||
p2 = (_Float16)si3 * (_Float16)s1;
|
||||
p3 = (_Float16)co3 * (_Float16)s1;
|
||||
p4 = (_Float16)si3 * (_Float16)r1;
|
||||
pSrc[2 * i3] = (_Float16)p1 + (_Float16)p2;
|
||||
pSrc[2 * i3 + 1] = (_Float16)p3 - (_Float16)p4;
|
||||
p1 = (_Float16)co7 * (_Float16)t1;
|
||||
p2 = (_Float16)si7 * (_Float16)t2;
|
||||
p3 = (_Float16)co7 * (_Float16)t2;
|
||||
p4 = (_Float16)si7 * (_Float16)t1;
|
||||
pSrc[2 * i7] = (_Float16)p1 + (_Float16)p2;
|
||||
pSrc[2 * i7 + 1] = (_Float16)p3 - (_Float16)p4;
|
||||
r1 = ((_Float16)r6 - (_Float16)r8) * (_Float16)C81;
|
||||
r6 = ((_Float16)r6 + (_Float16)r8) * (_Float16)C81;
|
||||
s1 = ((_Float16)s6 - (_Float16)s8) * (_Float16)C81;
|
||||
s6 = ((_Float16)s6 + (_Float16)s8) * (_Float16)C81;
|
||||
t1 = (_Float16)r5 - (_Float16)r1;
|
||||
r5 = (_Float16)r5 + (_Float16)r1;
|
||||
r8 = (_Float16)r7 - (_Float16)r6;
|
||||
r7 = (_Float16)r7 + (_Float16)r6;
|
||||
t2 = (_Float16)s5 - (_Float16)s1;
|
||||
s5 = (_Float16)s5 + (_Float16)s1;
|
||||
s8 = (_Float16)s7 - (_Float16)s6;
|
||||
s7 = (_Float16)s7 + (_Float16)s6;
|
||||
r1 = (_Float16)r5 + (_Float16)s7;
|
||||
r5 = (_Float16)r5 - (_Float16)s7;
|
||||
r6 = (_Float16)t1 + (_Float16)s8;
|
||||
t1 = (_Float16)t1 - (_Float16)s8;
|
||||
s1 = (_Float16)s5 - (_Float16)r7;
|
||||
s5 = (_Float16)s5 + (_Float16)r7;
|
||||
s6 = (_Float16)t2 - (_Float16)r8;
|
||||
t2 = (_Float16)t2 + (_Float16)r8;
|
||||
p1 = (_Float16)co2 * (_Float16)r1;
|
||||
p2 = (_Float16)si2 * (_Float16)s1;
|
||||
p3 = (_Float16)co2 * (_Float16)s1;
|
||||
p4 = (_Float16)si2 * (_Float16)r1;
|
||||
pSrc[2 * i2] = (_Float16)p1 + (_Float16)p2;
|
||||
pSrc[2 * i2 + 1] = (_Float16)p3 - (_Float16)p4;
|
||||
p1 = (_Float16)co8 * (_Float16)r5;
|
||||
p2 = (_Float16)si8 * (_Float16)s5;
|
||||
p3 = (_Float16)co8 * (_Float16)s5;
|
||||
p4 = (_Float16)si8 * (_Float16)r5;
|
||||
pSrc[2 * i8] = (_Float16)p1 + (_Float16)p2;
|
||||
pSrc[2 * i8 + 1] = (_Float16)p3 - (_Float16)p4;
|
||||
p1 = (_Float16)co6 * (_Float16)r6;
|
||||
p2 = (_Float16)si6 * (_Float16)s6;
|
||||
p3 = (_Float16)co6 * (_Float16)s6;
|
||||
p4 = (_Float16)si6 * (_Float16)r6;
|
||||
pSrc[2 * i6] = (_Float16)p1 + (_Float16)p2;
|
||||
pSrc[2 * i6 + 1] = (_Float16)p3 - (_Float16)p4;
|
||||
p1 = (_Float16)co4 * (_Float16)t1;
|
||||
p2 = (_Float16)si4 * (_Float16)t2;
|
||||
p3 = (_Float16)co4 * (_Float16)t2;
|
||||
p4 = (_Float16)si4 * (_Float16)t1;
|
||||
pSrc[2 * i4] = (_Float16)p1 + (_Float16)p2;
|
||||
pSrc[2 * i4 + 1] = (_Float16)p3 - (_Float16)p4;
|
||||
|
||||
i1 += n1;
|
||||
} while (i1 < fftLen);
|
||||
|
||||
j++;
|
||||
} while (j < n2);
|
||||
|
||||
twidCoefModifier <<= 3;
|
||||
} while (n2 > 7);
|
||||
}
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
@@ -0,0 +1,285 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_cfft_radix8_f32.c
|
||||
* Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Internal helper function used by the FFTs
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
brief Core function for the floating-point CFFT butterfly process.
|
||||
param[in,out] pSrc points to the in-place buffer of floating-point data type.
|
||||
param[in] fftLen length of the FFT.
|
||||
param[in] pCoef points to the twiddle coefficient buffer.
|
||||
param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
|
||||
return none
|
||||
*/
|
||||
|
||||
void arm_radix8_butterfly_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier)
|
||||
{
|
||||
uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
|
||||
uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
|
||||
uint32_t id;
|
||||
uint32_t n1, n2, j;
|
||||
|
||||
float32_t r1, r2, r3, r4, r5, r6, r7, r8;
|
||||
float32_t t1, t2;
|
||||
float32_t s1, s2, s3, s4, s5, s6, s7, s8;
|
||||
float32_t p1, p2, p3, p4;
|
||||
float32_t co2, co3, co4, co5, co6, co7, co8;
|
||||
float32_t si2, si3, si4, si5, si6, si7, si8;
|
||||
const float32_t C81 = 0.70710678118f;
|
||||
|
||||
n2 = fftLen;
|
||||
|
||||
do
|
||||
{
|
||||
n1 = n2;
|
||||
n2 = n2 >> 3;
|
||||
i1 = 0;
|
||||
|
||||
do
|
||||
{
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
i4 = i3 + n2;
|
||||
i5 = i4 + n2;
|
||||
i6 = i5 + n2;
|
||||
i7 = i6 + n2;
|
||||
i8 = i7 + n2;
|
||||
r1 = pSrc[2 * i1] + pSrc[2 * i5];
|
||||
r5 = pSrc[2 * i1] - pSrc[2 * i5];
|
||||
r2 = pSrc[2 * i2] + pSrc[2 * i6];
|
||||
r6 = pSrc[2 * i2] - pSrc[2 * i6];
|
||||
r3 = pSrc[2 * i3] + pSrc[2 * i7];
|
||||
r7 = pSrc[2 * i3] - pSrc[2 * i7];
|
||||
r4 = pSrc[2 * i4] + pSrc[2 * i8];
|
||||
r8 = pSrc[2 * i4] - pSrc[2 * i8];
|
||||
t1 = r1 - r3;
|
||||
r1 = r1 + r3;
|
||||
r3 = r2 - r4;
|
||||
r2 = r2 + r4;
|
||||
pSrc[2 * i1] = r1 + r2;
|
||||
pSrc[2 * i5] = r1 - r2;
|
||||
r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
|
||||
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
|
||||
r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
|
||||
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
|
||||
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
|
||||
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
|
||||
r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
|
||||
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
|
||||
t2 = r1 - s3;
|
||||
r1 = r1 + s3;
|
||||
s3 = r2 - r4;
|
||||
r2 = r2 + r4;
|
||||
pSrc[2 * i1 + 1] = r1 + r2;
|
||||
pSrc[2 * i5 + 1] = r1 - r2;
|
||||
pSrc[2 * i3] = t1 + s3;
|
||||
pSrc[2 * i7] = t1 - s3;
|
||||
pSrc[2 * i3 + 1] = t2 - r3;
|
||||
pSrc[2 * i7 + 1] = t2 + r3;
|
||||
r1 = (r6 - r8) * C81;
|
||||
r6 = (r6 + r8) * C81;
|
||||
r2 = (s6 - s8) * C81;
|
||||
s6 = (s6 + s8) * C81;
|
||||
t1 = r5 - r1;
|
||||
r5 = r5 + r1;
|
||||
r8 = r7 - r6;
|
||||
r7 = r7 + r6;
|
||||
t2 = s5 - r2;
|
||||
s5 = s5 + r2;
|
||||
s8 = s7 - s6;
|
||||
s7 = s7 + s6;
|
||||
pSrc[2 * i2] = r5 + s7;
|
||||
pSrc[2 * i8] = r5 - s7;
|
||||
pSrc[2 * i6] = t1 + s8;
|
||||
pSrc[2 * i4] = t1 - s8;
|
||||
pSrc[2 * i2 + 1] = s5 - r7;
|
||||
pSrc[2 * i8 + 1] = s5 + r7;
|
||||
pSrc[2 * i6 + 1] = t2 - r8;
|
||||
pSrc[2 * i4 + 1] = t2 + r8;
|
||||
|
||||
i1 += n1;
|
||||
} while (i1 < fftLen);
|
||||
|
||||
if (n2 < 8)
|
||||
break;
|
||||
|
||||
ia1 = 0;
|
||||
j = 1;
|
||||
|
||||
do
|
||||
{
|
||||
/* index calculation for the coefficients */
|
||||
id = ia1 + twidCoefModifier;
|
||||
ia1 = id;
|
||||
ia2 = ia1 + id;
|
||||
ia3 = ia2 + id;
|
||||
ia4 = ia3 + id;
|
||||
ia5 = ia4 + id;
|
||||
ia6 = ia5 + id;
|
||||
ia7 = ia6 + id;
|
||||
|
||||
co2 = pCoef[2 * ia1];
|
||||
co3 = pCoef[2 * ia2];
|
||||
co4 = pCoef[2 * ia3];
|
||||
co5 = pCoef[2 * ia4];
|
||||
co6 = pCoef[2 * ia5];
|
||||
co7 = pCoef[2 * ia6];
|
||||
co8 = pCoef[2 * ia7];
|
||||
si2 = pCoef[2 * ia1 + 1];
|
||||
si3 = pCoef[2 * ia2 + 1];
|
||||
si4 = pCoef[2 * ia3 + 1];
|
||||
si5 = pCoef[2 * ia4 + 1];
|
||||
si6 = pCoef[2 * ia5 + 1];
|
||||
si7 = pCoef[2 * ia6 + 1];
|
||||
si8 = pCoef[2 * ia7 + 1];
|
||||
|
||||
i1 = j;
|
||||
|
||||
do
|
||||
{
|
||||
/* index calculation for the input */
|
||||
i2 = i1 + n2;
|
||||
i3 = i2 + n2;
|
||||
i4 = i3 + n2;
|
||||
i5 = i4 + n2;
|
||||
i6 = i5 + n2;
|
||||
i7 = i6 + n2;
|
||||
i8 = i7 + n2;
|
||||
r1 = pSrc[2 * i1] + pSrc[2 * i5];
|
||||
r5 = pSrc[2 * i1] - pSrc[2 * i5];
|
||||
r2 = pSrc[2 * i2] + pSrc[2 * i6];
|
||||
r6 = pSrc[2 * i2] - pSrc[2 * i6];
|
||||
r3 = pSrc[2 * i3] + pSrc[2 * i7];
|
||||
r7 = pSrc[2 * i3] - pSrc[2 * i7];
|
||||
r4 = pSrc[2 * i4] + pSrc[2 * i8];
|
||||
r8 = pSrc[2 * i4] - pSrc[2 * i8];
|
||||
t1 = r1 - r3;
|
||||
r1 = r1 + r3;
|
||||
r3 = r2 - r4;
|
||||
r2 = r2 + r4;
|
||||
pSrc[2 * i1] = r1 + r2;
|
||||
r2 = r1 - r2;
|
||||
s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
|
||||
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
|
||||
s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
|
||||
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
|
||||
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
|
||||
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
|
||||
s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
|
||||
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
|
||||
t2 = s1 - s3;
|
||||
s1 = s1 + s3;
|
||||
s3 = s2 - s4;
|
||||
s2 = s2 + s4;
|
||||
r1 = t1 + s3;
|
||||
t1 = t1 - s3;
|
||||
pSrc[2 * i1 + 1] = s1 + s2;
|
||||
s2 = s1 - s2;
|
||||
s1 = t2 - r3;
|
||||
t2 = t2 + r3;
|
||||
p1 = co5 * r2;
|
||||
p2 = si5 * s2;
|
||||
p3 = co5 * s2;
|
||||
p4 = si5 * r2;
|
||||
pSrc[2 * i5] = p1 + p2;
|
||||
pSrc[2 * i5 + 1] = p3 - p4;
|
||||
p1 = co3 * r1;
|
||||
p2 = si3 * s1;
|
||||
p3 = co3 * s1;
|
||||
p4 = si3 * r1;
|
||||
pSrc[2 * i3] = p1 + p2;
|
||||
pSrc[2 * i3 + 1] = p3 - p4;
|
||||
p1 = co7 * t1;
|
||||
p2 = si7 * t2;
|
||||
p3 = co7 * t2;
|
||||
p4 = si7 * t1;
|
||||
pSrc[2 * i7] = p1 + p2;
|
||||
pSrc[2 * i7 + 1] = p3 - p4;
|
||||
r1 = (r6 - r8) * C81;
|
||||
r6 = (r6 + r8) * C81;
|
||||
s1 = (s6 - s8) * C81;
|
||||
s6 = (s6 + s8) * C81;
|
||||
t1 = r5 - r1;
|
||||
r5 = r5 + r1;
|
||||
r8 = r7 - r6;
|
||||
r7 = r7 + r6;
|
||||
t2 = s5 - s1;
|
||||
s5 = s5 + s1;
|
||||
s8 = s7 - s6;
|
||||
s7 = s7 + s6;
|
||||
r1 = r5 + s7;
|
||||
r5 = r5 - s7;
|
||||
r6 = t1 + s8;
|
||||
t1 = t1 - s8;
|
||||
s1 = s5 - r7;
|
||||
s5 = s5 + r7;
|
||||
s6 = t2 - r8;
|
||||
t2 = t2 + r8;
|
||||
p1 = co2 * r1;
|
||||
p2 = si2 * s1;
|
||||
p3 = co2 * s1;
|
||||
p4 = si2 * r1;
|
||||
pSrc[2 * i2] = p1 + p2;
|
||||
pSrc[2 * i2 + 1] = p3 - p4;
|
||||
p1 = co8 * r5;
|
||||
p2 = si8 * s5;
|
||||
p3 = co8 * s5;
|
||||
p4 = si8 * r5;
|
||||
pSrc[2 * i8] = p1 + p2;
|
||||
pSrc[2 * i8 + 1] = p3 - p4;
|
||||
p1 = co6 * r6;
|
||||
p2 = si6 * s6;
|
||||
p3 = co6 * s6;
|
||||
p4 = si6 * r6;
|
||||
pSrc[2 * i6] = p1 + p2;
|
||||
pSrc[2 * i6 + 1] = p3 - p4;
|
||||
p1 = co4 * t1;
|
||||
p2 = si4 * t2;
|
||||
p3 = co4 * t2;
|
||||
p4 = si4 * t1;
|
||||
pSrc[2 * i4] = p1 + p2;
|
||||
pSrc[2 * i4 + 1] = p3 - p4;
|
||||
|
||||
i1 += n1;
|
||||
} while (i1 < fftLen);
|
||||
|
||||
j++;
|
||||
} while (j < n2);
|
||||
|
||||
twidCoefModifier <<= 3;
|
||||
} while (n2 > 7);
|
||||
}
|
||||
448
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c
Normal file
448
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_f32.c
Normal file
@@ -0,0 +1,448 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dct4_f32.c
|
||||
* Description: Processing function of DCT4 & IDCT4 F32
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup DCT4_IDCT4 DCT Type IV Functions
|
||||
|
||||
Representation of signals by minimum number of values is important for storage and transmission.
|
||||
The possibility of large discontinuity between the beginning and end of a period of a signal
|
||||
in DFT can be avoided by extending the signal so that it is even-symmetric.
|
||||
Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
|
||||
spectrum and is very widely used in signal and image coding applications.
|
||||
The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
|
||||
DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
|
||||
|
||||
DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
|
||||
Reordering of the input data makes the computation of DCT just a problem of
|
||||
computing the DFT of a real signal with a few additional operations.
|
||||
This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
|
||||
|
||||
DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
|
||||
DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
|
||||
DCT2 implementation can be described in the following steps:
|
||||
- Re-ordering input
|
||||
- Calculating Real FFT
|
||||
- Multiplication of weights and Real FFT output and getting real part from the product.
|
||||
|
||||
This process is explained by the block diagram below:
|
||||
\image html DCT4.gif "Discrete Cosine Transform - type-IV"
|
||||
|
||||
@par Algorithm
|
||||
The N-point type-IV DCT is defined as a real, linear transformation by the formula:
|
||||
\image html DCT4Equation.gif
|
||||
where <code>k = 0, 1, 2, ..., N-1</code>
|
||||
@par
|
||||
Its inverse is defined as follows:
|
||||
\image html IDCT4Equation.gif
|
||||
where <code>n = 0, 1, 2, ..., N-1</code>
|
||||
@par
|
||||
The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
|
||||
The symmetry of the transform matrix indicates that the fast algorithms for the forward
|
||||
and inverse transform computation are identical.
|
||||
Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
|
||||
|
||||
@par Lengths supported by the transform:
|
||||
As DCT4 internally uses Real FFT, it supports all the lengths 128, 512, 2048 and 8192.
|
||||
The library provides separate functions for Q15, Q31, and floating-point data types.
|
||||
|
||||
@par Instance Structure
|
||||
The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
|
||||
A separate instance structure must be defined for each transform.
|
||||
There are separate instance structure declarations for each of the 3 supported data types.
|
||||
|
||||
@par Initialization Functions
|
||||
There is also an associated initialization function for each data type.
|
||||
The initialization function performs the following operations:
|
||||
- Sets the values of the internal structure fields.
|
||||
- Initializes Real FFT as its process function is used internally in DCT4, by calling \ref arm_rfft_init_f32().
|
||||
@par
|
||||
Use of the initialization function is optional.
|
||||
However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
|
||||
To place an instance structure into a const data section, the instance structure must be manually initialized.
|
||||
Manually initialize the instance structure as follows:
|
||||
<pre>
|
||||
arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
|
||||
arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
|
||||
arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
|
||||
</pre>
|
||||
where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
|
||||
\c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
|
||||
\c pTwiddle points to the twiddle factor table;
|
||||
\c pCosFactor points to the cosFactor table;
|
||||
\c pRfft points to the real FFT instance;
|
||||
\c pCfft points to the complex FFT instance;
|
||||
The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
|
||||
and arm_rfft_f32() respectively for details regarding static initialization.
|
||||
|
||||
@par Fixed-Point Behavior
|
||||
Care must be taken when using the fixed-point versions of the DCT4 transform functions.
|
||||
In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
|
||||
Refer to the function specific documentation below for usage guidelines.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup DCT4_IDCT4
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point DCT4/IDCT4.
|
||||
@param[in] S points to an instance of the floating-point DCT4/IDCT4 structure
|
||||
@param[in] pState points to state buffer
|
||||
@param[in,out] pInlineBuffer points to the in-place input and output buffer
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_dct4_f32(
|
||||
const arm_dct4_instance_f32 * S,
|
||||
float32_t * pState,
|
||||
float32_t * pInlineBuffer)
|
||||
{
|
||||
const float32_t *weights = S->pTwiddle; /* Pointer to the Weights table */
|
||||
const float32_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
|
||||
float32_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
|
||||
float32_t in; /* Temporary variable */
|
||||
uint32_t i; /* Loop counter */
|
||||
|
||||
|
||||
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
|
||||
* along with some pre-processing and post-processing.
|
||||
* Computational procedure is explained as follows:
|
||||
* (a) Pre-processing involves multiplying input with cos factor,
|
||||
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
|
||||
* where,
|
||||
* r(n) -- output of preprocessing
|
||||
* u(n) -- input to preprocessing(actual Source buffer)
|
||||
* (b) Calculation of DCT2 using FFT is divided into three steps:
|
||||
* Step1: Re-ordering of even and odd elements of input.
|
||||
* Step2: Calculating FFT of the re-ordered input.
|
||||
* Step3: Taking the real part of the product of FFT output and weights.
|
||||
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* where,
|
||||
* Y4 -- DCT4 output, Y2 -- DCT2 output
|
||||
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
|
||||
*/
|
||||
|
||||
/*-------- Pre-processing ------------*/
|
||||
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
|
||||
arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
|
||||
arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Step1: Re-ordering of even and odd elements as
|
||||
* pState[i] = pInlineBuffer[2*i] and
|
||||
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
|
||||
---------------------------------------------------------------------*/
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
|
||||
pS2 = pState + (S->N - 1U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
|
||||
i = S->Nby2 >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
do
|
||||
{
|
||||
/* Re-ordering of even and odd elements */
|
||||
/* pState[i] = pInlineBuffer[2*i] */
|
||||
*pS1++ = *pbuff++;
|
||||
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
|
||||
i = S->N >> 2U;
|
||||
|
||||
/* Processing with loop unrolling 4 times as N is always multiple of 4.
|
||||
* Compute 4 outputs at a time */
|
||||
do
|
||||
{
|
||||
/* Writing the re-ordered output back to inplace input buffer */
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
* Step2: Calculate RFFT for N-point input
|
||||
* ---------------------------------------------------------- */
|
||||
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
|
||||
arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
* Step3: Multiply the FFT output with the weights.
|
||||
*----------------------------------------------------------------------*/
|
||||
arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);
|
||||
|
||||
/* ----------- Post-processing ---------- */
|
||||
/* DCT-IV can be obtained from DCT-II by the equation,
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* Hence, Y4(0) = Y2(0)/2 */
|
||||
/* Getting only real part from the output and Converting to DCT-IV */
|
||||
|
||||
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
|
||||
i = (S->N - 1U) >> 2U;
|
||||
|
||||
/* pbuff initialized to input buffer. */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
|
||||
in = *pS1++ * (float32_t) 0.5;
|
||||
/* input buffer acts as inplace, so output values are stored in the input itself. */
|
||||
*pbuff++ = in;
|
||||
|
||||
/* pState pointer is incremented twice as the real values are located alternatively in the array */
|
||||
pS1++;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
do
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
i = (S->N - 1U) % 0x4U;
|
||||
|
||||
while (i > 0U)
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
}
|
||||
|
||||
|
||||
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
|
||||
|
||||
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
|
||||
i = S->N >> 2U;
|
||||
|
||||
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
|
||||
do
|
||||
{
|
||||
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
|
||||
in = *pbuff;
|
||||
*pbuff++ = in * S->normalize;
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = in * S->normalize;
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = in * S->normalize;
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = in * S->normalize;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Initializing the loop counter to N/2 */
|
||||
i = S->Nby2;
|
||||
|
||||
do
|
||||
{
|
||||
/* Re-ordering of even and odd elements */
|
||||
/* pState[i] = pInlineBuffer[2*i] */
|
||||
*pS1++ = *pbuff++;
|
||||
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Initializing the loop counter */
|
||||
i = S->N;
|
||||
|
||||
do
|
||||
{
|
||||
/* Writing the re-ordered output back to inplace input buffer */
|
||||
*pbuff++ = *pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
* Step2: Calculate RFFT for N-point input
|
||||
* ---------------------------------------------------------- */
|
||||
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
|
||||
arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
* Step3: Multiply the FFT output with the weights.
|
||||
*----------------------------------------------------------------------*/
|
||||
arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);
|
||||
|
||||
/* ----------- Post-processing ---------- */
|
||||
/* DCT-IV can be obtained from DCT-II by the equation,
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* Hence, Y4(0) = Y2(0)/2 */
|
||||
/* Getting only real part from the output and Converting to DCT-IV */
|
||||
|
||||
/* pbuff initialized to input buffer. */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
|
||||
in = *pS1++ * (float32_t) 0.5;
|
||||
/* input buffer acts as inplace, so output values are stored in the input itself. */
|
||||
*pbuff++ = in;
|
||||
|
||||
/* pState pointer is incremented twice as the real values are located alternatively in the array */
|
||||
pS1++;
|
||||
|
||||
/* Initializing the loop counter */
|
||||
i = (S->N - 1U);
|
||||
|
||||
do
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
|
||||
|
||||
/* Initializing loop counter */
|
||||
i = S->N;
|
||||
|
||||
/* pbuff initialized to the pInlineBuffer (now contains the output values) */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
do
|
||||
{
|
||||
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
|
||||
in = *pbuff;
|
||||
*pbuff++ = in * S->normalize;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of DCT4_IDCT4 group
|
||||
*/
|
||||
130
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c
Normal file
130
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_f32.c
Normal file
@@ -0,0 +1,130 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dct4_init_f32.c
|
||||
* Description: Initialization function of DCT-4 & IDCT4 F32
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup DCT4_IDCT4
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the floating-point DCT4/IDCT4.
|
||||
@param[in,out] S points to an instance of floating-point DCT4/IDCT4 structure
|
||||
@param[in] S_RFFT points to an instance of floating-point RFFT/RIFFT structure
|
||||
@param[in] S_CFFT points to an instance of floating-point CFFT/CIFFT structure
|
||||
@param[in] N length of the DCT4
|
||||
@param[in] Nby2 half of the length of the DCT4
|
||||
@param[in] normalize normalizing factor.
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
|
||||
|
||||
@par Normalizing factor
|
||||
The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
|
||||
Floating-point normalizing factors are mentioned in the table below for different DCT sizes:
|
||||
|
||||
\image html dct4NormalizingF32Table.gif
|
||||
*/
|
||||
|
||||
arm_status arm_dct4_init_f32(
|
||||
arm_dct4_instance_f32 * S,
|
||||
arm_rfft_instance_f32 * S_RFFT,
|
||||
arm_cfft_radix4_instance_f32 * S_CFFT,
|
||||
uint16_t N,
|
||||
uint16_t Nby2,
|
||||
float32_t normalize)
|
||||
{
|
||||
/* Initialize the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
|
||||
/* Initialize the DCT4 length */
|
||||
S->N = N;
|
||||
|
||||
/* Initialize the half of DCT4 length */
|
||||
S->Nby2 = Nby2;
|
||||
|
||||
/* Initialize the DCT4 Normalizing factor */
|
||||
S->normalize = normalize;
|
||||
|
||||
/* Initialize Real FFT Instance */
|
||||
S->pRfft = S_RFFT;
|
||||
|
||||
/* Initialize Complex FFT Instance */
|
||||
S->pCfft = S_CFFT;
|
||||
|
||||
switch (N)
|
||||
{
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_8192)
|
||||
/* Initialize the table modifier values */
|
||||
case 8192U:
|
||||
S->pTwiddle = Weights_8192;
|
||||
S->pCosFactor = cos_factors_8192;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_2048)
|
||||
case 2048U:
|
||||
S->pTwiddle = Weights_2048;
|
||||
S->pCosFactor = cos_factors_2048;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_512)
|
||||
case 512U:
|
||||
S->pTwiddle = Weights_512;
|
||||
S->pCosFactor = cos_factors_512;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_F32_128)
|
||||
case 128U:
|
||||
S->pTwiddle = Weights_128;
|
||||
S->pCosFactor = cos_factors_128;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
/* Initialize the RFFT/RIFFT Function */
|
||||
arm_rfft_init_f32(S->pRfft, S->pCfft, S->N, 0U, 1U);
|
||||
|
||||
/* return the status of DCT4 Init function */
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of DCT4_IDCT4 group
|
||||
*/
|
||||
130
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c
Normal file
130
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q15.c
Normal file
@@ -0,0 +1,130 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dct4_init_q15.c
|
||||
* Description: Initialization function of DCT-4 & IDCT4 Q15
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup DCT4_IDCT4
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the Q15 DCT4/IDCT4.
|
||||
@param[in,out] S points to an instance of Q15 DCT4/IDCT4 structure
|
||||
@param[in] S_RFFT points to an instance of Q15 RFFT/RIFFT structure
|
||||
@param[in] S_CFFT points to an instance of Q15 CFFT/CIFFT structure
|
||||
@param[in] N length of the DCT4
|
||||
@param[in] Nby2 half of the length of the DCT4
|
||||
@param[in] normalize normalizing factor
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
|
||||
|
||||
@par Normalizing factor
|
||||
The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
|
||||
Normalizing factors in 1.15 format are mentioned in the table below for different DCT sizes:
|
||||
|
||||
\image html dct4NormalizingQ15Table.gif
|
||||
*/
|
||||
|
||||
arm_status arm_dct4_init_q15(
|
||||
arm_dct4_instance_q15 * S,
|
||||
arm_rfft_instance_q15 * S_RFFT,
|
||||
arm_cfft_radix4_instance_q15 * S_CFFT,
|
||||
uint16_t N,
|
||||
uint16_t Nby2,
|
||||
q15_t normalize)
|
||||
{
|
||||
/* Initialise the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialize the DCT4 length */
|
||||
S->N = N;
|
||||
|
||||
/* Initialize the half of DCT4 length */
|
||||
S->Nby2 = Nby2;
|
||||
|
||||
/* Initialize the DCT4 Normalizing factor */
|
||||
S->normalize = normalize;
|
||||
|
||||
/* Initialize Real FFT Instance */
|
||||
S->pRfft = S_RFFT;
|
||||
|
||||
/* Initialize Complex FFT Instance */
|
||||
S->pCfft = S_CFFT;
|
||||
|
||||
switch (N)
|
||||
{
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_8192)
|
||||
/* Initialize the table modifier values */
|
||||
case 8192U:
|
||||
S->pTwiddle = WeightsQ15_8192;
|
||||
S->pCosFactor = cos_factorsQ15_8192;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_2048)
|
||||
case 2048U:
|
||||
S->pTwiddle = WeightsQ15_2048;
|
||||
S->pCosFactor = cos_factorsQ15_2048;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_512)
|
||||
case 512U:
|
||||
S->pTwiddle = WeightsQ15_512;
|
||||
S->pCosFactor = cos_factorsQ15_512;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q15_128)
|
||||
case 128U:
|
||||
S->pTwiddle = WeightsQ15_128;
|
||||
S->pCosFactor = cos_factorsQ15_128;
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
/* Initialize the RFFT/RIFFT */
|
||||
arm_rfft_init_q15(S->pRfft, S->N, 0U, 1U);
|
||||
|
||||
/* return the status of DCT4 Init function */
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of DCT4_IDCT4 group
|
||||
*/
|
||||
129
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c
Normal file
129
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_init_q31.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dct4_init_q31.c
|
||||
* Description: Initialization function of DCT-4 & IDCT4 Q31
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "arm_common_tables.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup DCT4_IDCT4
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Initialization function for the Q31 DCT4/IDCT4.
|
||||
@param[in,out] S points to an instance of Q31 DCT4/IDCT4 structure.
|
||||
@param[in] S_RFFT points to an instance of Q31 RFFT/RIFFT structure
|
||||
@param[in] S_CFFT points to an instance of Q31 CFFT/CIFFT structure
|
||||
@param[in] N length of the DCT4.
|
||||
@param[in] Nby2 half of the length of the DCT4.
|
||||
@param[in] normalize normalizing factor.
|
||||
@return execution status
|
||||
- \ref ARM_MATH_SUCCESS : Operation successful
|
||||
- \ref ARM_MATH_ARGUMENT_ERROR : <code>N</code> is not a supported transform length
|
||||
|
||||
@par Normalizing factor:
|
||||
The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.
|
||||
Normalizing factors in 1.31 format are mentioned in the table below for different DCT sizes:
|
||||
|
||||
\image html dct4NormalizingQ31Table.gif
|
||||
*/
|
||||
|
||||
arm_status arm_dct4_init_q31(
|
||||
arm_dct4_instance_q31 * S,
|
||||
arm_rfft_instance_q31 * S_RFFT,
|
||||
arm_cfft_radix4_instance_q31 * S_CFFT,
|
||||
uint16_t N,
|
||||
uint16_t Nby2,
|
||||
q31_t normalize)
|
||||
{
|
||||
/* Initialize the default arm status */
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
/* Initialize the DCT4 length */
|
||||
S->N = N;
|
||||
|
||||
/* Initialize the half of DCT4 length */
|
||||
S->Nby2 = Nby2;
|
||||
|
||||
/* Initialize the DCT4 Normalizing factor */
|
||||
S->normalize = normalize;
|
||||
|
||||
/* Initialize Real FFT Instance */
|
||||
S->pRfft = S_RFFT;
|
||||
|
||||
/* Initialize Complex FFT Instance */
|
||||
S->pCfft = S_CFFT;
|
||||
|
||||
switch (N)
|
||||
{
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_8192)
|
||||
/* Initialize the table modifier values */
|
||||
case 8192U:
|
||||
S->pTwiddle = WeightsQ31_8192;
|
||||
S->pCosFactor = cos_factorsQ31_8192;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_2048)
|
||||
case 2048U:
|
||||
S->pTwiddle = WeightsQ31_2048;
|
||||
S->pCosFactor = cos_factorsQ31_2048;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_512)
|
||||
case 512U:
|
||||
S->pTwiddle = WeightsQ31_512;
|
||||
S->pCosFactor = cos_factorsQ31_512;
|
||||
break;
|
||||
#endif
|
||||
|
||||
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || defined(ARM_TABLE_DCT4_Q31_128)
|
||||
case 128U:
|
||||
S->pTwiddle = WeightsQ31_128;
|
||||
S->pCosFactor = cos_factorsQ31_128;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
status = ARM_MATH_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
/* Initialize the RFFT/RIFFT Function */
|
||||
arm_rfft_init_q31(S->pRfft, S->N, 0U, 1U);
|
||||
|
||||
/* return the status of DCT4 Init function */
|
||||
return (status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of DCT4_IDCT4 group
|
||||
*/
|
||||
381
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c
Normal file
381
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q15.c
Normal file
@@ -0,0 +1,381 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dct4_q15.c
|
||||
* Description: Processing function of DCT4 & IDCT4 Q15
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
/**
|
||||
@addtogroup DCT4_IDCT4
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the Q15 DCT4/IDCT4.
|
||||
@param[in] S points to an instance of the Q15 DCT4 structure.
|
||||
@param[in] pState points to state buffer.
|
||||
@param[in,out] pInlineBuffer points to the in-place input and output buffer.
|
||||
@return none
|
||||
|
||||
@par Input an output formats
|
||||
Internally inputs are downscaled in the RFFT process function to avoid overflows.
|
||||
Number of bits downscaled, depends on the size of the transform. The input and output
|
||||
formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
|
||||
|
||||
\image html dct4FormatsQ15Table.gif
|
||||
*/
|
||||
|
||||
void arm_dct4_q15(
|
||||
const arm_dct4_instance_q15 * S,
|
||||
q15_t * pState,
|
||||
q15_t * pInlineBuffer)
|
||||
{
|
||||
const q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */
|
||||
const q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
|
||||
q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
|
||||
q15_t in; /* Temporary variable */
|
||||
uint32_t i; /* Loop counter */
|
||||
|
||||
|
||||
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
|
||||
* along with some pre-processing and post-processing.
|
||||
* Computational procedure is explained as follows:
|
||||
* (a) Pre-processing involves multiplying input with cos factor,
|
||||
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
|
||||
* where,
|
||||
* r(n) -- output of preprocessing
|
||||
* u(n) -- input to preprocessing(actual Source buffer)
|
||||
* (b) Calculation of DCT2 using FFT is divided into three steps:
|
||||
* Step1: Re-ordering of even and odd elements of input.
|
||||
* Step2: Calculating FFT of the re-ordered input.
|
||||
* Step3: Taking the real part of the product of FFT output and weights.
|
||||
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* where,
|
||||
* Y4 -- DCT4 output, Y2 -- DCT2 output
|
||||
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
|
||||
*/
|
||||
|
||||
/*-------- Pre-processing ------------*/
|
||||
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
|
||||
arm_mult_q15 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
|
||||
arm_shift_q15 (pInlineBuffer, 1, pInlineBuffer, S->N);
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Step1: Re-ordering of even and odd elements as
|
||||
* pState[i] = pInlineBuffer[2*i] and
|
||||
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
|
||||
---------------------------------------------------------------------*/
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
|
||||
pS2 = pState + (S->N - 1U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
|
||||
i = S->Nby2 >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
do
|
||||
{
|
||||
/* Re-ordering of even and odd elements */
|
||||
/* pState[i] = pInlineBuffer[2*i] */
|
||||
*pS1++ = *pbuff++;
|
||||
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
|
||||
i = S->N >> 2U;
|
||||
|
||||
/* Processing with loop unrolling 4 times as N is always multiple of 4.
|
||||
* Compute 4 outputs at a time */
|
||||
do
|
||||
{
|
||||
/* Writing the re-ordered output back to inplace input buffer */
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
* Step2: Calculate RFFT for N-point input
|
||||
* ---------------------------------------------------------- */
|
||||
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
|
||||
arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
* Step3: Multiply the FFT output with the weights.
|
||||
*----------------------------------------------------------------------*/
|
||||
arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
|
||||
|
||||
/* The output of complex multiplication is in 3.13 format.
|
||||
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
|
||||
arm_shift_q15 (pState, 2, pState, S->N * 2);
|
||||
|
||||
/* ----------- Post-processing ---------- */
|
||||
/* DCT-IV can be obtained from DCT-II by the equation,
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* Hence, Y4(0) = Y2(0)/2 */
|
||||
/* Getting only real part from the output and Converting to DCT-IV */
|
||||
|
||||
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
|
||||
i = (S->N - 1U) >> 2U;
|
||||
|
||||
/* pbuff initialized to input buffer. */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
|
||||
in = *pS1++ >> 1U;
|
||||
/* input buffer acts as inplace, so output values are stored in the input itself. */
|
||||
*pbuff++ = in;
|
||||
|
||||
/* pState pointer is incremented twice as the real values are located alternatively in the array */
|
||||
pS1++;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
do
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
i = (S->N - 1U) % 0x4U;
|
||||
|
||||
while (i > 0U)
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
}
|
||||
|
||||
|
||||
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
|
||||
|
||||
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
|
||||
i = S->N >> 2U;
|
||||
|
||||
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
|
||||
do
|
||||
{
|
||||
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Initializing the loop counter to N/2 */
|
||||
i = S->Nby2;
|
||||
|
||||
do
|
||||
{
|
||||
/* Re-ordering of even and odd elements */
|
||||
/* pState[i] = pInlineBuffer[2*i] */
|
||||
*pS1++ = *pbuff++;
|
||||
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Initializing the loop counter */
|
||||
i = S->N;
|
||||
|
||||
do
|
||||
{
|
||||
/* Writing the re-ordered output back to inplace input buffer */
|
||||
*pbuff++ = *pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
* Step2: Calculate RFFT for N-point input
|
||||
* ---------------------------------------------------------- */
|
||||
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
|
||||
arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
* Step3: Multiply the FFT output with the weights.
|
||||
*----------------------------------------------------------------------*/
|
||||
arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
|
||||
|
||||
/* The output of complex multiplication is in 3.13 format.
|
||||
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
|
||||
arm_shift_q15 (pState, 2, pState, S->N * 2);
|
||||
|
||||
/* ----------- Post-processing ---------- */
|
||||
/* DCT-IV can be obtained from DCT-II by the equation,
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* Hence, Y4(0) = Y2(0)/2 */
|
||||
/* Getting only real part from the output and Converting to DCT-IV */
|
||||
|
||||
/* pbuff initialized to input buffer. */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
|
||||
in = *pS1++ >> 1U;
|
||||
/* input buffer acts as inplace, so output values are stored in the input itself. */
|
||||
*pbuff++ = in;
|
||||
|
||||
/* pState pointer is incremented twice as the real values are located alternatively in the array */
|
||||
pS1++;
|
||||
|
||||
/* Initializing the loop counter */
|
||||
i = (S->N - 1U);
|
||||
|
||||
do
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
|
||||
|
||||
/* Initializing loop counter */
|
||||
i = S->N;
|
||||
|
||||
/* pbuff initialized to the pInlineBuffer (now contains the output values) */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
do
|
||||
{
|
||||
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
|
||||
} while (i > 0U);
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of DCT4_IDCT4 group
|
||||
*/
|
||||
383
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c
Normal file
383
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_dct4_q31.c
Normal file
@@ -0,0 +1,383 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_dct4_q31.c
|
||||
* Description: Processing function of DCT4 & IDCT4 Q31
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
/**
|
||||
@addtogroup DCT4_IDCT4
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the Q31 DCT4/IDCT4.
|
||||
@param[in] S points to an instance of the Q31 DCT4 structure.
|
||||
@param[in] pState points to state buffer.
|
||||
@param[in,out] pInlineBuffer points to the in-place input and output buffer.
|
||||
@return none
|
||||
|
||||
@par Input an output formats
|
||||
Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
|
||||
as the conversion from DCT2 to DCT4 involves one subtraction.
|
||||
Internally inputs are downscaled in the RFFT process function to avoid overflows.
|
||||
Number of bits downscaled, depends on the size of the transform.
|
||||
The input and output formats for different DCT sizes and number of bits to upscale are
|
||||
mentioned in the table below:
|
||||
|
||||
\image html dct4FormatsQ31Table.gif
|
||||
*/
|
||||
|
||||
void arm_dct4_q31(
|
||||
const arm_dct4_instance_q31 * S,
|
||||
q31_t * pState,
|
||||
q31_t * pInlineBuffer)
|
||||
{
|
||||
const q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */
|
||||
const q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
|
||||
q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
|
||||
q31_t in; /* Temporary variable */
|
||||
uint32_t i; /* Loop counter */
|
||||
|
||||
|
||||
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
|
||||
* along with some pre-processing and post-processing.
|
||||
* Computational procedure is explained as follows:
|
||||
* (a) Pre-processing involves multiplying input with cos factor,
|
||||
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
|
||||
* where,
|
||||
* r(n) -- output of preprocessing
|
||||
* u(n) -- input to preprocessing(actual Source buffer)
|
||||
* (b) Calculation of DCT2 using FFT is divided into three steps:
|
||||
* Step1: Re-ordering of even and odd elements of input.
|
||||
* Step2: Calculating FFT of the re-ordered input.
|
||||
* Step3: Taking the real part of the product of FFT output and weights.
|
||||
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* where,
|
||||
* Y4 -- DCT4 output, Y2 -- DCT2 output
|
||||
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
|
||||
*/
|
||||
|
||||
/*-------- Pre-processing ------------*/
|
||||
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
|
||||
arm_mult_q31 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
|
||||
arm_shift_q31 (pInlineBuffer, 1, pInlineBuffer, S->N);
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* Step1: Re-ordering of even and odd elements as
|
||||
* pState[i] = pInlineBuffer[2*i] and
|
||||
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
|
||||
---------------------------------------------------------------------*/
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
|
||||
pS2 = pState + (S->N - 1U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
|
||||
i = S->Nby2 >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
do
|
||||
{
|
||||
/* Re-ordering of even and odd elements */
|
||||
/* pState[i] = pInlineBuffer[2*i] */
|
||||
*pS1++ = *pbuff++;
|
||||
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
*pS1++ = *pbuff++;
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
|
||||
i = S->N >> 2U;
|
||||
|
||||
/* Processing with loop unrolling 4 times as N is always multiple of 4.
|
||||
* Compute 4 outputs at a time */
|
||||
do
|
||||
{
|
||||
/* Writing the re-ordered output back to inplace input buffer */
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
*pbuff++ = *pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
* Step2: Calculate RFFT for N-point input
|
||||
* ---------------------------------------------------------- */
|
||||
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
|
||||
arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
* Step3: Multiply the FFT output with the weights.
|
||||
*----------------------------------------------------------------------*/
|
||||
arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);
|
||||
|
||||
/* The output of complex multiplication is in 3.29 format.
|
||||
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
|
||||
arm_shift_q31 (pState, 2, pState, S->N * 2);
|
||||
|
||||
/* ----------- Post-processing ---------- */
|
||||
/* DCT-IV can be obtained from DCT-II by the equation,
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* Hence, Y4(0) = Y2(0)/2 */
|
||||
/* Getting only real part from the output and Converting to DCT-IV */
|
||||
|
||||
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
|
||||
i = (S->N - 1U) >> 2U;
|
||||
|
||||
/* pbuff initialized to input buffer. */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
|
||||
in = *pS1++ >> 1U;
|
||||
/* input buffer acts as inplace, so output values are stored in the input itself. */
|
||||
*pbuff++ = in;
|
||||
|
||||
/* pState pointer is incremented twice as the real values are located alternatively in the array */
|
||||
pS1++;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
do
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
i = (S->N - 1U) % 0x4U;
|
||||
|
||||
while (i > 0U)
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
}
|
||||
|
||||
|
||||
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
|
||||
|
||||
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
|
||||
i = S->N >> 2U;
|
||||
|
||||
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
|
||||
do
|
||||
{
|
||||
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
|
||||
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Initializing the loop counter to N/2 */
|
||||
i = S->Nby2;
|
||||
|
||||
do
|
||||
{
|
||||
/* Re-ordering of even and odd elements */
|
||||
/* pState[i] = pInlineBuffer[2*i] */
|
||||
*pS1++ = *pbuff++;
|
||||
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
|
||||
*pS2-- = *pbuff++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
/* pbuff initialized to input buffer */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Initializing the loop counter */
|
||||
i = S->N;
|
||||
|
||||
do
|
||||
{
|
||||
/* Writing the re-ordered output back to inplace input buffer */
|
||||
*pbuff++ = *pS1++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
|
||||
/* ---------------------------------------------------------
|
||||
* Step2: Calculate RFFT for N-point input
|
||||
* ---------------------------------------------------------- */
|
||||
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
|
||||
arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);
|
||||
|
||||
/*----------------------------------------------------------------------
|
||||
* Step3: Multiply the FFT output with the weights.
|
||||
*----------------------------------------------------------------------*/
|
||||
arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);
|
||||
|
||||
/* The output of complex multiplication is in 3.29 format.
|
||||
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
|
||||
arm_shift_q31(pState, 2, pState, S->N * 2);
|
||||
|
||||
/* ----------- Post-processing ---------- */
|
||||
/* DCT-IV can be obtained from DCT-II by the equation,
|
||||
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
|
||||
* Hence, Y4(0) = Y2(0)/2 */
|
||||
/* Getting only real part from the output and Converting to DCT-IV */
|
||||
|
||||
/* pbuff initialized to input buffer. */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
/* pS1 initialized to pState */
|
||||
pS1 = pState;
|
||||
|
||||
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
|
||||
in = *pS1++ >> 1U;
|
||||
/* input buffer acts as inplace, so output values are stored in the input itself. */
|
||||
*pbuff++ = in;
|
||||
|
||||
/* pState pointer is incremented twice as the real values are located alternatively in the array */
|
||||
pS1++;
|
||||
|
||||
/* Initializing the loop counter */
|
||||
i = (S->N - 1U);
|
||||
|
||||
while (i > 0U)
|
||||
{
|
||||
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
|
||||
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
|
||||
in = *pS1++ - in;
|
||||
*pbuff++ = in;
|
||||
|
||||
/* points to the next real value */
|
||||
pS1++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
}
|
||||
|
||||
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
|
||||
|
||||
/* Initializing loop counter */
|
||||
i = S->N;
|
||||
|
||||
/* pbuff initialized to the pInlineBuffer (now contains the output values) */
|
||||
pbuff = pInlineBuffer;
|
||||
|
||||
do
|
||||
{
|
||||
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
|
||||
in = *pbuff;
|
||||
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
|
||||
|
||||
/* Decrement loop counter */
|
||||
i--;
|
||||
} while (i > 0U);
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of DCT4_IDCT4 group
|
||||
*/
|
||||
161
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f16.c
Normal file
161
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f16.c
Normal file
@@ -0,0 +1,161 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_f16.c
|
||||
* Description: MFCC function for the f16 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
#include "dsp/statistics_functions_f16.h"
|
||||
#include "dsp/basic_math_functions_f16.h"
|
||||
#include "dsp/complex_math_functions_f16.h"
|
||||
#include "dsp/fast_math_functions_f16.h"
|
||||
#include "dsp/matrix_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@defgroup MFCC MFCC
|
||||
|
||||
MFCC Transform
|
||||
|
||||
There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief MFCC F16
|
||||
@param[in] S points to the mfcc instance structure
|
||||
@param[in] pSrc points to the input samples
|
||||
@param[out] pDst points to the output MFCC values
|
||||
@param[inout] pTmp points to a temporary buffer of complex
|
||||
|
||||
@return none
|
||||
|
||||
@par Description
|
||||
The number of input samples if the FFT length used
|
||||
when initializing the instance data structure.
|
||||
|
||||
The temporary buffer has a 2*fft length size when MFCC
|
||||
is implemented with CFFT.
|
||||
It has length FFT Length + 2 when implemented with RFFT
|
||||
(default implementation).
|
||||
|
||||
The source buffer is modified by this function.
|
||||
|
||||
*/
|
||||
void arm_mfcc_f16(
|
||||
const arm_mfcc_instance_f16 * S,
|
||||
float16_t *pSrc,
|
||||
float16_t *pDst,
|
||||
float16_t *pTmp
|
||||
)
|
||||
{
|
||||
float16_t maxValue;
|
||||
uint32_t index;
|
||||
uint32_t i;
|
||||
float16_t result;
|
||||
const float16_t *coefs=S->filterCoefs;
|
||||
arm_matrix_instance_f16 pDctMat;
|
||||
|
||||
/* Normalize */
|
||||
arm_absmax_f16(pSrc,S->fftLen,&maxValue,&index);
|
||||
|
||||
arm_scale_f16(pSrc,1.0f16/(_Float16)maxValue,pSrc,S->fftLen);
|
||||
|
||||
/* Multiply by window */
|
||||
arm_mult_f16(pSrc,S->windowCoefs,pSrc,S->fftLen);
|
||||
|
||||
/* Compute spectrum magnitude
|
||||
*/
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
/* some HW accelerator for CMSIS-DSP used in some boards
|
||||
are only providing acceleration for CFFT.
|
||||
With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
|
||||
will be accelerated on those boards.
|
||||
|
||||
The default is to use RFFT
|
||||
*/
|
||||
/* Convert from real to complex */
|
||||
for(i=0; i < S->fftLen ; i++)
|
||||
{
|
||||
pTmp[2*i] = pSrc[i];
|
||||
pTmp[2*i+1] = 0.0f16;
|
||||
}
|
||||
arm_cfft_f16(&(S->cfft),pTmp,0,1);
|
||||
#else
|
||||
/* Default RFFT based implementation */
|
||||
arm_rfft_fast_f16(&(S->rfft),pSrc,pTmp,0);
|
||||
/* Unpack real values */
|
||||
pTmp[S->fftLen]=pTmp[1];
|
||||
pTmp[S->fftLen+1]=0.0f16;
|
||||
pTmp[1]=0.0f;
|
||||
#endif
|
||||
arm_cmplx_mag_f16(pTmp,pSrc,S->fftLen);
|
||||
|
||||
/* Apply MEL filters */
|
||||
for(i=0; i<S->nbMelFilters; i++)
|
||||
{
|
||||
arm_dot_prod_f16(pSrc+S->filterPos[i],
|
||||
coefs,
|
||||
S->filterLengths[i],
|
||||
&result);
|
||||
|
||||
coefs += S->filterLengths[i];
|
||||
|
||||
pTmp[i] = result;
|
||||
|
||||
}
|
||||
|
||||
/* Compute the log */
|
||||
arm_offset_f16(pTmp,1.0e-4f16,pTmp,S->nbMelFilters);
|
||||
arm_vlog_f16(pTmp,pTmp,S->nbMelFilters);
|
||||
|
||||
/* Multiply with the DCT matrix */
|
||||
|
||||
pDctMat.numRows=S->nbDctOutputs;
|
||||
pDctMat.numCols=S->nbMelFilters;
|
||||
pDctMat.pData=(float16_t*)S->dctCoefs;
|
||||
|
||||
arm_mat_vec_mult_f16(&pDctMat, pTmp, pDst);
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif /* defined(ARM_FLOAT16_SUPPORTED) */
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
159
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f32.c
Normal file
159
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_f32.c
Normal file
@@ -0,0 +1,159 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_f32.c
|
||||
* Description: MFCC function for the f32 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "dsp/statistics_functions.h"
|
||||
#include "dsp/basic_math_functions.h"
|
||||
#include "dsp/complex_math_functions.h"
|
||||
#include "dsp/fast_math_functions.h"
|
||||
#include "dsp/matrix_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@defgroup MFCC MFCC
|
||||
|
||||
MFCC Transform
|
||||
|
||||
There are separate functions for floating-point, Q15, and Q31 data types.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief MFCC F32
|
||||
@param[in] S points to the mfcc instance structure
|
||||
@param[in] pSrc points to the input samples
|
||||
@param[out] pDst points to the output MFCC values
|
||||
@param[inout] pTmp points to a temporary buffer of complex
|
||||
|
||||
@return none
|
||||
|
||||
@par Description
|
||||
The number of input samples if the FFT length used
|
||||
when initializing the instance data structure.
|
||||
|
||||
The temporary buffer has a 2*fft length size when MFCC
|
||||
is implemented with CFFT.
|
||||
It has length FFT Length + 2 when implemented with RFFT
|
||||
(default implementation).
|
||||
|
||||
The source buffer is modified by this function.
|
||||
|
||||
*/
|
||||
void arm_mfcc_f32(
|
||||
const arm_mfcc_instance_f32 * S,
|
||||
float32_t *pSrc,
|
||||
float32_t *pDst,
|
||||
float32_t *pTmp
|
||||
)
|
||||
{
|
||||
float32_t maxValue;
|
||||
uint32_t index;
|
||||
uint32_t i;
|
||||
float32_t result;
|
||||
const float32_t *coefs=S->filterCoefs;
|
||||
arm_matrix_instance_f32 pDctMat;
|
||||
|
||||
/* Normalize */
|
||||
arm_absmax_f32(pSrc,S->fftLen,&maxValue,&index);
|
||||
|
||||
arm_scale_f32(pSrc,1.0f/maxValue,pSrc,S->fftLen);
|
||||
|
||||
/* Multiply by window */
|
||||
arm_mult_f32(pSrc,S->windowCoefs,pSrc,S->fftLen);
|
||||
|
||||
/* Compute spectrum magnitude
|
||||
*/
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
/* some HW accelerator for CMSIS-DSP used in some boards
|
||||
are only providing acceleration for CFFT.
|
||||
With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
|
||||
will be accelerated on those boards.
|
||||
|
||||
The default is to use RFFT
|
||||
*/
|
||||
/* Convert from real to complex */
|
||||
for(i=0; i < S->fftLen ; i++)
|
||||
{
|
||||
pTmp[2*i] = pSrc[i];
|
||||
pTmp[2*i+1] = 0.0f;
|
||||
}
|
||||
arm_cfft_f32(&(S->cfft),pTmp,0,1);
|
||||
#else
|
||||
/* Default RFFT based implementation */
|
||||
arm_rfft_fast_f32(&(S->rfft),pSrc,pTmp,0);
|
||||
/* Unpack real values */
|
||||
pTmp[S->fftLen]=pTmp[1];
|
||||
pTmp[S->fftLen+1]=0.0f;
|
||||
pTmp[1]=0.0f;
|
||||
#endif
|
||||
arm_cmplx_mag_f32(pTmp,pSrc,S->fftLen);
|
||||
|
||||
/* Apply MEL filters */
|
||||
for(i=0; i<S->nbMelFilters; i++)
|
||||
{
|
||||
arm_dot_prod_f32(pSrc+S->filterPos[i],
|
||||
coefs,
|
||||
S->filterLengths[i],
|
||||
&result);
|
||||
|
||||
coefs += S->filterLengths[i];
|
||||
|
||||
pTmp[i] = result;
|
||||
|
||||
}
|
||||
|
||||
/* Compute the log */
|
||||
arm_offset_f32(pTmp,1.0e-6f,pTmp,S->nbMelFilters);
|
||||
arm_vlog_f32(pTmp,pTmp,S->nbMelFilters);
|
||||
|
||||
/* Multiply with the DCT matrix */
|
||||
|
||||
pDctMat.numRows=S->nbDctOutputs;
|
||||
pDctMat.numCols=S->nbMelFilters;
|
||||
pDctMat.pData=(float32_t*)S->dctCoefs;
|
||||
|
||||
arm_mat_vec_mult_f32(&pDctMat, pTmp, pDst);
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
110
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f16.c
Normal file
110
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f16.c
Normal file
@@ -0,0 +1,110 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_init_f16.c
|
||||
* Description: MFCC initialization function for the f16 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
#include "dsp/transform_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@brief Initialization of the MFCC F16 instance structure
|
||||
@param[out] S points to the mfcc instance structure
|
||||
@param[in] fftLen fft length
|
||||
@param[in] nbMelFilters number of Mel filters
|
||||
@param[in] nbDctOutputs number of Dct outputs
|
||||
@param[in] dctCoefs points to an array of DCT coefficients
|
||||
@param[in] filterPos points of the array of filter positions
|
||||
@param[in] filterLengths points to the array of filter lengths
|
||||
@param[in] filterCoefs points to the array of filter coefficients
|
||||
@param[in] windowCoefs points to the array of window coefficients
|
||||
|
||||
@return error status
|
||||
|
||||
@par Description
|
||||
The matrix of Mel filter coefficients is sparse.
|
||||
Most of the coefficients are zero.
|
||||
To avoid multiplying the spectrogram by those zeros, the
|
||||
filter is applied only to a given position in the spectrogram
|
||||
and on a given number of FFT bins (the filter length).
|
||||
It is the reason for the arrays filterPos and filterLengths.
|
||||
|
||||
window coefficients can describe (for instance) a Hamming window.
|
||||
The array has the same size as the FFT length.
|
||||
|
||||
The folder Scripts is containing a Python script which can be used
|
||||
to generate the filter, dct and window arrays.
|
||||
*/
|
||||
|
||||
arm_status arm_mfcc_init_f16(
|
||||
arm_mfcc_instance_f16 * S,
|
||||
uint32_t fftLen,
|
||||
uint32_t nbMelFilters,
|
||||
uint32_t nbDctOutputs,
|
||||
const float16_t *dctCoefs,
|
||||
const uint32_t *filterPos,
|
||||
const uint32_t *filterLengths,
|
||||
const float16_t *filterCoefs,
|
||||
const float16_t *windowCoefs
|
||||
)
|
||||
{
|
||||
arm_status status;
|
||||
|
||||
S->fftLen=fftLen;
|
||||
S->nbMelFilters=nbMelFilters;
|
||||
S->nbDctOutputs=nbDctOutputs;
|
||||
S->dctCoefs=dctCoefs;
|
||||
S->filterPos=filterPos;
|
||||
S->filterLengths=filterLengths;
|
||||
S->filterCoefs=filterCoefs;
|
||||
S->windowCoefs=windowCoefs;
|
||||
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
status=arm_cfft_init_f16(&(S->cfft),fftLen);
|
||||
#else
|
||||
status=arm_rfft_fast_init_f16(&(S->rfft),fftLen);
|
||||
#endif
|
||||
|
||||
return(status);
|
||||
}
|
||||
|
||||
#endif /* defined(ARM_FLOAT16_SUPPORTED) */
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
107
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f32.c
Normal file
107
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_f32.c
Normal file
@@ -0,0 +1,107 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_init_f32.c
|
||||
* Description: MFCC initialization function for the f32 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@brief Initialization of the MFCC F32 instance structure
|
||||
@param[out] S points to the mfcc instance structure
|
||||
@param[in] fftLen fft length
|
||||
@param[in] nbMelFilters number of Mel filters
|
||||
@param[in] nbDctOutputs number of Dct outputs
|
||||
@param[in] dctCoefs points to an array of DCT coefficients
|
||||
@param[in] filterPos points of the array of filter positions
|
||||
@param[in] filterLengths points to the array of filter lengths
|
||||
@param[in] filterCoefs points to the array of filter coefficients
|
||||
@param[in] windowCoefs points to the array of window coefficients
|
||||
|
||||
@return error status
|
||||
|
||||
@par Description
|
||||
The matrix of Mel filter coefficients is sparse.
|
||||
Most of the coefficients are zero.
|
||||
To avoid multiplying the spectrogram by those zeros, the
|
||||
filter is applied only to a given position in the spectrogram
|
||||
and on a given number of FFT bins (the filter length).
|
||||
It is the reason for the arrays filterPos and filterLengths.
|
||||
|
||||
window coefficients can describe (for instance) a Hamming window.
|
||||
The array has the same size as the FFT length.
|
||||
|
||||
The folder Scripts is containing a Python script which can be used
|
||||
to generate the filter, dct and window arrays.
|
||||
*/
|
||||
|
||||
arm_status arm_mfcc_init_f32(
|
||||
arm_mfcc_instance_f32 * S,
|
||||
uint32_t fftLen,
|
||||
uint32_t nbMelFilters,
|
||||
uint32_t nbDctOutputs,
|
||||
const float32_t *dctCoefs,
|
||||
const uint32_t *filterPos,
|
||||
const uint32_t *filterLengths,
|
||||
const float32_t *filterCoefs,
|
||||
const float32_t *windowCoefs
|
||||
)
|
||||
{
|
||||
arm_status status;
|
||||
|
||||
S->fftLen=fftLen;
|
||||
S->nbMelFilters=nbMelFilters;
|
||||
S->nbDctOutputs=nbDctOutputs;
|
||||
S->dctCoefs=dctCoefs;
|
||||
S->filterPos=filterPos;
|
||||
S->filterLengths=filterLengths;
|
||||
S->filterCoefs=filterCoefs;
|
||||
S->windowCoefs=windowCoefs;
|
||||
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
status=arm_cfft_init_f32(&(S->cfft),fftLen);
|
||||
#else
|
||||
status=arm_rfft_fast_init_f32(&(S->rfft),fftLen);
|
||||
#endif
|
||||
|
||||
return(status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
107
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q15.c
Normal file
107
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q15.c
Normal file
@@ -0,0 +1,107 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_init_q15.c
|
||||
* Description: MFCC initialization function for the q15 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@brief Initialization of the MFCC F32 instance structure
|
||||
@param[out] S points to the mfcc instance structure
|
||||
@param[in] fftLen fft length
|
||||
@param[in] nbMelFilters number of Mel filters
|
||||
@param[in] nbDctOutputs number of Dct outputs
|
||||
@param[in] dctCoefs points to an array of DCT coefficients
|
||||
@param[in] filterPos points of the array of filter positions
|
||||
@param[in] filterLengths points to the array of filter lengths
|
||||
@param[in] filterCoefs points to the array of filter coefficients
|
||||
@param[in] windowCoefs points to the array of window coefficients
|
||||
|
||||
@return error status
|
||||
|
||||
@par Description
|
||||
The matrix of Mel filter coefficients is sparse.
|
||||
Most of the coefficients are zero.
|
||||
To avoid multiplying the spectrogram by those zeros, the
|
||||
filter is applied only to a given position in the spectrogram
|
||||
and on a given number of FFT bins (the filter length).
|
||||
It is the reason for the arrays filterPos and filterLengths.
|
||||
|
||||
window coefficients can describe (for instance) a Hamming window.
|
||||
The array has the same size as the FFT length.
|
||||
|
||||
The folder Scripts is containing a Python script which can be used
|
||||
to generate the filter, dct and window arrays.
|
||||
*/
|
||||
|
||||
arm_status arm_mfcc_init_q15(
|
||||
arm_mfcc_instance_q15 * S,
|
||||
uint32_t fftLen,
|
||||
uint32_t nbMelFilters,
|
||||
uint32_t nbDctOutputs,
|
||||
const q15_t *dctCoefs,
|
||||
const uint32_t *filterPos,
|
||||
const uint32_t *filterLengths,
|
||||
const q15_t *filterCoefs,
|
||||
const q15_t *windowCoefs
|
||||
)
|
||||
{
|
||||
arm_status status;
|
||||
|
||||
S->fftLen=fftLen;
|
||||
S->nbMelFilters=nbMelFilters;
|
||||
S->nbDctOutputs=nbDctOutputs;
|
||||
S->dctCoefs=dctCoefs;
|
||||
S->filterPos=filterPos;
|
||||
S->filterLengths=filterLengths;
|
||||
S->filterCoefs=filterCoefs;
|
||||
S->windowCoefs=windowCoefs;
|
||||
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
status=arm_cfft_init_q15(&(S->cfft),fftLen);
|
||||
#else
|
||||
status=arm_rfft_init_q15(&(S->rfft),fftLen,0,1);
|
||||
#endif
|
||||
|
||||
return(status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
107
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q31.c
Normal file
107
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_init_q31.c
Normal file
@@ -0,0 +1,107 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_init_q31.c
|
||||
* Description: MFCC initialization function for the q31 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@brief Initialization of the MFCC F32 instance structure
|
||||
@param[out] S points to the mfcc instance structure
|
||||
@param[in] fftLen fft length
|
||||
@param[in] nbMelFilters number of Mel filters
|
||||
@param[in] nbDctOutputs number of Dct outputs
|
||||
@param[in] dctCoefs points to an array of DCT coefficients
|
||||
@param[in] filterPos points of the array of filter positions
|
||||
@param[in] filterLengths points to the array of filter lengths
|
||||
@param[in] filterCoefs points to the array of filter coefficients
|
||||
@param[in] windowCoefs points to the array of window coefficients
|
||||
|
||||
@return error status
|
||||
|
||||
@par Description
|
||||
The matrix of Mel filter coefficients is sparse.
|
||||
Most of the coefficients are zero.
|
||||
To avoid multiplying the spectrogram by those zeros, the
|
||||
filter is applied only to a given position in the spectrogram
|
||||
and on a given number of FFT bins (the filter length).
|
||||
It is the reason for the arrays filterPos and filterLengths.
|
||||
|
||||
window coefficients can describe (for instance) a Hamming window.
|
||||
The array has the same size as the FFT length.
|
||||
|
||||
The folder Scripts is containing a Python script which can be used
|
||||
to generate the filter, dct and window arrays.
|
||||
*/
|
||||
|
||||
arm_status arm_mfcc_init_q31(
|
||||
arm_mfcc_instance_q31 * S,
|
||||
uint32_t fftLen,
|
||||
uint32_t nbMelFilters,
|
||||
uint32_t nbDctOutputs,
|
||||
const q31_t *dctCoefs,
|
||||
const uint32_t *filterPos,
|
||||
const uint32_t *filterLengths,
|
||||
const q31_t *filterCoefs,
|
||||
const q31_t *windowCoefs
|
||||
)
|
||||
{
|
||||
arm_status status;
|
||||
|
||||
S->fftLen=fftLen;
|
||||
S->nbMelFilters=nbMelFilters;
|
||||
S->nbDctOutputs=nbDctOutputs;
|
||||
S->dctCoefs=dctCoefs;
|
||||
S->filterPos=filterPos;
|
||||
S->filterLengths=filterLengths;
|
||||
S->filterCoefs=filterCoefs;
|
||||
S->windowCoefs=windowCoefs;
|
||||
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
status=arm_cfft_init_q31(&(S->cfft),fftLen);
|
||||
#else
|
||||
status=arm_rfft_init_q31(&(S->rfft),fftLen,0,1);
|
||||
#endif
|
||||
|
||||
return(status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
208
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q15.c
Normal file
208
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q15.c
Normal file
@@ -0,0 +1,208 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_q15.c
|
||||
* Description: MFCC function for the q15 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "dsp/statistics_functions.h"
|
||||
#include "dsp/basic_math_functions.h"
|
||||
#include "dsp/complex_math_functions.h"
|
||||
#include "dsp/fast_math_functions.h"
|
||||
#include "dsp/matrix_functions.h"
|
||||
|
||||
/* Constants for Q15 implementation */
|
||||
#define LOG2TOLOG_Q15 0x02C5C860
|
||||
#define MICRO_Q15 0x00000219
|
||||
#define SHIFT_MELFILTER_SATURATION_Q15 10
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@defgroup MFCC MFCC
|
||||
|
||||
MFCC Transform
|
||||
|
||||
There are separate functions for floating-point, Q15, and Q15 data types.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief MFCC Q15
|
||||
@param[in] S points to the mfcc instance structure
|
||||
@param[in] pSrc points to the input samples in Q15
|
||||
@param[out] pDst points to the output MFCC values in q8.7 format
|
||||
@param[inout] pTmp points to a temporary buffer of complex
|
||||
|
||||
@return none
|
||||
|
||||
@par Description
|
||||
The number of input samples is the FFT length used
|
||||
when initializing the instance data structure.
|
||||
|
||||
The temporary buffer has a 2*fft length.
|
||||
|
||||
The source buffer is modified by this function.
|
||||
|
||||
The function may saturate. If the FFT length is too
|
||||
big and the number of MEL filters too small then the fixed
|
||||
point computations may saturate.
|
||||
|
||||
*/
|
||||
|
||||
arm_status arm_mfcc_q15(
|
||||
const arm_mfcc_instance_q15 * S,
|
||||
q15_t *pSrc,
|
||||
q15_t *pDst,
|
||||
q31_t *pTmp
|
||||
)
|
||||
{
|
||||
q15_t m;
|
||||
uint32_t index;
|
||||
uint32_t fftShift=0;
|
||||
q31_t logExponent;
|
||||
q63_t result;
|
||||
arm_matrix_instance_q15 pDctMat;
|
||||
uint32_t i;
|
||||
uint32_t coefsPos;
|
||||
uint32_t filterLimit;
|
||||
q15_t *pTmp2=(q15_t*)pTmp;
|
||||
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
// q15
|
||||
arm_absmax_q15(pSrc,S->fftLen,&m,&index);
|
||||
|
||||
if (m !=0)
|
||||
{
|
||||
q15_t quotient;
|
||||
int16_t shift;
|
||||
|
||||
status = arm_divide_q15(0x7FFF,m,"ient,&shift);
|
||||
if (status != ARM_MATH_SUCCESS)
|
||||
{
|
||||
return(status);
|
||||
}
|
||||
|
||||
arm_scale_q15(pSrc,quotient,shift,pSrc,S->fftLen);
|
||||
}
|
||||
|
||||
|
||||
// q15
|
||||
arm_mult_q15(pSrc,S->windowCoefs, pSrc, S->fftLen);
|
||||
|
||||
|
||||
/* Compute spectrum magnitude
|
||||
*/
|
||||
fftShift = 31 - __CLZ(S->fftLen);
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
/* some HW accelerator for CMSIS-DSP used in some boards
|
||||
are only providing acceleration for CFFT.
|
||||
With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
|
||||
will be accelerated on those boards.
|
||||
|
||||
The default is to use RFFT
|
||||
*/
|
||||
/* Convert from real to complex */
|
||||
for(i=0; i < S->fftLen ; i++)
|
||||
{
|
||||
pTmp2[2*i] = pSrc[i];
|
||||
pTmp2[2*i+1] = 0;
|
||||
}
|
||||
arm_cfft_q15(&(S->cfft),pTmp2,0,1);
|
||||
#else
|
||||
/* Default RFFT based implementation */
|
||||
arm_rfft_q15(&(S->rfft),pSrc,pTmp2);
|
||||
#endif
|
||||
filterLimit = 1 + (S->fftLen >> 1);
|
||||
|
||||
|
||||
// q15 - fftShift
|
||||
arm_cmplx_mag_q15(pTmp2,pSrc,filterLimit);
|
||||
// q14 - fftShift
|
||||
|
||||
/* Apply MEL filters */
|
||||
coefsPos = 0;
|
||||
for(i=0; i<S->nbMelFilters; i++)
|
||||
{
|
||||
arm_dot_prod_q15(pSrc+S->filterPos[i],
|
||||
&(S->filterCoefs[coefsPos]),
|
||||
S->filterLengths[i],
|
||||
&result);
|
||||
|
||||
coefsPos += S->filterLengths[i];
|
||||
|
||||
// q34.29 - fftShift
|
||||
result += MICRO_Q15;
|
||||
result >>= SHIFT_MELFILTER_SATURATION_Q15;
|
||||
// q34.29 - fftShift - satShift
|
||||
pTmp[i] = __SSAT(result,31) ;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// q34.29 - fftShift - satShift
|
||||
/* Compute the log */
|
||||
arm_vlog_q31(pTmp,pTmp,S->nbMelFilters);
|
||||
|
||||
|
||||
// q5.26
|
||||
|
||||
logExponent = fftShift + 2 + SHIFT_MELFILTER_SATURATION_Q15;
|
||||
logExponent = logExponent * LOG2TOLOG_Q15;
|
||||
|
||||
|
||||
// q8.26
|
||||
arm_offset_q31(pTmp,logExponent,pTmp,S->nbMelFilters);
|
||||
arm_shift_q31(pTmp,-19,pTmp,S->nbMelFilters);
|
||||
for(i=0; i<S->nbMelFilters; i++)
|
||||
{
|
||||
pSrc[i] = __SSAT((q15_t)pTmp[i],16);
|
||||
}
|
||||
|
||||
// q8.7
|
||||
|
||||
pDctMat.numRows=S->nbDctOutputs;
|
||||
pDctMat.numCols=S->nbMelFilters;
|
||||
pDctMat.pData=(q15_t*)S->dctCoefs;
|
||||
|
||||
arm_mat_vec_mult_q15(&pDctMat, pSrc, pDst);
|
||||
|
||||
return(status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
207
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q31.c
Normal file
207
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_mfcc_q31.c
Normal file
@@ -0,0 +1,207 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_mfcc_q31.c
|
||||
* Description: MFCC function for the q31 version
|
||||
*
|
||||
* $Date: 07 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
#include "dsp/statistics_functions.h"
|
||||
#include "dsp/basic_math_functions.h"
|
||||
#include "dsp/complex_math_functions.h"
|
||||
#include "dsp/fast_math_functions.h"
|
||||
#include "dsp/matrix_functions.h"
|
||||
|
||||
/* Constants for Q31 implementation */
|
||||
#define LOG2TOLOG_Q31 0x02C5C860
|
||||
#define MICRO_Q31 0x08637BD0
|
||||
#define SHIFT_MELFILTER_SATURATION_Q31 10
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@defgroup MFCC MFCC
|
||||
|
||||
MFCC Transform
|
||||
|
||||
There are separate functions for floating-point, Q31, and Q31 data types.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup MFCC
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief MFCC Q31
|
||||
@param[in] S points to the mfcc instance structure
|
||||
@param[in] pSrc points to the input samples in Q31
|
||||
@param[out] pDst points to the output MFCC values in q8.23 format
|
||||
@param[inout] pTmp points to a temporary buffer of complex
|
||||
|
||||
@return none
|
||||
|
||||
@par Description
|
||||
The number of input samples is the FFT length used
|
||||
when initializing the instance data structure.
|
||||
|
||||
The temporary buffer has a 2*fft length.
|
||||
|
||||
The source buffer is modified by this function.
|
||||
|
||||
The function may saturate. If the FFT length is too
|
||||
big and the number of MEL filters too small then the fixed
|
||||
point computations may saturate.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
arm_status arm_mfcc_q31(
|
||||
const arm_mfcc_instance_q31 * S,
|
||||
q31_t *pSrc,
|
||||
q31_t *pDst,
|
||||
q31_t *pTmp
|
||||
)
|
||||
{
|
||||
q31_t m;
|
||||
uint32_t index;
|
||||
uint32_t fftShift=0;
|
||||
q31_t logExponent;
|
||||
q63_t result;
|
||||
arm_matrix_instance_q31 pDctMat;
|
||||
uint32_t i;
|
||||
uint32_t coefsPos;
|
||||
uint32_t filterLimit;
|
||||
q31_t *pTmp2=(q31_t*)pTmp;
|
||||
|
||||
arm_status status = ARM_MATH_SUCCESS;
|
||||
|
||||
// q31
|
||||
arm_absmax_q31(pSrc,S->fftLen,&m,&index);
|
||||
|
||||
if (m !=0)
|
||||
{
|
||||
q31_t quotient;
|
||||
int16_t shift;
|
||||
|
||||
status = arm_divide_q31(0x7FFFFFFF,m,"ient,&shift);
|
||||
if (status != ARM_MATH_SUCCESS)
|
||||
{
|
||||
return(status);
|
||||
}
|
||||
|
||||
arm_scale_q31(pSrc,quotient,shift,pSrc,S->fftLen);
|
||||
}
|
||||
|
||||
|
||||
// q31
|
||||
arm_mult_q31(pSrc,S->windowCoefs, pSrc, S->fftLen);
|
||||
|
||||
|
||||
/* Compute spectrum magnitude
|
||||
*/
|
||||
fftShift = 31 - __CLZ(S->fftLen);
|
||||
#if defined(ARM_MFCC_CFFT_BASED)
|
||||
/* some HW accelerator for CMSIS-DSP used in some boards
|
||||
are only providing acceleration for CFFT.
|
||||
With ARM_MFCC_CFFT_BASED enabled, CFFT is used and the MFCC
|
||||
will be accelerated on those boards.
|
||||
|
||||
The default is to use RFFT
|
||||
*/
|
||||
/* Convert from real to complex */
|
||||
for(i=0; i < S->fftLen ; i++)
|
||||
{
|
||||
pTmp2[2*i] = pSrc[i];
|
||||
pTmp2[2*i+1] = 0;
|
||||
}
|
||||
arm_cfft_q31(&(S->cfft),pTmp2,0,1);
|
||||
#else
|
||||
/* Default RFFT based implementation */
|
||||
arm_rfft_q31(&(S->rfft),pSrc,pTmp2);
|
||||
#endif
|
||||
filterLimit = 1 + (S->fftLen >> 1);
|
||||
|
||||
|
||||
// q31 - fftShift
|
||||
arm_cmplx_mag_q31(pTmp2,pSrc,filterLimit);
|
||||
// q30 - fftShift
|
||||
|
||||
|
||||
/* Apply MEL filters */
|
||||
coefsPos = 0;
|
||||
for(i=0; i<S->nbMelFilters; i++)
|
||||
{
|
||||
arm_dot_prod_q31(pSrc+S->filterPos[i],
|
||||
&(S->filterCoefs[coefsPos]),
|
||||
S->filterLengths[i],
|
||||
&result);
|
||||
|
||||
coefsPos += S->filterLengths[i];
|
||||
|
||||
// q16.48 - fftShift
|
||||
result += MICRO_Q31;
|
||||
result >>= (SHIFT_MELFILTER_SATURATION_Q31 + 18);
|
||||
// q16.29 - fftShift - satShift
|
||||
pTmp[i] = __SSAT(result,31) ;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// q16.29 - fftShift - satShift
|
||||
/* Compute the log */
|
||||
arm_vlog_q31(pTmp,pTmp,S->nbMelFilters);
|
||||
|
||||
|
||||
// q5.26
|
||||
|
||||
logExponent = fftShift + 2 + SHIFT_MELFILTER_SATURATION_Q31;
|
||||
logExponent = logExponent * LOG2TOLOG_Q31;
|
||||
|
||||
|
||||
// q5.26
|
||||
arm_offset_q31(pTmp,logExponent,pTmp,S->nbMelFilters);
|
||||
arm_shift_q31(pTmp,-3,pTmp,S->nbMelFilters);
|
||||
|
||||
|
||||
// q8.23
|
||||
|
||||
pDctMat.numRows=S->nbDctOutputs;
|
||||
pDctMat.numCols=S->nbMelFilters;
|
||||
pDctMat.pData=(q31_t*)S->dctCoefs;
|
||||
|
||||
arm_mat_vec_mult_q31(&pDctMat, pTmp, pDst);
|
||||
|
||||
return(status);
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of MFCC group
|
||||
*/
|
||||
318
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c
Normal file
318
Drivers/CMSIS/DSP/Source/TransformFunctions/arm_rfft_f32.c
Normal file
@@ -0,0 +1,318 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_rfft_f32.c
|
||||
* Description: RFFT & RIFFT Floating point process function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/transform_functions.h"
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
* Internal functions prototypes
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
extern void arm_radix4_butterfly_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier);
|
||||
|
||||
extern void arm_radix4_butterfly_inverse_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftLen,
|
||||
const float32_t * pCoef,
|
||||
uint16_t twidCoefModifier,
|
||||
float32_t onebyfftLen);
|
||||
|
||||
extern void arm_bitreversal_f32(
|
||||
float32_t * pSrc,
|
||||
uint16_t fftSize,
|
||||
uint16_t bitRevFactor,
|
||||
const uint16_t * pBitRevTab);
|
||||
|
||||
void arm_split_rfft_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pATable,
|
||||
const float32_t * pBTable,
|
||||
float32_t * pDst,
|
||||
uint32_t modifier);
|
||||
|
||||
void arm_split_rifft_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pATable,
|
||||
const float32_t * pBTable,
|
||||
float32_t * pDst,
|
||||
uint32_t modifier);
|
||||
|
||||
/**
|
||||
@ingroup groupTransforms
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup RealFFT
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Processing function for the floating-point RFFT/RIFFT.
|
||||
Source buffer is modified by this function.
|
||||
|
||||
@deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_f32 and will be removed in the future.
|
||||
@param[in] S points to an instance of the floating-point RFFT/RIFFT structure
|
||||
@param[in] pSrc points to the input buffer
|
||||
@param[out] pDst points to the output buffer
|
||||
@return none
|
||||
|
||||
@par
|
||||
For the RIFFT, the source buffer must at least have length
|
||||
fftLenReal + 2.
|
||||
The last two elements must be equal to what would be generated
|
||||
by the RFFT:
|
||||
(pSrc[0] - pSrc[1]) and 0.0f
|
||||
*/
|
||||
|
||||
void arm_rfft_f32(
|
||||
const arm_rfft_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst)
|
||||
{
|
||||
const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;
|
||||
|
||||
/* Calculation of Real IFFT of input */
|
||||
if (S->ifftFlagR == 1U)
|
||||
{
|
||||
/* Real IFFT core process */
|
||||
arm_split_rifft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
|
||||
|
||||
|
||||
/* Complex radix-4 IFFT process */
|
||||
arm_radix4_butterfly_inverse_f32 (pDst, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier, S_CFFT->onebyfftLen);
|
||||
|
||||
/* Bit reversal process */
|
||||
if (S->bitReverseFlagR == 1U)
|
||||
{
|
||||
arm_bitreversal_f32 (pDst, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Calculation of RFFT of input */
|
||||
|
||||
/* Complex radix-4 FFT process */
|
||||
arm_radix4_butterfly_f32 (pSrc, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
|
||||
|
||||
/* Bit reversal process */
|
||||
if (S->bitReverseFlagR == 1U)
|
||||
{
|
||||
arm_bitreversal_f32 (pSrc, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
|
||||
}
|
||||
|
||||
/* Real FFT core process */
|
||||
arm_split_rfft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of RealFFT group
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Core Real FFT process
|
||||
@param[in] pSrc points to input buffer
|
||||
@param[in] fftLen length of FFT
|
||||
@param[in] pATable points to twiddle Coef A buffer
|
||||
@param[in] pBTable points to twiddle Coef B buffer
|
||||
@param[out] pDst points to output buffer
|
||||
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_split_rfft_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pATable,
|
||||
const float32_t * pBTable,
|
||||
float32_t * pDst,
|
||||
uint32_t modifier)
|
||||
{
|
||||
uint32_t i; /* Loop Counter */
|
||||
float32_t outR, outI; /* Temporary variables for output */
|
||||
const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
|
||||
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
|
||||
float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U]; /* temp pointers for output buffer */
|
||||
float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U]; /* temp pointers for input buffer */
|
||||
|
||||
/* Init coefficient pointers */
|
||||
pCoefA = &pATable[modifier * 2];
|
||||
pCoefB = &pBTable[modifier * 2];
|
||||
|
||||
i = fftLen - 1U;
|
||||
|
||||
while (i > 0U)
|
||||
{
|
||||
/*
|
||||
outR = ( pSrc[2 * i] * pATable[2 * i]
|
||||
- pSrc[2 * i + 1] * pATable[2 * i + 1]
|
||||
+ pSrc[2 * n - 2 * i] * pBTable[2 * i]
|
||||
+ pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
|
||||
|
||||
outI = ( pIn[2 * i + 1] * pATable[2 * i]
|
||||
+ pIn[2 * i] * pATable[2 * i + 1]
|
||||
+ pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
|
||||
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
|
||||
*/
|
||||
|
||||
/* read pATable[2 * i] */
|
||||
CoefA1 = *pCoefA++;
|
||||
/* pATable[2 * i + 1] */
|
||||
CoefA2 = *pCoefA;
|
||||
|
||||
/* pSrc[2 * i] * pATable[2 * i] */
|
||||
outR = *pSrc1 * CoefA1;
|
||||
/* pSrc[2 * i] * CoefA2 */
|
||||
outI = *pSrc1++ * CoefA2;
|
||||
|
||||
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
|
||||
outR -= (*pSrc1 + *pSrc2) * CoefA2;
|
||||
/* pSrc[2 * i + 1] * CoefA1 */
|
||||
outI += *pSrc1++ * CoefA1;
|
||||
|
||||
CoefB1 = *pCoefB;
|
||||
|
||||
/* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
|
||||
outI -= *pSrc2-- * CoefB1;
|
||||
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
|
||||
outI -= *pSrc2 * CoefA2;
|
||||
|
||||
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
|
||||
outR += *pSrc2-- * CoefB1;
|
||||
|
||||
/* write output */
|
||||
*pDst1++ = outR;
|
||||
*pDst1++ = outI;
|
||||
|
||||
/* write complex conjugate output */
|
||||
*pDst2-- = -outI;
|
||||
*pDst2-- = outR;
|
||||
|
||||
/* update coefficient pointer */
|
||||
pCoefB = pCoefB + (modifier * 2U);
|
||||
pCoefA = pCoefA + ((modifier * 2U) - 1U);
|
||||
|
||||
i--;
|
||||
|
||||
}
|
||||
|
||||
pDst[2U * fftLen] = pSrc[0] - pSrc[1];
|
||||
pDst[(2U * fftLen) + 1U] = 0.0f;
|
||||
|
||||
pDst[0] = pSrc[0] + pSrc[1];
|
||||
pDst[1] = 0.0f;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief Core Real IFFT process
|
||||
@param[in] pSrc points to input buffer
|
||||
@param[in] fftLen length of FFT
|
||||
@param[in] pATable points to twiddle Coef A buffer
|
||||
@param[in] pBTable points to twiddle Coef B buffer
|
||||
@param[out] pDst points to output buffer
|
||||
@param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_split_rifft_f32(
|
||||
float32_t * pSrc,
|
||||
uint32_t fftLen,
|
||||
const float32_t * pATable,
|
||||
const float32_t * pBTable,
|
||||
float32_t * pDst,
|
||||
uint32_t modifier)
|
||||
{
|
||||
float32_t outR, outI; /* Temporary variables for output */
|
||||
const float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
|
||||
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
|
||||
float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U];
|
||||
|
||||
pCoefA = &pATable[0];
|
||||
pCoefB = &pBTable[0];
|
||||
|
||||
while (fftLen > 0U)
|
||||
{
|
||||
/*
|
||||
outR = ( pIn[2 * i] * pATable[2 * i]
|
||||
+ pIn[2 * i + 1] * pATable[2 * i + 1]
|
||||
+ pIn[2 * n - 2 * i] * pBTable[2 * i]
|
||||
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
|
||||
|
||||
outI = ( pIn[2 * i + 1] * pATable[2 * i]
|
||||
- pIn[2 * i] * pATable[2 * i + 1]
|
||||
- pIn[2 * n - 2 * i] * pBTable[2 * i + 1]
|
||||
- pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
|
||||
*/
|
||||
|
||||
CoefA1 = *pCoefA++;
|
||||
CoefA2 = *pCoefA;
|
||||
|
||||
/* outR = (pSrc[2 * i] * CoefA1 */
|
||||
outR = *pSrc1 * CoefA1;
|
||||
|
||||
/* - pSrc[2 * i] * CoefA2 */
|
||||
outI = -(*pSrc1++) * CoefA2;
|
||||
|
||||
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
|
||||
outR += (*pSrc1 + *pSrc2) * CoefA2;
|
||||
|
||||
/* pSrc[2 * i + 1] * CoefA1 */
|
||||
outI += (*pSrc1++) * CoefA1;
|
||||
|
||||
CoefB1 = *pCoefB;
|
||||
|
||||
/* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
|
||||
outI -= *pSrc2-- * CoefB1;
|
||||
|
||||
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
|
||||
outR += *pSrc2 * CoefB1;
|
||||
|
||||
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
|
||||
outI += *pSrc2-- * CoefA2;
|
||||
|
||||
/* write output */
|
||||
*pDst++ = outR;
|
||||
*pDst++ = outI;
|
||||
|
||||
/* update coefficient pointer */
|
||||
pCoefB = pCoefB + (modifier * 2);
|
||||
pCoefA = pCoefA + (modifier * 2 - 1);
|
||||
|
||||
/* Decrement loop count */
|
||||
fftLen--;
|
||||
}
|
||||
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user