начало

This commit is contained in:
2025-10-17 15:50:34 +03:00
parent 01bb6ae396
commit 980eb2e91d
1580 changed files with 1884898 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
cmake_minimum_required (VERSION 3.14)
project(CMSISDSPStatistics)
include(configLib)
include(configDsp)
add_library(CMSISDSPStatistics STATIC)
target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f64.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_q7.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f32.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q15.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q31.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_q7.c)
configLib(CMSISDSPStatistics ${ROOT})
configDsp(CMSISDSPStatistics ${ROOT})
### Includes
target_include_directories(CMSISDSPStatistics PUBLIC "${DSP}/Include")
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
target_sources(CMSISDSPStatistics PRIVATE arm_max_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_min_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_mean_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_power_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_rms_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_std_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_var_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_entropy_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_kullback_leibler_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_dot_prod_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_logsumexp_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_max_no_idx_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmax_f16.c)
target_sources(CMSISDSPStatistics PRIVATE arm_absmin_f16.c)
endif()

View File

@@ -0,0 +1,68 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: StatisticsFunctions.c
* Description: Combination of all statistics function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_entropy_f32.c"
#include "arm_entropy_f64.c"
#include "arm_kullback_leibler_f32.c"
#include "arm_kullback_leibler_f64.c"
#include "arm_logsumexp_dot_prod_f32.c"
#include "arm_logsumexp_f32.c"
#include "arm_max_f32.c"
#include "arm_max_q15.c"
#include "arm_max_q31.c"
#include "arm_max_q7.c"
#include "arm_max_no_idx_f32.c"
#include "arm_mean_f32.c"
#include "arm_mean_q15.c"
#include "arm_mean_q31.c"
#include "arm_mean_q7.c"
#include "arm_min_f32.c"
#include "arm_min_q15.c"
#include "arm_min_q31.c"
#include "arm_min_q7.c"
#include "arm_power_f32.c"
#include "arm_power_q15.c"
#include "arm_power_q31.c"
#include "arm_power_q7.c"
#include "arm_rms_f32.c"
#include "arm_rms_q15.c"
#include "arm_rms_q31.c"
#include "arm_std_f32.c"
#include "arm_std_q15.c"
#include "arm_std_q31.c"
#include "arm_var_f32.c"
#include "arm_var_q15.c"
#include "arm_var_q31.c"
#include "arm_absmax_f32.c"
#include "arm_absmax_q15.c"
#include "arm_absmax_q31.c"
#include "arm_absmax_q7.c"
#include "arm_absmin_f32.c"
#include "arm_absmin_q15.c"
#include "arm_absmin_q31.c"
#include "arm_absmin_q7.c"

View File

@@ -0,0 +1,42 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: StatisticsFunctions.c
* Description: Combination of all statistics function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_max_f16.c"
#include "arm_min_f16.c"
#include "arm_mean_f16.c"
#include "arm_power_f16.c"
#include "arm_rms_f16.c"
#include "arm_std_f16.c"
#include "arm_var_f16.c"
#include "arm_entropy_f16.c"
#include "arm_kullback_leibler_f16.c"
#include "arm_logsumexp_dot_prod_f16.c"
#include "arm_logsumexp_f16.c"
#include "arm_max_no_idx_f16.c"
#include "arm_absmax_f16.c"
#include "arm_absmin_f16.c"

View File

@@ -0,0 +1,274 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_f16.c
* Description: Maximum value of a absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN);
float16_t maxValue = F16_ABSMIN;
uint16_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u16((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u16(0);
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f16) ? out : -out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f16) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabsf(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,260 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_f32.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@defgroup AbsMax Absolute Maximum
Computes the maximum value of absolute values of an array of data.
The function returns both the maximum value and its position within the array.
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
int32_t blkSize = blockSize;
f32x4_t vecSrc;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
float32_t maxValue = F32_ABSMIN;
uint32_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
uint32_t curIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u32(&curIdx, 1);
curExtremIdxVec = vdupq_n_u32(0);
do {
mve_pred16_t p = vctp32q(blkSize);
vecSrc = vldrwq_z_f32((float32_t const *) pSrc, p);
vecSrc = vabsq_m(vuninitializedq_f32(), vecSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(vecSrc, curExtremValVec, p);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
/* Does TP detection works here ?? */
indexVec = vidupq_wb_u32(&curIdx, 1);
blkSize -= 4;
pSrc += 4;
}
while (blkSize > 0);
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabsf(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,92 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_f64.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
void arm_absmax_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult,
uint32_t * pIndex)
{
float64_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabs(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,228 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_f16.c
* Description: Maximum value of a absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
uint16_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMIN);
float16_t maxValue = F16_ABSMIN;
mve_pred16_t p0;
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmavq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
/* compare for the extrema value */ \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = ((_Float16)cur_absmax > 0.0f16) ? cur_absmax : -(_Float16)cur_absmax; \
if ((_Float16)cur_absmax > (_Float16)out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (_Float16)fabsf((float32_t)*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (_Float16)fabsf((float32_t)*pSrc++);
/* compare for the maximum value */
if ((_Float16)out < (_Float16)maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,225 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_f32.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
int32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
float32_t const *pSrcVec;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMIN);
float32_t maxValue = F32_ABSMIN;
mve_pred16_t p0;
pSrcVec = (float32_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmavq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmax_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0.0f) ? cur_absmax : -cur_absmax; \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabsf(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,87 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_f64.c
* Description: Maximum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
void arm_absmax_no_idx_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
float64_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = fabs(*pSrc++);
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,220 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_q15.c
* Description: Maximum value of absolute values of a Q15 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint16_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
uint16x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMIN);
q15_t maxValue = Q15_ABSMIN;
mve_pred16_t p0;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane max.
*/
curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxavq(maxValue, (q15x8_t)curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,220 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_q31.c
* Description: Maximum value of absolute values of a Q31 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
int32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
uint32x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMIN);
q31_t maxValue = Q31_ABSMIN;
mve_pred16_t p0;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane max.
*/
curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxavq(maxValue, (q31x4_t)curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,224 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_no_idx_q7.c
* Description: Maximum value of absolute values of a Q7 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
void arm_absmax_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
int32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
uint8x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMIN);
q7_t maxValue = Q7_ABSMIN;
mve_pred16_t p0;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
/*
* update per-lane max.
*/
curExtremValVec = vmaxaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
p0 = vctp8q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxavq(maxValue, (q7x16_t)curExtremValVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmax_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,236 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_q15.c
* Description: Maximum value of absolute values of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q15x8_t extremValVec = vdupq_n_s16(Q15_ABSMIN);
q15_t maxValue = Q15_ABSMIN;
uint16x8_t indexVec;
uint16x8_t extremIdxVec;
mve_pred16_t p0;
uint16_t extremIdxArr[8];
indexVec = vidupq_u16(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p);
extremIdxVal = vabsq(extremIdxVal);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u16(extremIdxArr, indexVec, p0);
indexVec += 8;
pSrc += 8;
blkCnt -= 8;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u16(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q15_t)__QSUB16(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,236 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_q31.c
* Description: Maximum value of absolute values of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmax_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q31x4_t extremValVec = vdupq_n_s32(Q31_ABSMIN);
q31_t maxValue = Q31_ABSMIN;
uint32x4_t indexVec;
uint32x4_t extremIdxVec;
mve_pred16_t p0;
uint32_t extremIdxArr[4];
indexVec = vidupq_u32(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp32q(blkCnt);
q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p);
extremIdxVal = vabsq(extremIdxVal);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u32(extremIdxArr, indexVec, p0);
indexVec += 4;
pSrc += 4;
blkCnt -= 4;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u32(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q31_t)__QSUB(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,294 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmax_q7.c
* Description: Maximum value of absolute values of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMax
@{
*/
/**
@brief Maximum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
#define MAX_BLKSZ_S8 (UINT8_MAX+1)
static void arm_small_blk_absmax_q7(
const q7_t * pSrc,
uint16_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q7x16_t extremValVec = vdupq_n_s8(Q7_ABSMIN);
q7_t maxValue = Q7_ABSMIN;
uint8x16_t indexVec;
uint8x16_t extremIdxVec;
mve_pred16_t p0;
uint8_t extremIdxArr[16];
indexVec = vidupq_u8(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp8q(blkCnt);
q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
extremIdxVal = vabsq(extremIdxVal);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u8(extremIdxArr, indexVec, p0);
indexVec += 16;
pSrc += 16;
blkCnt -= 16;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u8(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
*pIndex = vminvq_u8(blockSize - 1, indexVec);
*pResult = maxValue;
}
void arm_absmax_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t totalSize = blockSize;
if (totalSize <= MAX_BLKSZ_S8)
{
arm_small_blk_absmax_q7(pSrc, blockSize, pResult, pIndex);
}
else
{
uint32_t curIdx = 0;
q7_t curBlkExtr = Q7_MIN;
uint32_t curBlkPos = 0;
uint32_t curBlkIdx = 0;
/*
* process blocks of 255 elts
*/
while (totalSize >= MAX_BLKSZ_S8)
{
const q7_t *curSrc = pSrc;
arm_small_blk_absmax_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
if (*pResult > curBlkExtr)
{
/*
* update partial extrema
*/
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
curIdx++;
pSrc += MAX_BLKSZ_S8;
totalSize -= MAX_BLKSZ_S8;
}
/*
* remainder
*/
arm_small_blk_absmax_q7(pSrc, totalSize, pResult, pIndex);
if (*pResult > curBlkExtr)
{
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
*pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
*pResult = curBlkExtr;
}
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmax_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t cur_absmax, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmax to next consecutive values one by one */ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
/* compare for the extrema value */ \
if (cur_absmax > out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmax; \
outIndex = index + 1U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 2U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 3U; \
} \
\
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmax = *pSrc++; \
cur_absmax = (cur_absmax > 0) ? cur_absmax : (q7_t)__QSUB8(0, cur_absmax); \
if (cur_absmax > out) \
{ \
out = cur_absmax; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmax_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMax group
*/

View File

@@ -0,0 +1,276 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_f16.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
float16_t minValue = F16_ABSMAX;
uint16_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u16((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u16(0);
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 8;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
p0 = vctp16q(blkCnt);
vecSrc = vldrhq_f16(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f16) ? out : -out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f16) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabsf(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,279 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_f32.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@defgroup AbsMin Absolute Minimum
Computes the minimum value of absolute values of an array of data.
The function returns both the minimum value and its position within the array.
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
float32_t const *pSrcVec;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX);
float32_t minValue = F32_ABSMAX;
uint32_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
mve_pred16_t p0;
indexVec = vidupq_u32((uint32_t)0, 1);
curExtremIdxVec = vdupq_n_u32(0);
pSrcVec = (float32_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = indexVec + 4;
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
p0 = vctp32q(blkCnt);
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmvq(minValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabsf(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,90 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_f64.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
void arm_absmin_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult,
uint32_t * pIndex)
{
float64_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabs(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,230 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_f16.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
int32_t blkCnt; /* loop counters */
f16x8_t vecSrc;
float16_t const *pSrcVec;
f16x8_t curExtremValVec = vdupq_n_f16(F16_ABSMAX);
float16_t minValue = F16_ABSMAX;
mve_pred16_t p0;
pSrcVec = (float16_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane min.
*/
curExtremValVec = vminnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmavq(minValue, curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = ((_Float16)out > 0.0f16) ? out : -(_Float16)out; \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
/* compare for the extrema value */ \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = ((_Float16)cur_absmin > 0.0f16) ? cur_absmin : -(_Float16)cur_absmin; \
if ((_Float16)cur_absmin < (_Float16)out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
float16_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (_Float16)fabsf((float32_t)*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (_Float16)fabsf((float32_t)*pSrc++);
/* compare for the minimum value */
if ((_Float16)out > (_Float16)minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,226 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_f32.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
int32_t blkCnt; /* loop counters */
f32x4_t vecSrc;
float32_t const *pSrcVec;
f32x4_t curExtremValVec = vdupq_n_f32(F32_ABSMAX);
float32_t minValue = F32_ABSMAX;
mve_pred16_t p0;
pSrcVec = (float32_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane min.
*/
curExtremValVec = vminnmaq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_f32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminnmaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminnmavq(minValue, curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_LOOPUNROLL)
void arm_absmin_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0.0f) ? out : -out; \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0.0f) ? cur_absmin : -cur_absmin; \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult)
{
float32_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabsf(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabsf(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_LOOPUNROLL) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_f64.c
* Description: Minimum value of absolute values of a floating-point vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
void arm_absmin_no_idx_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult)
{
float64_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = fabs(*pSrc++);
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = fabs(*pSrc++);
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,222 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_q15.c
* Description: Minimum value of absolute values of a Q15 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
uint16_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
uint16x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMAX);
q15_t minValue = Q15_ABSMAX;
mve_pred16_t p0;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane min.
*/
curExtremValVec = vminaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminavq(minValue, (q15x8_t)curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,221 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_q31.c
* Description: Minimum value of absolute values of a Q31 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
int32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
uint32x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMAX);
q31_t minValue = Q31_ABSMAX;
mve_pred16_t p0;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane min.
*/
curExtremValVec = vminaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminavq(minValue, (q31x4_t)curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
\
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,223 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_no_idx_q7.c
* Description: Minimum value of absolute values of a Q7 vector
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
void arm_absmin_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
int32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
uint8x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
q7_t minValue = Q7_ABSMAX;
mve_pred16_t p0;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
/*
* update per-lane min.
*/
curExtremValVec = vminaq(curExtremValVec, vecSrc);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
p0 = vctp8q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
curExtremValVec = vminaq_m(curExtremValVec, vecSrc, p0);
}
/*
* Get min value across the vector
*/
minValue = vminavq(minValue, (q7x16_t)curExtremValVec);
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt; /* Loop counter */ \
\
\
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
}
#else
void arm_absmin_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* Loop counter */
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,269 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_q15.c
* Description: Minimum value of absolute values of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
q15x8_t curExtremValVec = vdupq_n_s16(Q15_ABSMAX);
q15_t minValue = Q15_ABSMAX;
uint16_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
uint32_t startIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u16(&startIdx, 1);
curExtremIdxVec = vdupq_n_u16(0);
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u16(&startIdx, 1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
vecSrc = vabsq(vecSrc);
p0 = vctp16q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a min value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q15_t)__QSUB16(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q15_t)__QSUB16(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q15_t) 0x8000) ? 0x7fff : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,269 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_q31.c
* Description: Minimum value of absolute values of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_absmin_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
uint16_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
q31x4_t curExtremValVec = vdupq_n_s32(Q31_ABSMAX);
q31_t minValue = Q31_ABSMAX;
uint16_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
uint32_t startIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u32(&startIdx, 1);
curExtremIdxVec = vdupq_n_u32(0);
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u32(&startIdx, 1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0U)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
vecSrc = vabsq(vecSrc);
p0 = vctp32q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a min value
*/
idx = vminvq(idx, indexVec);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q31_t)__QSUB(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q31_t)__QSUB(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == INT32_MIN) ? INT32_MAX : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,322 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_absmin_q7.c
* Description: Minimum value of absolute values of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup AbsMin
@{
*/
/**
@brief Minimum value of absolute values of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult minimum value returned here
@param[out] pIndex index of minimum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <stdint.h>
#include "arm_helium_utils.h"
#define MAX_BLKSZ_S8 (UINT8_MAX+1)
static void arm_small_blk_absmin_q7(
const q7_t *pSrc,
uint32_t blockSize,
q7_t *pResult,
uint32_t *pIndex)
{
uint16_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
q7x16_t curExtremValVec = vdupq_n_s8(Q7_ABSMAX);
q7_t minValue = Q7_ABSMAX;
uint16_t idx = blockSize - 1;
uint8x16_t indexVec;
uint8x16_t curExtremIdxVec;
uint32_t startIdx = 0;
mve_pred16_t p0;
indexVec = vidupq_wb_u8(&startIdx, 1);
curExtremIdxVec = vdupq_n_u8(0);
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
vecSrc = vabsq(vecSrc);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u8(&startIdx, 1);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0U)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
vecSrc = vabsq(vecSrc);
p0 = vctp8q(blkCnt);
/*
* Get current min per lane and current index per lane
* when a min is selected
*/
p0 = vcmpleq_m(vecSrc, curExtremValVec, p0);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
}
/*
* Get min value across the vector
*/
minValue = vminvq(minValue, curExtremValVec);
/*
* set index for lower values to min possible index
*/
p0 = vcmpleq(curExtremValVec, minValue);
idx = vminvq_p_u8(idx, curExtremIdxVec, p0);
/*
* Save result
*/
*pIndex = idx;
*pResult = minValue;
}
void arm_absmin_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t totalSize = blockSize;
if (totalSize <= MAX_BLKSZ_S8)
{
arm_small_blk_absmin_q7(pSrc, blockSize, pResult, pIndex);
}
else
{
uint32_t curIdx = 0;
q7_t curBlkExtr = Q7_MAX;
uint32_t curBlkPos = 0;
uint32_t curBlkIdx = 0;
/*
* process blocks of 255 elts
*/
while (totalSize >= MAX_BLKSZ_S8)
{
const q7_t *curSrc = pSrc;
arm_small_blk_absmin_q7(curSrc, MAX_BLKSZ_S8, pResult, pIndex);
if (*pResult < curBlkExtr)
{
/*
* update partial extrema
*/
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
curIdx++;
pSrc += MAX_BLKSZ_S8;
totalSize -= MAX_BLKSZ_S8;
}
/*
* remainder
*/
arm_small_blk_absmin_q7(pSrc, totalSize, pResult, pIndex);
if (*pResult < curBlkExtr)
{
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
*pIndex = curBlkIdx * MAX_BLKSZ_S8 + curBlkPos;
*pResult = curBlkExtr;
}
}
#else
#if defined(ARM_MATH_DSP)
void arm_absmin_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t cur_absmin, out; /* Temporary variables to store the output value. */\
uint32_t blkCnt, outIndex; /* Loop counter */ \
uint32_t index; /* index of maximum value */ \
\
/* Initialize index value to zero. */ \
outIndex = 0U; \
/* Load first input value that act as reference value for comparision */ \
out = *pSrc++; \
out = (out > 0) ? out : (q7_t)__QSUB8(0, out); \
/* Initialize index of extrema value. */ \
index = 0U; \
\
/* Loop unrolling: Compute 4 outputs at a time */ \
blkCnt = (blockSize - 1U) >> 2U; \
\
while (blkCnt > 0U) \
{ \
/* Initialize cur_absmin to next consecutive values one by one */ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
/* compare for the extrema value */ \
if (cur_absmin < out) \
{ \
/* Update the extrema value and it's index */ \
out = cur_absmin; \
outIndex = index + 1U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 2U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 3U; \
} \
\
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = index + 4U; \
} \
\
index += 4U; \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Loop unrolling: Compute remaining outputs */ \
blkCnt = (blockSize - 1U) % 4U; \
\
\
while (blkCnt > 0U) \
{ \
cur_absmin = *pSrc++; \
cur_absmin = (cur_absmin > 0) ? cur_absmin : (q7_t)__QSUB8(0, cur_absmin); \
if (cur_absmin < out) \
{ \
out = cur_absmin; \
outIndex = blockSize - blkCnt; \
} \
\
/* Decrement loop counter */ \
blkCnt--; \
} \
\
/* Store the extrema value and it's index into destination pointers */ \
*pResult = out; \
*pIndex = outIndex;
}
#else
void arm_absmin_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t minVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize minVal to the next consecutive values one by one */
minVal = (*pSrc > 0) ? *pSrc : ((*pSrc == (q7_t) 0x80) ? (q7_t) 0x7f : -*pSrc);
pSrc++;
/* compare for the minimum value */
if (out > minVal)
{
/* Update the minimum value and it's index */
out = minVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the minimum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_DSP) */
#endif /* defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of AbsMin group
*/

View File

@@ -0,0 +1,140 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup Entropy Entropy
Computes the entropy of a distribution
*/
/**
* @addtogroup Entropy
* @{
*/
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
{
uint32_t blkCnt;
_Float16 accum=0.0f16,p;
blkCnt = blockSize;
f16x8_t vSum = vdupq_n_f16(0.0f);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
f16x8_t vecIn = vld1q(pSrcA);
vSum = vaddq_f16(vSum, vmulq(vecIn, vlogq_f16(vecIn)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
blkCnt --;
}
accum = vecAddAcrossF16Mve(vSum);
/* Tail */
blkCnt = blockSize & 0x7;
while(blkCnt > 0)
{
p = *pSrcA++;
accum += p * logf(p);
blkCnt--;
}
return (-accum);
}
#else
float16_t arm_entropy_f16(const float16_t * pSrcA,uint32_t blockSize)
{
const float16_t *pIn;
uint32_t blkCnt;
_Float16 accum, p;
pIn = pSrcA;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * logf(p);
blkCnt--;
}
return(-accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Entropy group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,174 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Entropy
* @{
*/
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math.h"
float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
{
uint32_t blkCnt;
float32_t accum=0.0f,p;
blkCnt = blockSize;
f32x4_t vSum = vdupq_n_f32(0.0f);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
f32x4_t vecIn = vld1q(pSrcA);
vSum = vaddq_f32(vSum, vmulq(vecIn, vlogq_f32(vecIn)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 4;
blkCnt --;
}
accum = vecAddAcrossF32Mve(vSum);
/* Tail */
blkCnt = blockSize & 0x3;
while(blkCnt > 0)
{
p = *pSrcA++;
accum += p * logf(p);
blkCnt--;
}
return (-accum);
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "NEMath.h"
float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
{
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum, p;
float32x4_t accumV;
float32x2_t accumV2;
float32x4_t tmpV, tmpV2;
pIn = pSrcA;
accum = 0.0f;
accumV = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
tmpV = vld1q_f32(pIn);
pIn += 4;
tmpV2 = vlogq_f32(tmpV);
accumV = vmlaq_f32(accumV, tmpV, tmpV2);
blkCnt--;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * logf(p);
blkCnt--;
}
return(-accum);
}
#else
float32_t arm_entropy_f32(const float32_t * pSrcA,uint32_t blockSize)
{
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum, p;
pIn = pSrcA;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * logf(p);
blkCnt--;
}
return(-accum);
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Entropy group
*/

View File

@@ -0,0 +1,73 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f64.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Entropy
* @{
*/
/**
* @brief Entropy
*
* @param[in] pSrcA Array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return Entropy -Sum(p ln p)
*
*/
float64_t arm_entropy_f64(const float64_t * pSrcA, uint32_t blockSize)
{
const float64_t *pIn;
uint32_t blkCnt;
float64_t accum, p;
pIn = pSrcA;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
p = *pIn++;
accum += p * log(p);
blkCnt--;
}
return(-accum);
}
/**
* @} end of Entropy group
*/

View File

@@ -0,0 +1,152 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup Kullback-Leibler Kullback-Leibler divergence
Computes the Kullback-Leibler divergence between two distributions
*/
/**
* @addtogroup Kullback-Leibler
* @{
*/
/**
* @brief Kullback-Leibler
*
* Distribution A may contain 0 with Neon version.
* Result will be right but some exception flags will be set.
*
* Distribution B must not contain 0 probability.
*
* @param[in] *pSrcA points to an array of input values for probaility distribution A.
* @param[in] *pSrcB points to an array of input values for probaility distribution B.
* @param[in] blockSize number of samples in the input array.
* @return Kullback-Leibler divergence D(A || B)
*
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
{
uint32_t blkCnt;
_Float16 accum, pA,pB;
blkCnt = blockSize;
accum = 0.0f16;
f16x8_t vSum = vdupq_n_f16(0.0f);
blkCnt = blockSize >> 3;
while(blkCnt > 0)
{
f16x8_t vecA = vld1q(pSrcA);
f16x8_t vecB = vld1q(pSrcB);
f16x8_t vRatio;
vRatio = vdiv_f16(vecB, vecA);
vSum = vaddq_f16(vSum, vmulq(vecA, vlogq_f16(vRatio)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 8;
pSrcB += 8;
blkCnt --;
}
accum = vecAddAcrossF16Mve(vSum);
blkCnt = blockSize & 7;
while(blkCnt > 0)
{
pA = *pSrcA++;
pB = *pSrcB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#else
float16_t arm_kullback_leibler_f16(const float16_t * pSrcA,const float16_t * pSrcB,uint32_t blockSize)
{
const float16_t *pInA, *pInB;
uint32_t blkCnt;
_Float16 accum, pA,pB;
pInA = pSrcA;
pInB = pSrcB;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Kullback-Leibler group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,193 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Kullback-Leibler
* @{
*/
/**
* @brief Kullback-Leibler
*
* Distribution A may contain 0 with Neon version.
* Result will be right but some exception flags will be set.
*
* Distribution B must not contain 0 probability.
*
* @param[in] *pSrcA points to an array of input values for probaility distribution A.
* @param[in] *pSrcB points to an array of input values for probaility distribution B.
* @param[in] blockSize number of samples in the input array.
* @return Kullback-Leibler divergence D(A || B)
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math.h"
float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize)
{
uint32_t blkCnt;
float32_t accum, pA,pB;
blkCnt = blockSize;
accum = 0.0f;
f32x4_t vSum = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
f32x4_t vecA = vld1q(pSrcA);
f32x4_t vecB = vld1q(pSrcB);
f32x4_t vRatio;
vRatio = vdiv_f32(vecB, vecA);
vSum = vaddq_f32(vSum, vmulq(vecA, vlogq_f32(vRatio)));
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrcA += 4;
pSrcB += 4;
blkCnt --;
}
accum = vecAddAcrossF32Mve(vSum);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
pA = *pSrcA++;
pB = *pSrcB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "NEMath.h"
float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize)
{
const float32_t *pInA, *pInB;
uint32_t blkCnt;
float32_t accum, pA,pB;
float32x4_t accumV;
float32x2_t accumV2;
float32x4_t tmpVA, tmpVB,tmpV;
pInA = pSrcA;
pInB = pSrcB;
accum = 0.0f;
accumV = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
tmpVA = vld1q_f32(pInA);
pInA += 4;
tmpVB = vld1q_f32(pInB);
pInB += 4;
tmpV = vinvq_f32(tmpVA);
tmpVB = vmulq_f32(tmpVB, tmpV);
tmpVB = vlogq_f32(tmpVB);
accumV = vmlaq_f32(accumV, tmpVA, tmpVB);
blkCnt--;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 3;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * logf(pB/pA);
blkCnt--;
}
return(-accum);
}
#else
float32_t arm_kullback_leibler_f32(const float32_t * pSrcA,const float32_t * pSrcB,uint32_t blockSize)
{
const float32_t *pInA, *pInB;
uint32_t blkCnt;
float32_t accum, pA,pB;
pInA = pSrcA;
pInB = pSrcB;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * logf(pB / pA);
blkCnt--;
}
return(-accum);
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of Kullback-Leibler group
*/

View File

@@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f64.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup Kullback-Leibler
* @{
*/
/**
* @brief Kullback-Leibler
*
* @param[in] *pSrcA points to an array of input values for probaility distribution A.
* @param[in] *pSrcB points to an array of input values for probaility distribution B.
* @param[in] blockSize number of samples in the input array.
* @return Kullback-Leibler divergence D(A || B)
*
*/
float64_t arm_kullback_leibler_f64(const float64_t * pSrcA, const float64_t * pSrcB, uint32_t blockSize)
{
const float64_t *pInA, *pInB;
uint32_t blkCnt;
float64_t accum, pA,pB;
pInA = pSrcA;
pInB = pSrcB;
blkCnt = blockSize;
accum = 0.0f;
while(blkCnt > 0)
{
pA = *pInA++;
pB = *pInB++;
accum += pA * log(pB / pA);
blkCnt--;
}
return(-accum);
}
/**
* @} end of Kullback-Leibler group
*/

View File

@@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
@ingroup groupStats
*/
/**
@defgroup LogSumExp LogSumExp
LogSumExp optimizations to compute sum of probabilities with Gaussian distributions
*/
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Dot product with log arithmetic
*
* Vectors are containing the log of the samples
*
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[in] *pTmpBuffer temporary buffer of length blockSize
* @return The log of the dot product.
*
*/
float16_t arm_logsumexp_dot_prod_f16(const float16_t * pSrcA,
const float16_t * pSrcB,
uint32_t blockSize,
float16_t *pTmpBuffer)
{
float16_t result;
arm_add_f16((float16_t*)pSrcA, (float16_t*)pSrcB, pTmpBuffer, blockSize);
result = arm_logsumexp_f16(pTmpBuffer, blockSize);
return(result);
}
/**
* @} end of LogSumExp group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,68 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Dot product with log arithmetic
*
* Vectors are containing the log of the samples
*
* @param[in] *pSrcA points to the first input vector
* @param[in] *pSrcB points to the second input vector
* @param[in] blockSize number of samples in each vector
* @param[in] *pTmpBuffer temporary buffer of length blockSize
* @return The log of the dot product.
*
*/
float32_t arm_logsumexp_dot_prod_f32(const float32_t * pSrcA,
const float32_t * pSrcB,
uint32_t blockSize,
float32_t *pTmpBuffer)
{
float32_t result;
arm_add_f32((float32_t*)pSrcA, (float32_t*)pSrcB, pTmpBuffer, blockSize);
result = arm_logsumexp_f32(pTmpBuffer, blockSize);
return(result);
}
/**
* @} end of LogSumExp group
*/

View File

@@ -0,0 +1,172 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f16.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include <limits.h>
#include <math.h>
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Computation of the LogSumExp
*
* In probabilistic computations, the dynamic of the probability values can be very
* wide because they come from gaussian functions.
* To avoid underflow and overflow issues, the values are represented by their log.
* In this representation, multiplying the original exp values is easy : their logs are added.
* But adding the original exp values is requiring some special handling and it is the
* goal of the LogSumExp function.
*
* If the values are x1...xn, the function is computing:
*
* ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
* rounding issues are minimised.
*
* The max xm of the values is extracted and the function is computing:
* xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
*
* @param[in] *in Pointer to an array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return LogSumExp
*
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math_f16.h"
float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
{
float16_t maxVal;
const float16_t *pIn;
int32_t blkCnt;
_Float16 accum=0.0f16;
_Float16 tmp;
arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal);
blkCnt = blockSize;
pIn = in;
f16x8_t vSum = vdupq_n_f16(0.0f16);
blkCnt = blockSize >> 3;
while(blkCnt > 0)
{
f16x8_t vecIn = vld1q(pIn);
f16x8_t vecExp;
vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal));
vSum = vaddq_f16(vSum, vecExp);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pIn += 8;
blkCnt --;
}
/* sum + log */
accum = vecAddAcrossF16Mve(vSum);
blkCnt = blockSize & 0x7;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return (accum);
}
#else
float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
{
_Float16 maxVal;
_Float16 tmp;
const float16_t *pIn;
uint32_t blkCnt;
_Float16 accum;
pIn = in;
blkCnt = blockSize;
maxVal = *pIn++;
blkCnt--;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
blkCnt = blockSize;
pIn = in;
accum = 0;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return(accum);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of LogSumExp group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,277 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_logsumexp_f32.c
* Description: LogSumExp
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#include <limits.h>
#include <math.h>
/**
* @addtogroup LogSumExp
* @{
*/
/**
* @brief Computation of the LogSumExp
*
* In probabilistic computations, the dynamic of the probability values can be very
* wide because they come from gaussian functions.
* To avoid underflow and overflow issues, the values are represented by their log.
* In this representation, multiplying the original exp values is easy : their logs are added.
* But adding the original exp values is requiring some special handling and it is the
* goal of the LogSumExp function.
*
* If the values are x1...xn, the function is computing:
*
* ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
* rounding issues are minimised.
*
* The max xm of the values is extracted and the function is computing:
* xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
*
* @param[in] *in Pointer to an array of input values.
* @param[in] blockSize Number of samples in the input array.
* @return LogSumExp
*
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_math.h"
float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
{
float32_t maxVal;
const float32_t *pIn;
int32_t blkCnt;
float32_t accum=0.0f;
float32_t tmp;
arm_max_no_idx_f32((float32_t *) in, blockSize, &maxVal);
blkCnt = blockSize;
pIn = in;
f32x4_t vSum = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
f32x4_t vecIn = vld1q(pIn);
f32x4_t vecExp;
vecExp = vexpq_f32(vsubq_n_f32(vecIn, maxVal));
vSum = vaddq_f32(vSum, vecExp);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pIn += 4;
blkCnt --;
}
/* sum + log */
accum = vecAddAcrossF32Mve(vSum);
blkCnt = blockSize & 0x3;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + log(accum);
return (accum);
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "NEMath.h"
float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
{
float32_t maxVal;
float32_t tmp;
float32x4_t tmpV, tmpVb;
float32x4_t maxValV;
uint32x4_t idxV;
float32x4_t accumV;
float32x2_t accumV2;
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum;
pIn = in;
blkCnt = blockSize;
if (blockSize <= 3)
{
maxVal = *pIn++;
blkCnt--;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
}
else
{
maxValV = vld1q_f32(pIn);
pIn += 4;
blkCnt = (blockSize - 4) >> 2;
while(blkCnt > 0)
{
tmpVb = vld1q_f32(pIn);
pIn += 4;
idxV = vcgtq_f32(tmpVb, maxValV);
maxValV = vbslq_f32(idxV, tmpVb, maxValV );
blkCnt--;
}
accumV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
accumV2 = vpmax_f32(accumV2,accumV2);
maxVal = vget_lane_f32(accumV2, 0) ;
blkCnt = (blockSize - 4) & 3;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
}
maxValV = vdupq_n_f32(maxVal);
pIn = in;
accum = 0;
accumV = vdupq_n_f32(0.0f);
blkCnt = blockSize >> 2;
while(blkCnt > 0)
{
tmpV = vld1q_f32(pIn);
pIn += 4;
tmpV = vsubq_f32(tmpV, maxValV);
tmpV = vexpq_f32(tmpV);
accumV = vaddq_f32(accumV, tmpV);
blkCnt--;
}
accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
blkCnt = blockSize & 0x3;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return(accum);
}
#else
float32_t arm_logsumexp_f32(const float32_t *in, uint32_t blockSize)
{
float32_t maxVal;
float32_t tmp;
const float32_t *pIn;
uint32_t blkCnt;
float32_t accum;
pIn = in;
blkCnt = blockSize;
maxVal = *pIn++;
blkCnt--;
while(blkCnt > 0)
{
tmp = *pIn++;
if (tmp > maxVal)
{
maxVal = tmp;
}
blkCnt--;
}
blkCnt = blockSize;
pIn = in;
accum = 0;
while(blkCnt > 0)
{
tmp = *pIn++;
accum += expf(tmp - maxVal);
blkCnt--;
}
accum = maxVal + logf(accum);
return(accum);
}
#endif
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
* @} end of LogSumExp group
*/

View File

@@ -0,0 +1,246 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_f16.c
* Description: Maximum value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt;
f16x8_t vecSrc;
f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
float16_t maxValue = F16_MIN;
uint32_t idx = blockSize;
uint16x8_t indexVec;
uint16x8_t curExtremIdxVec;
uint32_t curIdx = 0;
mve_pred16_t p0;
float16_t tmp;
indexVec = vidupq_wb_u16(&curIdx, 1);
curExtremIdxVec = vdupq_n_u16(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vldrhq_f16(pSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u16(&curIdx, 1);
pSrc += 8;
/* Decrement the loop counter */
blkCnt--;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u16(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/* Tail */
blkCnt = blockSize & 7;
while (blkCnt > 0)
{
/* Initialize tmp to the next consecutive values one by one */
tmp = *pSrc++;
/* compare for the maximum value */
if (maxValue < tmp)
{
/* Update the maximum value and it's index */
maxValue = tmp;
idx = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
void arm_max_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult,
uint32_t * pIndex)
{
float16_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,365 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_f32.c
* Description: Maximum value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@defgroup Max Maximum
Computes the maximum value of an array of data.
The function returns both the maximum value and its position within the array.
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
uint32_t blkCnt;
f32x4_t vecSrc;
f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN);
float32_t maxValue = F32_MIN;
uint32_t idx = blockSize;
uint32x4_t indexVec;
uint32x4_t curExtremIdxVec;
uint32_t curIdx = 0;
mve_pred16_t p0;
float32_t tmp;
indexVec = vidupq_wb_u32(&curIdx, 1);
curExtremIdxVec = vdupq_n_u32(0);
/* Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq(vecSrc, curExtremValVec);
curExtremValVec = vpselq(vecSrc, curExtremValVec, p0);
curExtremIdxVec = vpselq(indexVec, curExtremIdxVec, p0);
indexVec = vidupq_wb_u32(&curIdx, 1);
pSrc += 4;
/* Decrement the loop counter */
blkCnt--;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
/*
* set index for lower values to max possible index
*/
p0 = vcmpgeq(curExtremValVec, maxValue);
indexVec = vpselq(curExtremIdxVec, vdupq_n_u32(blockSize), p0);
/*
* Get min index which is thus for a max value
*/
idx = vminvq(idx, indexVec);
/* Tail */
blkCnt = blockSize & 0x3;
while (blkCnt > 0U)
{
/* Initialize tmp to the next consecutive values one by one */
tmp = *pSrc++;
/* compare for the maximum value */
if (maxValue < tmp)
{
/* Update the maximum value and it's index */
maxValue = tmp;
idx = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/*
* Save result
*/
*pIndex = idx;
*pResult = maxValue;
}
#else
#if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* loop counter */
float32x4_t outV, srcV;
float32x2_t outV2;
uint32x4_t idxV;
uint32x4_t maxIdx;
static const uint32_t indexInit[4]={4,5,6,7};
static const uint32_t countVInit[4]={0,1,2,3};
uint32x4_t index;
uint32x4_t delta;
uint32x4_t countV;
uint32x2_t countV2;
maxIdx = vdupq_n_u32(ULONG_MAX);
delta = vdupq_n_u32(4);
index = vld1q_u32(indexInit);
countV = vld1q_u32(countVInit);
/* Initialise the index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparison */
if (blockSize <= 3)
{
out = *pSrc++;
blkCnt = blockSize - 1;
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt;
}
/* Decrement the loop counter */
blkCnt--;
}
}
else
{
outV = vld1q_f32(pSrc);
pSrc += 4;
/* Compute 4 outputs at a time */
blkCnt = (blockSize - 4 ) >> 2U;
while (blkCnt > 0U)
{
srcV = vld1q_f32(pSrc);
pSrc += 4;
idxV = vcgtq_f32(srcV, outV);
outV = vbslq_f32(idxV, srcV, outV );
countV = vbslq_u32(idxV, index,countV );
index = vaddq_u32(index,delta);
/* Decrement the loop counter */
blkCnt--;
}
outV2 = vpmax_f32(vget_low_f32(outV),vget_high_f32(outV));
outV2 = vpmax_f32(outV2,outV2);
out = vget_lane_f32(outV2, 0);
idxV = vceqq_f32(outV, vdupq_n_f32(out));
countV = vbslq_u32(idxV, countV,maxIdx);
countV2 = vpmin_u32(vget_low_u32(countV),vget_high_u32(countV));
countV2 = vpmin_u32(countV2,countV2);
outIndex = vget_lane_u32(countV2,0);
/* if (blockSize - 1U) is not multiple of 4 */
blkCnt = (blockSize - 4 ) % 4U;
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value and it's index */
out = maxVal1;
outIndex = blockSize - blkCnt ;
}
/* Decrement the loop counter */
blkCnt--;
}
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#else
void arm_max_f32(
const float32_t * pSrc,
uint32_t blockSize,
float32_t * pResult,
uint32_t * pIndex)
{
float32_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* #if defined(ARM_MATH_NEON) */
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,90 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_f64.c
* Description: Maximum value of a floating-point vector
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
void arm_max_f64(
const float64_t * pSrc,
uint32_t blockSize,
float64_t * pResult,
uint32_t * pIndex)
{
float64_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
/**
@} end of Max group
*/

View File

@@ -0,0 +1,144 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_f16.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
f16x8_t vecSrc;
f16x8_t curExtremValVec = vdupq_n_f16(F16_MIN);
float16_t maxValue = F16_MIN;
float16_t newVal;
uint32_t blkCnt;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 3U;
while (blkCnt > 0U)
{
vecSrc = vldrhq_f16(pSrc);
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 8;
blkCnt --;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
blkCnt = blockSize & 7;
while (blkCnt > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blkCnt --;
}
*pResult = maxValue;
}
#else
void arm_max_no_idx_f16(
const float16_t *pSrc,
uint32_t blockSize,
float16_t *pResult)
{
float16_t maxValue = F16_MIN;
float16_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blockSize --;
}
*pResult = maxValue;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View File

@@ -0,0 +1,138 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_f32.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_MVEF)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include <limits.h>
#endif
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
void arm_max_no_idx_f32(
const float32_t *pSrc,
uint32_t blockSize,
float32_t *pResult)
{
f32x4_t vecSrc;
f32x4_t curExtremValVec = vdupq_n_f32(F32_MIN);
float32_t maxValue = F32_MIN;
float32_t newVal;
uint32_t blkCnt;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
vecSrc = vldrwq_f32(pSrc);
/*
* update per-lane max.
*/
curExtremValVec = vmaxnmq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
* Advance vector source and destination pointers
*/
pSrc += 4;
blkCnt --;
}
/*
* Get max value across the vector
*/
maxValue = vmaxnmvq(maxValue, curExtremValVec);
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blkCnt --;
}
*pResult = maxValue;
}
#else
void arm_max_no_idx_f32(
const float32_t *pSrc,
uint32_t blockSize,
float32_t *pResult)
{
float32_t maxValue = F32_MIN;
float32_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blockSize --;
}
*pResult = maxValue;
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_f64.c
* Description: Maximum value of a floating-point vector without returning the index
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a floating-point vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
void arm_max_no_idx_f64(
const float64_t *pSrc,
uint32_t blockSize,
float64_t *pResult)
{
float64_t maxValue = F64_MIN;
float64_t newVal;
while (blockSize > 0U)
{
newVal = *pSrc++;
/* compare for the maximum value */
if (maxValue < newVal)
{
/* Update the maximum value and it's index */
maxValue = newVal;
}
blockSize --;
}
*pResult = maxValue;
}
/**
@} end of Max group
*/

View File

@@ -0,0 +1,142 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_q15.c
* Description: Maximum value of a q15 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a q15 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
int32_t blkCnt; /* loop counters */
q15x8_t vecSrc;
q15_t const *pSrcVec;
q15x8_t curExtremValVec = vdupq_n_s16(Q15_MIN);
q15_t maxValue = Q15_MIN;
mve_pred16_t p0;
pSrcVec = (q15_t const *) pSrc;
blkCnt = blockSize >> 3;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
/*
* update per-lane max.
*/
curExtremValVec = vmaxq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 7;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 8;
p0 = vctp16q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxvq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
void arm_max_no_idx_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult)
{
q15_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value */
out = maxVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,142 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_q31.c
* Description: Maximum value of a q31 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a q31 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
int32_t blkCnt; /* loop counters */
q31x4_t vecSrc;
q31_t const *pSrcVec;
q31x4_t curExtremValVec = vdupq_n_s32(Q31_MIN);
q31_t maxValue = Q31_MIN;
mve_pred16_t p0;
pSrcVec = (q31_t const *) pSrc;
blkCnt = blockSize >> 2;
while (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
/*
* update per-lane max.
*/
curExtremValVec = vmaxq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 3;
if (blkCnt > 0)
{
vecSrc = vldrwq_s32(pSrcVec);
pSrcVec += 4;
p0 = vctp32q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxvq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
void arm_max_no_idx_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult)
{
q31_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value */
out = maxVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_no_idx_q7.c
* Description: Maximum value of a q7 vector without returning the index
*
* $Date: 16 November 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a q7 vector without index.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
int32_t blkCnt; /* loop counters */
q7x16_t vecSrc;
q7_t const *pSrcVec;
q7x16_t curExtremValVec = vdupq_n_s8(Q7_MIN);
q7_t maxValue = Q7_MIN;
mve_pred16_t p0;
pSrcVec = (q7_t const *) pSrc;
blkCnt = blockSize >> 4;
while (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
/*
* update per-lane max.
*/
curExtremValVec = vmaxq(vecSrc, curExtremValVec);
/*
* Decrement the blockSize loop counter
*/
blkCnt--;
}
/*
* tail
* (will be merged thru tail predication)
*/
blkCnt = blockSize & 0xF;
if (blkCnt > 0)
{
vecSrc = vld1q(pSrcVec);
pSrcVec += 16;
p0 = vctp8q(blkCnt);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
curExtremValVec = vmaxq_m(curExtremValVec, vecSrc, curExtremValVec, p0);
}
/*
* Get max value across the vector
*/
maxValue = vmaxvq(maxValue, curExtremValVec);
*pResult = maxValue;
}
#else
void arm_max_no_idx_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult)
{
q7_t maxVal1, out; /* Temporary variables to store the output value. */
uint32_t blkCnt; /* loop counter */
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
blkCnt = (blockSize - 1U);
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal1 = *pSrc++;
/* compare for the maximum value */
if (out < maxVal1)
{
/* Update the maximum value */
out = maxVal1;
}
/* Decrement the loop counter */
blkCnt--;
}
/* Store the maximum value into destination pointer */
*pResult = out;
}
#endif /* #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_q15.c
* Description: Maximum value of a Q15 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a Q15 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q15x8_t extremValVec = vdupq_n_s16(Q15_MIN);
q15_t maxValue = Q15_MIN;
uint16x8_t indexVec;
uint16x8_t extremIdxVec;
mve_pred16_t p0;
uint16_t extremIdxArr[8];
indexVec = vidupq_u16(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
q15x8_t extremIdxVal = vld1q_z_s16(pSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u16(extremIdxArr, indexVec, p0);
indexVec += 8;
pSrc += 8;
blkCnt -= 8;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u16(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u16(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
void arm_max_q15(
const q15_t * pSrc,
uint32_t blockSize,
q15_t * pResult,
uint32_t * pIndex)
{
q15_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,202 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_q31.c
* Description: Maximum value of a Q31 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a Q31 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_max_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q31x4_t extremValVec = vdupq_n_s32(Q31_MIN);
q31_t maxValue = Q31_MIN;
uint32x4_t indexVec;
uint32x4_t extremIdxVec;
mve_pred16_t p0;
uint32_t extremIdxArr[4];
indexVec = vidupq_u32(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp32q(blkCnt);
q31x4_t extremIdxVal = vld1q_z_s32(pSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u32(extremIdxArr, indexVec, p0);
indexVec += 4;
pSrc += 4;
blkCnt -= 4;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u32(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u32(blockSize - 1), p0);
*pIndex = vminvq(blockSize - 1, indexVec);
*pResult = maxValue;
}
#else
void arm_max_q31(
const q31_t * pSrc,
uint32_t blockSize,
q31_t * pResult,
uint32_t * pIndex)
{
q31_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,256 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_max_q7.c
* Description: Maximum value of a Q7 vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions.h"
/**
@ingroup groupStats
*/
/**
@addtogroup Max
@{
*/
/**
@brief Maximum value of a Q7 vector.
@param[in] pSrc points to the input vector
@param[in] blockSize number of samples in input vector
@param[out] pResult maximum value returned here
@param[out] pIndex index of maximum value returned here
@return none
*/
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
static void arm_small_blk_max_q7(
const q7_t * pSrc,
uint16_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t blkCnt; /* loop counters */
q7x16_t extremValVec = vdupq_n_s8(Q7_MIN);
q7_t maxValue = Q7_MIN;
uint8x16_t indexVec;
uint8x16_t extremIdxVec;
mve_pred16_t p0;
uint8_t extremIdxArr[16];
indexVec = vidupq_u8(0U, 1);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp8q(blkCnt);
q7x16_t extremIdxVal = vld1q_z_s8(pSrc, p);
/*
* Get current max per lane and current index per lane
* when a max is selected
*/
p0 = vcmpgeq_m(extremIdxVal, extremValVec, p);
extremValVec = vorrq_m(extremValVec, extremIdxVal, extremIdxVal, p0);
/* store per-lane extrema indexes */
vst1q_p_u8(extremIdxArr, indexVec, p0);
indexVec += 16;
pSrc += 16;
blkCnt -= 16;
}
while (blkCnt > 0);
/* Get max value across the vector */
maxValue = vmaxvq(maxValue, extremValVec);
/* set index for lower values to max possible index */
p0 = vcmpgeq(extremValVec, maxValue);
extremIdxVec = vld1q_u8(extremIdxArr);
indexVec = vpselq(extremIdxVec, vdupq_n_u8(blockSize - 1), p0);
*pIndex = vminvq_u8(blockSize - 1, indexVec);
*pResult = maxValue;
}
void arm_max_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
int32_t totalSize = blockSize;
const uint16_t sub_blk_sz = UINT8_MAX + 1;
if (totalSize <= sub_blk_sz)
{
arm_small_blk_max_q7(pSrc, blockSize, pResult, pIndex);
}
else
{
uint32_t curIdx = 0;
q7_t curBlkExtr = Q7_MIN;
uint32_t curBlkPos = 0;
uint32_t curBlkIdx = 0;
/*
* process blocks of 255 elts
*/
while (totalSize >= sub_blk_sz)
{
const q7_t *curSrc = pSrc;
arm_small_blk_max_q7(curSrc, sub_blk_sz, pResult, pIndex);
if (*pResult > curBlkExtr)
{
/*
* update partial extrema
*/
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
curIdx++;
pSrc += sub_blk_sz;
totalSize -= sub_blk_sz;
}
/*
* remainder
*/
arm_small_blk_max_q7(pSrc, totalSize, pResult, pIndex);
if (*pResult > curBlkExtr)
{
curBlkExtr = *pResult;
curBlkPos = *pIndex;
curBlkIdx = curIdx;
}
*pIndex = curBlkIdx * sub_blk_sz + curBlkPos;
*pResult = curBlkExtr;
}
}
#else
void arm_max_q7(
const q7_t * pSrc,
uint32_t blockSize,
q7_t * pResult,
uint32_t * pIndex)
{
q7_t maxVal, out; /* Temporary variables to store the output value. */
uint32_t blkCnt, outIndex; /* Loop counter */
#if defined (ARM_MATH_LOOPUNROLL)
uint32_t index; /* index of maximum value */
#endif
/* Initialise index value to zero. */
outIndex = 0U;
/* Load first input value that act as reference value for comparision */
out = *pSrc++;
#if defined (ARM_MATH_LOOPUNROLL)
/* Initialise index of maximum value. */
index = 0U;
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = (blockSize - 1U) >> 2U;
while (blkCnt > 0U)
{
/* Initialize maxVal to next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = index + 1U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 2U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 3U;
}
maxVal = *pSrc++;
if (out < maxVal)
{
out = maxVal;
outIndex = index + 4U;
}
index += 4U;
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = (blockSize - 1U) % 4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = (blockSize - 1U);
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* Initialize maxVal to the next consecutive values one by one */
maxVal = *pSrc++;
/* compare for the maximum value */
if (out < maxVal)
{
/* Update the maximum value and it's index */
out = maxVal;
outIndex = blockSize - blkCnt;
}
/* Decrement loop counter */
blkCnt--;
}
/* Store the maximum value and it's index into destination pointers */
*pResult = out;
*pIndex = outIndex;
}
#endif /* defined(ARM_MATH_MVEI) */
/**
@} end of Max group
*/

View File

@@ -0,0 +1,152 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_mean_f16.c
* Description: Mean value of a floating-point vector
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/statistics_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/**
@ingroup groupStats
*/
/**
@defgroup mean Mean
Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
The underlying algorithm is used:
<pre>
Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
</pre>
There are separate functions for floating-point, Q31, Q15, and Q7 data types.
*/
/**
@addtogroup mean
@{
*/
/**
@brief Mean value of a floating-point vector.
@param[in] pSrc points to the input vector.
@param[in] blockSize number of samples in input vector.
@param[out] pResult mean value returned here.
@return none
*/
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
void arm_mean_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
int32_t blkCnt; /* loop counters */
f16x8_t vecSrc;
f16x8_t sumVec = vdupq_n_f16(0.0f16);
blkCnt = blockSize;
do {
mve_pred16_t p = vctp16q(blkCnt);
vecSrc = vldrhq_z_f16((float16_t const *) pSrc, p);
sumVec = vaddq_m_f16(sumVec, sumVec, vecSrc, p);
blkCnt -= 8;
pSrc += 8;
}
while (blkCnt > 0);
*pResult = vecAddAcrossF16Mve(sumVec) / (float16_t) blockSize;
}
#else
void arm_mean_f16(
const float16_t * pSrc,
uint32_t blockSize,
float16_t * pResult)
{
uint32_t blkCnt; /* Loop counter */
float16_t sum = 0.0f; /* Temporary result storage */
#if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
sum += *pSrc++;
/* Decrement the loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
#else
/* Initialize blkCnt with number of samples */
blkCnt = blockSize;
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
while (blkCnt > 0U)
{
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
sum += *pSrc++;
/* Decrement loop counter */
blkCnt--;
}
/* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
/* Store result to destination */
*pResult = (sum / (float16_t)blockSize);
}
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
/**
@} end of mean group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

Some files were not shown because too many files have changed in this diff Show More