1 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
2 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
17 static inline void volk_32f_s32f_convert_32i_a_avx(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
18 unsigned int number = 0;
20 const unsigned int eighthPoints = num_points / 8;
22 const float* inputVectorPtr = (
const float*)inputVector;
23 int32_t* outputVectorPtr = outputVector;
24 __m256 vScalar = _mm256_set1_ps(scalar);
28 for(;number < eighthPoints; number++){
29 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
31 intInputVal1 = _mm256_cvtps_epi32(_mm256_mul_ps(inputVal1, vScalar));
33 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
37 number = eighthPoints * 8;
38 for(; number < num_points; number++){
39 outputVector[number] = (
int32_t)(inputVector[number] * scalar);
45 #include <emmintrin.h>
53 static inline void volk_32f_s32f_convert_32i_a_sse2(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
54 unsigned int number = 0;
56 const unsigned int quarterPoints = num_points / 4;
58 const float* inputVectorPtr = (
const float*)inputVector;
59 int32_t* outputVectorPtr = outputVector;
60 __m128 vScalar = _mm_set_ps1(scalar);
64 for(;number < quarterPoints; number++){
65 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
67 intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar));
69 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
73 number = quarterPoints * 4;
74 for(; number < num_points; number++){
75 outputVector[number] = (
int32_t)(inputVector[number] * scalar);
81 #include <xmmintrin.h>
89 static inline void volk_32f_s32f_convert_32i_a_sse(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
90 unsigned int number = 0;
92 const unsigned int quarterPoints = num_points / 4;
94 const float* inputVectorPtr = (
const float*)inputVector;
95 int32_t* outputVectorPtr = outputVector;
96 __m128 vScalar = _mm_set_ps1(scalar);
101 for(;number < quarterPoints; number++){
102 ret = _mm_load_ps(inputVectorPtr);
105 ret = _mm_mul_ps(ret, vScalar);
107 _mm_store_ps(outputFloatBuffer, ret);
108 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[0]);
109 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[1]);
110 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[2]);
111 *outputVectorPtr++ = (
int32_t)(outputFloatBuffer[3]);
114 number = quarterPoints * 4;
115 for(; number < num_points; number++){
116 outputVector[number] = (
int32_t)(inputVector[number] * scalar);
121 #ifdef LV_HAVE_GENERIC
129 static inline void volk_32f_s32f_convert_32i_a_generic(
int32_t* outputVector,
const float* inputVector,
const float scalar,
unsigned int num_points){
130 int32_t* outputVectorPtr = outputVector;
131 const float* inputVectorPtr = inputVector;
132 unsigned int number = 0;
134 for(number = 0; number < num_points; number++){
135 *outputVectorPtr++ = ((
int32_t)(*inputVectorPtr++ * scalar));