56 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
57 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
62 #ifdef LV_HAVE_GENERIC
66 unsigned int num_points)
68 for (
unsigned int number = 0; number < num_points; number++) {
69 *cVector++ = (*aVector++) * scalar;
75 #include <xmmintrin.h>
80 unsigned int num_points)
82 const unsigned int quarterPoints = num_points / 4;
84 float* cPtr = cVector;
85 const float* aPtr = aVector;
87 const __m128 bVal = _mm_set_ps1(scalar);
88 for (
unsigned int number = 0; number < quarterPoints; number++) {
89 __m128 aVal = _mm_loadu_ps(aPtr);
91 __m128 cVal = _mm_mul_ps(aVal, bVal);
93 _mm_storeu_ps(cPtr, cVal);
99 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
100 *cPtr++ = (*aPtr++) * scalar;
106 #include <immintrin.h>
109 const float* aVector,
111 unsigned int num_points)
113 const unsigned int eighthPoints = num_points / 8;
115 float* cPtr = cVector;
116 const float* aPtr = aVector;
118 const __m256 bVal = _mm256_set1_ps(scalar);
119 for (
unsigned int number = 0; number < eighthPoints; number++) {
120 __m256 aVal = _mm256_loadu_ps(aPtr);
122 __m256 cVal = _mm256_mul_ps(aVal, bVal);
124 _mm256_storeu_ps(cPtr, cVal);
130 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
131 *cPtr++ = (*aPtr++) * scalar;
136 #ifdef LV_HAVE_RISCV64
137 extern void volk_32f_s32f_multiply_32f_sifive_u74(
float* cVector,
138 const float* aVector,
140 unsigned int num_points);
147 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
148 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
150 #include <inttypes.h>
154 #include <xmmintrin.h>
157 const float* aVector,
159 unsigned int num_points)
161 const unsigned int quarterPoints = num_points / 4;
163 float* cPtr = cVector;
164 const float* aPtr = aVector;
166 const __m128 bVal = _mm_set_ps1(scalar);
167 for (
unsigned int number = 0; number < quarterPoints; number++) {
168 __m128 aVal = _mm_load_ps(aPtr);
170 __m128 cVal = _mm_mul_ps(aVal, bVal);
172 _mm_store_ps(cPtr, cVal);
178 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
179 *cPtr++ = (*aPtr++) * scalar;
185 #include <immintrin.h>
188 const float* aVector,
190 unsigned int num_points)
192 const unsigned int eighthPoints = num_points / 8;
194 float* cPtr = cVector;
195 const float* aPtr = aVector;
197 const __m256 bVal = _mm256_set1_ps(scalar);
198 for (
unsigned int number = 0; number < eighthPoints; number++) {
199 __m256 aVal = _mm256_load_ps(aPtr);
201 __m256 cVal = _mm256_mul_ps(aVal, bVal);
203 _mm256_store_ps(cPtr, cVal);
209 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
210 *cPtr++ = (*aPtr++) * scalar;
216 #include <arm_neon.h>
219 const float* aVector,
221 unsigned int num_points)
223 const unsigned int quarterPoints = num_points / 4;
225 const float* inputPtr = aVector;
226 float* outputPtr = cVector;
228 for (
unsigned int number = 0; number < quarterPoints; number++) {
229 float32x4_t aVal = vld1q_f32(inputPtr);
230 float32x4_t cVal = vmulq_n_f32(aVal, scalar);
231 vst1q_f32(outputPtr, cVal);
236 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
237 *outputPtr++ = (*inputPtr++) * scalar;
245 extern void volk_32f_s32f_multiply_32f_a_orc_impl(
float* dst,
250 static inline void volk_32f_s32f_multiply_32f_u_orc(
float* cVector,
251 const float* aVector,
253 unsigned int num_points)
255 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
static void volk_32f_s32f_multiply_32f_a_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:187
static void volk_32f_s32f_multiply_32f_u_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:77
static void volk_32f_s32f_multiply_32f_u_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:108
static void volk_32f_s32f_multiply_32f_a_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:156
static void volk_32f_s32f_multiply_32f_generic(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:63
static void volk_32f_s32f_multiply_32f_u_neon(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_multiply_32f.h:218