Vector Optimized Library of Kernels  3.1.2
Architecture-tuned implementations of math kernels
volk_32f_x2_powpuppet_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2023 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
10 
11 #ifndef INCLUDED_volk_32f_x2_powpuppet_32f_H
12 #define INCLUDED_volk_32f_x2_powpuppet_32f_H
13 
14 #include <math.h>
15 #include <volk/volk.h>
17 
18 static inline float* make_positive(const float* input, unsigned int num_points)
19 {
20  float* output = (float*)volk_malloc(num_points * sizeof(float), volk_get_alignment());
21  for (unsigned int i = 0; i < num_points; i++) {
22  output[i] = fabsf(input[i]);
23  if (output[i] == 0) {
24  output[i] = 2.0f;
25  }
26  }
27  return output;
28 }
29 
30 #if LV_HAVE_AVX2 && LV_HAVE_FMA
31 static inline void volk_32f_x2_powpuppet_32f_a_avx2_fma(float* cVector,
32  const float* bVector,
33  const float* aVector,
34  unsigned int num_points)
35 {
36  float* aVectorPos = make_positive(aVector, num_points);
37  volk_32f_x2_pow_32f_a_avx2_fma(cVector, bVector, aVectorPos, num_points);
38  volk_free(aVectorPos);
39 }
40 #endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for aligned */
41 
42 #ifdef LV_HAVE_AVX2
43 static inline void volk_32f_x2_powpuppet_32f_a_avx2(float* cVector,
44  const float* bVector,
45  const float* aVector,
46  unsigned int num_points)
47 {
48  float* aVectorPos = make_positive(aVector, num_points);
49  volk_32f_x2_pow_32f_a_avx2(cVector, bVector, aVectorPos, num_points);
50  volk_free(aVectorPos);
51 }
52 #endif /* LV_HAVE_AVX2 for aligned */
53 
54 #ifdef LV_HAVE_SSE4_1
55 static inline void volk_32f_x2_powpuppet_32f_a_sse4_1(float* cVector,
56  const float* bVector,
57  const float* aVector,
58  unsigned int num_points)
59 {
60  float* aVectorPos = make_positive(aVector, num_points);
61  volk_32f_x2_pow_32f_a_sse4_1(cVector, bVector, aVectorPos, num_points);
62  volk_free(aVectorPos);
63 }
64 #endif /* LV_HAVE_SSE4_1 for aligned */
65 
66 #ifdef LV_HAVE_GENERIC
67 static inline void volk_32f_x2_powpuppet_32f_generic(float* cVector,
68  const float* bVector,
69  const float* aVector,
70  unsigned int num_points)
71 {
72  float* aVectorPos = make_positive(aVector, num_points);
73  volk_32f_x2_pow_32f_generic(cVector, bVector, aVectorPos, num_points);
74  volk_free(aVectorPos);
75 }
76 #endif /* LV_HAVE_GENERIC */
77 
78 #ifdef LV_HAVE_SSE4_1
79 static inline void volk_32f_x2_powpuppet_32f_u_sse4_1(float* cVector,
80  const float* bVector,
81  const float* aVector,
82  unsigned int num_points)
83 {
84  float* aVectorPos = make_positive(aVector, num_points);
85  volk_32f_x2_pow_32f_u_sse4_1(cVector, bVector, aVectorPos, num_points);
86  volk_free(aVectorPos);
87 }
88 #endif /* LV_HAVE_SSE4_1 for unaligned */
89 
90 #if LV_HAVE_AVX2 && LV_HAVE_FMA
91 static inline void volk_32f_x2_powpuppet_32f_u_avx2_fma(float* cVector,
92  const float* bVector,
93  const float* aVector,
94  unsigned int num_points)
95 {
96  float* aVectorPos = make_positive(aVector, num_points);
97  volk_32f_x2_pow_32f_u_avx2_fma(cVector, bVector, aVectorPos, num_points);
98  volk_free(aVectorPos);
99 }
100 #endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for unaligned */
101 
102 #ifdef LV_HAVE_AVX2
103 static inline void volk_32f_x2_powpuppet_32f_u_avx2(float* cVector,
104  const float* bVector,
105  const float* aVector,
106  unsigned int num_points)
107 {
108  float* aVectorPos = make_positive(aVector, num_points);
109  volk_32f_x2_pow_32f_u_avx2(cVector, bVector, aVectorPos, num_points);
110  volk_free(aVectorPos);
111 }
112 #endif /* LV_HAVE_AVX2 for unaligned */
113 
114 #endif /* INCLUDED_volk_32f_x2_powpuppet_32f_H */
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition: volk.tmpl.c:90
static void volk_32f_x2_pow_32f_generic(float *cVector, const float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_x2_pow_32f.h:523
static void volk_32f_x2_powpuppet_32f_generic(float *cVector, const float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_x2_powpuppet_32f.h:67
static float * make_positive(const float *input, unsigned int num_points)
Definition: volk_32f_x2_powpuppet_32f.h:18
for i
Definition: volk_config_fixed.tmpl.h:13
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition: volk_malloc.c:38
VOLK_API void volk_free(void *aptr)
Free's memory allocated by volk_malloc.
Definition: volk_malloc.c:70