40 #ifndef INCLUDED_volk_16i_max_star_16i_a_H
41 #define INCLUDED_volk_16i_max_star_16i_a_H
48 #include <emmintrin.h>
49 #include <tmmintrin.h>
50 #include <xmmintrin.h>
55 const unsigned int num_bytes = num_points * 2;
57 short candidate = src0[0];
59 __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
63 p_src0 = (__m128i*)src0;
65 int bound = num_bytes >> 4;
66 int leftovers = (num_bytes >> 1) & 7;
70 xmm1 = _mm_setzero_si128();
71 xmm0 = _mm_setzero_si128();
74 xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
76 for (
i = 0;
i < bound; ++
i) {
77 xmm1 = _mm_load_si128(p_src0);
81 xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
82 xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
83 xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
85 xmm6 = _mm_xor_si128(xmm4, xmm5);
87 xmm3 = _mm_and_si128(xmm3, xmm0);
88 xmm4 = _mm_and_si128(xmm6, xmm1);
90 xmm0 = _mm_add_epi16(xmm3, xmm4);
93 _mm_store_si128((__m128i*)cands, xmm0);
95 for (
i = 0;
i < 8; ++
i) {
96 candidate = ((short)(candidate - cands[
i]) > 0) ? candidate : cands[
i];
99 for (
i = 0;
i < leftovers; ++
i) {
100 candidate = ((short)(candidate - src0[(bound << 3) +
i]) > 0)
102 : src0[(bound << 3) +
i];
105 target[0] = candidate;
110 #ifdef LV_HAVE_GENERIC
115 const unsigned int num_bytes = num_points * 2;
119 int bound = num_bytes >> 1;
121 short candidate = src0[0];
122 for (
i = 1;
i < bound; ++
i) {
123 candidate = ((short)(candidate - src0[
i]) > 0) ? candidate : src0[
i];
125 target[0] = candidate;
static void volk_16i_max_star_16i_a_ssse3(short *target, short *src0, unsigned int num_points)
Definition: volk_16i_max_star_16i.h:53
static void volk_16i_max_star_16i_generic(short *target, short *src0, unsigned int num_points)
Definition: volk_16i_max_star_16i.h:113
for i
Definition: volk_config_fixed.tmpl.h:13