23 #ifndef VEC_INT16_PPC_H_
24 #define VEC_INT16_PPC_H_
490 #define vec_popcnth __builtin_vec_vpopcnth
517 result = vec_absdh (vra, vrb);
522 :
"v" (vra),
"v" (vrb)
528 vmin = vec_min (vra, vrb);
529 vmax = vec_max (vra, vrb);
530 result = vec_sub (vmax, vmin);
564 #if defined (vec_vclzh)
566 #elif defined (__clang__)
578 vui16_t z= { 0,0,0,0, 0,0,0,0};
579 vui16_t one = { 1,1,1,1, 1,1,1,1};
582 s = vec_splat_u16(8);
591 x = vec_sel (x , y, m);
592 n = vec_sel (n , nt, m);
599 x = vec_sel (x , y, m);
600 n = vec_sel (n , nt, m);
607 x = vec_sel (x , y, m);
608 n = vec_sel (n , nt, m);
615 n = vec_sel (n , nt, m);
619 r = vec_sel (nt , n, m);
654 #if defined (vec_cnttz) || defined (__clang__)
668 const vui16_t ones = vec_splat_u16 (1);
671 tzmask = vec_andc (vec_sub (vra, ones), vra);
751 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
779 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
805 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
833 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
834 return vec_mrgahh (vec_mulo (vra, vrb), vec_mule (vra, vrb));
836 return vec_mrgahh (vec_mule (vra, vrb), vec_mulo (vra, vrb));
862 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
863 return vec_mrgalh (vec_mulo (vra, vrb), vec_mule (vra, vrb));
865 return vec_mrgalh (vec_mule (vra, vrb), vec_mulo (vra, vrb));
898 #if defined (vec_vpopcnth)
899 r = vec_vpopcnth (vra);
900 #elif defined (__clang__)
901 r = vec_popcnt (vra);
911 __vector
unsigned short n, x1, x2, x, s;
912 __vector
unsigned short ones = { 1,1,1,1, 1,1,1,1};
913 __vector
unsigned short fives =
914 {0x5555,0x5555,0x5555,0x5555, 0x5555,0x5555,0x5555,0x5555};
915 __vector
unsigned short threes =
916 {0x3333,0x3333,0x3333,0x3333, 0x3333,0x3333,0x3333,0x3333};
917 __vector
unsigned short fs =
918 {0x0f0f,0x0f0f,0x0f0f,0x0f0f, 0x0f0f,0x0f0f,0x0f0f,0x0f0f};
924 x2 = vec_and (vec_sr (x, s), fives);
929 x1 = vec_and (n, threes);
930 x2 = vec_andc (n, threes);
931 n = vec_add (x1, vec_sr (x2, s));
935 x1 = vec_add (n, vec_sr (n, s));
936 n = vec_and (x1, fs);
941 x1 = vec_add (n, vec_sl (n, s));
949 #define vec_popcnth __builtin_vec_vpopcnth
972 #if defined (vec_revb) || defined (__clang__)
973 result = vec_revb (vra);
982 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1015 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
1022 const vui16_t rshift = vec_splat_u16( 15 );
1024 result = (
vb16_t) vec_sra (vra, rshift);
1058 if (__builtin_constant_p(shb))
1059 lshift = (
vui16_t) vec_splat_s16(shb);
1061 lshift = vec_splats ((
unsigned short) shb);
1065 result = vec_vslh (vra, lshift);
1104 if (__builtin_constant_p(shb))
1105 lshift = (
vui16_t) vec_splat_s16(shb);
1107 lshift = vec_splats ((
unsigned short) shb);
1111 result = vec_vsrh (vra, lshift);
1150 if (__builtin_constant_p(shb))
1151 lshift = (
vui16_t) vec_splat_s16(shb);
1153 lshift = vec_splats ((
unsigned short) shb);
1157 result = vec_vsrah (vra, lshift);
1163 lshift = (
vui16_t) vec_splat_s16(15);
1164 result = vec_vsrah (vra, lshift);
1195 const vui16_t zero = { 0, 0, 0, 0, 0, 0, 0, 0 };
1198 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1199 res = vec_vmulouh (a, b);
1201 res = vec_vmuleuh (a, b);
1203 return vec_vadduwm (res, (
vui32_t) c_euh);
1231 const vui16_t zero = { 0, 0, 0, 0, 0, 0, 0, 0 };
1234 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1235 res = vec_vmuleuh (a, b);
1237 res = vec_vmulouh (a, b);
1239 return vec_vadduwm (res, (
vui32_t) c_ouh);
1273 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1275 { 0x0302,0x1312, 0x0706,0x1716, 0x0B0A,0x1B1A, 0x0F0E,0x1F1E };
1276 return vec_perm (vrb, vra, (
vui8_t)permute);
1279 { 0x0001,0x1011, 0x0405,0x1415, 0x0809,0x1819, 0x0C0D,0x1C1D };
1280 return vec_perm (vra, vrb, (
vui8_t)permute);
1315 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1317 { 0x0100,0x1110, 0x0504,0x1514, 0x0908,0x1918, 0x0D0C,0x1D1C };
1318 return vec_perm (vrb, vra, (
vui8_t)permute);
1321 { 0x0203,0x1213, 0x0607,0x1617, 0x0A0B,0x1A1B, 0x0E0F,0x1E1F };
1322 return vec_perm (vra, vrb, (
vui8_t)permute);