24 #ifndef VEC_CHAR_PPC_H_
25 #define VEC_CHAR_PPC_H_
145 #define vec_popcntb __builtin_vec_vpopcntb
174 :
"v" (vra),
"v" (vrb)
182 vmin = vec_min (a, b);
183 vmax = vec_max (a, b);
184 result = vec_sub (vmax, vmin);
218 #if defined (vec_vclzb)
220 #elif defined (__clang__)
231 __vector
unsigned char n, nt, y, x, s, m;
232 __vector
unsigned char z= { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0};
233 __vector
unsigned char one = { 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1};
243 m = (__vector
unsigned char)vec_cmpgt(y, z);
245 x = vec_sel (x , y, m);
246 n = vec_sel (n , nt, m);
251 m = (__vector
unsigned char)vec_cmpgt(y, z);
253 x = vec_sel (x , y, m);
254 n = vec_sel (n , nt, m);
260 m = (__vector
unsigned char)vec_cmpgt(y, z);
261 n = vec_sel (n , nt, m);
265 n = vec_sel (nt , n, m);
301 #if defined (vec_cnttz) || defined (__clang__)
314 const vui8_t ones = vec_splat_u8 (1);
317 tzmask = vec_andc (vec_sub (vra, ones), vra);
326 const vui8_t ones = vec_splat_u8 (1);
327 const vui8_t c8s = vec_splat_u8 (8);
330 term = vec_andc (vec_sub (vra, ones), vra);
332 return vec_sub (c8s,
vec_clzb (term));
358 { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
359 0x40, 0x40, 0x40, 0x40 };
361 { 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a,
362 0x5a, 0x5a, 0x5a, 0x5a };
364 { 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60,
365 0x60, 0x60, 0x60, 0x60 };
367 { 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a,
368 0x7a, 0x7a, 0x7a, 0x7a };
370 { 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
371 0x2f, 0x2f, 0x2f, 0x2f };
373 { 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x5a,
374 0x39, 0x39, 0x39, 0x39 };
376 vui8_t cmp1, cmp2, cmp3, cmp4, cmp5, cmp6, cmask1, cmask2, cmask3;
378 cmp1 = (
vui8_t) vec_cmpgt (vec_str, LC_FIRST);
379 cmp2 = (
vui8_t) vec_cmpgt (vec_str, LC_LAST);
381 cmp3 = (
vui8_t) vec_cmpgt (vec_str, UC_FIRST);
382 cmp4 = (
vui8_t) vec_cmpgt (vec_str, UC_LAST);
384 cmp5 = (
vui8_t) vec_cmpgt (vec_str, DG_FIRST);
385 cmp6 = (
vui8_t) vec_cmpgt (vec_str, DG_LAST);
387 cmask1 = vec_andc (cmp1, cmp2);
388 cmask2 = vec_andc (cmp3, cmp4);
389 cmask3 = vec_andc (cmp5, cmp6);
391 result = vec_or (vec_or (cmask1, cmask2), cmask3);
417 { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
418 0x40, 0x40, 0x40, 0x40 };
420 { 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a,
421 0x5a, 0x5a, 0x5a, 0x5a };
423 { 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60,
424 0x60, 0x60, 0x60, 0x60 };
426 { 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a,
427 0x7a, 0x7a, 0x7a, 0x7a };
429 vui8_t cmp1, cmp2, cmp3, cmp4, cmask1, cmask2;
431 cmp1 = (
vui8_t) vec_cmpgt (vec_str, LC_FIRST);
432 cmp2 = (
vui8_t) vec_cmpgt (vec_str, LC_LAST);
434 cmp3 = (
vui8_t) vec_cmpgt (vec_str, UC_FIRST);
435 cmp4 = (
vui8_t) vec_cmpgt (vec_str, UC_LAST);
437 cmask1 = vec_andc (cmp1, cmp2);
438 cmask2 = vec_andc (cmp3, cmp4);
440 result = vec_or (cmask1, cmask2);
466 { 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
467 0x2f, 0x2f, 0x2f, 0x2f };
469 { 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x5a,
470 0x39, 0x39, 0x39, 0x39 };
474 cmp1 = (
vui8_t) vec_cmpgt (vec_str, DG_FIRST);
475 cmp2 = (
vui8_t) vec_cmpgt (vec_str, DG_LAST);
477 result = vec_andc (cmp1, cmp2);
555 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
583 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
609 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
637 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
638 return vec_mrgahb (vec_mulo (vra, vrb), vec_mule (vra, vrb));
640 return vec_mrgahb (vec_mule (vra, vrb), vec_mulo (vra, vrb));
668 return vec_mul (vra, vrb);
670 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
671 return vec_mrgalb (vec_mulo (vra, vrb), vec_mule (vra, vrb));
673 return vec_mrgalb (vec_mule (vra, vrb), vec_mulo (vra, vrb));
707 #if defined (vec_vpopcntb)
708 r = vec_vpopcntb (vra);
709 #elif defined (__clang__)
710 r = vec_popcnt (vra);
721 vui8_t ones = { 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1};
723 {0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55,
724 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55};
726 {0x33,0x33,0x33,0x33, 0x33,0x33,0x33,0x33,
727 0x33,0x33,0x33,0x33, 0x33,0x33,0x33,0x33};
729 {0x0f,0x0f,0x0f,0x0f, 0x0f,0x0f,0x0f,0x0f,
730 0x0f,0x0f,0x0f,0x0f, 0x0f,0x0f,0x0f,0x0f};
735 x2 = vec_and (vec_sr (x, s), fives);
739 x1 = vec_and (n, threes);
740 x2 = vec_andc (n, threes);
741 n = vec_add (x1, vec_sr (x2, s));
744 x1 = vec_add (n, vec_sr (n, s));
745 n = vec_and (x1, fs);
753 #define vec_popcntb __builtin_vec_vpopcntb
777 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
784 const vui8_t rshift = vec_splat_u8( 7 );
786 result = (
vb8_t) vec_sra (vra, rshift);
820 if (__builtin_constant_p(shb))
821 lshift = (
vui8_t) vec_splat_s8(shb);
823 lshift = vec_splats ((
unsigned char) shb);
827 result = vec_vslb (vra, lshift);
867 if (__builtin_constant_p(shb))
868 lshift = (
vui8_t) vec_splat_s8(shb);
870 lshift = vec_splats ((
unsigned char) shb);
874 result = vec_vsrab (vra, lshift);
880 lshift = (
vui8_t) vec_splat_s8(7);
881 result = vec_vsrab (vra, lshift);
884 return (
vi8_t) result;
916 if (__builtin_constant_p(shb))
917 lshift = (
vui8_t) vec_splat_s8(shb);
919 lshift = vec_splats ((
unsigned char) shb);
923 result = vec_vsrb (vra, lshift);
955 vui8_t result, vt1, vt2, vt3;
956 const vui8_t vzero = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
958 vt1 = vec_slo (vrw, vrb);
959 vt3 = vec_sub (vzero, vrb);
960 vt2 = vec_sro (vrx, vt3);
961 result = vec_or (vt1, vt2);
985 { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
986 0x20, 0x20, 0x20, 0x20 };
988 { 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60,
989 0x60, 0x60, 0x60, 0x60 };
991 { 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a, 0x7a,
992 0x7a, 0x7a, 0x7a, 0x7a };
996 cmp1 = (
vui8_t) vec_cmpgt (vec_str, LC_FIRST);
997 cmp2 = (
vui8_t) vec_cmpgt (vec_str, LC_LAST);
999 cmask = vec_andc (cmp1, cmp2);
1000 cmask = vec_and (cmask, UC_MASK);
1002 result = vec_andc (vec_str, cmask);
1026 { 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
1027 0x20, 0x20, 0x20, 0x20 };
1029 { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
1030 0x40, 0x40, 0x40, 0x40 };
1032 { 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a, 0x5a,
1033 0x5a, 0x5a, 0x5a, 0x5a };
1035 vui8_t cmp1, cmp2, cmask;
1037 cmp1 = (
vui8_t) vec_cmpgt (vec_str, UC_FIRST);
1038 cmp2 = (
vui8_t) vec_cmpgt (vec_str, UC_LAST);
1040 cmask = vec_andc (cmp1, cmp2);
1041 cmask = vec_and (cmask, UC_MASK);
1043 result = vec_or (vec_str, cmask);
1087 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1089 { 0x01, 0x11, 0x03, 0x13, 0x05, 0x15, 0x07, 0x17, 0x09, 0x19, 0x0B, 0x1B,
1090 0x0D, 0x1D, 0x0F, 0x1F };
1092 return vec_perm (vrb, vra, (
vui8_t) permute);
1095 { 0x00, 0x10, 0x02, 0x12, 0x04, 0x14, 0x06, 0x16, 0x08, 0x18, 0x0A, 0x1A,
1096 0x0C, 0x1C, 0x0E, 0x1E};
1098 return vec_perm (vra, vrb, (
vui8_t)permute);
1141 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1143 { 0x00, 0x10, 0x02, 0x12, 0x04, 0x14, 0x06, 0x16, 0x08, 0x18, 0x0A, 0x1A,
1144 0x0C, 0x1C, 0x0E, 0x1E};
1145 return vec_perm (vrb, vra, (
vui8_t)permute);
1148 { 0x01, 0x11, 0x03, 0x13, 0x05, 0x15, 0x07, 0x17, 0x09, 0x19, 0x0B, 0x1B,
1149 0x0D, 0x1D, 0x0F, 0x1F };
1150 return vec_perm (vra, vrb, (
vui8_t)permute);