POWER Vector Library Manual
1.0.4
|
Go to the documentation of this file.
24 #ifndef VEC_INT128_PPC_H_
25 #define VEC_INT128_PPC_H_
2337 #ifndef PVECLIB_DISABLE_CONSTINT128
2338 #define CONST_VUINT128_QxW(__q0, __q1, __q2, __q3) ( (vui128_t) \
2339 (((unsigned __int128) __q0) << 96) \
2340 + (((unsigned __int128) __q1) << 64) \
2341 + (((unsigned __int128) __q2) << 32) \
2342 + ((unsigned __int128) __q3) )
2345 #define CONST_VUINT128_QxW(__q0, __q1, __q2, __q3) ( (vui128_t) \
2346 CONST_VINT128_W(__q0, __q1, __q2, __q3) )
2364 #define CONST_VUINT128_QxD(__q0, __q1) ( (vui128_t) \
2365 (((unsigned __int128) __q0) << 64) \
2366 + ((unsigned __int128) __q1) )
2385 #define CONST_VUINT128_Qx19d(__q0, __q1) ( (vui128_t) \
2386 (((unsigned __int128) __q0) * 10000000000000000000UL) \
2387 + ((unsigned __int128) __q1) )
2405 #define CONST_VUINT128_Qx18d(__q0, __q1) ( (vui128_t) \
2406 (((unsigned __int128) __q0) * 1000000000000000000UL) \
2407 + ((unsigned __int128) __q1) )
2425 #define CONST_VUINT128_Qx16d(__q0, __q1) ( (vui128_t) \
2426 (((unsigned __int128) __q0) * 10000000000000000UL) \
2427 + ((unsigned __int128) __q1) )
2458 const unsigned int shb);
2470 const unsigned int shb);
2572 #if defined (vec_vaddcuq)
2573 co = (
vui32_t) vec_vaddcuq (a, b);
2574 #elif defined (__clang__)
2575 co = (
vui32_t) vec_addc (a, b);
2590 c = vec_sld (co, z, 4);
2591 c2 = vec_vaddcuw (t, c);
2592 t = vec_vadduwm (t, c);
2593 co = vec_vor (co, c2);
2594 c = vec_sld (c2, z, 4);
2595 c2 = vec_vaddcuw (t, c);
2596 t = vec_vadduwm (t, c);
2597 co = vec_vor (co, c2);
2598 c = vec_sld (c2, z, 4);
2599 c2 = vec_vaddcuw (t, c);
2600 co = vec_vor (co, c2);
2601 co = vec_sld (z, co, 4);
2626 #if defined (vec_vaddcuq)
2627 co = (
vui32_t) vec_vaddecuq (a, b, ci);
2628 #elif defined (__clang__)
2629 co = (
vui32_t) vec_addec (a, b, ci);
2632 "vaddecuq %0,%1,%2,%3;"
2644 c2 = vec_and ((
vui32_t) ci, co);
2645 c2 = vec_sld ((
vui32_t) c2, z, 12);
2648 c = vec_sld (co, c2, 4);
2649 c2 = vec_vaddcuw (t, c);
2650 t = vec_vadduwm (t, c);
2651 co = vec_vor (co, c2);
2652 c = vec_sld (c2, z, 4);
2653 c2 = vec_vaddcuw (t, c);
2654 t = vec_vadduwm (t, c);
2655 co = vec_vor (co, c2);
2656 c = vec_sld (c2, z, 4);
2657 c2 = vec_vaddcuw (t, c);
2658 t = vec_vadduwm (t, c);
2659 co = vec_vor (co, c2);
2660 c = vec_sld (c2, z, 4);
2661 c2 = vec_vaddcuw (t, c);
2662 co = vec_vor (co, c2);
2663 co = vec_sld (z, co, 4);
2688 #if defined (vec_vaddeuqm)
2689 t = (
vui32_t) vec_vaddeuqm (a, b, ci);
2690 #elif defined (__clang__)
2691 t = (
vui32_t) vec_adde (a, b, ci);
2694 "vaddeuqm %0,%1,%2,%3;"
2706 c2 = vec_and ((
vui32_t)ci, co);
2707 c2 = vec_sld ((
vui32_t)ci, z, 12);
2710 c = vec_sld (co, c2, 4);
2711 c2 = vec_vaddcuw (t, c);
2712 t = vec_vadduwm (t, c);
2713 c = vec_sld (c2, z, 4);
2714 c2 = vec_vaddcuw (t, c);
2715 t = vec_vadduwm (t, c);
2716 c = vec_sld (c2, z, 4);
2717 c2 = vec_vaddcuw (t, c);
2718 t = vec_vadduwm (t, c);
2719 c = vec_sld (c2, z, 4);
2720 t = vec_vadduwm (t, c);
2743 #if defined (vec_vadduqm)
2744 t = (
vui32_t) vec_vadduqm (a, b);
2745 #elif defined (__clang__)
2761 c = vec_sld (c, z, 4);
2762 c2 = vec_vaddcuw (t, c);
2763 t = vec_vadduwm (t, c);
2764 c = vec_sld (c2, z, 4);
2765 c2 = vec_vaddcuw (t, c);
2766 t = vec_vadduwm (t, c);
2767 c = vec_sld (c2, z, 4);
2768 t = vec_vadduwm (t, c);
2792 #if defined (vec_vadduqm) && defined (vec_vaddcuq)
2793 t = (
vui32_t) vec_vadduqm (a, b);
2794 co = (
vui32_t) vec_vaddcuq (a, b);
2795 #elif defined (__clang__)
2797 co = (
vui32_t) vec_addc (a, b);
2800 "vadduqm %0,%2,%3;\n"
2801 "\tvaddcuq %1,%2,%3;"
2814 c = vec_sld (co, z, 4);
2815 c2 = vec_vaddcuw (t, c);
2816 t = vec_vadduwm (t, c);
2817 co = vec_vor (co, c2);
2818 c = vec_sld (c2, z, 4);
2819 c2 = vec_vaddcuw (t, c);
2820 t = vec_vadduwm (t, c);
2821 co = vec_vor (co, c2);
2822 c = vec_sld (c2, z, 4);
2823 c2 = vec_vaddcuw (t, c);
2824 t = vec_vadduwm (t, c);
2825 co = vec_vor (co, c2);
2826 co = vec_sld (z, co, 4);
2853 #if defined (vec_vaddeuqm) && defined (vec_vaddecuq)
2854 t = (
vui32_t) vec_vaddeuqm (a, b, ci);
2855 co = (
vui32_t) vec_vaddecuq (a, b, ci);
2856 #elif defined (__clang__)
2857 t = (
vui32_t) vec_adde (a, b, ci);
2858 co = (
vui32_t) vec_addec (a, b, ci);
2861 "vaddeuqm %0,%2,%3,%4;\n"
2862 "\tvaddecuq %1,%2,%3,%4;"
2875 c2 = vec_and ((
vui32_t)ci, co);
2876 c2 = vec_sld ((
vui32_t)c2, z, 12);
2879 c = vec_sld (co, c2, 4);
2880 c2 = vec_vaddcuw (t, c);
2881 t = vec_vadduwm (t, c);
2882 co = vec_vor (co, c2);
2883 c = vec_sld (c2, z, 4);
2884 c2 = vec_vaddcuw (t, c);
2885 t = vec_vadduwm (t, c);
2886 co = vec_vor (co, c2);
2887 c = vec_sld (c2, z, 4);
2888 c2 = vec_vaddcuw (t, c);
2889 t = vec_vadduwm (t, c);
2890 co = vec_vor (co, c2);
2891 c = vec_sld (c2, z, 4);
2892 c2 = vec_vaddcuw (t, c);
2893 t = vec_vadduwm (t, c);
2894 co = vec_vor (co, c2);
2895 co = vec_sld (z, co, 4);
2933 vui64_t vt1, vt2, vt3, h64, l64;
2934 const vui64_t vzero = { 0, 0 };
2940 l64 = vec_and (vt1, vt3);
2948 vui32_t r32, gt32, gt32sr32, gt64sr64;
2950 c0 = vec_splat_u32 (0);
2952 gt32sr32 = vec_sld (c0, gt32, 12);
2953 gt64sr64 = vec_sld (c0, gt32, 8);
2954 gt32 = vec_sld (c0, gt32, 4);
2956 gt32sr32 = vec_or (gt32sr32, gt32);
2957 gt64sr64 = vec_or (gt64sr64, (
vui32_t) vra);
2958 r32 = vec_or (gt32sr32, gt64sr64);
3045 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3046 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3047 return vec_cmpeq (vra, vrb);
3051 "vcmpequq %0,%1,%2;\n"
3053 :
"v" (vra),
"v" (vrb)
3057 #elif defined (_ARCH_PWR8)
3062 return (
vb128_t) vec_and (equd, swapd);
3067 return (
vb128_t) vec_splat_u32 (0);
3091 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3092 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3093 return vec_cmpge (vra, vrb);
3097 "vcmpgtsq %0,%2,%1;\n"
3099 :
"v" (vra),
"v" (vrb)
3107 _a = vec_xor ((
vui32_t) vra, signbit);
3108 _b = vec_xor ((
vui32_t) vrb, signbit);
3140 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3141 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3142 return vec_cmpge (vra, vrb);
3146 "vcmpgtuq %0,%2,%1;\n"
3148 :
"v" (vra),
"v" (vrb)
3180 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3181 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3182 return vec_cmpgt (vra, vrb);
3186 "vcmpgtsq %0,%1,%2;\n"
3188 :
"v" (vra),
"v" (vrb)
3196 _a = vec_xor ((
vui32_t) vra, signbit);
3197 _b = vec_xor ((
vui32_t) vrb, signbit);
3229 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3230 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3231 return vec_cmpgt (vra, vrb);
3235 "vcmpgtuq %0,%1,%2;\n"
3237 :
"v" (vra),
"v" (vrb)
3269 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3270 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3271 return vec_cmple (vra, vrb);
3275 "vcmpgtsq %0,%1,%2;\n"
3277 :
"v" (vra),
"v" (vrb)
3285 _a = vec_xor ((
vui32_t) vra, signbit);
3286 _b = vec_xor ((
vui32_t) vrb, signbit);
3318 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3319 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3320 return vec_cmple (vra, vrb);
3324 "vcmpgtuq %0,%1,%2;\n"
3326 :
"v" (vra),
"v" (vrb)
3359 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3360 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3361 return vec_cmplt (vra, vrb);
3365 "vcmpgtsq %0,%2,%1;\n"
3367 :
"v" (vra),
"v" (vrb)
3375 _a = vec_xor ((
vui32_t) vra, signbit);
3376 _b = vec_xor ((
vui32_t) vrb, signbit);
3408 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3409 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3410 return vec_cmplt (vra, vrb);
3414 "vcmpgtuq %0,%2,%1;\n"
3416 :
"v" (vra),
"v" (vrb)
3477 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3478 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3479 return vec_cmpne (vra, vrb);
3483 "vcmpequq %0,%1,%2;\n"
3485 :
"v" (vra),
"v" (vrb)
3489 #elif defined (_ARCH_PWR8)
3490 __vector
unsigned long long equd, swapd;
3494 return (
vb128_t) vec_nand (equd, swapd);
3497 return (
vb128_t) vec_splat_s32 (-1);
3499 return (
vb128_t) vec_splat_u32 (0);
3525 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3526 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3527 return vec_all_eq (vra, vrb);
3532 "vcmpequq. %0,%3,%4;\n"
3535 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3536 :
"v" (vra),
"v" (vrb)
3540 #elif defined (_ARCH_PWR8) && (__GNUC__ >= 6) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
3568 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3569 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3570 return vec_all_ge (vra, vrb);
3575 "vcmpgtsq. %0,%4,%3;\n"
3578 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3579 :
"v" (vra),
"v" (vrb)
3592 return vec_all_eq((
vui32_t)a_b, carry128);
3616 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3617 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3618 return vec_all_ge (vra, vrb);
3623 "vcmpgtsq. %0,%3,%4;\n"
3626 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3627 :
"v" (vra),
"v" (vrb)
3640 return vec_all_eq((
vui32_t)b_a, ncarry128);
3664 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3665 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3666 return vec_all_le (vra, vrb);
3671 "vcmpgtsq. %0,%3,%4;\n"
3674 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3675 :
"v" (vra),
"v" (vrb)
3688 return vec_all_eq((
vui32_t)b_a, carry128);
3712 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3713 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3714 return vec_all_lt (vra, vrb);
3719 "vcmpgtsq. %0,%4,%3;\n"
3722 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3723 :
"v" (vra),
"v" (vrb)
3736 return vec_all_eq((
vui32_t)a_b, ncarry128);
3762 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3763 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3764 return vec_all_ne (vra, vrb);
3769 "vcmpequq. %0,%3,%4;\n"
3772 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3773 :
"v" (vra),
"v" (vrb)
3777 #elif defined (_ARCH_PWR8) && (__GNUC__ >= 6) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
3807 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3808 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3809 return vec_all_eq (vra, vrb);
3814 "vcmpequq. %0,%3,%4;\n"
3817 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3818 :
"v" (vra),
"v" (vrb)
3822 #elif defined (_ARCH_PWR8) && (__GNUC__ >= 6) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
3850 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3851 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3852 return vec_all_ge (vra, vrb);
3857 "vcmpgtuq. %0,%4,%3;\n"
3860 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3861 :
"v" (vra),
"v" (vrb)
3870 return vec_all_eq ((
vui32_t) a_b, carry128);
3894 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3895 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3896 return vec_all_ge (vra, vrb);
3901 "vcmpgtuq. %0,%3,%4;\n"
3904 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3905 :
"v" (vra),
"v" (vrb)
3914 return vec_all_eq ((
vui32_t) b_a, ncarry128);
3938 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3939 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3940 return vec_all_le (vra, vrb);
3945 "vcmpgtuq. %0,%3,%4;\n"
3948 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3949 :
"v" (vra),
"v" (vrb)
3958 return vec_all_eq ((
vui32_t) b_a, carry128);
3982 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
3983 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
3984 return vec_all_lt (vra, vrb);
3989 "vcmpgtuq. %0,%4,%3;\n"
3992 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
3993 :
"v" (vra),
"v" (vrb)
4002 return vec_all_eq ((
vui32_t) a_b, ncarry128);
4028 #if defined (_ARCH_PWR10) && defined (__VSX__) && (__GNUC__ >= 10)
4029 #if (__GNUC__ > 11) || ((__GNUC__ == 11) && (__GNUC_MINOR__ >= 2))
4030 return vec_all_ne (vra, vrb);
4035 "vcmpequq. %0,%3,%4;\n"
4038 :
"=v" (vrt),
"=&r" (u),
"=r" (r)
4039 :
"v" (vra),
"v" (vrb)
4043 #elif defined (_ARCH_PWR8) && (__GNUC__ >= 6) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
4073 "vmul10ecuq %0,%2,%3;\n"
4074 "vmul10euq %1,%2,%3;\n"
4084 vui32_t t_odd, t_even, t_high;
4086 t10 = vec_splat_u16(10);
4087 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4088 t_even = vec_vmulouh (ts, t10);
4089 t_odd = vec_vmuleuh (ts, t10);
4091 t_even = vec_vmuleuh (ts, t10);
4092 t_odd = vec_vmulouh (ts, t10);
4095 t_high = vec_sld (z, t_even, 2);
4097 tc = vec_sld ((
vui32_t) cin, z, 14);
4099 t_even = vec_sld (t_even, tc, 2);
4136 "vmul10cuq %0,%2;\n"
4145 vui32_t t_odd, t_even, t_high;
4147 t10 = vec_splat_u16(10);
4148 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4149 t_even = vec_vmulouh (ts, t10);
4150 t_odd = vec_vmuleuh (ts, t10);
4152 t_even = vec_vmuleuh(ts, t10);
4153 t_odd = vec_vmulouh(ts, t10);
4156 t_high = vec_sld (z, t_even, 2);
4158 t_even = vec_sld (t_even, z, 2);
4198 { (__int128) 1000000000000000UL * (__int128) 10000000000000000UL };
4203 0x039d66589687f9e9UL, 0x01d59f290ee19dafUL);
4204 const int shift_ten31 = 103;
4221 result =
vec_srqi (t, shift_ten31-1);
4260 { (__int128) 1000000000000000UL * (__int128) 10000000000000000UL };
4266 0x039d66589687f9e9UL, 0x01d59f290ee19dafUL);
4267 const int shift_ten31 = 103;
4268 vui128_t result, r2, t, q, q1, q2, c;
4286 r2 =
vec_sldqi (q2, q1, (128 - shift_ten31));
4287 result =
vec_sldqi (q1, q, (128 - shift_ten31));
4331 { (__int128) 10000000000000000UL * (__int128) 10000000000000000UL };
4337 0x9f623d5a8a732974UL, 0xcfbc31db4b0295e5UL);
4338 const int shift_ten32 = 107;
4339 vui128_t result, r2, t, q, q1, q2, c;
4357 r2 =
vec_sldqi (q2, q1, (128 - shift_ten32));
4358 result =
vec_sldqi (q1, q, (128 - shift_ten32));
4395 { (__int128) 1000000000000000UL * (__int128) 10000000000000000UL };
4400 0x039d66589687f9e9UL, 0x01d59f290ee19dafUL);
4401 const int shift_ten31 = 103;
4412 result =
vec_srqi (t, shift_ten31-1);
4415 result = (
vui128_t) { (__int128) 0 };
4443 { (__int128) 10000000000000000UL * (__int128) 10000000000000000UL };
4448 0x9f623d5a8a732974UL, 0xcfbc31db4b0295e5UL);
4449 const int shift_ten32 = 107;
4460 result =
vec_srqi (t, shift_ten32-1);
4463 result = (
vui128_t) { (__int128) 0 };
4583 { (__int128) 1000000000000000UL * (__int128) 10000000000000000UL };
4624 { (__int128) 1000000000000000UL * (__int128) 10000000000000000UL };
4677 { (__int128) 10000000000000000UL * (__int128) 10000000000000000UL };
4722 { (__int128) 1000000000000000UL
4723 * (__int128) 10000000000000000UL };
4755 { (__int128) 10000000000000000UL * (__int128) 10000000000000000UL };
4790 "vmul10cuq %0,%1;\n"
4797 vui32_t t_even, t_odd, t_high;
4799 t10 = vec_splat_u16(10);
4800 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4801 t_even = vec_vmulouh (ts, t10);
4802 t_odd = vec_vmuleuh (ts, t10);
4804 t_even = vec_vmuleuh(ts, t10);
4805 t_odd = vec_vmulouh(ts, t10);
4808 t_high = vec_sld (z, t_even, 2);
4810 t_even = vec_sld (t_even, z, 2);
4820 t_carry = vec_vadduwm (t_carry, t_high);
4847 "vmul10ecuq %0,%1,%2;\n"
4859 t10 = vec_splat_u16(10);
4860 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4861 t_even = vec_vmulouh (ts, t10);
4862 t_odd = vec_vmuleuh (ts, t10);
4864 t_even = vec_vmuleuh(ts, t10);
4865 t_odd = vec_vmulouh(ts, t10);
4868 t_high = vec_sld (z, t_even, 2);
4870 tc = vec_sld ((
vui32_t) cin, z, 14);
4872 t_even = vec_sld (t_even, tc, 2);
4882 t_carry = vec_vadduwm (t_carry, t_high);
4908 "vmul10euq %0,%1,%2;\n"
4919 t10 = vec_splat_u16(10);
4920 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4921 t_even = vec_vmulouh (ts, t10);
4922 t_odd = vec_vmuleuh (ts, t10);
4924 t_even = vec_vmuleuh(ts, t10);
4925 t_odd = vec_vmulouh(ts, t10);
4928 tc = vec_sld ((
vui32_t) cin, z, 14);
4930 t_even = vec_sld (t_even, tc, 2);
4970 t10 = vec_splat_u16(10);
4971 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4972 t_even = vec_vmulouh (ts, t10);
4973 t_odd = vec_vmuleuh (ts, t10);
4975 t_even = vec_vmuleuh(ts, t10);
4976 t_odd = vec_vmulouh(ts, t10);
4979 t_even = vec_sld (t_even, z, 2);
5023 vui16_t t100 = (
vui16_t ) { 100, 100, 100, 100, 100, 100, 100, 100 };
5024 vui32_t t_odd, t_even, t_high;
5027 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
5028 t_even = vec_vmulouh (ts, t100);
5029 t_odd = vec_vmuleuh (ts, t100);
5031 t_even = vec_vmuleuh(ts, t100);
5032 t_odd = vec_vmulouh(ts, t100);
5035 t_high = vec_sld (z, t_even, 2);
5037 t_even = vec_sld (t_even, z, 2);
5091 vui16_t t100 = (
vui16_t ) { 100, 100, 100, 100, 100, 100, 100, 100 };
5092 vui32_t t_odd, t_even, t_high;
5095 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
5096 t_even = vec_vmulouh (ts, t100);
5097 t_odd = vec_vmuleuh (ts, t100);
5099 t_even = vec_vmuleuh (ts, t100);
5100 t_odd = vec_vmulouh (ts, t100);
5103 t_high = vec_sld (z, t_even, 2);
5105 tc = vec_sld ((
vui32_t) cin, z, 14);
5107 t_even = vec_sld (t_even, tc, 2);
5148 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
5150 "vmsumcud %0,%1,%2,%3;\n"
5152 :
"v" (a),
"v" (b),
"v" (c)
5155 vui128_t p_even, p_odd, p_sum1, p_cry1, p_cry2;
5164 p_sum1 =
vec_addcq (&p_cry1, p_even, p_odd);
5205 #if defined (_ARCH_PWR9) && ((__GNUC__ >= 6) || (__clang_major__ >= 11))
5207 "vmsumudm %0,%1,%2,%3;\n"
5209 :
"v" (a),
"v" (b),
"v" (c)
5246 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
5279 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
5282 "vmulhud %0,%1,%2;\n"
5284 :
"v" (vra),
"v" (vrb)
5315 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
5346 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
5349 "vmulld %0,%1,%2;\n"
5351 :
"v" (vra),
"v" (vrb)
5354 #elif defined (_ARCH_PWR9)
5356 #elif defined (_ARCH_PWR8)
5395 const vui64_t zero = { 0, 0 };
5397 vui128_t tmh, tab, tba, tb0, tc1, tc2, tmq;
5434 t_odd = vec_sld (z, t_odd, 12);
5442 t_odd = vec_sld (z, t_odd, 12);
5450 t_odd = vec_sld (z, t_odd, 12);
5458 t_odd = vec_sld (z, t_odd, 12);
5461 #else // _ARCH_PWR7 or earlier and Big Endian only. */
5469 vui16_t z = { 0, 0, 0, 0, 0, 0, 0, 0 };
5471 tsw = vec_splat ((
vui16_t) b, 7);
5476 t_odd = vec_sld (z, t_odd, 14);
5480 tsw = vec_splat ((
vui16_t) b, 6);
5484 t_odd = vec_sld (z, t_odd, 14);
5488 tsw = vec_splat ((
vui16_t) b, 5);
5492 t_odd = vec_sld (z, t_odd, 14);
5496 tsw = vec_splat ((
vui16_t) b, 4);
5500 t_odd = vec_sld (z, t_odd, 14);
5504 tsw = vec_splat ((
vui16_t) b, 3);
5508 t_odd = vec_sld (z, t_odd, 14);
5512 tsw = vec_splat ((
vui16_t) b, 2);
5516 t_odd = vec_sld (z, t_odd, 14);
5520 tsw = vec_splat ((
vui16_t) b, 1);
5524 t_odd = vec_sld (z, t_odd, 14);
5528 tsw = vec_splat ((
vui16_t) b, 0);
5532 t_odd = vec_sld (z, t_odd, 14);
5564 const vui64_t zero = { 0, 0 };
5593 tmq = vec_sld (t_odd, z, 12);
5595 t_odd = vec_sld (z, t_odd, 12);
5603 tmq = vec_sld (t_odd, tmq, 12);
5605 t_odd = vec_sld (z, t_odd, 12);
5613 tmq = vec_sld (t_odd, tmq, 12);
5615 t_odd = vec_sld (z, t_odd, 12);
5622 tmq = vec_sld (t_odd, tmq, 12);
5633 vui16_t z = { 0, 0, 0, 0, 0, 0, 0, 0 };
5635 tsw = vec_splat ((
vui16_t) b, 7);
5641 tmq = (
vui32_t) vec_sld (t_odd, z, 14);
5643 t_odd = vec_sld (z, t_odd, 14);
5647 tsw = vec_splat ((
vui16_t) b, 6);
5653 t_odd = vec_sld (z, t_odd, 14);
5657 tsw = vec_splat ((
vui16_t) b, 5);
5663 t_odd = vec_sld (z, t_odd, 14);
5667 tsw = vec_splat ((
vui16_t) b, 4);
5673 t_odd = vec_sld (z, t_odd, 14);
5677 tsw = vec_splat ((
vui16_t) b, 3);
5683 t_odd = vec_sld (z, t_odd, 14);
5687 tsw = vec_splat ((
vui16_t) b, 2);
5693 t_odd = vec_sld (z, t_odd, 14);
5697 tsw = vec_splat ((
vui16_t) b, 1);
5703 t_odd = vec_sld (z, t_odd, 14);
5707 tsw = vec_splat ((
vui16_t) b, 0);
5744 const vui64_t zero = { 0, 0 };
5801 tmq = vec_sld (t_odd, z, 12);
5803 t_odd = vec_sld (z, t_odd, 12);
5811 tmq = vec_sld (t_odd, tmq, 12);
5813 t_odd = vec_sld (z, t_odd, 12);
5821 tmq = vec_sld (t_odd, tmq, 12);
5823 t_odd = vec_sld (z, t_odd, 12);
5831 tmq = vec_sld (t_odd, tmq, 12);
5833 t_odd = vec_sld (z, t_odd, 12);
5836 #else // _ARCH_PWR7 or earlier and Big Endian only. */
5844 vui16_t z = { 0, 0, 0, 0, 0, 0, 0, 0 };
5846 tsw = vec_splat ((
vui16_t) b, 7);
5852 tmq = (
vui32_t)vec_sld (t_odd, z, 14);
5854 t_odd = vec_sld (z, t_odd, 14);
5858 tsw = vec_splat ((
vui16_t) b, 6);
5864 t_odd = vec_sld (z, t_odd, 14);
5868 tsw = vec_splat ((
vui16_t) b, 5);
5874 t_odd = vec_sld (z, t_odd, 14);
5878 tsw = vec_splat ((
vui16_t) b, 4);
5884 t_odd = vec_sld (z, t_odd, 14);
5888 tsw = vec_splat ((
vui16_t) b, 3);
5894 t_odd = vec_sld (z, t_odd, 14);
5898 tsw = vec_splat ((
vui16_t) b, 2);
5904 t_odd = vec_sld (z, t_odd, 14);
5908 tsw = vec_splat ((
vui16_t) b, 1);
5914 t_odd = vec_sld (z, t_odd, 14);
5918 tsw = vec_splat ((
vui16_t) b, 0);
5924 t_odd = vec_sld (z, t_odd, 14);
6024 tmq = vec_sld (t_odd, z, 12);
6026 t_odd = vec_sld (z, t_odd, 12);
6034 tmq = vec_sld (t_odd, tmq, 12);
6036 t_odd = vec_sld (z, t_odd, 12);
6044 tmq = vec_sld (t_odd, tmq, 12);
6046 t_odd = vec_sld (z, t_odd, 12);
6054 tmq = vec_sld (t_odd, tmq, 12);
6056 t_odd = vec_sld (z, t_odd, 12);
6061 #else // _ARCH_PWR7 or earlier and Big Endian only. */
6070 vui16_t z = { 0, 0, 0, 0, 0, 0, 0, 0 };
6072 tsw = vec_splat ((
vui16_t) b, 7);
6077 tmq = (
vui32_t)vec_sld (t_odd, z, 14);
6079 t_odd = vec_sld (z, t_odd, 14);
6083 tsw = vec_splat ((
vui16_t) b, 6);
6089 t_odd = vec_sld (z, t_odd, 14);
6093 tsw = vec_splat ((
vui16_t) b, 5);
6099 t_odd = vec_sld (z, t_odd, 14);
6103 tsw = vec_splat ((
vui16_t) b, 4);
6109 t_odd = vec_sld (z, t_odd, 14);
6113 tsw = vec_splat ((
vui16_t) b, 3);
6119 t_odd = vec_sld (z, t_odd, 14);
6123 tsw = vec_splat ((
vui16_t) b, 2);
6129 t_odd = vec_sld (z, t_odd, 14);
6133 tsw = vec_splat ((
vui16_t) b, 1);
6139 t_odd = vec_sld (z, t_odd, 14);
6143 tsw = vec_splat ((
vui16_t) b, 0);
6149 t_odd = vec_sld (z, t_odd, 14);
6289 const vui64_t vzero = { 0, 0 };
6295 #elif defined(_ARCH_PWR8)
6304 const vui64_t vzero = { 0, 0 };
6337 #if defined (vec_revb) || defined (__clang__)
6338 result = vec_revb (vra);
6347 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
6379 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
6385 :
"v" (vra),
"v" (vrb)
6412 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
6413 if (__builtin_constant_p (shb) && (shb < 8))
6421 vui32_t lshift = vec_splats((
unsigned int) shb);
6425 :
"v" (vra),
"v" (lshift)
6429 if (__builtin_constant_p (shb) && ((shb % 8) == 0))
6435 result = vec_sld ((
vui8_t) vra, (
vui8_t) vra, ((shb / 8) & 15));
6514 return (
vb128_t) vec_vsubuqm (zero, vcy);
6516 const vui32_t ones = vec_splat_u32(1);
6520 return (
vb128_t) vec_cmpeq (rcy, ones);
6551 return (
vb128_t) vec_vsubeuqm (zero, zero, vcy);
6557 return (
vb128_t) vec_cmpeq (rcy, zero);
6580 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
6587 const vui8_t shift = vec_splat_u8 (7);
6590 result = (
vb128_t) vec_sra (splat, shift);
6615 vui8_t result, vt1, vt2, vt3, vbs;
6616 const vui8_t vzero = vec_splat_u8 (0);
6622 vt1 = vec_sll (vt1, vbs);
6623 vt3 = vec_sub (vzero, vbs);
6624 vt2 = vec_sro ((
vui8_t) vrx, vt3);
6625 vt2 = vec_srl (vt2, vt3);
6626 result = vec_or (vt1, vt2);
6653 if (__builtin_constant_p(shb))
6669 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
6677 #else // Load shb as vector and use general vec_sldq case.
6678 const vui8_t vrb = vec_splats ((
unsigned char) shb);
6685 const vui8_t vrb = vec_splats ((
unsigned char) shb);
6711 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
6717 :
"v" (vra),
"v" (vrb)
6726 result = vec_sll (result, vshift_splat);
6755 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
6756 lshift = (
vui8_t) vec_splats((
unsigned int) shb);
6760 :
"v" (vra),
"v" (lshift)
6763 if (__builtin_constant_p (shb) && ((shb % 8) == 0))
6770 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
6772 result = vec_sld ((
vui8_t) vra, zero, (shb / 8));
6781 if (__builtin_constant_p (shb) && (shb < 16))
6782 lshift = (
vui8_t) vec_splat_s8(shb);
6784 lshift = vec_splats ((
unsigned char) shb);
6788 result = vec_slo ((
vui8_t) vra, lshift);
6793 result = vec_sll (result, lshift);
6835 if (__builtin_constant_p (sim) && ((sim >= -128) && (sim < 128)))
6838 vi8_t vbi = vec_splats ((
signed char) sim);
6840 if (__builtin_constant_p (sim) && ((sim == 0) || (sim == -1)))
6847 if (__builtin_constant_p (sim) && (sim > 0))
6849 const vui32_t q_zero = {0, 0, 0, 0};
6854 const vui32_t q_ones = {-1, -1, -1, -1};
6860 result = vec_splats ((
signed __int128) sim);
6862 if (__builtin_constant_p (sim) && ((sim >= -16) && (sim < 16)))
6866 if (__builtin_constant_p (sim) && ((sim == 0) || (sim == -1)))
6873 if (__builtin_constant_p (sim) && (sim > 0))
6875 const vui32_t q_zero = {0, 0, 0, 0};
6876 result = (
vi128_t) vec_sld (q_zero, vwi, 4);
6880 const vui32_t q_ones = {-1, -1, -1, -1};
6881 result = (
vi128_t) vec_sld (q_ones, vwi, 4);
6886 result = vec_splats ((
signed __int128) sim);
6920 if (__builtin_constant_p (sim) && ((sim >= 0) && (sim < 256)))
6923 vui8_t vbi = vec_splats ((
unsigned char) sim);
6925 if (__builtin_constant_p (sim) && (sim == 0))
6932 if (__builtin_constant_p (sim) && (sim < 256))
6934 const vui32_t q_zero = {0, 0, 0, 0};
6938 result = vec_splats ((
unsigned __int128) sim);
6942 result = vec_splats ((
unsigned __int128) sim);
6944 if (__builtin_constant_p (sim) && ((sim >= 0) && (sim < 16)))
6946 const vui32_t q_zero = {0, 0, 0, 0};
6947 vui32_t vwi = vec_splat_u32 (sim);
6949 if (__builtin_constant_p (sim) && (sim == 0))
6954 result = (
vui128_t) vec_sld (q_zero, vwi, 4);
6957 else if (__builtin_constant_p (sim) && (sim == 128))
6960 vui8_t vbi = vec_splats ((
unsigned char) 128);
6962 const vui32_t q_zero = {0, 0, 0, 0};
6966 result = vec_splats ((
unsigned __int128) sim);
6989 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
6995 :
"v" (vra),
"v" (vrb)
7000 const vui8_t zero = vec_splat_u8 (0);
7006 vsht = vec_sub (zero, (
vui8_t) vrb);
7042 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
7043 vui32_t rshift = vec_splats((
unsigned int) shb);
7047 :
"v" (vra),
"v" (rshift)
7052 if (__builtin_constant_p (shb) && ((shb % 8) == 0))
7057 result = vec_sld ((
vui8_t) vsgn, (
vui8_t) vra, 16 - (shb / 8));
7076 const unsigned int lshb = 128 - shb;
7077 if (__builtin_constant_p (shb) && (lshb < 16))
7078 lshift = (
vui8_t) vec_splat_s8(shb);
7080 lshift = vec_splats ((
unsigned char) lshb);
7117 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
7123 :
"v" (vra),
"v" (vrb)
7132 result = vec_srl (result, vsht_splat);
7160 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
7161 vui32_t rshift = vec_splats((
unsigned int) shb);
7165 :
"v" (vra),
"v" (rshift)
7169 if (__builtin_constant_p (shb) && ((shb % 8)) == 0)
7176 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
7182 result = vec_sld (zero, (
vui8_t) vra, (16 - (shb / 8)));
7191 if ((__builtin_constant_p (shb) && (shb < 16)))
7192 lshift = (
vui8_t) vec_splat_s8(shb);
7194 lshift = vec_splats ((
unsigned char) shb);
7199 result = vec_sro ((
vui8_t) vra, lshift);
7204 result = vec_srl (result, lshift);
7227 __vector
unsigned char result, vsht_splat;
7231 vsht_splat = vec_splat_u8(4);
7232 result = vec_sll ((__vector
unsigned char) vra, vsht_splat);
7249 __vector
unsigned char result, vsht_splat;
7253 vsht_splat = vec_splat_u8(5);
7254 result = vec_sll ((__vector
unsigned char) vra, vsht_splat);
7271 __vector
unsigned char result, vsht_splat;
7275 vsht_splat = vec_splat_u8(4);
7276 result = vec_srl ((__vector
unsigned char) vra, vsht_splat);
7293 __vector
unsigned char result, vsht_splat;
7297 vsht_splat = vec_splat_u8(5);
7298 result = vec_srl ((__vector
unsigned char) vra, vsht_splat);
7321 #if defined (vec_vsubcuq)
7322 t = (
vui32_t) vec_vsubcuq (vra, vrb);
7323 #elif defined (__clang__)
7324 t = (
vui32_t) vec_subc (vra, vrb);
7336 const vui32_t ci= { 0,0,0,1 };
7362 #if defined (vec_vsubecuq)
7363 t = (
vui32_t) vec_vsubecuq (vra, vrb, vrc);
7364 #elif defined (__clang__)
7365 t = (
vui32_t) vec_subec (vra, vrb, vrc);
7368 "vsubecuq %0,%1,%2,%3;"
7403 #if defined (vec_vsubeuqm)
7404 t = (
vui32_t) vec_vsubeuqm (vra, vrb, vrc);
7405 #elif defined (__clang__)
7406 t = (
vui32_t) vec_sube (vra, vrb, vrc);
7409 "vsubeuqm %0,%1,%2,%3;"
7443 #if defined (vec_vsubuqm)
7444 t = (
vui32_t) vec_vsubuqm (vra, vrb);
7445 #elif defined (__clang__)
7446 t = (
vui32_t) vec_sub (vra, vrb);
7457 const vui32_t ci= { 0,0,0,1 };
7491 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
7493 "vmuleud %0,%1,%2;\n"
7497 #elif defined (_ARCH_PWR9) && ((__GNUC__ >= 6) || (__clang_major__ >= 11))
7498 const vui64_t zero = { 0, 0 };
7501 "vmsumudm %0,%1,%2,%3;\n"
7503 :
"v" (a),
"v" (b_eud),
"v" (zero)
7505 #elif defined (_ARCH_PWR8)
7506 const vui64_t zero = { 0, 0 };
7511 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7538 const vui32_t zero = {0,0,0,0};
7546 m1 = vec_splat (mm, 3);
7548 p0 = vec_vmuleuh (m0, m1);
7549 p1 = vec_vmulouh (m0, m1);
7551 resw = vec_sld (zero, p1, 14);
7554 c = vec_vaddcuw (resw, p0);
7555 resw = vec_vadduwm (resw, p0);
7556 c = vec_sld (c, c, 4);
7557 resw = vec_vadduwm (resw, c);
7560 m1 = vec_splat (mm, 2);
7561 p0 = vec_vmuleuh (m0, m1);
7562 p1 = vec_vmulouh (m0, m1);
7566 c = vec_vaddcuw (resw, p1);
7567 resw = vec_vadduwm (resw, p1);
7568 c = vec_sld (c, c, 4);
7569 resw = vec_vadduwm (resw, c);
7570 resw = vec_sld (c, resw, 14);
7575 c = vec_vaddcuw (resw, p0);
7576 resw = vec_vadduwm (resw, p0);
7577 c = vec_sld (c, c, 4);
7578 resw = vec_vadduwm (resw, c);
7581 m1 = vec_splat (mm, 1);
7582 p0 = vec_vmuleuh (m0, m1);
7583 p1 = vec_vmulouh (m0, m1);
7587 c = vec_vaddcuw (resw, p1);
7588 resw = vec_vadduwm (resw, p1);
7589 c = vec_sld (c, c, 4);
7590 resw = vec_vadduwm (resw, c);
7591 resw = vec_sld (c, resw, 14);
7596 c = vec_vaddcuw (resw, p0);
7597 resw = vec_vadduwm (resw, p0);
7598 c = vec_sld (c, c, 4);
7599 resw = vec_vadduwm (resw, c);
7602 m1 = vec_splat (mm, 0);
7603 p0 = vec_vmuleuh (m0, m1);
7604 p1 = vec_vmulouh (m0, m1);
7608 c = vec_vaddcuw (resw, p1);
7609 resw = vec_vadduwm (resw, p1);
7610 c = vec_sld (c, c, 4);
7611 resw = vec_vadduwm (resw, c);
7612 resw = vec_sld (c, resw, 14);
7617 c = vec_vaddcuw (resw, p0);
7618 resw = vec_vadduwm (resw, p0);
7619 c = vec_sld (c, c, 4);
7620 resw = vec_vadduwm (resw, c);
7653 const vui64_t zero = { 0, 0 };
7692 const vui64_t zero = { 0, 0 };
7737 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
7739 "vmuloud %0,%1,%2;\n"
7743 #elif defined (_ARCH_PWR9) && ((__GNUC__ >= 6) || (__clang_major__ >= 11))
7744 const vui64_t zero = { 0, 0 };
7747 "vmsumudm %0,%1,%2,%3;\n"
7749 :
"v" (a),
"v" (b_oud),
"v" (zero)
7751 #elif defined (_ARCH_PWR8)
7752 const vui64_t zero = { 0, 0 };
7757 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
7786 const vui32_t zero = {0,0,0,0};
7794 m1 = vec_splat (mm, 3);
7796 p0 = vec_vmuleuh (m0, m1);
7797 p1 = vec_vmulouh (m0, m1);
7799 resw = vec_sld (zero, p1, 14);
7803 c = vec_vaddcuw (resw, p0);
7804 resw = vec_vadduwm (resw, p0);
7805 c = vec_sld (c, c, 4);
7806 resw = vec_vadduwm (resw, c);
7809 m1 = vec_splat (mm, 2);
7811 p0 = vec_vmuleuh (m0, m1);
7812 p1 = vec_vmulouh (m0, m1);
7815 c = vec_vaddcuw (resw, p1);
7816 resw = vec_vadduwm (resw, p1);
7818 c = vec_sld (c, c, 4);
7819 resw = vec_vadduwm (resw, c);
7820 resw = vec_sld (c, resw, 14);
7825 c = vec_vaddcuw (resw, p0);
7826 resw = vec_vadduwm (resw, p0);
7827 c = vec_sld (c, c, 4);
7828 resw = vec_vadduwm (resw, c);
7831 m1 = vec_splat (mm, 1);
7833 p0 = vec_vmuleuh (m0, m1);
7834 p1 = vec_vmulouh (m0, m1);
7838 c = vec_vaddcuw (resw, p1);
7839 resw = vec_vadduwm (resw, p1);
7841 c = vec_sld (c, c, 4);
7842 resw = vec_vadduwm (resw, c);
7843 resw = vec_sld (c, resw, 14);
7848 c = vec_vaddcuw (resw, p0);
7849 resw = vec_vadduwm (resw, p0);
7850 c = vec_sld (c, c, 4);
7851 resw = vec_vadduwm (resw, c);
7854 m1 = vec_splat (mm, 0);
7856 p0 = vec_vmuleuh (m0, m1);
7857 p1 = vec_vmulouh (m0, m1);
7861 c = vec_vaddcuw (resw, p1);
7862 resw = vec_vadduwm (resw, p1);
7864 c = vec_sld (c, c, 4);
7865 resw = vec_vadduwm (resw, c);
7866 resw = vec_sld (c, resw, 14);
7871 c = vec_vaddcuw (resw, p0);
7872 resw = vec_vadduwm (resw, p0);
7873 c = vec_sld (c, c, 4);
7874 resw = vec_vadduwm (resw, c);
7907 const vui64_t zero = { 0, 0 };
7946 const vui64_t zero = { 0, 0 };
7990 const vui64_t zero = { 0, 0 };
8026 const vui64_t zero = { 0, 0 };
8057 if (__builtin_constant_p (shb) && (shb < 8))
8059 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
8061 "vsldbi %0,%1,%2,%3;\n"
8063 :
"v" (vra),
"v" (vrb),
"K" (shb)
8073 const vui8_t vshl = vec_splat_u8 (shb);
8074 const vui8_t vshr = vec_splat_u8 (8 - shb);
8075 const vui8_t zero = vec_splat_u8 (0);
8076 vui8_t lowbits, highbits;
8082 lowbits = vec_sld (zero, (
vui8_t) vrb, 1);
8083 lowbits = vec_vsrb (lowbits, vshr);
8085 highbits = vec_sll ((
vui8_t) vra, vshl);
8087 result = (
vui128_t) vec_or (highbits, lowbits);
8095 result =
vec_sldqi (vra, vrb, (shb & 7));
8122 if (__builtin_constant_p (shb) && (shb < 8))
8124 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
8126 "vsrdbi %0,%1,%2,%3;\n"
8128 :
"v" (vra),
"v" (vrb),
"K" (shb)
8138 const vui8_t vshl = vec_splat_u8 (8 - shb);
8139 const vui8_t vshr = vec_splat_u8 (shb);
8140 const vui8_t zero = vec_splat_u8 (0);
8141 vui8_t lowbits, highbits;
8147 highbits = vec_sld ((
vui8_t) vra, zero, 15);
8148 highbits = vec_vslb (highbits, vshl);
8150 lowbits = vec_srl ((
vui8_t) vrb, vshr);
8152 result = (
vui128_t) vec_or (highbits, lowbits);
8160 #if defined (__clang__) && (__clang_major__ < 6)
8166 result =
vec_sldqi (vra, vrb, (128 - (shb & 7)));
static vui128_t vec_vmaddeud(vui64_t a, vui64_t b, vui64_t c)
Vector Multiply-Add Even Unsigned Doublewords.
Definition: vec_int128_ppc.h:7651
static vi128_t vec_sraq(vi128_t vra, vui128_t vrb)
Vector Shift Right Algebraic Quadword.
Definition: vec_int128_ppc.h:6986
#define VEC_WE_2
Element index for vector splat word 2.
Definition: vec_common_ppc.h:334
static vui128_t vec_subeuqm(vui128_t vra, vui128_t vrb, vui128_t vrc)
Vector Subtract Extended Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:7399
static int vec_cmpsq_all_eq(vi128_t vra, vi128_t vrb)
Vector Compare all Equal Signed Quadword.
Definition: vec_int128_ppc.h:3522
static vb128_t vec_cmpltuq(vui128_t vra, vui128_t vrb)
Vector Compare Less Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3406
static vi128_t vec_sraqi(vi128_t vra, const unsigned int shb)
Vector Shift Right Algebraic Quadword Immediate.
Definition: vec_int128_ppc.h:7036
static vi128_t vec_minsq(vi128_t vra, vi128_t vrb)
Vector Minimum Signed Quadword.
Definition: vec_int128_ppc.h:4532
static vi128_t vec_modsq_10e31(vi128_t vra, vi128_t q)
Vector Modulo by const 10e31 Signed Quadword.
Definition: vec_int128_ppc.h:4578
#define VEC_BYTE_L
Element index for lowest order byte.
Definition: vec_common_ppc.h:344
static vui128_t vec_divuq_10e32(vui128_t vra)
Vector Divide by const 10e32 Unsigned Quadword.
Definition: vec_int128_ppc.h:4439
static vb128_t vec_cmplesq(vi128_t vra, vi128_t vrb)
Vector Compare Less Than or Equal Signed Quadword.
Definition: vec_int128_ppc.h:3267
static vui128_t vec_subecuq(vui128_t vra, vui128_t vrb, vui128_t vrc)
Vector Subtract Extended and Write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:7358
static vb128_t vec_cmpgtuq(vui128_t vra, vui128_t vrb)
Vector Compare Greater Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3227
static vui128_t vec_addecuq(vui128_t a, vui128_t b, vui128_t ci)
Vector Add Extended & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2622
static vui64_t vec_vmaddouw(vui32_t a, vui32_t b, vui32_t c)
Vector Multiply-Add Odd Unsigned Words.
Definition: vec_int64_ppc.h:4757
static vui128_t vec_mul10cuq(vui128_t a)
Vector Multiply by 10 & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:4785
static vui128_t vec_subuqm(vui128_t vra, vui128_t vrb)
Vector Subtract Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:7439
static vui128_t vec_cmul10ecuq(vui128_t *cout, vui128_t a, vui128_t cin)
Vector combined Multiply by 10 Extended & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:4067
static vi128_t vec_negsq(vi128_t int128)
Vector Negate Signed Quadword.
Definition: vec_int128_ppc.h:6234
static int vec_cmpuq_all_lt(vui128_t vra, vui128_t vrb)
Vector Compare any Less Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3980
__vector __bool int vb32_t
vector of 32-bit bool int elements.
Definition: vec_common_ppc.h:228
static vui64_t vec_xxspltd(vui64_t vra, const int ctl)
Vector splat doubleword. Duplicate the selected doubleword element across the doubleword elements of ...
Definition: vec_int64_ppc.h:4647
static vb128_t vec_cmpgtsq(vi128_t vra, vi128_t vrb)
Vector Compare Greater Than Signed Quadword.
Definition: vec_int128_ppc.h:3178
static vui64_t vec_vmaddeuw(vui32_t a, vui32_t b, vui32_t c)
Vector Multiply-Add Even Unsigned Words.
Definition: vec_int64_ppc.h:4688
#define VEC_BYTE_H
Element index for highest order byte.
Definition: vec_common_ppc.h:350
__vector __bool __int128 vb128_t
vector of one 128-bit bool __int128 element.
Definition: vec_common_ppc.h:240
static vui128_t vec_sldq(vui128_t vrw, vui128_t vrx, vui128_t vrb)
Vector Shift Left Double Quadword.
Definition: vec_int128_ppc.h:6613
static vui128_t vec_slq4(vui128_t vra)
Definition: vec_int128_ppc.h:7225
static vui64_t vec_muludm(vui64_t vra, vui64_t vrb)
Vector Multiply Unsigned Doubleword Modulo.
Definition: vec_int128_ppc.h:5344
static vb128_t vec_cmpneuq(vui128_t vra, vui128_t vrb)
Vector Compare Not Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3475
static vi128_t vec_splat_s128(const int sim)
Vector Splat Immediate Signed Quadword. Extend a signed integer constant across the quadword element ...
Definition: vec_int128_ppc.h:6829
static vui128_t vec_vsrdbi(vui128_t vra, vui128_t vrb, const unsigned int shb)
Vector Shift Right Double Quadword by Bit Immediate.
Definition: vec_int128_ppc.h:8118
static int vec_cmpuq_all_ge(vui128_t vra, vui128_t vrb)
Vector Compare any Greater Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3848
static vui128_t vec_vmaddoud(vui64_t a, vui64_t b, vui64_t c)
Vector Multiply-Add Odd Unsigned Doublewords.
Definition: vec_int128_ppc.h:7905
static vui128_t vec_cmul100ecuq(vui128_t *cout, vui128_t a, vui128_t cin)
Vector combined Multiply by 100 Extended & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:5071
static vui128_t vec_divudq_10e32(vui128_t *qh, vui128_t vra, vui128_t vrb)
Vector Divide Unsigned Double Quadword by const 10e32.
Definition: vec_int128_ppc.h:4327
static vui128_t vec_rlq(vui128_t vra, vui128_t vrb)
Vector Rotate Left Quadword.
Definition: vec_int128_ppc.h:6375
static vi32_t vec_vsumsw(vi32_t vra, vi32_t vrb)
Vector Sum-across Signed Word Saturate.
Definition: vec_int32_ppc.h:2907
#define CONST_VINT128_W(__w0, __w1, __w2, __w3)
Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast.
Definition: vec_common_ppc.h:304
static int vec_cmpsq_all_lt(vi128_t vra, vi128_t vrb)
Vector Compare any Less Than Signed Quadword.
Definition: vec_int128_ppc.h:3710
static vui128_t vec_cmul100cuq(vui128_t *cout, vui128_t a)
Vector combined Multiply by 100 & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:5006
static vui128_t vec_mulluq(vui128_t a, vui128_t b)
Vector Multiply Low Unsigned Quadword.
Definition: vec_int128_ppc.h:5555
static vui128_t vec_addeuqm(vui128_t a, vui128_t b, vui128_t ci)
Vector Add Extended Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:2684
static vui128_t vec_vmsumeud(vui64_t a, vui64_t b, vui128_t c)
Vector Multiply-Sum Even Unsigned Doublewords.
Definition: vec_int128_ppc.h:7987
static vui128_t vec_maxuq(vui128_t vra, vui128_t vrb)
Vector Maximum Unsigned Quadword.
Definition: vec_int128_ppc.h:4508
static int vec_cmpsq_all_le(vi128_t vra, vi128_t vrb)
Vector Compare any Less Than or Equal Signed Quadword.
Definition: vec_int128_ppc.h:3662
static int vec_cmpuq_all_ne(vui128_t vra, vui128_t vrb)
Vector Compare all Not Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:4025
#define CONST_VINT64_DW(__dw0, __dw1)
Arrange elements of dword initializer in high->low order.
Definition: vec_common_ppc.h:295
static int vec_cmpsq_all_gt(vi128_t vra, vi128_t vrb)
Vector Compare any Greater Than Signed Quadword.
Definition: vec_int128_ppc.h:3614
__vector unsigned short vui16_t
vector of 16-bit unsigned short elements.
Definition: vec_common_ppc.h:204
static vui128_t vec_minuq(vui128_t vra, vui128_t vrb)
Vector Minimum Unsigned Quadword.
Definition: vec_int128_ppc.h:4556
static vui128_t vec_vmuleud(vui64_t a, vui64_t b)
Vector Multiply Even Unsigned Doublewords.
Definition: vec_int128_ppc.h:7487
static int vec_cmpuq_all_eq(vui128_t vra, vui128_t vrb)
Vector Compare all Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3804
static vui32_t vec_popcntw(vui32_t vra)
Vector Population Count word.
Definition: vec_int32_ppc.h:1184
__vector __int128 vi128_t
vector of one 128-bit signed __int128 element.
Definition: vec_common_ppc.h:235
static vi128_t vec_abssq(vi128_t vra)
Vector Absolute Value Signed Quadword.
Definition: vec_int128_ppc.h:2516
static vui128_t vec_madd2uq(vui128_t *mulu, vui128_t a, vui128_t b, vui128_t c1, vui128_t c2)
Vector Multiply-Add2 Unsigned Quadword.
Definition: vec_int128_ppc.h:6184
static vi128_t vec_divsq_10e31(vi128_t vra)
Vector Divide by const 10e31 Signed Quadword.
Definition: vec_int128_ppc.h:4193
__vector unsigned long long vui64_t
vector of 64-bit unsigned long long elements.
Definition: vec_common_ppc.h:208
Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers.
static vui128_t vec_srq4(vui128_t vra)
Definition: vec_int128_ppc.h:7269
static vui64_t vec_splatd(vui64_t vra, const int ctl)
Vector splat doubleword. Duplicate the selected doubleword element across the doubleword elements of ...
Definition: vec_int64_ppc.h:3382
static vui64_t vec_vmuleuw(vui32_t vra, vui32_t vrb)
Vector Multiply Even Unsigned words.
Definition: vec_int32_ppc.h:2237
__vector unsigned char vui8_t
vector of 8-bit unsigned char elements.
Definition: vec_common_ppc.h:202
static vui128_t vec_popcntq(vui128_t vra)
Vector Population Count Quadword for unsigned __int128 elements.
Definition: vec_int128_ppc.h:6277
__vector int vi32_t
vector of 32-bit signed int elements.
Definition: vec_common_ppc.h:215
static vui64_t vec_vmulouw(vui32_t vra, vui32_t vrb)
Vector Multiply Odd Unsigned Words.
Definition: vec_int32_ppc.h:2340
static vui128_t vec_rlqi(vui128_t vra, const unsigned int shb)
Vector Rotate Left Quadword Immediate.
Definition: vec_int128_ppc.h:6408
static int vec_cmpsq_all_ne(vi128_t vra, vi128_t vrb)
Vector Compare all Not Equal Signed Quadword.
Definition: vec_int128_ppc.h:3759
static vui64_t vec_vrld(vui64_t vra, vui64_t vrb)
Vector Rotate Left Doubleword.
Definition: vec_int64_ppc.h:4185
static vui128_t vec_modudq_10e31(vui128_t vra, vui128_t vrb, vui128_t *ql)
Vector Modulo Unsigned Double Quadword by const 10e31.
Definition: vec_int128_ppc.h:4620
static vui64_t vec_vsld(vui64_t vra, vui64_t vrb)
Vector Shift Left Doubleword.
Definition: vec_int64_ppc.h:4238
static vui128_t vec_srqi(vui128_t vra, const unsigned int shb)
Vector Shift Right Quadword Immediate.
Definition: vec_int128_ppc.h:7154
static vui128_t vec_moduq_10e32(vui128_t vra, vui128_t q)
Vector Modulo by const 10e32 Unsigned Quadword.
Definition: vec_int128_ppc.h:4751
static vui128_t vec_splat_u128(const int sim)
Vector Splat Immediate Unsigned Quadword. Extend a unsigned integer constant across the quadword elem...
Definition: vec_int128_ppc.h:6914
static vui128_t vec_clzq(vui128_t vra)
Vector Count Leading Zeros Quadword for unsigned __int128 elements.
Definition: vec_int128_ppc.h:2918
static vui128_t vec_madduq(vui128_t *mulu, vui128_t a, vui128_t b, vui128_t c)
Vector Multiply-Add Unsigned Quadword.
Definition: vec_int128_ppc.h:5956
static vui128_t vec_cmul10cuq(vui128_t *cout, vui128_t a)
Vector combined Multiply by 10 & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:4130
static vui128_t vec_muludq(vui128_t *mulu, vui128_t a, vui128_t b)
Vector Multiply Unsigned Double Quadword.
Definition: vec_int128_ppc.h:5734
#define VEC_WE_3
Element index for vector splat word 3.
Definition: vec_common_ppc.h:336
static vui128_t vec_srq5(vui128_t vra)
Definition: vec_int128_ppc.h:7291
static vui128_t vec_addcuq(vui128_t a, vui128_t b)
Vector Add & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2568
static int vec_cmpuq_all_le(vui128_t vra, vui128_t vrb)
Vector Compare any Less Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3936
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
static vi128_t vec_maxsq(vi128_t vra, vi128_t vrb)
Vector Maximum Signed Quadword.
Definition: vec_int128_ppc.h:4484
static vui128_t vec_msumudm(vui64_t a, vui64_t b, vui128_t c)
Vector Multiply-Sum Unsigned Doubleword Modulo.
Definition: vec_int128_ppc.h:5202
static vui128_t vec_slq5(vui128_t vra)
Definition: vec_int128_ppc.h:7247
static vui128_t vec_vmuloud(vui64_t a, vui64_t b)
Vector Multiply Odd Unsigned Doublewords.
Definition: vec_int128_ppc.h:7733
static vui64_t vec_permdi(vui64_t vra, vui64_t vrb, const int ctl)
Vector Permute Doubleword Immediate. Combine a doubleword selected from the 1st (vra) vector with a d...
Definition: vec_int64_ppc.h:2983
static vui128_t vec_vmadd2oud(vui64_t a, vui64_t b, vui64_t c, vui64_t d)
Vector Multiply-Add2 Odd Unsigned Doublewords.
Definition: vec_int128_ppc.h:7944
static vui128_t vec_addcq(vui128_t *cout, vui128_t a, vui128_t b)
Vector Add with carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2788
static vui64_t vec_clzd(vui64_t vra)
Vector Count Leading Zeros Doubleword for unsigned long long elements.
Definition: vec_int64_ppc.h:1313
static vui128_t vec_avguq(vui128_t vra, vui128_t vrb)
Vector Average Unsigned Quadword.
Definition: vec_int128_ppc.h:2541
static vb128_t vec_cmpeqsq(vi128_t vra, vi128_t vrb)
Vector Compare Equal Signed Quadword.
Definition: vec_int128_ppc.h:3013
static vb128_t vec_cmpltsq(vi128_t vra, vi128_t vrb)
Vector Compare Less Than Signed Quadword.
Definition: vec_int128_ppc.h:3357
static vb128_t vec_setb_cyq(vui128_t vcy)
Vector Set Bool from Quadword Carry.
Definition: vec_int128_ppc.h:6509
__vector long long vi64_t
vector of 64-bit signed long long elements.
Definition: vec_common_ppc.h:217
static vui128_t vec_vmsumoud(vui64_t a, vui64_t b, vui128_t c)
Vector Multiply-Sum Odd Unsigned Doublewords.
Definition: vec_int128_ppc.h:8023
#define CONST_VINT128_DW(__dw0, __dw1)
Initializer for 128-bits vector, as two unsigned long long elements in high->low order....
Definition: vec_common_ppc.h:298
Header package containing a collection of 128-bit SIMD operations over 64-bit integer elements.
__vector signed char vi8_t
vector of 8-bit signed char elements.
Definition: vec_common_ppc.h:211
static vui128_t vec_revbq(vui128_t vra)
Vector Byte Reverse Quadword.
Definition: vec_int128_ppc.h:6332
static vui128_t vec_subcuq(vui128_t vra, vui128_t vrb)
Vector Subtract and Write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:7317
static vui128_t vec_divudq_10e31(vui128_t *qh, vui128_t vra, vui128_t vrb)
Vector Divide Unsigned Double Quadword by const 10e31.
Definition: vec_int128_ppc.h:4257
static int vec_cmpuq_all_gt(vui128_t vra, vui128_t vrb)
Vector Compare any Greater Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3892
static vui128_t vec_modudq_10e32(vui128_t vra, vui128_t vrb, vui128_t *ql)
Vector Modulo Unsigned Double Quadword by const 10e32.
Definition: vec_int128_ppc.h:4673
static vb128_t vec_setb_sq(vi128_t vra)
Vector Set Bool from Signed Quadword.
Definition: vec_int128_ppc.h:6576
#define VEC_DW_L
Element index for low order dword.
Definition: vec_common_ppc.h:324
static vb128_t vec_cmpnesq(vi128_t vra, vi128_t vrb)
Vector Compare Equal Signed Quadword.
Definition: vec_int128_ppc.h:3445
static vui128_t vec_vmadd2eud(vui64_t a, vui64_t b, vui64_t c, vui64_t d)
Vector Multiply-Add2 Even Unsigned Doublewords.
Definition: vec_int128_ppc.h:7690
#define VEC_WE_1
Element index for vector splat word 1.
Definition: vec_common_ppc.h:332
static vui128_t vec_neguq(vui128_t int128)
Vector Negate Unsigned Quadword.
Definition: vec_int128_ppc.h:6254
#define VEC_WE_0
Element index for vector splat word 0.
Definition: vec_common_ppc.h:330
static vb128_t vec_cmpequq(vui128_t vra, vui128_t vrb)
Vector Compare Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3043
static vui128_t vec_absduq(vui128_t vra, vui128_t vrb)
Vector Absolute Difference Unsigned Quadword.
Definition: vec_int128_ppc.h:2489
static vb128_t vec_cmpleuq(vui128_t vra, vui128_t vrb)
Vector Compare Less Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3316
static vui64_t vec_pasted(vui64_t __VH, vui64_t __VL)
Vector doubleword paste. Concatenate the high doubleword of the 1st vector with the low double word o...
Definition: vec_int64_ppc.h:2937
static vui64_t vec_mrgald(vui128_t vra, vui128_t vrb)
Vector Merge Algebraic Low Doublewords.
Definition: vec_int64_ppc.h:2736
static vui128_t vec_ctzq(vui128_t vra)
Vector Count Trailing Zeros Quadword for unsigned __int128 elements.
Definition: vec_int128_ppc.h:2984
static vui64_t vec_mrgahd(vui128_t vra, vui128_t vrb)
Vector Merge Algebraic High Doublewords.
Definition: vec_int64_ppc.h:2710
static vui128_t vec_divuq_10e31(vui128_t vra)
Vector Divide by const 10e31 Unsigned Quadword.
Definition: vec_int128_ppc.h:4391
__vector unsigned int vui32_t
vector of 32-bit unsigned int elements.
Definition: vec_common_ppc.h:206
static vui64_t vec_mulhud(vui64_t vra, vui64_t vrb)
Vector Multiply High Unsigned Doubleword.
Definition: vec_int128_ppc.h:5277
static vui128_t vec_sldqi(vui128_t vrw, vui128_t vrx, const unsigned int shb)
Vector Shift Left Double Quadword Immediate.
Definition: vec_int128_ppc.h:6649
static vui128_t vec_mul10uq(vui128_t a)
Vector Multiply by 10 Unsigned Quadword.
Definition: vec_int128_ppc.h:4956
static vui128_t vec_adduqm(vui128_t a, vui128_t b)
Vector Add Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:2739
static vui128_t vec_mul10ecuq(vui128_t a, vui128_t cin)
Vector Multiply by 10 Extended & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:4841
static vui64_t vec_vmsumuwm(vui32_t vra, vui32_t vrb, vui64_t vrc)
Vector Multiply-Sum Unsigned Word Modulo.
Definition: vec_int64_ppc.h:4829
static vui64_t vec_addudm(vui64_t a, vui64_t b)
Vector Add Unsigned Doubleword Modulo.
Definition: vec_int64_ppc.h:1261
static vui32_t vec_vmaddouh(vui16_t a, vui16_t b, vui16_t c)
Vector Multiply-Add Odd Unsigned Halfwords.
Definition: vec_int16_ppc.h:1229
static vui128_t vec_addeq(vui128_t *cout, vui128_t a, vui128_t b, vui128_t ci)
Vector Add Extend with carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2849
static vui128_t vec_srq(vui128_t vra, vui128_t vrb)
Vector Shift Right Quadword.
Definition: vec_int128_ppc.h:7114
static vui64_t vec_popcntd(vui64_t vra)
Vector Population Count doubleword.
Definition: vec_int64_ppc.h:3068
static vui128_t vec_muleud(vui64_t a, vui64_t b)
Vector Multiply Even Unsigned Doublewords.
Definition: vec_int128_ppc.h:5244
static vui128_t vec_moduq_10e31(vui128_t vra, vui128_t q)
Vector Modulo by const 10e31 Unsigned Quadword.
Definition: vec_int128_ppc.h:4718
static vi128_t vec_selsq(vi128_t vra, vi128_t vrb, vb128_t vrc)
Vector Select Signed Quadword.
Definition: vec_int128_ppc.h:6462
static vui128_t vec_mulhuq(vui128_t a, vui128_t b)
Vector Multiply High Unsigned Quadword.
Definition: vec_int128_ppc.h:5387
static vui64_t vec_swapd(vui64_t vra)
Vector doubleword swap. Exchange the high and low doubleword elements of a vector.
Definition: vec_int64_ppc.h:3789
static vi64_t vec_sradi(vi64_t vra, const unsigned int shb)
Vector Shift Right Algebraic Doubleword Immediate.
Definition: vec_int64_ppc.h:3692
static vui128_t vec_muloud(vui64_t a, vui64_t b)
Vector Multiply Odd Unsigned Doublewords.
Definition: vec_int128_ppc.h:5313
static vui128_t vec_vsldbi(vui128_t vra, vui128_t vrb, const unsigned int shb)
Vector Shift Left Double Quadword by Bit Immediate.
Definition: vec_int128_ppc.h:8053
static vui128_t vec_slqi(vui128_t vra, const unsigned int shb)
Vector Shift Left Quadword Immediate.
Definition: vec_int128_ppc.h:6748
static vui32_t vec_clzw(vui32_t vra)
Vector Count Leading Zeros word.
Definition: vec_int32_ppc.h:503
static vb128_t vec_cmpgesq(vi128_t vra, vi128_t vrb)
Vector Compare Greater Than or Equal Signed Quadword.
Definition: vec_int128_ppc.h:3089
static vui128_t vec_slq(vui128_t vra, vui128_t vrb)
Vector Shift Left Quadword.
Definition: vec_int128_ppc.h:6707
static vui32_t vec_vmaddeuh(vui16_t a, vui16_t b, vui16_t c)
Vector Multiply-Add Even Unsigned Halfwords.
Definition: vec_int16_ppc.h:1193
#define VEC_W_L
Element index for lowest order word.
Definition: vec_common_ppc.h:328
static vb64_t vec_cmpequd(vui64_t a, vui64_t b)
Vector Compare Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:1451
static vui128_t vec_mul10euq(vui128_t a, vui128_t cin)
Vector Multiply by 10 Extended Unsigned Quadword.
Definition: vec_int128_ppc.h:4903
static vb128_t vec_setb_ncq(vui128_t vcy)
Vector Set Bool from Quadword not Carry.
Definition: vec_int128_ppc.h:6546
static vb128_t vec_cmpgeuq(vui128_t vra, vui128_t vrb)
Vector Compare Greater Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3138
static int vec_cmpsq_all_ge(vi128_t vra, vi128_t vrb)
Vector Compare any Greater Than or Equal Signed Quadword.
Definition: vec_int128_ppc.h:3566
static vui128_t vec_msumcud(vui64_t a, vui64_t b, vui128_t c)
Vector Multiply-Sum and Write Carryout Unsigned Doubleword.
Definition: vec_int128_ppc.h:5145
static vui128_t vec_seluq(vui128_t vra, vui128_t vrb, vb128_t vrc)
Vector Select Unsigned Quadword.
Definition: vec_int128_ppc.h:6482