23 #ifndef VEC_F32_PPC_H_
24 #define VEC_F32_PPC_H_
207 vec_vglfsso (
float *array,
const long long offset0,
208 const long long offset1);
210 vec_vlxsspx (
const signed long long ra,
const float *rb);
213 const long long offset0,
const long long offset1);
236 return vec_abs (vf32x);
239 0x80000000, 0x80000000);
269 #ifdef vec_test_data_class
270 tmp = (
vui32_t)vec_test_data_class (vf32, 0x70);
273 "xvtstdcsp %x0,%x1,0x70;\n"
278 return vec_all_eq(tmp, vec_zero);
282 tmp = vec_and ((
vui32_t)vf32, expmask);
283 return !vec_any_eq(tmp, expmask);
310 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
312 #ifdef vec_test_data_class
313 tmp = (
vui32_t)vec_test_data_class (vf32, 0x30);
316 "xvtstdcsp %x0,%x1,0x30;\n"
321 return vec_all_eq(tmp, vec_ones);
327 tmp = vec_andc ((
vui32_t)vf32, signmask);
328 return vec_all_eq(tmp, expmask);
356 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
358 #ifdef vec_test_data_class
359 tmp = (
vui32_t)vec_test_data_class (vf32, 0x40);
362 "xvtstdcsp %x0,%x1,0x40;\n"
367 return vec_all_eq(tmp, vec_ones);
373 tmp = vec_andc ((
vui32_t)vf32, signmask);
374 return vec_all_gt(tmp, expmask);
404 #ifdef vec_test_data_class
405 tmp = (
vui32_t)vec_test_data_class (vf32, 0x7f);
408 "xvtstdcsp %x0,%x1,0x7f;\n"
413 return vec_all_eq(tmp, vec_zero);
417 tmp = vec_and ((
vui32_t) vf32, expmask);
418 return !(vec_any_eq (tmp, expmask) || vec_any_eq(tmp, vec_zero));
448 #ifdef vec_test_data_class
449 tmp = (
vui32_t)vec_test_data_class (vf32, 0x03);
452 "xvtstdcsp %x0,%x1,0x03;\n"
457 return vec_all_eq(tmp, vec_ones);
464 tmp = vec_andc ((
vui32_t)vf32, signmask);
465 return vec_all_lt (tmp, explow) && vec_all_ne (tmp, vec_zero);
495 #ifdef vec_test_data_class
496 tmp = (
vui32_t)vec_test_data_class (vf32, 0x0c);
499 "xvtstdcsp %x0,%x1,0x0c;\n"
504 return vec_all_eq(tmp, vec_ones);
509 tmp = vec_andc ((
vui32_t)vf32, signmask);
510 return vec_all_eq(tmp, vec_zero);
539 #ifdef vec_test_data_class
540 tmp = (
vui32_t)vec_test_data_class (vf32, 0x70);
543 "xvtstdcsp %x0,%x1,0x70;\n"
548 return vec_any_eq(tmp, vec_zero);
552 tmp = vec_and ((
vui32_t)vf32, expmask);
553 return !vec_all_eq(tmp, expmask);
579 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
581 #ifdef vec_test_data_class
582 tmp = (
vui32_t)vec_test_data_class (vf32, 0x30);
585 "xvtstdcsp %x0,%x1,0x30;\n"
590 return vec_any_eq(tmp, vec_ones);
596 tmp = vec_andc ((
vui32_t)vf32, signmask);
597 return vec_any_eq(tmp, expmask);
625 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
627 #ifdef vec_test_data_class
628 tmp = (
vui32_t)vec_test_data_class (vf32, 0x40);
631 "xvtstdcsp %x0,%x1,0x40;\n"
636 return vec_any_eq(tmp, vec_ones);
642 tmp = vec_andc ((
vui32_t)vf32, signmask);
643 return vec_any_gt(tmp, expmask);
673 #ifdef vec_test_data_class
674 tmp = (
vui32_t)vec_test_data_class (vf32, 0x7f);
677 "xvtstdcsp %x0,%x1,0x7f;\n"
682 return vec_any_eq(tmp, vec_zero);
687 tmp = vec_and ((
vui32_t) vf32, expmask);
688 res = (
vui32_t) vec_nor (vec_cmpeq (tmp, expmask), vec_cmpeq (tmp, vec_zero));
690 return vec_any_gt(res, vec_zero);
719 #ifdef vec_test_data_class
720 tmp = (
vui32_t)vec_test_data_class (vf32, 0x03);
723 "xvtstdcsp %x0,%x1,0x03;\n"
728 return vec_any_eq(tmp, vec_ones);
738 tmp2 = vec_andc ((
vui32_t)vf32, signmask);
739 tmp = (
vui32_t) vec_cmplt(tmp2, explow);
740 tmpz = (
vui32_t) vec_cmpeq (tmp2, vec_zero);
741 vsubnorm = (
vb32_t ) vec_andc (tmp, tmpz);
742 return vec_any_ne(vsubnorm, vec_zero);
772 #ifdef vec_test_data_class
773 tmp = (
vui32_t)vec_test_data_class (vf32, 0x0c);
776 "xvtstdcsp %x0,%x1,0x0c;\n"
781 return vec_any_eq(tmp, vec_ones);
786 tmp = vec_andc ((
vui32_t)vf32, signmask);
787 return vec_any_eq(tmp, vec_zero);
820 #ifdef PVECLIB_CPSGN_FIXED
821 return (vec_cpsgn (vf32x, vf32y));
825 "xvcpsgnsp %x0,%x1,%x2;\n"
827 :
"wa" (vf32x),
"wa" (vf32y)
833 0x80000000, 0x80000000);
867 #if defined (_ARCH_PWR9)
868 #ifdef vec_test_data_class
869 tmp2 = vec_test_data_class (vf32, 0x70);
872 "xvtstdcsp %x0,%x1,0x70;\n"
877 return vec_nor (tmp2, tmp2);
883 tmp = vec_and ((
vui32_t)vf32, expmask);
884 tmp2 = vec_cmpeq (tmp, expmask);
885 return vec_nor (tmp2, tmp2);
911 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
912 #ifdef vec_test_data_class
913 result = vec_test_data_class (vf32, 0x30);
916 "xvtstdcsp %x0,%x1,0x30;\n"
927 tmp = vec_andc ((
vui32_t)vf32, signmask);
928 result = vec_cmpeq (tmp, expmask);
953 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
954 #ifdef vec_test_data_class
955 result = vec_test_data_class (vf32, 0x40);
958 "xvtstdcsp %x0,%x1,0x40;\n"
969 tmp2 = vec_andc ((
vui32_t)vf32, signmask);
970 result = vec_cmpgt (tmp2, expmask);
1000 #ifdef vec_test_data_class
1001 tmp2 = vec_test_data_class (vf32, 0x7f);
1004 "xvtstdcsp %x0,%x1,0x7f;\n"
1009 return vec_nor (tmp2, tmp2);
1016 tmp = vec_and ((
vui32_t) vf32, expmask);
1017 return vec_nor (vec_cmpeq (tmp, expmask), vec_cmpeq (tmp, veczero));
1046 #ifdef vec_test_data_class
1047 result = vec_test_data_class (vf32, 0x03);
1050 "xvtstdcsp %x0,%x1,0x03;\n"
1062 tmp2 = vec_andc ((
vui32_t)vf32, signmask);
1063 tmp = (
vui32_t) vec_cmplt(tmp2, explow);
1064 tmpz = (
vui32_t) vec_cmpeq (tmp2, vec_zero);
1065 result = (
vb32_t) vec_andc (tmp, tmpz);
1094 #ifdef vec_test_data_class
1095 result = vec_test_data_class (vf32, 0x0c);
1098 "xvtstdcsp %x0,%x1,0x0c;\n"
1108 tmp2 = vec_andc ((
vui32_t)vf32, signmask);
1109 result = vec_cmpeq (tmp2, vec_zero);
1164 const long long offset1,
const long long offset2,
1165 const long long offset3)
1177 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1180 re0 = vec_xxpermdi (re0, re2, 3);
1181 re1 = vec_xxpermdi (re1, re3, 3);
1184 re0 = vec_xxpermdi (re0, re2, 0);
1185 re1 = vec_xxpermdi (re1, re3, 0);
1189 vf32_t xte0, xte1, xte2, xte3;
1190 vui8_t perm0, perm1, perm2, perm3;
1192 perm0 = vec_lvsl (offset0, array);
1193 xte0 = vec_lde (offset0, array);
1194 xte0 = vec_perm (xte0, xte0, perm0);
1196 perm1 = vec_lvsl (offset1, array);
1197 xte1 = vec_lde (offset1, array);
1198 xte1 = vec_perm (xte1, xte1, perm1);
1200 perm2 = vec_lvsl (offset2, array);
1201 xte2 = vec_lde (offset2, array);
1202 xte2 = vec_perm (xte2, xte2, perm2);
1204 perm3 = vec_lvsl (offset3, array);
1205 xte3 = vec_lde (offset3, array);
1206 xte3 = vec_perm (xte3, xte3, perm3);
1208 xte0 = vec_mergeh (xte0, xte2);
1209 xte1 = vec_mergeh (xte1, xte3);
1210 result = vec_mergeh (xte0, xte1);
1246 r =
vec_vgl4fsso (array, off01[0], off01[1], off23[0], off23[1]);
1250 signed int off0, off1, off2, off3;
1288 const unsigned char scale)
1294 vi64_t lshift = vec_splats ((
long long) (2+ scale));
1302 r =
vec_vgl4fsso (array, off01[0], off01[1], off23[0], off23[1]);
1306 signed long long off0, off1, off2, off3;
1346 vi64_t lshift = vec_splats ((
long long) (2));
1354 r =
vec_vgl4fsso (array, off01[0], off01[1], off23[0], off23[1]);
1358 signed long long off0, off1, off2, off3;
1426 const unsigned char scale)
1431 vi64_t lshift = vec_splats ((
long long) (2 + scale));
1437 long long offset0, offset1;
1474 vi64_t lshift = vec_splats ((
long long) 2);
1480 long long offset0, offset1;
1510 const long long offset1)
1518 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1521 result = vec_xxpermdi (re0, re1, 3);
1524 result = vec_xxpermdi (re0, re1, 0);
1526 re0 = (
vi64_t) vec_sld (re0, re0, 8);
1527 result = (
vi64_t) vec_sld (re0, re1, 8);
1577 #if (defined(__clang__) && __clang_major__ < 8)
1580 float *p = (
float *)((
char *)rb + ra);
1582 t.
vf2[0] = t.
vf2[1] = *p;
1585 if (__builtin_constant_p (ra) && (ra < 32760) && (ra >= -32768)
1588 #if defined (_ARCH_PWR9)
1592 :
"m" (*(
float*)((
char *)rb + ra))
1603 unsigned long long rt;
1612 :
"Z" (*(
float *)((
char *)rb+rt))
1620 :
"Z" (*(
float *)((
char *)rb+ra))
1626 float *p = (
float *)((
char *)rb + ra);
1628 t.
vf2[0] = t.
vf2[1] = *p;
1653 const long long offset0,
const long long offset1,
1654 const long long offset2,
const long long offset3)
1656 vf32_t xs0, xs1, xs2, xs3;
1658 xs0 = vec_splat (xs, 0);
1659 xs1 = vec_splat (xs, 1);
1660 xs2 = vec_splat (xs, 2);
1661 xs3 = vec_splat (xs, 3);
1662 vec_ste (xs0, offset0, array);
1663 vec_ste (xs1, offset1, array);
1664 vec_ste (xs2, offset2, array);
1665 vec_ste (xs3, offset3, array);
1695 vec_vsst4fsso (xs, array, off01[0], off01[1], off23[0], off23[1]);
1699 signed int off0, off1, off2, off3;
1733 vi32_t vra,
const unsigned char scale)
1737 vui64_t lshift = vec_splats ((
unsigned long long) (2 + scale));
1745 vec_vsst4fsso (xs, array, off01[0], off01[1], off23[0], off23[1]);
1749 signed int off0, off1, off2, off3;
1785 vui64_t lshift = vec_splats ((
unsigned long long) 2);
1793 vec_vsst4fsso (xs, array, off01[0], off01[1], off23[0], off23[1]);
1797 signed int off0, off1, off2, off3;
1859 const unsigned char scale)
1862 vui64_t lshift = vec_splats ((
unsigned long long) (2 + scale));
1868 long long offset0, offset1;
1898 vui64_t lshift = vec_splats ((
unsigned long long) 2);
1904 long long offset0, offset1;
1932 const long long offset0,
const long long offset1)
1939 xs1 = vec_xxpermdi (xs, xs, 2);
1941 xs1 = vec_sld (xs0, xs0, 8);
1946 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1986 #if (defined(__clang__) && __clang_major__ < 8)
1988 float *p = (
float *)((
char *)rb + ra);
1992 if (__builtin_constant_p (ra) && (ra < 32760) && (ra >= -32768)
1995 #if defined (_ARCH_PWR9)
1998 :
"=m" (*(
float*)((
char *)rb + ra))
2010 unsigned long long rt;
2018 :
"=Z" (*(
float *)((
char *)rb+rt))
2026 :
"=Z" (*(
float *)((
char *)rb+ra))
2033 float *p = (
float *)((
char *)rb + ra);
2070 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
2071 #if defined (vec_insert_exp)
2072 result = vec_insert_exp (sig, exp);
2075 "xviexpsp %x0,%x1,%x2"
2077 :
"wa" (sig),
"wa" (exp)
2083 0x7f800000, 0x7f800000);
2119 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
2120 #if defined (vec_extract_exp)
2121 result = vec_extract_exp (vrb);
2132 0x7f800000, 0x7f800000);
2134 tmp = vec_and ((
vui32_t) vrb, expmask);
2169 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
2170 #if defined (vec_extract_sig)
2171 result = vec_extract_sig (vrb);
2184 0x007fffff, 0x007fffff);
2186 0x7f800000, 0x7f800000);
2188 0x00800000, 0x00800000);
2193 tmp = vec_and ((
vui32_t) vrb, expmask);
2194 normal = vec_nor ((
vui32_t) vec_cmpeq (tmp, expmask),
2195 (
vui32_t) vec_cmpeq (tmp, zero));
2196 t128 = vec_and ((
vui32_t) vrb, sigmask);
2197 result = (
vui32_t) vec_sel (t128, normal, hidden);