POWER Vector Library Manual  1.0.4
vec_f32_ppc.h
Go to the documentation of this file.
1 /*
2  Copyright (c) [2017] IBM Corporation.
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  vec_f32_ppc.h
17 
18  Contributors:
19  IBM Corporation, Steven Munroe
20  Created on: Apr 13, 2016
21  */
22 
23 #ifndef VEC_F32_PPC_H_
24 #define VEC_F32_PPC_H_
25 
202 #include <pveclib/vec_common_ppc.h>
203 #include <pveclib/vec_int128_ppc.h>
204 
206 static inline vf64_t
207 vec_vglfsso (float *array, const long long offset0,
208  const long long offset1);
209 static inline vf64_t
210 vec_vlxsspx (const signed long long ra, const float *rb);
211 static inline void
212 vec_vsstfsso (vf64_t xs, float *array,
213  const long long offset0, const long long offset1);
214 static inline void
215 vec_vstxsspx (vf64_t xs, const signed long long ra, float *rb);
217 
220 
231 static inline vf32_t
233 {
234 #if _ARCH_PWR7
235  /* Requires VSX but eliminates a const load. */
236  return vec_abs (vf32x);
237 #else
238  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000,
239  0x80000000, 0x80000000);
240  return (vf32_t)vec_andc ((vui32_t)vf32x, signmask);
241 #endif
242 }
243 
263 static inline int
265 {
266  vui32_t tmp;
267 #if _ARCH_PWR9
268  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
269 #ifdef vec_test_data_class
270  tmp = (vui32_t)vec_test_data_class (vf32, 0x70);
271 #else
272  __asm__(
273  "xvtstdcsp %x0,%x1,0x70;\n"
274  : "=wa" (tmp)
275  : "wa" (vf32)
276  :);
277 #endif
278  return vec_all_eq(tmp, vec_zero);
279 #else
280  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
281  0x7f800000);
282  tmp = vec_and ((vui32_t)vf32, expmask);
283  return !vec_any_eq(tmp, expmask);
284 #endif
285 }
286 
305 static inline int
307 {
308  vui32_t tmp;
309 
310 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
311  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
312 #ifdef vec_test_data_class
313  tmp = (vui32_t)vec_test_data_class (vf32, 0x30);
314 #else
315  __asm__(
316  "xvtstdcsp %x0,%x1,0x30;\n"
317  : "=wa" (tmp)
318  : "wa" (vf32)
319  :);
320 #endif
321  return vec_all_eq(tmp, vec_ones);
322 #else
323  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
324  0x7f800000);
325  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
326  0x80000000);
327  tmp = vec_andc ((vui32_t)vf32, signmask);
328  return vec_all_eq(tmp, expmask);
329 #endif
330 }
331 
351 static inline int
353 {
354  vui32_t tmp;
355 
356 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
357  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
358 #ifdef vec_test_data_class
359  tmp = (vui32_t)vec_test_data_class (vf32, 0x40);
360 #else
361  __asm__(
362  "xvtstdcsp %x0,%x1,0x40;\n"
363  : "=wa" (tmp)
364  : "wa" (vf32)
365  :);
366 #endif
367  return vec_all_eq(tmp, vec_ones);
368 #else
369  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
370  0x7f800000);
371  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
372  0x80000000);
373  tmp = vec_andc ((vui32_t)vf32, signmask);
374  return vec_all_gt(tmp, expmask);
375 #endif
376 }
377 
398 static inline int
400 {
401  vui32_t tmp;
402  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
403 #if _ARCH_PWR9
404 #ifdef vec_test_data_class
405  tmp = (vui32_t)vec_test_data_class (vf32, 0x7f);
406 #else
407  __asm__(
408  "xvtstdcsp %x0,%x1,0x7f;\n"
409  : "=wa" (tmp)
410  : "wa" (vf32)
411  :);
412 #endif
413  return vec_all_eq(tmp, vec_zero);
414 #else
415  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
416  0x7f800000);
417  tmp = vec_and ((vui32_t) vf32, expmask);
418  return !(vec_any_eq (tmp, expmask) || vec_any_eq(tmp, vec_zero));
419 #endif
420 }
421 
441 static inline int
443 {
444  vui32_t tmp;
445 
446 #if _ARCH_PWR9
447  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
448 #ifdef vec_test_data_class
449  tmp = (vui32_t)vec_test_data_class (vf32, 0x03);
450 #else
451  __asm__(
452  "xvtstdcsp %x0,%x1,0x03;\n"
453  : "=wa" (tmp)
454  : "wa" (vf32)
455  :);
456 #endif
457  return vec_all_eq(tmp, vec_ones);
458 #else
459  const vui32_t explow = CONST_VINT128_W(0x00800000, 0x00800000, 0x00800000,
460  0x00800000);
461  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
462  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
463  0x80000000);
464  tmp = vec_andc ((vui32_t)vf32, signmask);
465  return vec_all_lt (tmp, explow) && vec_all_ne (tmp, vec_zero);
466 #endif
467 }
468 
488 static inline int
490 {
491  vui32_t tmp;
492 
493 #if _ARCH_PWR9
494  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
495 #ifdef vec_test_data_class
496  tmp = (vui32_t)vec_test_data_class (vf32, 0x0c);
497 #else
498  __asm__(
499  "xvtstdcsp %x0,%x1,0x0c;\n"
500  : "=wa" (tmp)
501  : "wa" (vf32)
502  :);
503 #endif
504  return vec_all_eq(tmp, vec_ones);
505 #else
506  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
507  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
508  0x80000000);
509  tmp = vec_andc ((vui32_t)vf32, signmask);
510  return vec_all_eq(tmp, vec_zero);
511 #endif
512 }
513 
533 static inline int
535 {
536  vui32_t tmp;
537 #if _ARCH_PWR9
538  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
539 #ifdef vec_test_data_class
540  tmp = (vui32_t)vec_test_data_class (vf32, 0x70);
541 #else
542  __asm__(
543  "xvtstdcsp %x0,%x1,0x70;\n"
544  : "=wa" (tmp)
545  : "wa" (vf32)
546  :);
547 #endif
548  return vec_any_eq(tmp, vec_zero);
549 #else
550  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
551  0x7f800000);
552  tmp = vec_and ((vui32_t)vf32, expmask);
553  return !vec_all_eq(tmp, expmask);
554 #endif
555 }
556 
574 static inline int
576 {
577  vui32_t tmp;
578 
579 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
580  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
581 #ifdef vec_test_data_class
582  tmp = (vui32_t)vec_test_data_class (vf32, 0x30);
583 #else
584  __asm__(
585  "xvtstdcsp %x0,%x1,0x30;\n"
586  : "=wa" (tmp)
587  : "wa" (vf32)
588  :);
589 #endif
590  return vec_any_eq(tmp, vec_ones);
591 #else
592  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
593  0x7f800000);
594  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
595  0x80000000);
596  tmp = vec_andc ((vui32_t)vf32, signmask);
597  return vec_any_eq(tmp, expmask);
598 #endif
599 }
600 
620 static inline int
622 {
623  vui32_t tmp;
624 
625 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
626  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
627 #ifdef vec_test_data_class
628  tmp = (vui32_t)vec_test_data_class (vf32, 0x40);
629 #else
630  __asm__(
631  "xvtstdcsp %x0,%x1,0x40;\n"
632  : "=wa" (tmp)
633  : "wa" (vf32)
634  :);
635 #endif
636  return vec_any_eq(tmp, vec_ones);
637 #else
638  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
639  0x7f800000);
640  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
641  0x80000000);
642  tmp = vec_andc ((vui32_t)vf32, signmask);
643  return vec_any_gt(tmp, expmask);
644 #endif
645 }
646 
667 static inline int
669 {
670  vui32_t tmp;
671  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
672 #if _ARCH_PWR9
673 #ifdef vec_test_data_class
674  tmp = (vui32_t)vec_test_data_class (vf32, 0x7f);
675 #else
676  __asm__(
677  "xvtstdcsp %x0,%x1,0x7f;\n"
678  : "=wa" (tmp)
679  : "wa" (vf32)
680  :);
681 #endif
682  return vec_any_eq(tmp, vec_zero);
683 #else
684  vui32_t res;
685  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
686  0x7f800000);
687  tmp = vec_and ((vui32_t) vf32, expmask);
688  res = (vui32_t) vec_nor (vec_cmpeq (tmp, expmask), vec_cmpeq (tmp, vec_zero));
689 
690  return vec_any_gt(res, vec_zero);
691 #endif
692 }
693 
712 static inline int
714 {
715  vui32_t tmp;
716 
717 #if _ARCH_PWR9
718  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
719 #ifdef vec_test_data_class
720  tmp = (vui32_t)vec_test_data_class (vf32, 0x03);
721 #else
722  __asm__(
723  "xvtstdcsp %x0,%x1,0x03;\n"
724  : "=wa" (tmp)
725  : "wa" (vf32)
726  :);
727 #endif
728  return vec_any_eq(tmp, vec_ones);
729 #else
730  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
731  0x80000000);
732  const vui32_t explow = CONST_VINT128_W(0x00800000, 0x00800000, 0x00800000,
733  0x00800000);
734  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
735  vui32_t tmpz, tmp2;
736  vb32_t vsubnorm;
737 
738  tmp2 = vec_andc ((vui32_t)vf32, signmask);
739  tmp = (vui32_t) vec_cmplt(tmp2, explow);
740  tmpz = (vui32_t) vec_cmpeq (tmp2, vec_zero);
741  vsubnorm = (vb32_t ) vec_andc (tmp, tmpz);
742  return vec_any_ne(vsubnorm, vec_zero);
743 #endif
744 }
745 
765 static inline int
767 {
768  vui32_t tmp;
769 
770 #if _ARCH_PWR9
771  const vui32_t vec_ones = CONST_VINT128_W(-1, -1, -1, -1);
772 #ifdef vec_test_data_class
773  tmp = (vui32_t)vec_test_data_class (vf32, 0x0c);
774 #else
775  __asm__(
776  "xvtstdcsp %x0,%x1,0x0c;\n"
777  : "=wa" (tmp)
778  : "wa" (vf32)
779  :);
780 #endif
781  return vec_any_eq(tmp, vec_ones);
782 #else
783  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
784  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
785  0x80000000);
786  tmp = vec_andc ((vui32_t)vf32, signmask);
787  return vec_any_eq(tmp, vec_zero);
788 #endif
789 }
790 
816 static inline vf32_t
818 {
819 #if _ARCH_PWR7
820 #ifdef PVECLIB_CPSGN_FIXED
821  return (vec_cpsgn (vf32x, vf32y));
822 #else
823  vf32_t result;
824  __asm__(
825  "xvcpsgnsp %x0,%x1,%x2;\n"
826  : "=wa" (result)
827  : "wa" (vf32x), "wa" (vf32y)
828  :);
829  return (result);
830 #endif
831 #else
832  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000,
833  0x80000000, 0x80000000);
834  vf32_t result;
835 
836  result = (vf32_t)vec_sel ((vui32_t)vf32y, (vui32_t)vf32x, signmask);
837  return (result);
838 #endif
839 }
840 
863 static inline vb32_t
865 {
866  vb32_t tmp2;
867 #if defined (_ARCH_PWR9)
868 #ifdef vec_test_data_class
869  tmp2 = vec_test_data_class (vf32, 0x70);
870 #else
871  __asm__(
872  "xvtstdcsp %x0,%x1,0x70;\n"
873  : "=wa" (tmp2)
874  : "wa" (vf32)
875  :);
876 #endif
877  return vec_nor (tmp2, tmp2); // vec_not
878 #else
879  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
880  0x7f800000);
881  vui32_t tmp;
882 
883  tmp = vec_and ((vui32_t)vf32, expmask);
884  tmp2 = vec_cmpeq (tmp, expmask);
885  return vec_nor (tmp2, tmp2); // vec_not
886 #endif
887 }
888 
907 static inline vb32_t
909 {
910  vb32_t result;
911 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
912 #ifdef vec_test_data_class
913  result = vec_test_data_class (vf32, 0x30);
914 #else
915  __asm__(
916  "xvtstdcsp %x0,%x1,0x30;\n"
917  : "=wa" (result)
918  : "wa" (vf32)
919  :);
920 #endif
921 #else
922  vui32_t tmp;
923  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
924  0x7f800000);
925  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
926  0x80000000);
927  tmp = vec_andc ((vui32_t)vf32, signmask);
928  result = vec_cmpeq (tmp, expmask);
929 #endif
930  return (result);
931 }
932 
949 static inline vb32_t
951 {
952  vb32_t result;
953 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
954 #ifdef vec_test_data_class
955  result = vec_test_data_class (vf32, 0x40);
956 #else
957  __asm__(
958  "xvtstdcsp %x0,%x1,0x40;\n"
959  : "=wa" (result)
960  : "wa" (vf32)
961  :);
962 #endif
963 #else
964  vui32_t tmp2;
965  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
966  0x7f800000);
967  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
968  0x80000000);
969  tmp2 = vec_andc ((vui32_t)vf32, signmask);
970  result = vec_cmpgt (tmp2, expmask);
971 #endif
972  return (result);
973 }
974 
995 static inline vb32_t
997 {
998 #if _ARCH_PWR9
999  vb32_t tmp2;
1000 #ifdef vec_test_data_class
1001  tmp2 = vec_test_data_class (vf32, 0x7f);
1002 #else
1003  __asm__(
1004  "xvtstdcsp %x0,%x1,0x7f;\n"
1005  : "=wa" (tmp2)
1006  : "wa" (vf32)
1007  :);
1008 #endif
1009  return vec_nor (tmp2, tmp2); // vec_not
1010 #else
1011  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000, 0x7f800000,
1012  0x7f800000);
1013  const vui32_t veczero = CONST_VINT128_W(0, 0, 0, 0);
1014  vui32_t tmp;
1015 
1016  tmp = vec_and ((vui32_t) vf32, expmask);
1017  return vec_nor (vec_cmpeq (tmp, expmask), vec_cmpeq (tmp, veczero));
1018 #endif
1019 }
1020 
1040 static inline vb32_t
1042 {
1043  vb32_t result;
1044 
1045 #if _ARCH_PWR9
1046 #ifdef vec_test_data_class
1047  result = vec_test_data_class (vf32, 0x03);
1048 #else
1049  __asm__(
1050  "xvtstdcsp %x0,%x1,0x03;\n"
1051  : "=wa" (result)
1052  : "wa" (vf32)
1053  :);
1054 #endif
1055 #else
1056  vui32_t tmp, tmpz, tmp2;
1057  const vui32_t explow = CONST_VINT128_W(0x00800000, 0x00800000, 0x00800000,
1058  0x00800000);
1059  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
1060  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
1061  0x80000000);
1062  tmp2 = vec_andc ((vui32_t)vf32, signmask);
1063  tmp = (vui32_t) vec_cmplt(tmp2, explow);
1064  tmpz = (vui32_t) vec_cmpeq (tmp2, vec_zero);
1065  result = (vb32_t) vec_andc (tmp, tmpz);
1066 #endif
1067  return (result);
1068 }
1069 
1089 static inline vb32_t
1091 {
1092  vb32_t result;
1093 #if _ARCH_PWR9
1094 #ifdef vec_test_data_class
1095  result = vec_test_data_class (vf32, 0x0c);
1096 #else
1097  __asm__(
1098  "xvtstdcsp %x0,%x1,0x0c;\n"
1099  : "=wa" (result)
1100  : "wa" (vf32)
1101  :);
1102 #endif
1103 #else
1104  vui32_t tmp2;
1105  const vui32_t vec_zero = CONST_VINT128_W(0, 0, 0, 0);
1106  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0x80000000, 0x80000000,
1107  0x80000000);
1108  tmp2 = vec_andc ((vui32_t)vf32, signmask);
1109  result = vec_cmpeq (tmp2, vec_zero);
1110 #endif
1111  return (result);
1112 }
1113 
1136 static inline vb32_t
1138 {
1139  return vec_setb_sw ((vi32_t) vra);
1140 }
1141 
1162 static inline vf32_t
1163 vec_vgl4fsso (float *array, const long long offset0,
1164  const long long offset1, const long long offset2,
1165  const long long offset3)
1166 {
1167  vf32_t result;
1168 
1169 #ifdef _ARCH_PWR8
1170  vui64_t re0, re1, re2, re3;
1171  re0 = vec_vlxsiwzx (offset0, (unsigned int *) array);
1172  re1 = vec_vlxsiwzx (offset1, (unsigned int *) array);
1173  re2 = vec_vlxsiwzx (offset2, (unsigned int *) array);
1174  re3 = vec_vlxsiwzx (offset3, (unsigned int *) array);
1175  /* Need to handle endian as the vec_vlxsiwzx result is always left
1176  * justified in VR, while element [0] may be left or right. */
1177 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1178  /* Can't use vec_mergeo here as GCC 7 (AT11) and earlier don't
1179  * support doubleword vec_merge. */
1180  re0 = vec_xxpermdi (re0, re2, 3);
1181  re1 = vec_xxpermdi (re1, re3, 3);
1182  result = (vf32_t) vec_mergee ((vui32_t) re0, (vui32_t) re1);
1183 #else
1184  re0 = vec_xxpermdi (re0, re2, 0);
1185  re1 = vec_xxpermdi (re1, re3, 0);
1186  result = (vf32_t) vec_mergeo ((vui32_t) re0, (vui32_t) re1);
1187 #endif
1188 #else // _ARCH_PWR7
1189  vf32_t xte0, xte1, xte2, xte3;
1190  vui8_t perm0, perm1, perm2, perm3;
1191 
1192  perm0 = vec_lvsl (offset0, array);
1193  xte0 = vec_lde (offset0, array);
1194  xte0 = vec_perm (xte0, xte0, perm0);
1195 
1196  perm1 = vec_lvsl (offset1, array);
1197  xte1 = vec_lde (offset1, array);
1198  xte1 = vec_perm (xte1, xte1, perm1);
1199 
1200  perm2 = vec_lvsl (offset2, array);
1201  xte2 = vec_lde (offset2, array);
1202  xte2 = vec_perm (xte2, xte2, perm2);
1203 
1204  perm3 = vec_lvsl (offset3, array);
1205  xte3 = vec_lde (offset3, array);
1206  xte3 = vec_perm (xte3, xte3, perm3);
1207 
1208  xte0 = vec_mergeh (xte0, xte2);
1209  xte1 = vec_mergeh (xte1, xte3);
1210  result = vec_mergeh (xte0, xte1);
1211 #endif
1212  return (vf32_t)result;
1213 }
1214 
1234 static inline
1235 vf32_t
1236 vec_vgl4fswo (float *array, vi32_t vra)
1237 {
1238  vf32_t r;
1239 
1240 #ifdef _ARCH_PWR8
1241  vi64_t off01, off23;
1242 
1243  off01 = vec_vupkhsw (vra);
1244  off23 = vec_vupklsw (vra);
1245 
1246  r = vec_vgl4fsso (array, off01[0], off01[1], off23[0], off23[1]);
1247 #else
1248  // Need to explicitly manage the VR/GPR xfer for PWR7
1249  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1250  signed int off0, off1, off2, off3;
1251 
1252  off0 = scalar_extract_uint64_from_high_uint128(gprp) >> 32;
1253  off1 = (int) scalar_extract_uint64_from_high_uint128(gprp);
1254  off2 = scalar_extract_uint64_from_low_uint128(gprp) >> 32;
1255  off3 = (int) scalar_extract_uint64_from_low_uint128(gprp);
1256 
1257  r = vec_vgl4fsso (array, off0, off1, off2, off3);
1258 #endif
1259  return r;
1260 }
1261 
1285 static inline
1286 vf32_t
1287 vec_vgl4fswsx (float *array, vi32_t vra,
1288  const unsigned char scale)
1289 {
1290  vf32_t r;
1291 
1292 #ifdef _ARCH_PWR8
1293  vi64_t off01, off23;
1294  vi64_t lshift = vec_splats ((long long) (2+ scale));
1295 
1296  off01 = vec_vupkhsw (vra);
1297  off23 = vec_vupklsw (vra);
1298 
1299  off01 = (vi64_t) __pvec_vsld (off01, (vui64_t) lshift);
1300  off23 = (vi64_t) __pvec_vsld (off23, (vui64_t) lshift);
1301 
1302  r = vec_vgl4fsso (array, off01[0], off01[1], off23[0], off23[1]);
1303 #else
1304  // Need to explicitly manage the VR/GPR xfer for PWR7
1305  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1306  signed long long off0, off1, off2, off3;
1307 
1308  off0 = (scalar_extract_uint64_from_high_uint128(gprp) >> 32) << (2+ scale);
1309  off1 = ((int) scalar_extract_uint64_from_high_uint128(gprp)) << (2+ scale);
1310  off2 = (scalar_extract_uint64_from_low_uint128(gprp) >> 32) << (2+ scale);
1311  off3 = ((int) scalar_extract_uint64_from_low_uint128(gprp)) << (2+ scale);
1312 
1313  r = vec_vgl4fsso (array, off0, off1, off2, off3);
1314 #endif
1315  return r;
1316 }
1317 
1338 static inline
1339 vf32_t
1340 vec_vgl4fswx (float *array, vi32_t vra)
1341 {
1342  vf32_t r;
1343 
1344 #ifdef _ARCH_PWR8
1345  vi64_t off01, off23;
1346  vi64_t lshift = vec_splats ((long long) (2));
1347 
1348  off01 = vec_vupkhsw (vra);
1349  off23 = vec_vupklsw (vra);
1350 
1351  off01 = (vi64_t) __pvec_vsld (off01, (vui64_t) lshift);
1352  off23 = (vi64_t) __pvec_vsld (off23, (vui64_t) lshift);
1353 
1354  r = vec_vgl4fsso (array, off01[0], off01[1], off23[0], off23[1]);
1355 #else
1356  // Need to explicitly manage the VR/GPR xfer for PWR7
1357  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1358  signed long long off0, off1, off2, off3;
1359 
1360  off0 = (scalar_extract_uint64_from_high_uint128(gprp) >> 32) << 2;
1361  off1 = ((int) scalar_extract_uint64_from_high_uint128(gprp)) << 2;
1362  off2 = (scalar_extract_uint64_from_low_uint128(gprp) >> 32) << 2;
1363  off3 = ((int) scalar_extract_uint64_from_low_uint128(gprp)) << 2;
1364 
1365  r = vec_vgl4fsso (array, off0, off1, off2, off3);
1366 #endif
1367  return r;
1368 }
1369 
1387 static inline
1388 vf64_t
1389 vec_vglfsdo (float *array, vi64_t vra)
1390 {
1391  vf64_t result;
1392 
1393 #ifdef _ARCH_PWR8
1394  result = vec_vglfsso (array, vra[0], vra[1]);
1395 #else
1396  // Need to explicitly manage the VR/GPR xfer for PWR7
1397  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1398 
1401 #endif
1402  return result;
1403 }
1404 
1423 static inline
1424 vf64_t
1425 vec_vglfsdsx (float *array, vi64_t vra,
1426  const unsigned char scale)
1427 {
1428  vf64_t r;
1429 
1430 #ifdef _ARCH_PWR8
1431  vi64_t lshift = vec_splats ((long long) (2 + scale));
1432  vi64_t offset;
1433 
1434  offset = (vi64_t) __pvec_vsld (vra, (vui64_t) lshift);
1435  r = vec_vglfsso (array, offset[0], offset[1]);
1436 #else
1437  long long offset0, offset1;
1438  // Need to explicitly manage the VR/GPR xfer for PWR7
1439  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1440  offset0 = scalar_extract_uint64_from_high_uint128(gprp) << (2 + scale);
1441  offset1 = scalar_extract_uint64_from_low_uint128(gprp) << (2 + scale);
1442 
1443  r = vec_vglfsso (array, offset0, offset1);
1444 #endif
1445  return r;
1446 }
1447 
1467 static inline
1468 vf64_t
1469 vec_vglfsdx (float *array, vi64_t vra)
1470 {
1471  vf64_t r;
1472 
1473 #ifdef _ARCH_PWR8
1474  vi64_t lshift = vec_splats ((long long) 2);
1475  vi64_t offset;
1476 
1477  offset = (vi64_t) __pvec_vsld (vra, (vui64_t) lshift);
1478  r = vec_vglfsso (array, offset[0], offset[1]);
1479 #else
1480  long long offset0, offset1;
1481  // Need to explicitly manage the VR/GPR xfer for PWR7
1482  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1483  offset0 = scalar_extract_uint64_from_high_uint128(gprp) << 2;
1484  offset1 = scalar_extract_uint64_from_low_uint128(gprp) << 2;
1485 
1486  r = vec_vglfsso (array, offset0, offset1);
1487 #endif
1488  return r;
1489 }
1490 
1508 static inline vf64_t
1509 vec_vglfsso (float *array, const long long offset0,
1510  const long long offset1)
1511 {
1512  vf64_t re0, re1, result;
1513 
1514  re0 = vec_vlxsspx (offset0, array);
1515  re1 = vec_vlxsspx (offset1, array);
1516  /* Need to handle endian as the vec_vlxsspx result is always left
1517  * justified in VR, while element [0] may be left or right. */
1518 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1519  /* Can't use vec_mergeo here as GCC 7 (AT11) and earlier don't
1520  * support doubleword vec_merge. */
1521  result = vec_xxpermdi (re0, re1, 3);
1522 #else
1523 #ifdef _ARCH_PWR7
1524  result = vec_xxpermdi (re0, re1, 0);
1525 #else
1526  re0 = (vi64_t) vec_sld (re0, re0, 8);
1527  result = (vi64_t) vec_sld (re0, re1, 8);
1528 #endif
1529 #endif
1530  return result;
1531 }
1532 
1572 static inline vf64_t
1573 vec_vlxsspx (const signed long long ra, const float *rb)
1574 {
1575  vf64_t xt;
1576 
1577 #if (defined(__clang__) && __clang_major__ < 8)
1578  __VEC_U_128 t;
1579 
1580  float *p = (float *)((char *)rb + ra);
1581  // Splat the load, otherwise some compilers will treat this as dead code.
1582  t.vf2[0] = t.vf2[1] = *p;
1583  xt = t.vf2;
1584 #elif _ARCH_PWR8
1585  if (__builtin_constant_p (ra) && (ra < 32760) && (ra >= -32768)
1586  && ((ra & 3) == 0))
1587  {
1588  #if defined (_ARCH_PWR9)
1589  __asm__(
1590  "lxssp%X1 %0,%1;"
1591  : "=v" (xt)
1592  : "m" (*(float*)((char *)rb + ra))
1593  : );
1594  #else
1595  if (ra == 0)
1596  {
1597  __asm__(
1598  "lxsspx %x0,%y1;"
1599  : "=wa" (xt)
1600  : "Z" (*rb)
1601  : );
1602  } else {
1603  unsigned long long rt;
1604  __asm__(
1605  "li %0,%1;"
1606  : "=r" (rt)
1607  : "I" (ra)
1608  : );
1609  __asm__(
1610  "lxsspx %x0,%y1;"
1611  : "=wa" (xt)
1612  : "Z" (*(float *)((char *)rb+rt))
1613  : );
1614  }
1615  #endif
1616  } else {
1617  __asm__(
1618  "lxsspx %x0,%y1;"
1619  : "=wa" (xt)
1620  : "Z" (*(float *)((char *)rb+ra))
1621  : );
1622  }
1623 #else // _ARCH_PWR7
1624  __VEC_U_128 t;
1625 
1626  float *p = (float *)((char *)rb + ra);
1627  // Let the compiler generate a Load Float Single Indexed
1628  t.vf2[0] = t.vf2[1] = *p;
1629  xt = t.vf2;
1630 #endif
1631  return xt;
1632 }
1633 
1651 static inline void
1652 vec_vsst4fsso (vf32_t xs, float *array,
1653  const long long offset0, const long long offset1,
1654  const long long offset2, const long long offset3)
1655 {
1656  vf32_t xs0, xs1, xs2, xs3;
1657 
1658  xs0 = vec_splat (xs, 0);
1659  xs1 = vec_splat (xs, 1);
1660  xs2 = vec_splat (xs, 2);
1661  xs3 = vec_splat (xs, 3);
1662  vec_ste (xs0, offset0, array);
1663  vec_ste (xs1, offset1, array);
1664  vec_ste (xs2, offset2, array);
1665  vec_ste (xs3, offset3, array);
1666 }
1667 
1685 static inline void
1686 vec_vsst4fswo (vf32_t xs, float *array,
1687  vi32_t vra)
1688 {
1689 #ifdef _ARCH_PWR8
1690  vi64_t off01, off23;
1691 
1692  off01 = vec_vupkhsw (vra);
1693  off23 = vec_vupklsw (vra);
1694 
1695  vec_vsst4fsso (xs, array, off01[0], off01[1], off23[0], off23[1]);
1696 #else
1697  // Need to explicitly manage the VR/GPR xfer for PWR7
1698  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1699  signed int off0, off1, off2, off3;
1700 
1701  off0 = scalar_extract_uint64_from_high_uint128(gprp) >> 32;
1702  off1 = (int) scalar_extract_uint64_from_high_uint128(gprp);
1703  off2 = scalar_extract_uint64_from_low_uint128(gprp) >> 32;
1704  off3 = (int) scalar_extract_uint64_from_low_uint128(gprp);
1705 
1706  vec_vsst4fsso (xs, array, off0, off1, off2, off3);
1707 #endif
1708 }
1709 
1731 static inline void
1732 vec_vsst4fswsx (vf32_t xs, float *array,
1733  vi32_t vra, const unsigned char scale)
1734 {
1735 #ifdef _ARCH_PWR8
1736  vi64_t off01, off23;
1737  vui64_t lshift = vec_splats ((unsigned long long) (2 + scale));
1738 
1739  off01 = vec_vupkhsw (vra);
1740  off23 = vec_vupklsw (vra);
1741 
1742  off01 = (vi64_t) __pvec_vsld (off01, (vui64_t) lshift);
1743  off23 = (vi64_t) __pvec_vsld (off23, (vui64_t) lshift);
1744 
1745  vec_vsst4fsso (xs, array, off01[0], off01[1], off23[0], off23[1]);
1746 #else
1747  // Need to explicitly manage the VR/GPR xfer for PWR7
1748  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1749  signed int off0, off1, off2, off3;
1750 
1751  off0 = (scalar_extract_uint64_from_high_uint128(gprp) >> 32) << (2 + scale);
1752  off1 = ((int) scalar_extract_uint64_from_high_uint128(gprp)) << (2 + scale);
1753  off2 = (scalar_extract_uint64_from_low_uint128(gprp) >> 32) << (2 + scale);
1754  off3 = ((int) scalar_extract_uint64_from_low_uint128(gprp)) << (2 + scale);
1755 
1756  vec_vsst4fsso (xs, array, off0, off1, off2, off3);
1757 #endif
1758 }
1759 
1779 static inline void
1780 vec_vsst4fswx (vf32_t xs, float *array,
1781  vi32_t vra)
1782 {
1783 #ifdef _ARCH_PWR8
1784  vi64_t off01, off23;
1785  vui64_t lshift = vec_splats ((unsigned long long) 2);
1786 
1787  off01 = vec_vupkhsw (vra);
1788  off23 = vec_vupklsw (vra);
1789 
1790  off01 = (vi64_t) __pvec_vsld (off01, (vui64_t) lshift);
1791  off23 = (vi64_t) __pvec_vsld (off23, (vui64_t) lshift);
1792 
1793  vec_vsst4fsso (xs, array, off01[0], off01[1], off23[0], off23[1]);
1794 #else
1795  // Need to explicitly manage the VR/GPR xfer for PWR7
1796  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1797  signed int off0, off1, off2, off3;
1798 
1799  off0 = (scalar_extract_uint64_from_high_uint128(gprp) >> 32) << 2;
1800  off1 = ((int) scalar_extract_uint64_from_high_uint128(gprp)) << 2;
1801  off2 = (scalar_extract_uint64_from_low_uint128(gprp) >> 32) << 2;
1802  off3 = ((int) scalar_extract_uint64_from_low_uint128(gprp)) << 2;
1803 
1804  vec_vsst4fsso (xs, array, off0, off1, off2, off3);
1805 #endif
1806 }
1807 
1824 static inline void
1825 vec_vsstfsdo (vf64_t xs, float *array, vi64_t vra)
1826 {
1827 #ifdef _ARCH_PWR8
1828  vec_vsstfsso (xs, array, vra[0], vra[1]);
1829 #else
1830  // Need to explicitly manage the VR/GPR xfer for PWR7
1831  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1832 
1833  vec_vsstfsso (xs, array,
1836 #endif
1837 }
1838 
1857 static inline void
1858 vec_vsstfsdsx (vf64_t xs, float *array, vi64_t vra,
1859  const unsigned char scale)
1860 {
1861 #ifdef _ARCH_PWR8
1862  vui64_t lshift = vec_splats ((unsigned long long) (2 + scale));
1863  vui64_t offset;
1864 
1865  offset = (vui64_t) __pvec_vsld (vra, (vui64_t) lshift);
1866  vec_vsstfsso (xs, array, offset[0], offset[1]);
1867 #else
1868  long long offset0, offset1;
1869  // Need to explicitly manage the VR/GPR xfer for PWR7
1870  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1871  offset0 = scalar_extract_uint64_from_high_uint128(gprp) << (2 + scale);
1872  offset1 = scalar_extract_uint64_from_low_uint128(gprp) << (2 + scale);
1873 
1874  vec_vsstfsso (xs, array, offset0, offset1);
1875 #endif
1876 }
1877 
1894 static inline void
1895 vec_vsstfsdx (vf64_t xs, float *array, vi64_t vra)
1896 {
1897 #ifdef _ARCH_PWR8
1898  vui64_t lshift = vec_splats ((unsigned long long) 2);
1899  vui64_t offset;
1900 
1901  offset = (vui64_t) __pvec_vsld (vra, (vui64_t) lshift);
1902  vec_vsstfsso (xs, array, offset[0], offset[1]);
1903 #else
1904  long long offset0, offset1;
1905  // Need to explicitly manage the VR/GPR xfer for PWR7
1906  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1907  offset0 = scalar_extract_uint64_from_high_uint128(gprp) << 2;
1908  offset1 = scalar_extract_uint64_from_low_uint128(gprp) << 2;
1909 
1910  vec_vsstfsso (xs, array, offset0, offset1);
1911 #endif
1912 }
1913 
1930 static inline void
1931 vec_vsstfsso (vf64_t xs, float *array,
1932  const long long offset0, const long long offset1)
1933 {
1934  vf64_t xs0, xs1;
1935 
1936  xs0 = xs;
1937  // xs1 = vec_xxswapd (xs);
1938 #ifdef _ARCH_PWR7
1939  xs1 = vec_xxpermdi (xs, xs, 2);
1940 #else
1941  xs1 = vec_sld (xs0, xs0, 8);
1942 #endif
1943  /* Need to handle endian as vec_vstxsspx always stores the
1944  * left doubleword of the VSR, while doubleword element [0] may in
1945  * the left or right doubleword. */
1946 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1947  vec_vstxsspx (xs0, offset1, array);
1948  vec_vstxsspx (xs1, offset0, array);
1949 #else
1950  vec_vstxsspx (xs0, offset0, array);
1951  vec_vstxsspx (xs1, offset1, array);
1952 #endif
1953 }
1954 
1983 static inline void
1984 vec_vstxsspx (vf64_t xs, const signed long long ra, float *rb)
1985 {
1986 #if (defined(__clang__) && __clang_major__ < 8)
1987  __VEC_U_128 t;
1988  float *p = (float *)((char *)rb + ra);
1989  t.vf2 = xs;
1990  *p = t.vf2[0];
1991 #elif _ARCH_PWR8
1992  if (__builtin_constant_p (ra) && (ra < 32760) && (ra >= -32768)
1993  && ((ra & 3) == 0))
1994  {
1995 #if defined (_ARCH_PWR9)
1996  __asm__(
1997  "stxssp%X0 %1,%0;"
1998  : "=m" (*(float*)((char *)rb + ra))
1999  : "v" (xs)
2000  : );
2001 #else
2002  if (ra == 0)
2003  {
2004  __asm__(
2005  "stxsspx %x1,%y0;"
2006  : "=Z" (*rb)
2007  : "wa" (xs)
2008  : );
2009  } else {
2010  unsigned long long rt;
2011  __asm__(
2012  "li %0,%1;"
2013  : "=r" (rt)
2014  : "I" (ra)
2015  : );
2016  __asm__(
2017  "stxsspx %x1,%y0;"
2018  : "=Z" (*(float *)((char *)rb+rt))
2019  : "wa" (xs)
2020  : );
2021  }
2022 #endif
2023  } else {
2024  __asm__(
2025  "stxsspx %x1,%y0;"
2026  : "=Z" (*(float *)((char *)rb+ra))
2027  : "wa" (xs)
2028  : );
2029  }
2030 #else //_ARCH_PWR7
2031  // Let the compiler generate a Store Float Single Indexed
2032  __VEC_U_128 t;
2033  float *p = (float *)((char *)rb + ra);
2034  t.vf2 = xs;
2035  *p = t.vf2[0];
2036 #endif
2037 }
2038 
2066 static inline vf32_t
2068 {
2069  vf32_t result;
2070 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
2071 #if defined (vec_insert_exp)
2072  result = vec_insert_exp (sig, exp);
2073 #else
2074  __asm__(
2075  "xviexpsp %x0,%x1,%x2"
2076  : "=wa" (result)
2077  : "wa" (sig), "wa" (exp)
2078  : );
2079 #endif
2080 #else
2081  vui32_t tmp;
2082  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000,
2083  0x7f800000, 0x7f800000);
2084 
2085  tmp = vec_slwi (exp, 23);
2086  result = (vf32_t) vec_sel ((vui32_t) sig, tmp, expmask);
2087 #endif
2088  return result;
2089 }
2090 
2115 static inline vui32_t
2117 {
2118  vui32_t result;
2119 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
2120 #if defined (vec_extract_exp)
2121  result = vec_extract_exp (vrb);
2122 #else
2123  __asm__(
2124  "xvxexpsp %x0,%x1"
2125  : "=wa" (result)
2126  : "wa" (vrb)
2127  : );
2128 #endif
2129 #else
2130  vui32_t tmp;
2131  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000,
2132  0x7f800000, 0x7f800000);
2133 
2134  tmp = vec_and ((vui32_t) vrb, expmask);
2135  result = vec_srwi (tmp, 23);
2136 #endif
2137  return result;
2138 }
2139 
2165 static inline vui32_t
2167 {
2168  vui32_t result;
2169 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
2170 #if defined (vec_extract_sig)
2171  result = vec_extract_sig (vrb);
2172 #else
2173  __asm__(
2174  "xvxsigsp %x0,%x1"
2175  : "=wa" (result)
2176  : "wa" (vrb)
2177  : );
2178 #endif
2179 #else
2180  vui32_t t128, tmp;
2181  vui32_t normal;
2182  const vui32_t zero = CONST_VINT128_W(0, 0, 0, 0);
2183  const vui32_t sigmask = CONST_VINT128_W(0x007fffff, 0x007fffff,
2184  0x007fffff, 0x007fffff);
2185  const vui32_t expmask = CONST_VINT128_W(0x7f800000, 0x7f800000,
2186  0x7f800000, 0x7f800000);
2187  const vui32_t hidden = CONST_VINT128_W(0x00800000, 0x00800000,
2188  0x00800000, 0x00800000);
2189 
2190  // Check if vrb is normal. Normal values need the hidden bit
2191  // restored to the significand. We use a simpler sequence here as
2192  // vec_isnormalf32 does more then we need.
2193  tmp = vec_and ((vui32_t) vrb, expmask);
2194  normal = vec_nor ((vui32_t) vec_cmpeq (tmp, expmask),
2195  (vui32_t) vec_cmpeq (tmp, zero));
2196  t128 = vec_and ((vui32_t) vrb, sigmask);
2197  result = (vui32_t) vec_sel (t128, normal, hidden);
2198 #endif
2199  return result;
2200 }
2201 
2202 #endif /* VEC_F32_PPC_H_ */
vec_isfinitef32
static vb32_t vec_isfinitef32(vf32_t vf32)
Return 4x32-bit vector boolean true values for each float element that is Finite (Not NaN nor Inf).
Definition: vec_f32_ppc.h:864
vec_vlxsspx
static vf64_t vec_vlxsspx(const signed long long ra, const float *rb)
Vector Load Scalar Single Float Indexed.
Definition: vec_f32_ppc.h:1573
vec_xvxsigsp
static vui32_t vec_xvxsigsp(vf32_t vrb)
Vector Extract Significand Single-Precision.
Definition: vec_f32_ppc.h:2166
vec_vsstfsdo
static void vec_vsstfsdo(vf64_t xs, float *array, vi64_t vra)
Vector Scatter-Store Floats Singles to Vector Doubleword Offsets.
Definition: vec_f32_ppc.h:1825
scalar_extract_uint64_from_high_uint128
static unsigned long long scalar_extract_uint64_from_high_uint128(unsigned __int128 gprp)
Extract the high doubleword from a __int128 scalar.
Definition: vec_common_ppc.h:503
vec_xviexpsp
static vf32_t vec_xviexpsp(vui32_t sig, vui32_t exp)
Vector Insert Exponent Single-Precision.
Definition: vec_f32_ppc.h:2067
vb32_t
__vector __bool int vb32_t
vector of 32-bit bool int elements.
Definition: vec_common_ppc.h:228
vf32_t
__vector float vf32_t
vector of 32-bit float elements.
Definition: vec_common_ppc.h:219
vec_setb_sw
static vb32_t vec_setb_sw(vi32_t vra)
Vector Set Bool from Signed Word.
Definition: vec_int32_ppc.h:1273
vec_all_isnormalf32
static int vec_all_isnormalf32(vf32_t vf32)
Return true if all of 4x32-bit vector float values are normal (Not NaN, Inf, denormal,...
Definition: vec_f32_ppc.h:399
vec_vstxsspx
static void vec_vstxsspx(vf64_t xs, const signed long long ra, float *rb)
Vector Store Scalar Single Float Indexed.
Definition: vec_f32_ppc.h:1984
CONST_VINT128_W
#define CONST_VINT128_W(__w0, __w1, __w2, __w3)
Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast.
Definition: vec_common_ppc.h:304
vec_transfer_vui128t_to_uint128
static unsigned __int128 vec_transfer_vui128t_to_uint128(vui128_t vra)
Transfer a vector unsigned __int128 to __int128 scalar.
Definition: vec_common_ppc.h:420
vec_all_issubnormalf32
static int vec_all_issubnormalf32(vf32_t vf32)
Return true if all of 4x32-bit vector float values is subnormal (denormal).
Definition: vec_f32_ppc.h:442
vec_all_isnanf32
static int vec_all_isnanf32(vf32_t vf32)
Return true if all of 4x32-bit vector float values are NaN.
Definition: vec_f32_ppc.h:352
vec_any_isinff32
static int vec_any_isinff32(vf32_t vf32)
Return true if any 4x32-bit vector float values are infinity.
Definition: vec_f32_ppc.h:575
vec_vglfsdsx
static vf64_t vec_vglfsdsx(float *array, vi64_t vra, const unsigned char scale)
Vector Gather-Load Single Floats from Vector Doubleword Scaled Indexes.
Definition: vec_f32_ppc.h:1425
vec_vsstfsso
static void vec_vsstfsso(vf64_t xs, float *array, const long long offset0, const long long offset1)
Vector Scatter-Store Float Singles to Scalar Offsets.
Definition: vec_f32_ppc.h:1931
vui64_t
__vector unsigned long long vui64_t
vector of 64-bit unsigned long long elements.
Definition: vec_common_ppc.h:208
vec_vsst4fswx
static void vec_vsst4fswx(vf32_t xs, float *array, vi32_t vra)
Vector Scatter-Store 4 Float Singles to Vector Word Indexes.
Definition: vec_f32_ppc.h:1780
vec_copysignf32
static vf32_t vec_copysignf32(vf32_t vf32x, vf32_t vf32y)
Copy the sign bit from vf32x merged with magnitude from vf32y and return the resulting vector float v...
Definition: vec_f32_ppc.h:817
vec_common_ppc.h
Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers.
vec_xvxexpsp
static vui32_t vec_xvxexpsp(vf32_t vrb)
Vector Extract Exponent Single-Precision.
Definition: vec_f32_ppc.h:2116
vui8_t
__vector unsigned char vui8_t
vector of 8-bit unsigned char elements.
Definition: vec_common_ppc.h:202
vi32_t
__vector int vi32_t
vector of 32-bit signed int elements.
Definition: vec_common_ppc.h:215
vec_absf32
static vf32_t vec_absf32(vf32_t vf32x)
Vector float absolute value.
Definition: vec_f32_ppc.h:232
vec_vsst4fswo
static void vec_vsst4fswo(vf32_t xs, float *array, vi32_t vra)
Vector Scatter-Store 4 Float Singles to Vector Word Offsets.
Definition: vec_f32_ppc.h:1686
vec_int128_ppc.h
Header package containing a collection of 128-bit computation functions implemented with PowerISA VMX...
vec_all_isfinitef32
static int vec_all_isfinitef32(vf32_t vf32)
Return true if all 4x32-bit vector float values are Finite (Not NaN nor Inf).
Definition: vec_f32_ppc.h:264
vec_srwi
static vui32_t vec_srwi(vui32_t vra, const unsigned int shb)
Vector Shift Right Word Immediate.
Definition: vec_int32_ppc.h:1405
vec_vsst4fswsx
static void vec_vsst4fswsx(vf32_t xs, float *array, vi32_t vra, const unsigned char scale)
Vector Scatter-Store 4 Float Singles to Vector Word Indexes.
Definition: vec_f32_ppc.h:1732
vec_any_issubnormalf32
static int vec_any_issubnormalf32(vf32_t vf32)
Return true if any of 4x32-bit vector float values is subnormal (denormal).
Definition: vec_f32_ppc.h:713
vui128_t
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
vec_vupklsw
static vi64_t vec_vupklsw(vi32_t vra)
Vector Unpack Low Signed Word.
Definition: vec_int32_ppc.h:3028
vec_any_isfinitef32
static int vec_any_isfinitef32(vf32_t vf32)
Return true if any 4x32-bit vector float values are Finite (Not NaN nor Inf).
Definition: vec_f32_ppc.h:534
__VEC_U_128
Union used to transfer 128-bit data between vector and non-vector types.
Definition: vec_common_ppc.h:256
vec_iszerof32
static vb32_t vec_iszerof32(vf32_t vf32)
Return 4x32-bit vector boolean true values, for each float value that is +-0.0.
Definition: vec_f32_ppc.h:1090
vec_setb_sp
static vb32_t vec_setb_sp(vf32_t vra)
Vector Set Bool from Sign, Single Precision.
Definition: vec_f32_ppc.h:1137
vi64_t
__vector long long vi64_t
vector of 64-bit signed long long elements.
Definition: vec_common_ppc.h:217
vec_vgl4fswx
static vf32_t vec_vgl4fswx(float *array, vi32_t vra)
Vector Gather-Load 4 Words from Vector Word Indexes.
Definition: vec_f32_ppc.h:1340
vec_vglfsso
static vf64_t vec_vglfsso(float *array, const long long offset0, const long long offset1)
Vector Gather-Load Float Single from scalar Offsets.
Definition: vec_f32_ppc.h:1509
vec_vsstfsdsx
static void vec_vsstfsdsx(vf64_t xs, float *array, vi64_t vra, const unsigned char scale)
Vector Scatter-Store Words to Vector Doubleword Scaled Indexes.
Definition: vec_f32_ppc.h:1858
vec_all_isinff32
static int vec_all_isinff32(vf32_t vf32)
Return true if all 4x32-bit vector float values are infinity.
Definition: vec_f32_ppc.h:306
vec_vglfsdo
static vf64_t vec_vglfsdo(float *array, vi64_t vra)
Vector Gather-Load Single Floats from Vector Doubleword Offsets.
Definition: vec_f32_ppc.h:1389
vec_any_isnanf32
static int vec_any_isnanf32(vf32_t vf32)
Return true if any of 4x32-bit vector float values are NaN.
Definition: vec_f32_ppc.h:621
vec_vupkhsw
static vi64_t vec_vupkhsw(vi32_t vra)
Vector Unpack High Signed Word.
Definition: vec_int32_ppc.h:2950
vec_vgl4fswo
static vf32_t vec_vgl4fswo(float *array, vi32_t vra)
Vector Gather-Load 4 Words from Vector Word Offsets.
Definition: vec_f32_ppc.h:1236
vec_any_iszerof32
static int vec_any_iszerof32(vf32_t vf32)
Return true if any of 4x32-bit vector float values are +-0.0.
Definition: vec_f32_ppc.h:766
vui32_t
__vector unsigned int vui32_t
vector of 32-bit unsigned int elements.
Definition: vec_common_ppc.h:206
scalar_extract_uint64_from_low_uint128
static unsigned long long scalar_extract_uint64_from_low_uint128(unsigned __int128 gprp)
Extract the low doubleword from a __int128 scalar.
Definition: vec_common_ppc.h:490
vec_vgl4fsso
static vf32_t vec_vgl4fsso(float *array, const long long offset0, const long long offset1, const long long offset2, const long long offset3)
Vector Gather-Load 4 Words from scalar Offsets.
Definition: vec_f32_ppc.h:1163
vec_vlxsiwzx
static vui64_t vec_vlxsiwzx(const signed long long ra, const unsigned int *rb)
Vector Load Scalar Integer Word and Zero Indexed.
Definition: vec_int32_ppc.h:2109
vf64_t
__vector double vf64_t
vector of 64-bit double elements.
Definition: vec_common_ppc.h:221
vec_isnormalf32
static vb32_t vec_isnormalf32(vf32_t vf32)
Return 4x32-bit vector boolean true values, for each float value, if normal (Not NaN,...
Definition: vec_f32_ppc.h:996
vec_issubnormalf32
static vb32_t vec_issubnormalf32(vf32_t vf32)
Return 4x32-bit vector boolean true values, for each float value that is subnormal (denormal).
Definition: vec_f32_ppc.h:1041
vec_all_iszerof32
static int vec_all_iszerof32(vf32_t vf32)
Return true if all of 4x32-bit vector float values are +-0.0.
Definition: vec_f32_ppc.h:489
__VEC_U_128::vf2
vf64_t vf2
128 bit Vector of 2 double float elements.
Definition: vec_common_ppc.h:279
vec_slwi
static vui32_t vec_slwi(vui32_t vra, const unsigned int shb)
Vector Shift left Word Immediate.
Definition: vec_int32_ppc.h:1309
vec_vsst4fsso
static void vec_vsst4fsso(vf32_t xs, float *array, const long long offset0, const long long offset1, const long long offset2, const long long offset3)
Vector Scatter-Store 4 Float Singles to Scalar Offsets.
Definition: vec_f32_ppc.h:1652
vec_isinff32
static vb32_t vec_isinff32(vf32_t vf32)
Return 4x32-bit vector boolean true values for each float, if infinity.
Definition: vec_f32_ppc.h:908
vec_any_isnormalf32
static int vec_any_isnormalf32(vf32_t vf32)
Return true if any of 4x32-bit vector float values are normal (Not NaN, Inf, denormal,...
Definition: vec_f32_ppc.h:668
vec_vsstfsdx
static void vec_vsstfsdx(vf64_t xs, float *array, vi64_t vra)
Vector Scatter-Store Words to Vector Doubleword Indexes.
Definition: vec_f32_ppc.h:1895
vec_isnanf32
static vb32_t vec_isnanf32(vf32_t vf32)
Return 4x32-bit vector boolean true values, for each float NaN value.
Definition: vec_f32_ppc.h:950
__vbinary32
vf32_t __vbinary32
typedef __vbinary32 to vector of 4 xfloat elements.
Definition: vec_f32_ppc.h:219
vec_vgl4fswsx
static vf32_t vec_vgl4fswsx(float *array, vi32_t vra, const unsigned char scale)
Vector Gather-Load 4 Words from Vector Word Scaled Indexes.
Definition: vec_f32_ppc.h:1287
vec_vglfsdx
static vf64_t vec_vglfsdx(float *array, vi64_t vra)
Vector Gather-Load Single Floats from Vector Doubleword Indexes.
Definition: vec_f32_ppc.h:1469