POWER Vector Library Manual  1.0.4
vec_f64_ppc.h
Go to the documentation of this file.
1 /*
2  Copyright (c) [2017] IBM Corporation.
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  vec_f64_ppc.h
17 
18  Contributors:
19  IBM Corporation, Steven Munroe
20  Created on: Apr 12, 2016
21  */
22 
23 #ifndef VEC_F64_PPC_H_
24 #define VEC_F64_PPC_H_
25 
26 #include <pveclib/vec_common_ppc.h>
27 #include <pveclib/vec_int128_ppc.h>
203 static inline vf64_t
205 vec_vlxsfdx (const signed long long ra, const double *rb);
206 static inline void
207 vec_vstxsfdx (vf64_t xs, const signed long long ra, double *rb);
209 
220 static inline vf64_t
222 {
223 #if _ARCH_PWR7
224  /* Requires VSX but eliminates a const load. */
225  return vec_abs (vf64x);
226 #else
227  const vui32_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
228  0x8000000000000000UL);
229  return (vf64_t)vec_andc ((vui32_t)vf64x, signmask);
230 #endif
231 }
232 
252 static inline int
254 {
255  vui64_t tmp;
256 #if _ARCH_PWR9 && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
257  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
258 #ifdef vec_test_data_class
259  tmp = (vui64_t)vec_test_data_class (vf64, 0x70);
260 #else
261  __asm__(
262  "xvtstdcdp %x0,%x1,0x70;\n"
263  : "=wa" (tmp)
264  : "wa" (vf64)
265  :);
266 #endif
267  return vec_all_eq(tmp, vec_zero);
268 #else
269  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
270  0x7ff0000000000000UL);
271  tmp = vec_and ((vui64_t)vf64, expmask);
272  return !vec_cmpud_any_eq(tmp, expmask);
273 #endif
274 }
275 
294 static inline int
296 {
297  vui64_t tmp;
298 
299 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
300  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
301 #ifdef vec_test_data_class
302  tmp = (vui64_t)vec_test_data_class (vf64, 0x30);
303 #else
304  __asm__(
305  "xvtstdcdp %x0,%x1,0x30;\n"
306  : "=wa" (tmp)
307  : "wa" (vf64)
308  :);
309 #endif
310  return vec_all_eq(tmp, vec_ones);
311 #else
312  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
313  0x8000000000000000UL);
314  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
315  0x7ff0000000000000UL);
316  tmp = vec_andc ((vui64_t)vf64, signmask);
317  return vec_cmpud_all_eq(tmp, expmask);
318 #endif
319 }
320 
340 static inline int
342 {
343  vui64_t tmp;
344 
345 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
346  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
347 #ifdef vec_test_data_class
348  tmp = (vui64_t)vec_test_data_class (vf64, 0x40);
349 #else
350  __asm__(
351  "xvtstdcdp %x0,%x1,0x40;\n"
352  : "=wa" (tmp)
353  : "wa" (vf64)
354  :);
355 #endif
356  return vec_all_eq(tmp, vec_ones);
357 #else
358  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
359  0x8000000000000000UL);
360  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
361  0x7ff0000000000000UL);
362  tmp = vec_andc ((vui64_t)vf64, signmask);
363  return vec_cmpud_all_gt(tmp, expmask);
364 #endif
365 }
366 
387 static inline int
389 {
390  vui64_t tmp;
391  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
392 #if _ARCH_PWR9 && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
393 #ifdef vec_test_data_class
394  tmp = (vui64_t)vec_test_data_class (vf64, 0x7f);
395 #else
396  __asm__(
397  "xvtstdcdp %x0,%x1,0x7f;\n"
398  : "=wa" (tmp)
399  : "wa" (vf64)
400  :);
401 #endif
402  return vec_all_eq(tmp, vec_zero);
403 #else
404  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
405  0x7ff0000000000000UL);
406  tmp = vec_and ((vui64_t) vf64, expmask);
407  return !(vec_cmpud_any_eq (tmp, expmask)
408  || vec_cmpud_any_eq (tmp, vec_zero));
409 #endif
410 }
411 
431 static inline int
433 {
434  vui64_t tmp;
435 
436 #if _ARCH_PWR9 && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
437  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
438 #ifdef vec_test_data_class
439  tmp = (vui64_t)vec_test_data_class (vf64, 0x03);
440 #else
441  __asm__(
442  "xvtstdcdp %x0,%x1,0x03;\n"
443  : "=wa" (tmp)
444  : "wa" (vf64)
445  :);
446 #endif
447  return vec_all_eq(tmp, vec_ones);
448 #else
449  const vui64_t explow = CONST_VINT128_DW (0x0010000000000000,
450  0x0010000000000000);
451  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
452  0x8000000000000000UL);
453  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
454 
455  tmp = vec_andc ((vui64_t)vf64, signmask);
456  return vec_cmpud_all_lt (tmp, explow) && vec_cmpud_all_ne (tmp, vec_zero);
457 #endif
458 }
459 
479 static inline int
481 {
482  vui64_t tmp;
483 
484 #if _ARCH_PWR9 && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
485  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
486 #ifdef vec_test_data_class
487  tmp = (vui64_t)vec_test_data_class (vf64, 0x0c);
488 #else
489  __asm__(
490  "xvtstdcdp %x0,%x1,0x0c;\n"
491  : "=wa" (tmp)
492  : "wa" (vf64)
493  :);
494 #endif
495  return vec_all_eq(tmp, vec_ones);
496 #else
497  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
498  0x8000000000000000UL);
499  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
500 
501  tmp = vec_andc ((vui64_t)vf64, signmask);
502  return vec_all_eq((vui32_t)tmp, (vui32_t)vec_zero);
503 #endif
504 }
505 
525 static inline int
527 {
528  vui64_t tmp;
529 #if _ARCH_PWR9
530  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
531 #ifdef vec_test_data_class
532  tmp = (vui64_t)vec_test_data_class (vf64, 0x70);
533 #else
534  __asm__(
535  "xvtstdcdp %x0,%x1,0x70;\n"
536  : "=wa" (tmp)
537  : "wa" (vf64)
538  :);
539 #endif
540  return vec_any_eq(tmp, vec_zero);
541 #else
542  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
543  0x7ff0000000000000UL);
544  tmp = vec_and ((vui64_t)vf64, expmask);
545  return !vec_cmpud_all_eq(tmp, expmask);
546 #endif
547 }
548 
566 static inline int
568 {
569  vui64_t tmp;
570 
571 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
572  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
573 #ifdef vec_test_data_class
574  tmp = (vui64_t)vec_test_data_class (vf64, 0x30);
575 #else
576  __asm__(
577  "xvtstdcdp %x0,%x1,0x30;\n"
578  : "=wa" (tmp)
579  : "wa" (vf64)
580  :);
581 #endif
582  return vec_any_eq(tmp, vec_ones);
583 #else
584  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
585  0x7ff0000000000000UL);
586  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
587  0x8000000000000000UL);
588  tmp = vec_andc ((vui64_t)vf64, signmask);
589  return vec_cmpud_any_eq(tmp, expmask);
590 #endif
591 }
592 
612 static inline int
614 {
615  vui64_t tmp;
616 
617 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
618  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
619 #ifdef vec_test_data_class
620  tmp = (vui64_t)vec_test_data_class (vf64, 0x40);
621 #else
622  __asm__(
623  "xvtstdcdp %x0,%x1,0x40;\n"
624  : "=wa" (tmp)
625  : "wa" (vf64)
626  :);
627 #endif
628  return vec_any_eq(tmp, vec_ones);
629 #else
630  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
631  0x8000000000000000UL);
632  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
633  0x7ff0000000000000UL);
634  tmp = vec_andc ((vui64_t)vf64, signmask);
635  return vec_cmpud_any_gt(tmp, expmask);
636 #endif
637 }
638 
659 static inline int
661 {
662  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
663  vui64_t tmp;
664 #if _ARCH_PWR9
665 #ifdef vec_test_data_class
666  tmp = (vui64_t)vec_test_data_class (vf64, 0x7f);
667 #else
668  __asm__(
669  "xvtstdcdp %x0,%x1,0x7f;\n"
670  : "=wa" (tmp)
671  : "wa" (vf64)
672  :);
673 #endif
674  return vec_any_eq(tmp, vec_zero);
675 #else
676  vui64_t res;
677  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
678  0x7ff0000000000000UL);
679 
680  tmp = vec_and ((vui64_t) vf64, expmask);
681  res = (vui64_t) vec_nor (vec_cmpequd (tmp, expmask),
682  vec_cmpequd (tmp, vec_zero));
683  return vec_cmpud_any_gt (res, vec_zero);
684 #endif
685 }
686 
705 static inline int
707 {
708  vui64_t tmp;
709 
710 #if _ARCH_PWR9
711  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
712 #ifdef vec_test_data_class
713  tmp = (vui64_t)vec_test_data_class (vf64, 0x03);
714 #else
715  __asm__(
716  "xvtstdcdp %x0,%x1,0x03;\n"
717  : "=wa" (tmp)
718  : "wa" (vf64)
719  :);
720 #endif
721  return vec_any_eq(tmp, vec_ones);
722 #else
723  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
724  0x8000000000000000UL);
725  const vui64_t minnorm = CONST_VINT128_DW (0x0010000000000000UL,
726  0x0010000000000000UL);
727  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
728  vui64_t tmpz, tmp2, vsubnorm;
729 
730  tmp2 = vec_andc ((vui64_t)vf64, signmask);
731  tmp = (vui64_t) vec_cmpltud(tmp2, minnorm);
732  tmpz = (vui64_t) vec_cmpequd (tmp2, vec_zero);
733  vsubnorm = vec_andc (tmp, tmpz);
734  return vec_cmpud_any_ne(vsubnorm, vec_zero);
735 #endif
736 }
737 
757 static inline int
759 {
760  vui64_t tmp;
761 
762 #if _ARCH_PWR9
763  const vui64_t vec_ones = CONST_VINT128_DW (-1, -1);
764 #ifdef vec_test_data_class
765  tmp = (vui64_t)vec_test_data_class (vf64, 0x0c);
766 #else
767  __asm__(
768  "xvtstdcdp %x0,%x1,0x0c;\n"
769  : "=wa" (tmp)
770  : "wa" (vf64)
771  :);
772 #endif
773  return vec_any_eq(tmp, vec_ones);
774 #else
775  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
776  0x8000000000000000UL);
777  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
778  tmp = vec_andc ((vui64_t)vf64, signmask);
779  return vec_cmpud_any_eq(tmp, vec_zero);
780 #endif
781 }
782 
808 static inline vf64_t
810 {
811 #if _ARCH_PWR7
812  /* P9 has a 2 cycle xvcpsgndp and eliminates a const load. */
813 #ifdef PVECLIB_CPSGN_FIXED
814  return (vec_cpsgn (vf64x, vf64y));
815 #else
816  vf64_t result;
817  __asm__(
818  "xvcpsgndp %x0,%x1,%x2;\n"
819  : "=wa" (result)
820  : "wa" (vf64x), "wa" (vf64y)
821  :);
822  return (result);
823 #endif
824 #else
825  const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0x80000000, 0);
826  vf64_t result;
827 
828  result = (vf64_t) vec_sel ((vui32_t) vf64y, (vui32_t) vf64x, signmask);
829  return (result);
830 #endif
831 }
832 
856 static inline vb64_t
858 {
859  vb64_t tmp2;
860 #if defined (_ARCH_PWR9)
861 #ifdef vec_test_data_class
862  tmp2 = vec_test_data_class (vf64, 0x70);
863 #else
864  __asm__(
865  "xvtstdcdp %x0,%x1,0x70;\n"
866  : "=wa" (tmp2)
867  : "wa" (vf64)
868  :);
869 #endif
870  return vec_nor (tmp2, tmp2); // vec_not
871 #else
872  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
873  0x7ff0000000000000UL);
874  vui64_t tmp;
875 
876  tmp = vec_and ((vui64_t)vf64, expmask);
877  tmp2 = vec_cmpequd (tmp, expmask);
878  return vec_nor (tmp2, tmp2); // vec_not
879 #endif
880 }
881 
900 static inline vb64_t
902 {
903  vb64_t result;
904 
905 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
906 #ifdef vec_test_data_class
907  result = vec_test_data_class (vf64, 0x30);
908 #else
909  __asm__(
910  "xvtstdcdp %x0,%x1,0x30;\n"
911  : "=wa" (result)
912  : "wa" (vf64)
913  :);
914 #endif
915 #else
916  vui64_t tmp;
917  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
918  0x7ff0000000000000UL);
919  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
920  0x8000000000000000UL);
921  tmp = vec_andc ((vui64_t) vf64, signmask);
922  result = (vb64_t)vec_cmpequd (tmp, expmask);
923 #endif
924  return (result);
925 }
926 
943 static inline vb64_t
945 {
946  vb64_t result;
947 
948 #if _ARCH_PWR9 && !(defined(__clang__) && __clang_major__ < 9)
949 #ifdef vec_test_data_class
950  result = vec_test_data_class (vf64, 0x40);
951 #else
952  __asm__(
953  "xvtstdcdp %x0,%x1,0x40;\n"
954  : "=wa" (result)
955  : "wa" (vf64)
956  :);
957 #endif
958 #else
959  vui64_t tmp;
960  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
961  0x7ff0000000000000UL);
962  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
963  0x8000000000000000UL);
964  tmp = vec_andc ((vui64_t)vf64, signmask);
965  result = (vb64_t)vec_cmpgtud (tmp, expmask);
966 #endif
967  return (result);
968 }
969 
989 static inline vb64_t
991 {
992 #if _ARCH_PWR9
993  vb64_t tmp2;
994 #ifdef vec_test_data_class
995  tmp2 = vec_test_data_class (vf64, 0x7f);
996 #else
997  __asm__(
998  "xvtstdcdp %x0,%x1,0x7f;\n"
999  : "=wa" (tmp2)
1000  : "wa" (vf64)
1001  :);
1002 #endif
1003  return vec_nor (tmp2, tmp2); // vec_not
1004 #else
1005  const vui64_t expmask = CONST_VINT128_DW (0x7ff0000000000000UL,
1006  0x7ff0000000000000UL);
1007  const vui64_t veczero = CONST_VINT128_DW (0UL, 0UL);
1008  vui64_t tmp;
1009 
1010  tmp = vec_and ((vui64_t) vf64, expmask);
1011  return (vb64_t) vec_nor (vec_cmpequd (tmp, expmask),
1012  vec_cmpequd (tmp, veczero));
1013 #endif
1014 }
1015 
1035 static inline vb64_t
1037 {
1038  vb64_t result;
1039 
1040 #if _ARCH_PWR9
1041 #ifdef vec_test_data_class
1042  result = vec_test_data_class (vf64, 0x03);
1043 #else
1044  __asm__(
1045  "xvtstdcdp %x0,%x1,0x03;\n"
1046  : "=wa" (result)
1047  : "wa" (vf64)
1048  :);
1049 #endif
1050 #else
1051  vui64_t tmp;
1052  const vui64_t minnorm = CONST_VINT128_DW (0x0010000000000000UL,
1053  0x0010000000000000UL);
1054  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
1055  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
1056  0x8000000000000000UL);
1057  tmp = vec_andc ((vui64_t) vf64, signmask);
1058  result = vec_andc (vec_cmpltud (tmp, minnorm),
1059  vec_cmpequd (tmp, vec_zero));
1060 #endif
1061  return (result);
1062 }
1063 
1083 static inline vb64_t
1085 {
1086  vb64_t result;
1087 
1088 #if _ARCH_PWR9
1089 #ifdef vec_test_data_class
1090  result = vec_test_data_class (vf64, 0x0c);
1091 #else
1092  __asm__(
1093  "xvtstdcdp %x0,%x1,0x0c;\n"
1094  : "=wa" (result)
1095  : "wa" (vf64)
1096  :);
1097 #endif
1098 #else
1099  vui64_t tmp2;
1100  const vui64_t vec_zero = CONST_VINT128_DW (0, 0);
1101  const vui64_t signmask = CONST_VINT128_DW (0x8000000000000000UL,
1102  0x8000000000000000UL);
1103  tmp2 = vec_andc ((vui64_t)vf64, signmask);
1104  result = (vb64_t)vec_cmpequd (tmp2, vec_zero);
1105 #endif
1106  return (result);
1107 }
1108 
1114 static inline long double
1116 {
1117 #ifdef __clang__
1118  __VEC_U_128 t;
1119  t.vf2 = lval;
1120  return (t.ldbl128);
1121 #else
1122 #ifdef _ARCH_PWR7
1123  long double t;
1124  __asm__(
1125  "xxlor %0,%x1,%x1;\n"
1126  "\txxswapd %L0,%x1;\n"
1127  : "=f" (t)
1128  : "wa" (lval)
1129  : );
1130  return (t);
1131 #else
1132  __VEC_U_128 t;
1133  t.vf2 = lval;
1134  return (t.ldbl128);
1135 #endif
1136 #endif
1137 }
1138 
1162 static inline vb64_t
1164 {
1165  return vec_setb_sd ((vi64_t) vra);
1166 }
1167 
1174 static inline vf64_t
1175 vec_unpack_longdouble (long double lval)
1176 {
1177 #ifdef __clang__
1178  __VEC_U_128 t;
1179  t.ldbl128 = lval;
1180  return (t.vf2);
1181 #else
1182 #ifdef _ARCH_PWR7
1183  vf64_t t;
1184  __asm__(
1185  "xxmrghd %x0,%1,%L1;\n"
1186  : "=wa" (t)
1187  : "f" (lval)
1188  : );
1189  return (t);
1190 #else
1191  __VEC_U_128 t;
1192  t.ldbl128 = lval;
1193  return (t.vf2);
1194 #endif
1195 #endif
1196 }
1197 
1215 static inline vf64_t
1216 vec_vglfdso (double *array, const long long offset0,
1217  const long long offset1)
1218 {
1219  vf64_t re0, re1, result;
1220 
1221  re0 = vec_vlxsfdx (offset0, array);
1222  re1 = vec_vlxsfdx (offset1, array);
1223  /* Need to handle endian as the vec_vlxsfdx result is always left
1224  * justified in VR, while element [0] may be left ot right. */
1225 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1226  result = (vf64_t) vec_permdi ((vui64_t) re1, (vui64_t) re0, 0);
1227 #else
1228  result = (vf64_t) vec_permdi ((vui64_t) re0, (vui64_t) re1, 0);
1229 #endif
1230  return result;
1231 }
1232 
1252 static inline vf64_t
1253 vec_vglfddo (double *array, vi64_t vra)
1254 {
1255  vf64_t rese0, rese1;
1256 
1257 #ifdef _ARCH_PWR8
1258  rese0 = vec_vlxsfdx (vra[VEC_DW_H], array);
1259  rese1 = vec_vlxsfdx (vra[VEC_DW_L], array);
1260 #else
1261  // Need to explicitly manage the VR/GPR xfer for PWR7
1262  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1263 
1265  rese1 = vec_vlxsfdx (scalar_extract_uint64_from_low_uint128(gprp), array);
1266 #endif
1267  return (vf64_t) vec_permdi ((vui64_t) rese0, (vui64_t) rese1, 0);
1268 }
1269 
1292 static inline vf64_t
1293 vec_vglfddsx (double *array, vi64_t vra,
1294  const unsigned char scale)
1295 {
1296  vi64_t offset;
1297 
1298  offset = (vi64_t) vec_sldi ((vui64_t) vra, (3 + scale));
1299  return vec_vglfddo (array, offset);
1300 }
1301 
1321 static inline vf64_t
1322 vec_vglfddx (double *array, vi64_t vra)
1323 {
1324  vi64_t offset;
1325 
1326  offset = (vi64_t) vec_sldi ((vui64_t) vra, 3);
1327  return vec_vglfddo (array, offset);
1328 }
1329 
1348 static inline void
1349 vec_vsstfdso (vf64_t xs, double *array,
1350  const long long offset0, const long long offset1)
1351 {
1352  vf64_t xs1;
1353 
1354  xs1 = (vf64_t) vec_xxspltd ((vui64_t) xs, 1);
1355  /* Need to handle endian as vec_vstxsfdx always left side of
1356  * the VR, while the element [0] may in the left or right. */
1357 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
1358  vec_vstxsfdx (xs, offset1, array);
1359  vec_vstxsfdx (xs1, offset0, array);
1360 #else
1361  vec_vstxsfdx (xs, offset0, array);
1362  vec_vstxsfdx (xs1, offset1, array);
1363 #endif
1364 }
1365 
1383 static inline void
1384 vec_vsstfddo (vf64_t xs, double *array,
1385  vi64_t vra)
1386 {
1387  vf64_t xs1 = (vf64_t) vec_xxspltd ((vui64_t) xs, 1);
1388 #ifdef _ARCH_PWR8
1389  vec_vstxsfdx (xs, vra[VEC_DW_H], array);
1390  vec_vstxsfdx (xs1, vra[VEC_DW_L], array);
1391 #else
1392  // Need to explicitly manage the VR/GPR xfer for PWR7
1393  unsigned __int128 gprp = vec_transfer_vui128t_to_uint128 ((vui128_t) vra);
1396 #endif
1397 }
1398 
1420 static inline void
1421 vec_vsstfddsx (vf64_t xs, double *array,
1422  vi64_t vra, const unsigned char scale)
1423 {
1424  vi64_t offset;
1425 
1426  offset = (vi64_t) vec_sldi ((vui64_t) vra, (3 + scale));
1427  vec_vsstfddo (xs, array, offset);
1428 }
1429 
1449 static inline void
1450 vec_vsstfddx (vf64_t xs, double *array, vi64_t vra)
1451 {
1452  vi64_t offset;
1453 
1454  offset = (vi64_t) vec_sldi ((vui64_t) vra, 3);
1455  vec_vsstfddo (xs, array, offset);
1456 }
1457 
1493 static inline vf64_t
1494 vec_vlxsfdx (const signed long long ra, const double *rb)
1495 {
1496  vf64_t xt;
1497 
1498 #if defined (__clang__)
1499  __VEC_U_128 t;
1500  unsigned long long *p = (unsigned long long *)((char *)rb + ra);
1501  t.ulong.upper = *p;
1502  xt = t.vx1;
1503 #else
1504  if (__builtin_constant_p (ra) && (ra < 32760) && (ra >= -32768)
1505  && ((ra & 3) == 0))
1506  {
1507 #if defined (_ARCH_PWR9)
1508  __asm__(
1509  "lxsd%X1 %0,%1;"
1510  : "=v" (xt)
1511  : "m" (*(double*)((char *)rb + ra))
1512  : );
1513 #else
1514  if (ra == 0)
1515  {
1516  __asm__(
1517  "lxsdx %x0,%y1;"
1518  : "=wa" (xt)
1519  : "Z" (*rb)
1520  : );
1521  } else {
1522  unsigned long long rt;
1523 #if defined (_ARCH_PWR8)
1524  // For P8 better if li and lxsdx shared a single asm block
1525  // (enforcing consecutive instructions).
1526  // This enables instruction fusion for P8.
1527  __asm__(
1528  "li %0,%2;"
1529  "lxsdx %x1,%3,%0;"
1530  : "=&r" (rt), "=wa" (xt)
1531  : "I" (ra), "b" (rb), "m" (*(double*)((char *)rb+ra))
1532  : );
1533 #else // _ARCH_PWR7
1534  // This generates operationally the same code, but the
1535  // compiler may rearrange/schedule the code.
1536  __asm__(
1537  "li %0,%1;"
1538  : "=r" (rt)
1539  : "I" (ra)
1540  : );
1541  __asm__(
1542  "lxsdx %x0,%y1;"
1543  : "=wa" (xt)
1544  : "Z" (*(double*)((char *)rb+rt))
1545  : );
1546 #endif
1547  }
1548 #endif
1549  } else {
1550  __asm__(
1551  "lxsdx %x0,%y1;"
1552  : "=wa" (xt)
1553  : "Z" (*(double*)((char *)rb+ra))
1554  : );
1555  }
1556 #endif
1557  return xt;
1558 }
1559 
1587 static inline void
1588 vec_vstxsfdx (vf64_t xs, const signed long long ra, double *rb)
1589 {
1590 #if defined (__clang__)
1591  __VEC_U_128 t;
1592  unsigned long long *p = (unsigned long long *)((char *)rb + ra);
1593  t.vx1 = xs;
1594  *p = t.ulong.upper;
1595 #else
1596  if (__builtin_constant_p (ra) && (ra <= 32760) && (ra >= -32768)
1597  && ((ra & 3) == 0))
1598  {
1599 #if defined (_ARCH_PWR9)
1600  __asm__(
1601  "stxsd%X0 %1,%0;"
1602  : "=m" (*(double*)((char *)rb + ra))
1603  : "v" (xs)
1604  : );
1605 #else
1606  if (ra == 0)
1607  {
1608  __asm__(
1609  "stxsdx %x1,%y0;"
1610  : "=Z" (*rb)
1611  : "wa" (xs)
1612  : );
1613  } else {
1614  unsigned long long rt;
1615  __asm__(
1616  "li %0,%1;"
1617  : "=r" (rt)
1618  : "I" (ra)
1619  : );
1620  __asm__(
1621  "stxsdx %x1,%y0;"
1622  : "=Z" (*(double*)((char *)rb+rt))
1623  : "wa" (xs)
1624  : );
1625  }
1626 #endif
1627  } else {
1628  __asm__(
1629  "stxsdx %x1,%y0;"
1630  : "=Z" (*(double*)((char *)rb+ra))
1631  : "wa" (xs)
1632  : );
1633  }
1634 #endif
1635 }
1636 
1664 static inline vf64_t
1666 {
1667  vf64_t result;
1668 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
1669 #if defined (vec_insert_exp)
1670  result = vec_insert_exp (sig, exp);
1671 #else
1672  __asm__(
1673  "xviexpdp %x0,%x1,%x2"
1674  : "=wa" (result)
1675  : "wa" (sig), "wa" (exp)
1676  : );
1677 #endif
1678 #else
1679  vui32_t tmp;
1680  const vui32_t expmask = CONST_VINT128_W(0x7ff00000, 0, 0x7ff00000, 0);
1681 
1682  tmp = (vui32_t) vec_slqi ((vui128_t) exp, 52);
1683  result = (vf64_t) vec_sel ((vui32_t) sig, tmp, expmask);
1684 #endif
1685  return result;
1686 }
1687 
1712 static inline vui64_t
1714 {
1715  vui64_t result;
1716 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
1717 #if defined (vec_extract_exp)
1718  result = vec_extract_exp (vrb);
1719 #else
1720  __asm__(
1721  "xvxexpdp %x0,%x1"
1722  : "=wa" (result)
1723  : "wa" (vrb)
1724  : );
1725 #endif
1726 #else
1727  vui32_t tmp;
1728  const vui32_t expmask = CONST_VINT128_W(0x7ff00000, 0, 0x7ff00000, 0);
1729 
1730  tmp = vec_and ((vui32_t) vrb, expmask);
1731  result = (vui64_t) vec_srqi ((vui128_t) tmp, 52);
1732 #endif
1733  return result;
1734 }
1735 
1761 static inline vui64_t
1763 {
1764  vui64_t result;
1765 #if defined (_ARCH_PWR9) && defined (__VSX__) && (__GNUC__ > 7)
1766 #if defined (vec_extract_sig)
1767  result = vec_extract_sig (vrb);
1768 #else
1769  __asm__(
1770  "xvxsigdp %x0,%x1"
1771  : "=wa" (result)
1772  : "wa" (vrb)
1773  : );
1774 #endif
1775 #else
1776  vui32_t t128, tmp;
1777  vui32_t normal;
1778  const vui32_t zero = CONST_VINT128_W(0, 0, 0, 0);
1779  const vui32_t sigmask = CONST_VINT128_W(0x000fffff, -1, 0x000fffff, -1);
1780  const vui32_t expmask = CONST_VINT128_W(0x7ff00000, 0, 0x7ff00000, 0);
1781  const vui32_t hidden = CONST_VINT128_W(0x00100000, 0, 0x00100000, 0);
1782 
1783  // Check if vrb is normal. Normal values need the hidden bit
1784  // restored to the significand. We use a simpler sequence here as
1785  // vec_isnormalf64 does more then we need.
1786  tmp = vec_and ((vui32_t) vrb, expmask);
1787  normal = (vui32_t) vec_nor (vec_cmpeq (tmp, expmask),
1788  vec_cmpeq (tmp, zero));
1789  t128 = vec_and ((vui32_t) vrb, sigmask);
1790  result = (vui64_t) vec_sel (t128, normal, hidden);
1791 #endif
1792  return result;
1793 }
1794 
1795 #endif /* VEC_F64_PPC_H_ */
vec_cmpud_all_ne
static int vec_cmpud_all_ne(vui64_t a, vui64_t b)
Vector Compare all Not Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2334
vec_vsstfddx
static void vec_vsstfddx(vf64_t xs, double *array, vi64_t vra)
Vector Scatter-Store Float Double to Doubleword Indexes.
Definition: vec_f64_ppc.h:1450
vec_isnanf64
static vb64_t vec_isnanf64(vf64_t vf64)
Return 2x64-bit vector boolean true values, for each double NaN value.
Definition: vec_f64_ppc.h:944
vec_any_isnanf64
static int vec_any_isnanf64(vf64_t vf64)
Return true if any of 2x64-bit vector double values are NaN.
Definition: vec_f64_ppc.h:613
scalar_extract_uint64_from_high_uint128
static unsigned long long scalar_extract_uint64_from_high_uint128(unsigned __int128 gprp)
Extract the high doubleword from a __int128 scalar.
Definition: vec_common_ppc.h:503
vec_xxspltd
static vui64_t vec_xxspltd(vui64_t vra, const int ctl)
Vector splat doubleword. Duplicate the selected doubleword element across the doubleword elements of ...
Definition: vec_int64_ppc.h:4647
vec_xvxexpdp
static vui64_t vec_xvxexpdp(vf64_t vrb)
Vector Extract Exponent Double-Precision.
Definition: vec_f64_ppc.h:1713
vec_any_issubnormalf64
static int vec_any_issubnormalf64(vf64_t vf64)
Return true if any of 2x64-bit vector double values is subnormal (denormal).
Definition: vec_f64_ppc.h:706
vec_any_isnormalf64
static int vec_any_isnormalf64(vf64_t vf64)
Return true if any of 2x64-bit vector double values are normal (Not NaN, Inf, denormal,...
Definition: vec_f64_ppc.h:660
vec_cmpud_all_lt
static int vec_cmpud_all_lt(vui64_t a, vui64_t b)
Vector Compare all Less than Unsigned Doubleword.
Definition: vec_int64_ppc.h:2311
CONST_VINT128_W
#define CONST_VINT128_W(__w0, __w1, __w2, __w3)
Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast.
Definition: vec_common_ppc.h:304
vec_vlxsfdx
static vf64_t vec_vlxsfdx(const signed long long ra, const double *rb)
Vector Load Scalar Float Double Indexed.
Definition: vec_f64_ppc.h:1494
vec_xviexpdp
static vf64_t vec_xviexpdp(vui64_t sig, vui64_t exp)
Vector Insert Exponent Double-Precision.
Definition: vec_f64_ppc.h:1665
vec_any_iszerof64
static int vec_any_iszerof64(vf64_t vf64)
Return true if any of 2x64-bit vector double values are +-0.0.
Definition: vec_f64_ppc.h:758
vec_transfer_vui128t_to_uint128
static unsigned __int128 vec_transfer_vui128t_to_uint128(vui128_t vra)
Transfer a vector unsigned __int128 to __int128 scalar.
Definition: vec_common_ppc.h:420
vec_all_isnanf64
static int vec_all_isnanf64(vf64_t vf64)
Return true if all 2x64-bit vector double values are NaN.
Definition: vec_f64_ppc.h:341
vec_cmpud_all_eq
static int vec_cmpud_all_eq(vui64_t a, vui64_t b)
Vector Compare all Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2193
vec_iszerof64
static vb64_t vec_iszerof64(vf64_t vf64)
Return 2x64-bit vector boolean true values, for each double value that is +-0.0.
Definition: vec_f64_ppc.h:1084
vec_vsstfdso
static void vec_vsstfdso(vf64_t xs, double *array, const long long offset0, const long long offset1)
Vector Scatter-Store Float Double to Scalar Offsets.
Definition: vec_f64_ppc.h:1349
vec_vglfddsx
static vf64_t vec_vglfddsx(double *array, vi64_t vra, const unsigned char scale)
Vector Gather-Load Float Double from Doubleword Scaled Indexes.
Definition: vec_f64_ppc.h:1293
vec_vglfddo
static vf64_t vec_vglfddo(double *array, vi64_t vra)
Vector Gather-Load Float Double from Doubleword Offsets.
Definition: vec_f64_ppc.h:1253
vui64_t
__vector unsigned long long vui64_t
vector of 64-bit unsigned long long elements.
Definition: vec_common_ppc.h:208
vec_common_ppc.h
Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers.
vec_sldi
static vui64_t vec_sldi(vui64_t vra, const unsigned int shb)
Vector Shift left Doubleword Immediate.
Definition: vec_int64_ppc.h:3253
vec_cmpud_all_gt
static int vec_cmpud_all_gt(vui64_t a, vui64_t b)
Vector Compare all Greater Than Unsigned Doubleword.
Definition: vec_int64_ppc.h:2255
vec_vsstfddsx
static void vec_vsstfddsx(vf64_t xs, double *array, vi64_t vra, const unsigned char scale)
Vector Scatter-Store Float Double to Doubleword Scaled Index.
Definition: vec_f64_ppc.h:1421
vec_absf64
static vf64_t vec_absf64(vf64_t vf64x)
Vector double absolute value.
Definition: vec_f64_ppc.h:221
vec_pack_longdouble
static long double vec_pack_longdouble(vf64_t lval)
Copy the pair of doubles from a vector to IBM long double.
Definition: vec_f64_ppc.h:1115
vec_int128_ppc.h
Header package containing a collection of 128-bit computation functions implemented with PowerISA VMX...
vec_srqi
static vui128_t vec_srqi(vui128_t vra, const unsigned int shb)
Vector Shift Right Quadword Immediate.
Definition: vec_int128_ppc.h:7154
__VEC_U_128::ulong
struct __VEC_U_128::@0 ulong
Struct of two unsigned long int (64-bit GPR) fields.
vec_isinff64
static vb64_t vec_isinff64(vf64_t vf64)
Return 2x64-bit vector boolean true values for each double, if infinity.
Definition: vec_f64_ppc.h:901
vec_cmpud_any_eq
static int vec_cmpud_any_eq(vui64_t a, vui64_t b)
Vector Compare any Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2365
vec_any_isfinitef64
static int vec_any_isfinitef64(vf64_t vf64)
Return true if any of 2x64-bit vector double values are Finite (Not NaN nor Inf).
Definition: vec_f64_ppc.h:526
__VEC_U_128::vx1
vui128_t vx1
128 bit Vector of 1 unsigned __int128 element.
Definition: vec_common_ppc.h:277
vui128_t
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
vb64_t
__vector __bool long long vb64_t
vector of 64-bit bool long long elements.
Definition: vec_common_ppc.h:230
vec_all_isnormalf64
static int vec_all_isnormalf64(vf64_t vf64)
Return true if all 2x64-bit vector double values are normal (Not NaN, Inf, denormal,...
Definition: vec_f64_ppc.h:388
vec_vstxsfdx
static void vec_vstxsfdx(vf64_t xs, const signed long long ra, double *rb)
Vector Store Scalar Float Double Indexed.
Definition: vec_f64_ppc.h:1588
vec_permdi
static vui64_t vec_permdi(vui64_t vra, vui64_t vrb, const int ctl)
Vector Permute Doubleword Immediate. Combine a doubleword selected from the 1st (vra) vector with a d...
Definition: vec_int64_ppc.h:2983
vec_all_iszerof64
static int vec_all_iszerof64(vf64_t vf64)
Return true if all 2x64-bit vector double values are +-0.0.
Definition: vec_f64_ppc.h:480
__VEC_U_128
Union used to transfer 128-bit data between vector and non-vector types.
Definition: vec_common_ppc.h:256
vi64_t
__vector long long vi64_t
vector of 64-bit signed long long elements.
Definition: vec_common_ppc.h:217
CONST_VINT128_DW
#define CONST_VINT128_DW(__dw0, __dw1)
Initializer for 128-bits vector, as two unsigned long long elements in high->low order....
Definition: vec_common_ppc.h:298
vec_copysignf64
static vf64_t vec_copysignf64(vf64_t vf64x, vf64_t vf64y)
Copy the sign bit from vf64x merged with magnitude from vf64y and return the resulting vector double ...
Definition: vec_f64_ppc.h:809
vec_xvxsigdp
static vui64_t vec_xvxsigdp(vf64_t vrb)
Vector Extract Significand Double-Precision.
Definition: vec_f64_ppc.h:1762
vec_all_isfinitef64
static int vec_all_isfinitef64(vf64_t vf64)
Return true if all 2x64-bit vector double values are Finite (Not NaN nor Inf).
Definition: vec_f64_ppc.h:253
vec_any_isinff64
static int vec_any_isinff64(vf64_t vf64)
Return true if any of 2x64-bit vector double values are infinity.
Definition: vec_f64_ppc.h:567
VEC_DW_L
#define VEC_DW_L
Element index for low order dword.
Definition: vec_common_ppc.h:324
vec_all_isinff64
static int vec_all_isinff64(vf64_t vf64)
Return true if all 2x64-bit vector double values are infinity.
Definition: vec_f64_ppc.h:295
vec_cmpud_any_ne
static int vec_cmpud_any_ne(vui64_t a, vui64_t b)
Vector Compare any Not Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2508
vui32_t
__vector unsigned int vui32_t
vector of 32-bit unsigned int elements.
Definition: vec_common_ppc.h:206
vec_setb_sd
static vb64_t vec_setb_sd(vi64_t vra)
Vector Set Bool from Signed Doubleword.
Definition: vec_int64_ppc.h:3164
vec_cmpltud
static vb64_t vec_cmpltud(vui64_t a, vui64_t b)
Vector Compare less Than Unsigned Doubleword.
Definition: vec_int64_ppc.h:1771
vec_all_issubnormalf64
static int vec_all_issubnormalf64(vf64_t vf64)
Return true if all 2x64-bit vector double values are subnormal (denormal).
Definition: vec_f64_ppc.h:432
scalar_extract_uint64_from_low_uint128
static unsigned long long scalar_extract_uint64_from_low_uint128(unsigned __int128 gprp)
Extract the low doubleword from a __int128 scalar.
Definition: vec_common_ppc.h:490
vec_cmpgtud
static vb64_t vec_cmpgtud(vui64_t a, vui64_t b)
Vector Compare Greater Than Unsigned Doubleword.
Definition: vec_int64_ppc.h:1622
vec_vglfdso
static vf64_t vec_vglfdso(double *array, const long long offset0, const long long offset1)
Vector Gather-Load Float Double from scalar Offsets.
Definition: vec_f64_ppc.h:1216
vf64_t
__vector double vf64_t
vector of 64-bit double elements.
Definition: vec_common_ppc.h:221
vec_unpack_longdouble
static vf64_t vec_unpack_longdouble(long double lval)
Copy the pair of doubles from a IBM long double to a vector double.
Definition: vec_f64_ppc.h:1175
vec_isfinitef64
static vb64_t vec_isfinitef64(vf64_t vf64)
Return 2x64-bit vector boolean true values for each double element that is Finite (Not NaN nor Inf).
Definition: vec_f64_ppc.h:857
__VEC_U_128::vf2
vf64_t vf2
128 bit Vector of 2 double float elements.
Definition: vec_common_ppc.h:279
vec_vglfddx
static vf64_t vec_vglfddx(double *array, vi64_t vra)
Vector Gather-Load Float Double from Doubleword indexes.
Definition: vec_f64_ppc.h:1322
vec_slqi
static vui128_t vec_slqi(vui128_t vra, const unsigned int shb)
Vector Shift Left Quadword Immediate.
Definition: vec_int128_ppc.h:6748
vec_setb_dp
static vb64_t vec_setb_dp(vf64_t vra)
Vector Set Bool from Sign, Double Precision.
Definition: vec_f64_ppc.h:1163
__VEC_U_128::ldbl128
long double ldbl128
IBM long double float from pair of double float registers.
Definition: vec_common_ppc.h:267
vec_cmpequd
static vb64_t vec_cmpequd(vui64_t a, vui64_t b)
Vector Compare Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:1451
vec_issubnormalf64
static vb64_t vec_issubnormalf64(vf64_t vf64)
Return 2x64-bit vector boolean true values, for each double value that is subnormal (denormal).
Definition: vec_f64_ppc.h:1036
VEC_DW_H
#define VEC_DW_H
Element index for high order dword.
Definition: vec_common_ppc.h:322
vec_isnormalf64
static vb64_t vec_isnormalf64(vf64_t vf64)
Return 2x64-bit vector boolean true values, for each double value, if normal (Not NaN,...
Definition: vec_f64_ppc.h:990
vec_cmpud_any_gt
static int vec_cmpud_any_gt(vui64_t a, vui64_t b)
Vector Compare any Greater Than Unsigned Doubleword.
Definition: vec_int64_ppc.h:2429
vec_vsstfddo
static void vec_vsstfddo(vf64_t xs, double *array, vi64_t vra)
Vector Scatter-Store Float Double to Doubleword Offsets.
Definition: vec_f64_ppc.h:1384