POWER Vector Library Manual  1.0.4
vec_f128_ppc.h
Go to the documentation of this file.
1 /*
2  Copyright (c) [2017-2018] IBM Corporation.
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  vec_f128_ppc.h
17 
18  Contributors:
19  IBM Corporation, Steven Munroe
20  Created on: Apr 11, 2016
21  */
22 
3638 #ifndef VEC_F128_PPC_H_
3639 #define VEC_F128_PPC_H_
3640 
3641 #include <pveclib/vec_common_ppc.h>
3642 #include <pveclib/vec_int128_ppc.h>
3643 #include <pveclib/vec_f64_ppc.h>
3644 
3645 
3646 /* __float128 was added in GCC 6.0. But only with -mfloat128.
3647  Later compilers typedef __float128 to __ieee128 and
3648  long double to __ibm128. The intent was to allow the switch of
3649  long double from __ibm128 to __ieee128 (someday).
3650 
3651  Clang does not define __FLOAT128__ or __float128 without both
3652  -mcu=power9 and -mfloat128.
3653  So far clang does not support/define the __ibm128 type. */
3654 #ifdef __FLOAT128__
3655 typedef __float128 __Float128;
3656 #ifndef __clang__
3657 typedef __float128 __binary128;
3658 typedef __float128 __ieee128;
3659 typedef __ibm128 __IBM128;
3660 #else
3661 /* Clang started defining __FLOAT128__ and does not allow redefining
3662  __float128 or __ieee128. Worse it will give errors if you try to
3663  use either type. So define __binary128 as if __FLOAT128__ is not
3664  defined. */
3665 typedef vui128_t __binary128;
3666 /* Clang does not define __ibm128 over IBM long double.
3667  So defined it here. */
3668 typedef long double __IBM128;
3669 #endif
3670 #else
3671 /* Before GCC 6.0 (or without -mfloat128) we need to fake it. */
3681 #ifndef __clang__
3682 // Clang will not allow redefining __float128 even is it not enabled
3686 #endif
3687 
3689 typedef long double __IBM128;
3690 #endif
3691 
3694 typedef union
3695  {
3711  unsigned __int128 ix1;
3712  } __VF_128;
3713 
3715 static inline __binary128 vec_xfer_vui32t_2_bin128 (vui32_t f128);
3716 static inline int vec_all_isnanf128 (__binary128 f128);
3717 static inline vb128_t vec_isnanf128 (__binary128 f128);
3718 static inline vb128_t vec_isunorderedf128 (__binary128 vfa, __binary128 vfb);
3719 static inline vb128_t vec_setb_qp (__binary128 f128);
3720 static inline __binary128 vec_xsiexpqp (vui128_t sig, vui64_t exp);
3721 static inline vui64_t vec_xsxexpqp (__binary128 f128);
3722 static inline vui128_t vec_xsxsigqp (__binary128 f128);
3723 static inline vui64_t vec_xxxexpqpp (__binary128 vfa, __binary128 vfb);
3725 
3738 static inline vui64_t
3740 {
3741  // const vui32_t dw_128 = CONST_VINT128_W(0, 0, 0, 128);
3742  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3743 #if defined (_ARCH_PWR8)
3744  // Generate {64, 64} from count leading zeros of {0, 0}
3745  vui64_t dw64 = vec_clzd((vui64_t) q_zero);
3746  // Generate {128, 128}
3747  return vec_addudm (dw64, dw64);
3748 #else
3749  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
3750  vui32_t signmask;
3751  signmask = vec_sl (q_ones, q_ones);
3752  signmask = vec_sld (q_zero, signmask, 1);
3753  return vec_mrgald ((vui128_t) signmask, (vui128_t) signmask);
3754 #endif
3755 }
3756 
3769 static inline vui32_t
3771 {
3772  // const vui32_t signmask = CONST_VINT128_W(0, 0, 0, 128);
3773  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3774  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
3775  vui32_t signmask;
3776  signmask = vec_sl (q_ones, q_ones);
3777  return vec_sld (q_zero, signmask, 1);
3778 }
3779 
3805 static inline vui64_t
3807 {
3808  //const vui32_t expmask = CONST_VINT128_W (0, 0x7fff, 0, 0x7fff);
3809  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3810  vui32_t expmask;
3811  expmask = (vui32_t) vec_splat_u8 (-8);
3812  expmask = vec_sld (q_zero, expmask, 4);
3813  return (vui64_t) vec_packpx (expmask, expmask);
3814 }
3815 
3828 static inline vui32_t
3830 {
3831  // const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
3832  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3833  vui32_t expmask;
3834 
3835  expmask = (vui32_t) vec_splat_u8 (-8);
3836  expmask = vec_sld (expmask, q_zero, 12);
3837 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
3838  return (vui32_t) vec_packpx (q_zero, expmask);
3839 #else
3840  return (vui32_t) vec_packpx (expmask, q_zero);
3841 #endif
3842 }
3843 
3844 
3857 static inline vui32_t
3859 {
3860  // const vui32_t magmask = CONST_VINT128_W (0x7fffffff, -1, -1, -1);
3861  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
3862  return (vui32_t) vec_srqi ((vui128_t) q_ones, 1);
3863 }
3864 
3877 static inline vui32_t
3879 {
3880  // const vui32_t sigmask = CONST_VINT128_W (0x0000ffff, -1, -1, -1);
3881  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3882  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
3883  return vec_sld (q_zero, q_ones, 14);
3884 }
3885 
3898 static inline vui32_t
3900 {
3901  // const vui32_t signmask = CONST_VINT128_W(0x80000000, 0, 0, 0);
3902  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3903  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
3904  vui32_t signmask;
3905  signmask = vec_sl (q_ones, q_ones);
3906  return vec_sld (signmask, q_zero, 12);
3907 }
3908 
3921 static inline vui32_t
3923 {
3924  // const vui32_t carry = CONST_VINT128_W (0x00020000, 0, 0, 0);
3925  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3926  vui32_t carry = vec_splat_u32 (2);
3927  return vec_sld (carry, q_zero, 14);
3928 }
3929 
3942 static inline vui32_t
3944 {
3945  // const vui32_t hidden = CONST_VINT128_W (0x00010000, 0, 0, 0);
3946  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3947  vui32_t hidden = vec_splat_u32 (1);
3948  return vec_sld (hidden, q_zero, 14);
3949 }
3950 
3963 static inline vui32_t
3965 {
3966  // const vui32_t QNaNbit = CONST_VINT128_W (0x00008000, 0, 0, 0);
3967  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
3968  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
3969  vui32_t QNaNbit;
3970  QNaNbit = vec_sl (q_ones, q_ones);
3971  return vec_sld (QNaNbit, q_zero, 10);
3972 }
3973 
3991  static inline __binary128
3993  {
3994  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
3995  && !defined (_ARCH_PWR9)
3996  // Work around for GCC PR 100085
3997  __binary128 result;
3998  #ifdef __VSX__
3999  __asm__(
4000  "xxsel %x0,%x1,%x2,%x3"
4001  : "=wa" (result)
4002  : "wa" (vfa), "wa" (vfb), "wa" (mask)
4003  : );
4004  #else
4005  __asm__(
4006  "vsel %0,%1,%2,%3"
4007  : "=v" (result)
4008  : "v" (vfa), "v" (vfb), "v" (mask)
4009  : );
4010  #endif
4011  return result;
4012  #else
4013  __VF_128 ua, ub;
4014  vui32_t result;
4015 
4016  ua.vf1 = vfa;
4017  ub.vf1 = vfb;
4018 
4019  result = vec_sel (ua.vx4, ub.vx4, (vb32_t) mask);
4020  return vec_xfer_vui32t_2_bin128 (result);
4021  #endif
4022  }
4023 
4039  static inline vui32_t
4041  {
4042  vui32_t result;
4043  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4044  && !defined (_ARCH_PWR9)
4045  // Work around for GCC PR 100085
4046  #ifdef __VSX__
4047  __asm__(
4048  "xxland %x0,%x1,%x2"
4049  : "=wa" (result)
4050  : "wa" (f128), "wa" (mask)
4051  : );
4052  #else
4053  __asm__(
4054  "vand %0,%1,%2"
4055  : "=v" (result)
4056  : "v" (f128), "v" (mask)
4057  : );
4058  #endif
4059  #else
4060  __VF_128 vunion;
4061 
4062  vunion.vf1 = f128;
4063 
4064  result = (vec_and (vunion.vx4, mask));
4065  #endif
4066  return result;
4067  }
4068 
4084  static inline vui32_t
4086  {
4087  vui32_t result;
4088  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4089  && !defined (_ARCH_PWR9)
4090  // Work around for GCC PR 100085
4091  #ifdef __VSX__
4092  __asm__(
4093  "xxlandc %x0,%x1,%x2"
4094  : "=wa" (result)
4095  : "wa" (f128), "wa" (mask)
4096  : );
4097  #else
4098  __asm__(
4099  "vandc %0,%1,%2"
4100  : "=v" (result)
4101  : "v" (f128), "v" (mask)
4102  : );
4103  #endif
4104  #else
4105  __VF_128 vunion;
4106 
4107  vunion.vf1 = f128;
4108 
4109  result = (vec_andc (vunion.vx4, mask));
4110  #endif
4111  return result;
4112  }
4113 
4129  static inline vui32_t
4131  {
4132  vui32_t result;
4133  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4134  && !defined (_ARCH_PWR9)
4135  // Work around for GCC PR 100085
4136  #ifdef __VSX__
4137  __asm__(
4138  "xxlor %x0,%x1,%x2"
4139  : "=wa" (result)
4140  : "wa" (f128), "wa" (mask)
4141  : );
4142  #else
4143  __asm__(
4144  "vor %0,%1,%2"
4145  : "=v" (result)
4146  : "v" (f128), "v" (mask)
4147  : );
4148  #endif
4149  #else
4150  __VF_128 vunion;
4151 
4152  vunion.vf1 = f128;
4153 
4154  result = (vec_or (vunion.vx4, mask));
4155  #endif
4156  return result;
4157  }
4158 
4174  static inline vui32_t
4176  {
4177  vui32_t result;
4178  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4179  && !defined (_ARCH_PWR9)
4180  // Work around for GCC PR 100085
4181  #ifdef __VSX__
4182  __asm__(
4183  "xxlxor %x0,%x1,%x2"
4184  : "=wa" (result)
4185  : "wa" (f128), "wa" (mask)
4186  : );
4187  #else
4188  __asm__(
4189  "vxor %0,%1,%2"
4190  : "=v" (result)
4191  : "v" (f128), "v" (mask)
4192  : );
4193  #endif
4194  #else
4195  __VF_128 vunion;
4196 
4197  vunion.vf1 = f128;
4198 
4199  result = (vec_xor (vunion.vx4, mask));
4200  #endif
4201  return result;
4202  }
4203 
4219  static inline vui128_t
4221  {
4222  vui128_t result;
4223  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4224  && !defined (_ARCH_PWR9)
4225  // Work around for GCC PR 100085
4226  #ifdef __VSX__
4227  __asm__(
4228  "xxlandc %x0,%x1,%x2"
4229  : "=wa" (result)
4230  : "wa" (f128), "wa" (mask)
4231  : );
4232  #else
4233  __asm__(
4234  "vandc %0,%1,%2"
4235  : "=v" (result)
4236  : "v" (f128), "v" (mask)
4237  : );
4238  #endif
4239  #else
4240  __VF_128 vunion;
4241 
4242  vunion.vf1 = f128;
4243  // vec_andc does not accept vector __int128 type
4244  result = (vui128_t) vec_andc (vunion.vx4, (vui32_t) mask);
4245  #endif
4246  return result;
4247  }
4248 
4262 static inline vui8_t
4264 {
4265  vui8_t result;
4266 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4267  && !defined (_ARCH_PWR9)
4268  // Work around for GCC PR 100085
4269 #ifdef __VSX__
4270  __asm__(
4271  "xxlor %x0,%x1,%x1"
4272  : "=wa" (result)
4273  : "wa" (f128)
4274  : );
4275 #else
4276  __asm__(
4277  "vor %0,%1,%1"
4278  : "=v" (result)
4279  : "v" (f128)
4280  : );
4281 #endif
4282 #else
4283  __VF_128 vunion;
4284 
4285  vunion.vf1 = f128;
4286 
4287  result = (vunion.vx16);
4288 #endif
4289  return result;
4290 }
4291 
4305 static inline vui16_t
4307 {
4308  __VF_128 vunion;
4309 
4310  vunion.vf1 = f128;
4311 
4312  return (vunion.vx8);
4313 }
4314 
4328 static inline vui32_t
4330 {
4331  vui32_t result;
4332 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4333  && !defined (_ARCH_PWR9)
4334  // Work around for GCC PR 100085
4335 #ifdef __VSX__
4336  __asm__(
4337  "xxlor %x0,%x1,%x1"
4338  : "=wa" (result)
4339  : "wa" (f128)
4340  : );
4341 #else
4342  __asm__(
4343  "vor %0,%1,%1"
4344  : "=v" (result)
4345  : "v" (f128)
4346  : );
4347 #endif
4348 #else
4349  __VF_128 vunion;
4350 
4351  vunion.vf1 = f128;
4352 
4353  result = (vunion.vx4);
4354 #endif
4355  return result;
4356 }
4357 
4373 static inline vui64_t
4375 {
4376  vui64_t result;
4377 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4378  && !defined (_ARCH_PWR9) && defined (__VSX__)
4379  // Work around for GCC PR 100085
4380  __asm__(
4381  "xxmrghd %x0,%x1,%x2"
4382  : "=wa" (result)
4383  : "wa" (vfa), "wa" (vfb)
4384  : );
4385 #else
4386  __VF_128 vunion_a, vunion_b;
4387 
4388  vunion_a.vf1 = vfa;
4389  vunion_b.vf1 = vfb;
4390 
4391  result = vec_mrgahd (vunion_a.vx1, vunion_b.vx1);
4392 #endif
4393  return result;
4394 }
4395 
4411 static inline vui64_t
4413 {
4414  vui64_t result;
4415 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4416  && !defined (_ARCH_PWR9) && defined (__VSX__)
4417  // Work around for GCC PR 100085
4418  __asm__(
4419  "xxmrgld %x0,%x1,%x2"
4420  : "=wa" (result)
4421  : "wa" (vfa), "wa" (vfb)
4422  : );
4423 #else
4424  __VF_128 vunion_a, vunion_b;
4425 
4426  vunion_a.vf1 = vfa;
4427  vunion_b.vf1 = vfb;
4428 
4429  result = vec_mrgald (vunion_a.vx1, vunion_b.vx1);
4430 #endif
4431  return result;
4432 }
4433 
4447 static inline vui64_t
4449 {
4450  vui64_t result;
4451 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4452  && !defined (_ARCH_PWR9)
4453  // Work around for GCC PR 100085
4454 #ifdef __VSX__
4455  __asm__(
4456  "xxlor %x0,%x1,%x1"
4457  : "=wa" (result)
4458  : "wa" (f128)
4459  : );
4460 #else
4461  __asm__(
4462  "vor %0,%1,%1"
4463  : "=v" (result)
4464  : "v" (f128)
4465  : );
4466 #endif
4467 #else
4468  __VF_128 vunion;
4469 
4470  vunion.vf1 = f128;
4471 
4472  result = (vunion.vx2);
4473 #endif
4474  return result;
4475 }
4476 
4490 static inline vui128_t
4492 {
4493  vui128_t result;
4494 #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && (__GNUC__ > 7) \
4495  && !defined (_ARCH_PWR9)
4496  // Work around for GCC PR 100085
4497 #ifdef __VSX__
4498  __asm__(
4499  "xxlor %x0,%x1,%x1"
4500  : "=wa" (result)
4501  : "wa" (f128)
4502  : );
4503 #else
4504  __asm__(
4505  "vor %0,%1,%1"
4506  : "=v" (result)
4507  : "v" (f128)
4508  : );
4509 #endif
4510 #else
4511  __VF_128 vunion;
4512 
4513  vunion.vf1 = f128;
4514 
4515  result = (vunion.vx1);
4516 #endif
4517  return result;
4518 }
4519 
4533 static inline __binary128
4535 {
4536  __VF_128 vunion;
4537 
4538  vunion.vx16 = f128;
4539 
4540  return (vunion.vf1);
4541 }
4542 
4556 static inline __binary128
4558 {
4559  __VF_128 vunion;
4560 
4561  vunion.vx8 = f128;
4562 
4563  return (vunion.vf1);
4564 }
4565 
4579 static inline __binary128
4581 {
4582  __VF_128 vunion;
4583 
4584  vunion.vx4 = f128;
4585 
4586  return (vunion.vf1);
4587 }
4588 
4602 static inline __binary128
4604 {
4605  __VF_128 vunion;
4606 
4607  vunion.vx2 = f128;
4608 
4609  return (vunion.vf1);
4610 }
4611 
4625 static inline __binary128
4627 {
4628  __VF_128 vunion;
4629 
4630  vunion.vx1 = f128;
4631 
4632  return (vunion.vf1);
4633 }
4634 
4649 static inline __binary128
4651 {
4652  __binary128 result;
4653 #if _ARCH_PWR9
4654  __asm__(
4655  "xsabsqp %0,%1;\n"
4656  : "=v" (result)
4657  : "v" (f128)
4658  :);
4659 #else
4660  vui32_t tmp;
4661  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
4662 
4663  tmp = vec_andc_bin128_2_vui32t (f128, signmask);
4664  result = vec_xfer_vui32t_2_bin128 (tmp);
4665 #endif
4666  return (result);
4667 }
4668 
4690 static inline int
4692 {
4693 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
4694  return !scalar_test_data_class (f128, 0x70);
4695 #else
4696  vui32_t tmp;
4697  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
4698 
4699  tmp = vec_and_bin128_2_vui32t (f128, expmask);
4700  return !vec_all_eq(tmp, expmask);
4701 #endif
4702 }
4703 
4722 static inline int
4724 {
4725 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
4726  return scalar_test_data_class (f128, 0x30);
4727 #else
4728  vui32_t tmp;
4729 #if 0
4730  const vui32_t magmask = CONST_VINT128_W (0x7fffffff, -1, -1, -1);
4731  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
4732  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
4733 #else
4734  vui32_t magmask = vec_mask128_f128mag ();
4735  vui32_t expmask = vec_mask128_f128exp ();
4736 #endif
4737 
4738  tmp = vec_and_bin128_2_vui32t (f128, magmask);
4739  return vec_all_eq(tmp, expmask);
4740 #endif
4741 }
4742 
4762 static inline int
4764 {
4765 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
4766  return scalar_test_data_class (f128, 0x40);
4767 #elif defined (_ARCH_PWR8)
4768  vui32_t tmp;
4769  vui32_t magmask = vec_mask128_f128mag ();
4770  vui32_t expmask = vec_mask128_f128exp ();
4771 
4772  tmp = vec_and_bin128_2_vui32t (f128, magmask);
4773  return vec_cmpuq_all_gt ((vui128_t) tmp, (vui128_t) expmask);
4774 #else
4775  vui32_t tmp, tmp2;
4776  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
4777  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
4778 
4779  tmp = vec_andc_bin128_2_vui32t (f128, signmask);
4780  tmp2 = vec_and_bin128_2_vui32t (f128, expmask);
4781  return (vec_all_eq (tmp2, expmask) && vec_any_gt(tmp, expmask));
4782 #endif
4783 }
4784 
4806 static inline int
4808 {
4809 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
4810  return !scalar_test_data_class (f128, 0x7f);
4811 #else
4812  vui32_t tmp;
4813  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
4814  const vui32_t vec_zero = CONST_VINT128_W (0, 0, 0, 0);
4815 
4816  tmp = vec_and_bin128_2_vui32t (f128, expmask);
4817  return !(vec_all_eq (tmp, expmask) || vec_all_eq(tmp, vec_zero));
4818 #endif
4819 }
4820 
4840 static inline int
4842 {
4843 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
4844  return scalar_test_data_class (f128, 0x03);
4845 #else
4846  const vui64_t minnorm = CONST_VINT128_DW(0x0001000000000000UL, 0UL);
4847  const vui64_t vec_zero = CONST_VINT128_DW(0, 0);
4848  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
4849  vui128_t tmp1;
4850 
4851  // Equivalent to vec_absf128 (f128)
4852  tmp1 = (vui128_t) vec_andc_bin128_2_vui32t (f128, signmask);
4853 
4854  return vec_cmpuq_all_gt ((vui128_t) minnorm, tmp1)
4855  && !vec_cmpuq_all_eq (tmp1, (vui128_t) vec_zero);
4856 #endif
4857 }
4858 
4880 static inline int
4882 {
4883  return (vec_all_isnanf128 (vfa) || vec_all_isnanf128 (vfb));
4884 }
4885 
4905 static inline int
4907 {
4908 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
4909  return scalar_test_data_class (f128, 0x0c);
4910 #else
4911  vui64_t tmp2;
4912  const vui64_t vec_zero = CONST_VINT128_DW(0, 0);
4913  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
4914 
4915  // Equivalent to vec_absf128 (f128)
4916  tmp2 = (vui64_t) vec_andc_bin128_2_vui32t (f128, signmask);
4917 #if _ARCH_PWR8
4918  return vec_all_eq(tmp2, vec_zero);
4919 #else
4920  return vec_all_eq((vui32_t)tmp2, (vui32_t)vec_zero);
4921 #endif
4922 #endif
4923 }
4924 
4949 static inline __binary128
4951 {
4952  __binary128 result;
4953 #if _ARCH_PWR9
4954  __asm__(
4955  "xscpsgnqp %0,%1,%2;\n"
4956  : "=v" (result)
4957  : "v" (f128x), "v" (f128y)
4958  :);
4959 #else
4960  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
4961  vui32_t tmpx, tmpy, tmp;
4962  tmpx = vec_xfer_bin128_2_vui32t (f128x);
4963  tmpy = vec_xfer_bin128_2_vui32t (f128y);
4964 
4965  tmp = vec_sel (tmpy, tmpx, signmask);
4966  result = vec_xfer_vui32t_2_bin128 (tmp);
4967 #endif
4968  return (result);
4969 }
4970 
4975 static inline __binary128
4977 {
4978  const vui32_t posinf = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
4979 
4980  return vec_xfer_vui32t_2_bin128 (posinf);
4981 }
4982 
4987 static inline __binary128
4989 {
4990  const vui32_t posinf = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
4991 
4992  return vec_xfer_vui32t_2_bin128 (posinf);
4993 }
4994 
4999 static inline __binary128
5001 {
5002  const vui32_t posnan = CONST_VINT128_W (0x7fff8000, 0, 0, 0);
5003 
5004  return vec_xfer_vui32t_2_bin128 (posnan);
5005 }
5006 
5011 static inline __binary128
5013 {
5014  const vui32_t signan = CONST_VINT128_W (0x7fff4000, 0, 0, 0);
5015 
5016  return vec_xfer_vui32t_2_bin128 (signan);
5017 }
5018 
5050 static inline vb128_t
5052 {
5053  vb128_t result;
5054 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5055  __asm__(
5056  "xscmpeqqp %0,%1,%2;\n"
5057  : "=v" (result)
5058  : "v" (vfa), "v" (vfb)
5059  : );
5060 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5061  result= (vb128_t) vec_splat_u32 (0);
5062  if (vfa == vfb)
5063  result= (vb128_t) vec_splat_s32 (-1);
5064 #else // defined( _ARCH_PWR8 )
5065  vui128_t vra, vrb;
5066  vra = vec_xfer_bin128_2_vui128t (vfa);
5067  vrb = vec_xfer_bin128_2_vui128t (vfb);
5068  result = vec_cmpequq ( vra, vrb );
5069 #endif
5070  return result;
5071 }
5072 
5105 static inline vb128_t
5107 {
5108  vb128_t result;
5109 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5110  __asm__(
5111  "xscmpeqqp %0,%1,%2;\n"
5112  : "=v" (result)
5113  : "v" (vfa), "v" (vfb)
5114  : );
5115 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5116  result = (vb128_t) vec_splat_u32 (0);
5117  if (vfa == vfb)
5118  result = (vb128_t) vec_splat_s32 (-1);
5119 #else // defined( _ARCH_PWR8 )
5120  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5121  vb128_t cmps, or_ab, eq_s;
5122  vui64_t vra, vrb;
5123 
5124  vra = vec_xfer_bin128_2_vui64t (vfa);
5125  vrb = vec_xfer_bin128_2_vui64t (vfb);
5126 
5127  or_ab = (vb128_t) vec_or ( vra, vrb );
5128  eq_s = vec_cmpequq ((vui128_t) or_ab, (vui128_t) signmask);
5129  cmps = vec_cmpequq ((vui128_t) vra, (vui128_t)vrb);
5130  result = (vb128_t) vec_or ((vui32_t) cmps, (vui32_t) eq_s);
5131 #endif
5132  return result;
5133 }
5134 
5168 static inline vb128_t
5170 {
5171  vb128_t result;
5172 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5173  __asm__(
5174  "xscmpeqqp %0,%1,%2;\n"
5175  : "=v" (result)
5176  : "v" (vfa), "v" (vfb)
5177  : );
5178 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5179  result = (vb128_t) vec_splat_u32 (0);
5180  if (vfa == vfb)
5181  result = (vb128_t) vec_splat_s32 (-1);
5182 #else // defined( _ARCH_PWR8 )
5183  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5184  vb128_t cmps, or_ab, eq_s;
5185  vui64_t vra, vrb;
5186  vb128_t unordered;
5187 
5188  unordered = vec_isunorderedf128 (vfa, vfb);
5189  vra = vec_xfer_bin128_2_vui64t (vfa);
5190  vrb = vec_xfer_bin128_2_vui64t (vfb);
5191 
5192  or_ab = (vb128_t) vec_or ( vra, vrb );
5193  eq_s = vec_cmpequq ((vui128_t) or_ab, (vui128_t) signmask);
5194  cmps = vec_cmpequq ((vui128_t) vra, (vui128_t) vrb);
5195  result = (vb128_t) vec_or ((vui32_t) cmps, (vui32_t) eq_s);
5196  result = (vb128_t) vec_andc ((vui32_t) result, (vui32_t) unordered);
5197 #endif
5198  return result;
5199 }
5200 
5234 static inline vb128_t
5236 {
5237  vb128_t result;
5238 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5239  __asm__(
5240  "xscmpgeqp %0,%1,%2;\n"
5241  : "=v" (result)
5242  : "v" (vfa), "v" (vfb)
5243  : );
5244 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5245  result= (vb128_t) vec_splat_u32 (0);
5246  if (vfa >= vfb)
5247  result= (vb128_t) vec_splat_s32 (-1);
5248 #else // defined( _ARCH_PWR8 )
5249  vui128_t vfa128, vfb128;
5250  vb128_t altb, agtb;
5251  vb128_t signbool;
5252  const vui8_t shift = vec_splat_u8 (7);
5253  vui8_t splatvfa;
5254 
5255  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
5256  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
5257 
5258  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
5259  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
5260  signbool = (vb128_t) vec_sra (splatvfa, shift);
5261 
5262  agtb = vec_cmpgesq ((vi128_t) vfa128, (vi128_t) vfb128);
5263  altb = vec_cmpleuq ((vui128_t) vfa128, (vui128_t) vfb128);
5264  result = (vb128_t) vec_sel ((vui32_t)agtb, (vui32_t)altb, (vui32_t)signbool);
5265 #endif
5266  return result;
5267 }
5268 
5303 static inline vb128_t
5305 {
5306  vb128_t result;
5307 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5308  __asm__(
5309  "xscmpgeqp %0,%1,%2;\n"
5310  : "=v" (result)
5311  : "v" (vfa), "v" (vfb)
5312  : );
5313 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5314  result = (vb128_t) vec_splat_u32 (0);
5315  if (vfa >= vfb)
5316  result = (vb128_t) vec_splat_s32 (-1);
5317 #else // defined( _ARCH_PWR8 )
5318  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
5319  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5320  vui128_t vra, vrb;
5321  vb128_t age0, bge0;
5322  vui128_t vrap, vran;
5323  vui128_t vrbp, vrbn;
5324 
5325  vra = vec_xfer_bin128_2_vui128t (vfa);
5326  vrb = vec_xfer_bin128_2_vui128t (vfb);
5327 
5328  age0 = vec_setb_qp (vfa);
5329  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
5330  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
5331  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
5332 
5333  bge0 = vec_setb_qp (vfb);
5334  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
5335  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
5336  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
5337 
5338  result = vec_cmpgeuq ((vui128_t) vra, (vui128_t) vrb);
5339 #endif
5340  return result;
5341 }
5342 
5377 static inline vb128_t
5379 {
5380  vb128_t result;
5381 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5382  __asm__(
5383  "xscmpgeqp %0,%1,%2;\n"
5384  : "=v" (result)
5385  : "v" (vfa), "v" (vfb)
5386  : );
5387 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5388  result= (vb128_t) vec_splat_u32 (0);
5389  if (vfa >= vfb)
5390  result= (vb128_t) vec_splat_s32 (-1);
5391 #else // defined( _ARCH_PWR8 )
5392  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
5393  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5394  vui128_t vra, vrb;
5395  vb128_t age0, bge0;
5396  vui128_t vrap, vran;
5397  vui128_t vrbp, vrbn;
5398  vb128_t unordered;
5399 
5400  unordered = vec_isunorderedf128 (vfa, vfb);
5401 
5402  vra = vec_xfer_bin128_2_vui128t (vfa);
5403  vrb = vec_xfer_bin128_2_vui128t (vfb);
5404 
5405  age0 = vec_setb_qp (vfa);
5406  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
5407  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
5408  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
5409 
5410  bge0 = vec_setb_qp (vfb);
5411  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
5412  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
5413  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
5414 
5415  result = vec_cmpgeuq ((vui128_t) vra, (vui128_t) vrb);
5416  result = (vb128_t) vec_andc ((vui32_t) result, (vui32_t) unordered);
5417 #endif
5418  return result;
5419 }
5420 
5454 static inline vb128_t
5456 {
5457  vb128_t result;
5458 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5459  __asm__(
5460  "xscmpgtqp %0,%1,%2;\n"
5461  : "=v" (result)
5462  : "v" (vfa), "v" (vfb)
5463  : );
5464 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5465  result= (vb128_t) vec_splat_u32 (0);
5466  if (vfa > vfb)
5467  result= (vb128_t) vec_splat_s32 (-1);
5468 #else // defined( _ARCH_PWR8 )
5469  vui128_t vfa128, vfb128;
5470  vb128_t altb, agtb;
5471  vb128_t signbool;
5472  const vui8_t shift = vec_splat_u8 (7);
5473  vui8_t splatvfa;
5474 
5475  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
5476  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
5477 
5478  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
5479  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
5480  signbool = (vb128_t) vec_sra (splatvfa, shift);
5481 
5482  agtb = vec_cmpgtsq ((vi128_t) vfa128, (vi128_t) vfb128);
5483  altb = vec_cmpltuq ((vui128_t) vfa128, (vui128_t) vfb128);
5484  result = (vb128_t) vec_sel ((vui32_t)agtb, (vui32_t)altb, (vui32_t)signbool);
5485 #endif
5486  return result;
5487 }
5488 
5523 static inline vb128_t
5525 {
5526  vb128_t result;
5527 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5528  __asm__(
5529  "xscmpgtqp %0,%1,%2;\n"
5530  : "=v" (result)
5531  : "v" (vfa), "v" (vfb)
5532  : );
5533 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5534  result = (vb128_t) vec_splat_u32 (0);
5535  if (vfa > vfb)
5536  result = (vb128_t) vec_splat_s32 (-1);
5537 #else // defined( _ARCH_PWR8 )
5538  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
5539  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5540  vui128_t vra, vrb;
5541  vb128_t age0, bge0;
5542  vui128_t vrap, vran;
5543  vui128_t vrbp, vrbn;
5544 
5545  vra = vec_xfer_bin128_2_vui128t (vfa);
5546  vrb = vec_xfer_bin128_2_vui128t (vfb);
5547 
5548  age0 = vec_setb_qp (vfa);
5549  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
5550  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
5551  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
5552 
5553  bge0 = vec_setb_qp (vfb);
5554  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
5555  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
5556  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
5557 
5558  result = vec_cmpgtuq ((vui128_t) vra, (vui128_t) vrb);
5559 #endif
5560  return result;
5561 }
5562 
5597 static inline vb128_t
5599 {
5600  vb128_t result;
5601 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5602  __asm__(
5603  "xscmpgtqp %0,%1,%2;\n"
5604  : "=v" (result)
5605  : "v" (vfa), "v" (vfb)
5606  : );
5607 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5608  result= (vb128_t) vec_splat_u32 (0);
5609  if (vfa > vfb)
5610  result= (vb128_t) vec_splat_s32 (-1);
5611 #else // defined( _ARCH_PWR8 )
5612  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
5613  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5614  vui128_t vra, vrb;
5615  vb128_t age0, bge0;
5616  vui128_t vrap, vran;
5617  vui128_t vrbp, vrbn;
5618  vb128_t unordered;
5619 
5620  unordered = vec_isunorderedf128 (vfa, vfb);
5621 
5622  vra = vec_xfer_bin128_2_vui128t (vfa);
5623  vrb = vec_xfer_bin128_2_vui128t (vfb);
5624 
5625  age0 = vec_setb_qp (vfa);
5626  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
5627  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
5628  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
5629 
5630  bge0 = vec_setb_qp (vfb);
5631  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
5632  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
5633  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
5634 
5635  result = vec_cmpgtuq ((vui128_t) vra, (vui128_t) vrb);
5636  result = (vb128_t) vec_andc ((vui32_t) result, (vui32_t) unordered);
5637 #endif
5638  return result;
5639 }
5640 
5674 static inline vb128_t
5676 {
5677  vb128_t result;
5678 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5679  __asm__(
5680  "xscmpgeqp %0,%2,%1;\n"
5681  : "=v" (result)
5682  : "v" (vfa), "v" (vfb)
5683  : );
5684 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5685  result= (vb128_t) vec_splat_u32 (0);
5686  if (vfa <= vfb)
5687  result= (vb128_t) vec_splat_s32 (-1);
5688 #else // defined( _ARCH_PWR8 )
5689  vui128_t vfa128, vfb128;
5690  vb128_t altb, agtb;
5691  vb128_t signbool;
5692  const vui8_t shift = vec_splat_u8 (7);
5693  vui8_t splatvfa;
5694 
5695  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
5696  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
5697 
5698  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
5699  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
5700  signbool = (vb128_t) vec_sra (splatvfa, shift);
5701 
5702  altb = vec_cmplesq ((vi128_t) vfa128, (vi128_t) vfb128);
5703  agtb = vec_cmpgeuq ((vui128_t) vfa128, (vui128_t) vfb128);
5704  result = (vb128_t) vec_sel ((vui32_t)altb, (vui32_t)agtb, (vui32_t)signbool);
5705 #endif
5706  return result;
5707 }
5708 
5743 static inline vb128_t
5745 {
5746  vb128_t result;
5747 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5748  __asm__(
5749  "xscmpgeqp %0,%2,%1;\n"
5750  : "=v" (result)
5751  : "v" (vfa), "v" (vfb)
5752  : );
5753 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5754  result = (vb128_t) vec_splat_u32 (0);
5755  if (vfa <= vfb)
5756  result = (vb128_t) vec_splat_s32 (-1);
5757 #else // defined( _ARCH_PWR8 )
5758  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
5759  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5760  vui128_t vra, vrb;
5761  vb128_t age0, bge0;
5762  vui128_t vrap, vran;
5763  vui128_t vrbp, vrbn;
5764 
5765  vra = vec_xfer_bin128_2_vui128t (vfa);
5766  vrb = vec_xfer_bin128_2_vui128t (vfb);
5767 
5768  age0 = vec_setb_qp (vfa);
5769  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
5770  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
5771  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
5772 
5773  bge0 = vec_setb_qp (vfb);
5774  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
5775  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
5776  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
5777 
5778  result = vec_cmpleuq ((vui128_t) vra, (vui128_t) vrb);
5779 #endif
5780  return result;
5781 }
5782 
5817 static inline vb128_t
5819 {
5820  vb128_t result;
5821 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5822  __asm__(
5823  "xscmpgeqp %0,%2,%1;\n"
5824  : "=v" (result)
5825  : "v" (vfa), "v" (vfb)
5826  : );
5827 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5828  result= (vb128_t) vec_splat_u32 (0);
5829  if (vfa <= vfb)
5830  result= (vb128_t) vec_splat_s32 (-1);
5831 #else // defined( _ARCH_PWR8 )
5832  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
5833  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5834  vui128_t vra, vrb;
5835  vb128_t age0, bge0;
5836  vui128_t vrap, vran;
5837  vui128_t vrbp, vrbn;
5838  vb128_t unordered;
5839 
5840  unordered = vec_isunorderedf128 (vfa, vfb);
5841 
5842  vra = vec_xfer_bin128_2_vui128t (vfa);
5843  vrb = vec_xfer_bin128_2_vui128t (vfb);
5844 
5845  age0 = vec_setb_qp (vfa);
5846  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
5847  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
5848  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
5849 
5850  bge0 = vec_setb_qp (vfb);
5851  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
5852  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
5853  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
5854 
5855  result = vec_cmpleuq ((vui128_t) vra, (vui128_t) vrb);
5856  result = (vb128_t) vec_andc ((vui32_t) result, (vui32_t) unordered);
5857 #endif
5858  return result;
5859 }
5860 
5894 static inline vb128_t
5896 {
5897  vb128_t result;
5898 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5899  __asm__(
5900  "xscmpgtqp %0,%2,%1;\n"
5901  : "=v" (result)
5902  : "v" (vfa), "v" (vfb)
5903  : );
5904 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5905  result= (vb128_t) vec_splat_u32 (0);
5906  if (vfa < vfb)
5907  result= (vb128_t) vec_splat_s32 (-1);
5908 #else // defined( _ARCH_PWR8 )
5909  vui128_t vfa128, vfb128;
5910  vb128_t altb, agtb;
5911  vb128_t signbool;
5912  const vui8_t shift = vec_splat_u8 (7);
5913  vui8_t splatvfa;
5914 
5915  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
5916  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
5917 
5918  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
5919  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
5920  signbool = (vb128_t) vec_sra (splatvfa, shift);
5921 
5922  altb = vec_cmpltsq ((vi128_t) vfa128, (vi128_t) vfb128);
5923  agtb = vec_cmpgtuq ((vui128_t) vfa128, (vui128_t) vfb128);
5924  result = (vb128_t) vec_sel ((vui32_t)altb, (vui32_t)agtb, (vui32_t)signbool);
5925 #endif
5926  return result;
5927 }
5928 
5963 static inline vb128_t
5965 {
5966  vb128_t result;
5967 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
5968  __asm__(
5969  "xscmpgtqp %0,%2,%1;\n"
5970  : "=v" (result)
5971  : "v" (vfa), "v" (vfb)
5972  : );
5973 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
5974  result = (vb128_t) vec_splat_u32 (0);
5975  if (vfa < vfb)
5976  result = (vb128_t) vec_splat_s32 (-1);
5977 #else // defined( _ARCH_PWR8 )
5978  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
5979  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
5980  vui128_t vra, vrb;
5981  vb128_t age0, bge0;
5982  vui128_t vrap, vran;
5983  vui128_t vrbp, vrbn;
5984 
5985  vra = vec_xfer_bin128_2_vui128t (vfa);
5986  vrb = vec_xfer_bin128_2_vui128t (vfb);
5987 
5988  age0 = vec_setb_qp (vfa);
5989  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
5990  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
5991  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
5992 
5993  bge0 = vec_setb_qp (vfb);
5994  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
5995  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
5996  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
5997 
5998  result = vec_cmpltuq ((vui128_t) vra, (vui128_t) vrb);
5999 #endif
6000  return result;
6001 }
6002 
6037 static inline vb128_t
6039 {
6040  vb128_t result;
6041 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
6042  __asm__(
6043  "xscmpgtqp %0,%2,%1;\n"
6044  : "=v" (result)
6045  : "v" (vfa), "v" (vfb)
6046  : );
6047 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6048  result= (vb128_t) vec_splat_u32 (0);
6049  if (vfa < vfb)
6050  result= (vb128_t) vec_splat_s32 (-1);
6051 #else // defined( _ARCH_PWR8 )
6052  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
6053  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6054  vui128_t vra, vrb;
6055  vb128_t age0, bge0;
6056  vui128_t vrap, vran;
6057  vui128_t vrbp, vrbn;
6058  vb128_t unordered;
6059 
6060  unordered = vec_isunorderedf128 (vfa, vfb);
6061 
6062  vra = vec_xfer_bin128_2_vui128t (vfa);
6063  vrb = vec_xfer_bin128_2_vui128t (vfb);
6064 
6065  age0 = vec_setb_qp (vfa);
6066  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
6067  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
6068  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
6069 
6070  bge0 = vec_setb_qp (vfb);
6071  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
6072  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
6073  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
6074 
6075  result = vec_cmpltuq ((vui128_t) vra, (vui128_t) vrb);
6076  result = (vb128_t) vec_andc ((vui32_t) result, (vui32_t) unordered);
6077 #endif
6078  return result;
6079 }
6080 
6112 static inline vb128_t
6114 {
6115  vb128_t result;
6116 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
6117  __asm__(
6118  "xscmpeqqp %0,%1,%2;\n"
6119  : "=v" (result)
6120  : "v" (vfa), "v" (vfb)
6121  : );
6122  result = (vb128_t) vec_nor ((vui32_t) result, (vui32_t) result);
6123 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6124  result= (vb128_t) vec_splat_u32 (0);
6125  if (vfa != vfb)
6126  result= (vb128_t) vec_splat_s32 (-1);
6127 #else // defined( _ARCH_PWR8 )
6128  vui128_t vra, vrb;
6129  vra = vec_xfer_bin128_2_vui128t (vfa);
6130  vrb = vec_xfer_bin128_2_vui128t (vfb);
6131  result = vec_cmpneuq ( vra, vrb );
6132 #endif
6133  return result;
6134 }
6135 
6168 static inline vb128_t
6170 {
6171  vb128_t result;
6172 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
6173  __asm__(
6174  "xscmpeqqp %0,%1,%2;\n"
6175  : "=v" (result)
6176  : "v" (vfa), "v" (vfb)
6177  : );
6178  result = (vb128_t) vec_nor ((vui32_t) result, (vui32_t) result);
6179 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6180  result = (vb128_t) vec_splat_u32 (0);
6181  if (vfa != vfb)
6182  result = (vb128_t) vec_splat_s32 (-1);
6183 #else // defined( _ARCH_PWR8 )
6184  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6185  vb128_t cmps, or_ab, eq_s;
6186  vui64_t vra, vrb;
6187 
6188  vra = vec_xfer_bin128_2_vui64t (vfa);
6189  vrb = vec_xfer_bin128_2_vui64t (vfb);
6190 
6191  or_ab = (vb128_t) vec_or ( vra, vrb );
6192  eq_s = vec_cmpequq ((vui128_t) or_ab, (vui128_t) signmask);
6193  cmps = vec_cmpequq ((vui128_t) vra, (vui128_t)vrb);
6194  result = (vb128_t) vec_nor ((vui32_t) cmps, (vui32_t) eq_s);
6195 #endif
6196  return result;
6197 }
6198 
6232 static inline vb128_t
6234 {
6235  vb128_t result;
6236 #if defined (_ARCH_PWR10) && defined (__FLOAT128__) && (__GNUC__ >= 10)
6237  __asm__(
6238  "xscmpeqqp %0,%1,%2;\n"
6239  : "=v" (result)
6240  : "v" (vfa), "v" (vfb)
6241  : );
6242  result = (vb128_t) vec_nor ((vui32_t) result, (vui32_t) result);
6243 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6244  result = (vb128_t) vec_splat_u32 (0);
6245  if (vfa != vfb)
6246  result = (vb128_t) vec_splat_s32 (-1);
6247 #else // defined( _ARCH_PWR8 )
6248  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6249  vb128_t cmps, or_ab, eq_s;
6250  vui64_t vra, vrb;
6251  vb128_t unordered;
6252 
6253  unordered = vec_isunorderedf128 (vfa, vfb);
6254 
6255  vra = vec_xfer_bin128_2_vui64t (vfa);
6256  vrb = vec_xfer_bin128_2_vui64t (vfb);
6257 
6258  or_ab = (vb128_t) vec_or ( vra, vrb );
6259  eq_s = vec_cmpequq ((vui128_t) or_ab, (vui128_t) signmask);
6260  cmps = vec_cmpequq ((vui128_t) vra, (vui128_t) vrb);
6261  result = (vb128_t) vec_nor ((vui32_t) cmps, (vui32_t) eq_s);
6262  result = (vb128_t) vec_or ((vui32_t) result, (vui32_t) unordered);
6263 #endif
6264  return result;
6265 }
6266 
6296 static inline int
6298 {
6299  int result;
6300 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6301  result= (vfa == vfb);
6302 #else // defined( _ARCH_PWR8 )
6303  vui128_t vra, vrb;
6304  vra = vec_xfer_bin128_2_vui128t (vfa);
6305  vrb = vec_xfer_bin128_2_vui128t (vfb);
6306  result = vec_cmpuq_all_eq ( vra, vrb );
6307 #endif
6308  return result;
6309 }
6310 
6341 static inline int
6343 {
6344  int result;
6345 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6346  result = (vfa == vfb);
6347 #else // defined( _ARCH_PWR8 )
6348  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6349  vb128_t or_ab;
6350  vui64_t vra, vrb;
6351 
6352  vra = vec_xfer_bin128_2_vui64t (vfa);
6353  vrb = vec_xfer_bin128_2_vui64t (vfb);
6354 
6355  or_ab = (vb128_t) vec_or ( vra, vrb );
6356  result = vec_cmpuq_all_eq ((vui128_t) or_ab, (vui128_t) signmask)
6357  || vec_cmpuq_all_eq ((vui128_t) vra, (vui128_t)vrb);
6358 #endif
6359  return result;
6360 }
6361 
6393 static inline int
6395 {
6396  int result;
6397 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6398  result = (vfa == vfb);
6399 #else // defined( _ARCH_PWR8 )
6400  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6401  vb128_t or_ab;
6402  vui64_t vra, vrb;
6403 
6404  vra = vec_xfer_bin128_2_vui64t (vfa);
6405  vrb = vec_xfer_bin128_2_vui64t (vfb);
6406 
6407  or_ab = (vb128_t) vec_or ( vra, vrb );
6408  result = (vec_cmpuq_all_eq ((vui128_t) or_ab, (vui128_t) signmask)
6409  || vec_cmpuq_all_eq ((vui128_t) vra, (vui128_t)vrb))
6410  && !vec_all_isunorderedf128 (vfa, vfb);
6411 #endif
6412  return result;
6413 }
6414 
6446 static inline int
6448 {
6449  int result;
6450 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6451  result = (vfa >= vfb);
6452 #else // defined( _ARCH_PWR8 )
6453  vui128_t vfa128, vfb128;
6454  vb128_t altb, agtb;
6455  vb128_t signbool;
6456  const vui8_t shift = vec_splat_u8 (7);
6457  vui8_t splatvfa;
6458  vui32_t togt;
6459  const vui32_t zeros = (vui32_t) vec_splat_u32 (0);
6460 
6461  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
6462  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
6463 
6464  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
6465  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
6466  signbool = (vb128_t) vec_sra (splatvfa, shift);
6467 
6468  agtb = vec_cmpgesq ((vi128_t) vfa128, (vi128_t) vfb128);
6469  altb = vec_cmpleuq ((vui128_t) vfa128, (vui128_t) vfb128);
6470  togt = vec_sel ((vui32_t)agtb, (vui32_t)altb, (vui32_t)signbool);
6471  result = vec_all_ne (togt, zeros);
6472 #endif
6473  return result;
6474 }
6475 
6508 static inline int
6510 {
6511  int result;
6512 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6513  result = (vfa >= vfb);
6514 #else // defined( _ARCH_PWR8 )
6515  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
6516  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6517  vui128_t vra, vrb;
6518  vb128_t age0, bge0;
6519  vui128_t vrap, vran;
6520  vui128_t vrbp, vrbn;
6521 
6522  vra = vec_xfer_bin128_2_vui128t (vfa);
6523  vrb = vec_xfer_bin128_2_vui128t (vfb);
6524 
6525  age0 = vec_setb_qp (vfa);
6526  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
6527  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
6528  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
6529 
6530  bge0 = vec_setb_qp (vfb);
6531  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
6532  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
6533  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
6534 
6535  result = vec_cmpuq_all_ge ((vui128_t) vra, (vui128_t) vrb);
6536 #endif
6537  return result;
6538 }
6539 
6572 static inline int
6574 {
6575  int result;
6576 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6577  result = (vfa >= vfb);
6578 #else // defined( _ARCH_PWR8 )
6579  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
6580  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6581  vui128_t vra, vrb;
6582  vb128_t age0, bge0;
6583  vui128_t vrap, vran;
6584  vui128_t vrbp, vrbn;
6585 
6586  vra = vec_xfer_bin128_2_vui128t (vfa);
6587  vrb = vec_xfer_bin128_2_vui128t (vfb);
6588 
6589  age0 = vec_setb_qp (vfa);
6590  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
6591  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
6592  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
6593 
6594  bge0 = vec_setb_qp (vfb);
6595  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
6596  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
6597  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
6598 
6599  result = vec_cmpuq_all_ge ((vui128_t) vra, (vui128_t) vrb)
6600  && !vec_all_isunorderedf128 (vfa, vfb);
6601 #endif
6602  return result;
6603 }
6604 
6636 static inline int
6638 {
6639  int result;
6640 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6641  result = (vfa > vfb);
6642 #else // defined( _ARCH_PWR8 )
6643  vui128_t vfa128, vfb128;
6644  vb128_t altb, agtb;
6645  vb128_t signbool;
6646  const vui8_t shift = vec_splat_u8 (7);
6647  vui8_t splatvfa;
6648  vui32_t togt;
6649  const vui32_t zeros = (vui32_t) vec_splat_u32 (0);
6650 
6651  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
6652  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
6653 
6654  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
6655  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
6656  signbool = (vb128_t) vec_sra (splatvfa, shift);
6657 
6658  agtb = vec_cmpgtsq ((vi128_t) vfa128, (vi128_t) vfb128);
6659  altb = vec_cmpltuq ((vui128_t) vfa128, (vui128_t) vfb128);
6660  togt = vec_sel ((vui32_t)agtb, (vui32_t)altb, (vui32_t)signbool);
6661  result = vec_all_ne (togt, zeros);
6662 #endif
6663  return result;
6664 }
6665 
6698 static inline int
6700 {
6701  int result;
6702 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6703  result = (vfa > vfb);
6704 #else // defined( _ARCH_PWR8 )
6705  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
6706  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6707  vui128_t vra, vrb;
6708  vb128_t age0, bge0;
6709  vui128_t vrap, vran;
6710  vui128_t vrbp, vrbn;
6711 
6712  vra = vec_xfer_bin128_2_vui128t (vfa);
6713  vrb = vec_xfer_bin128_2_vui128t (vfb);
6714 
6715  age0 = vec_setb_qp (vfa);
6716  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
6717  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
6718  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
6719 
6720  bge0 = vec_setb_qp (vfb);
6721  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
6722  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
6723  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
6724 
6725  result = vec_cmpuq_all_gt ((vui128_t) vra, (vui128_t) vrb);
6726 #endif
6727  return result;
6728 }
6729 
6762 static inline int
6764 {
6765  int result;
6766 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6767  result = (vfa > vfb);
6768 #else // defined( _ARCH_PWR8 )
6769  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
6770  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6771  vui128_t vra, vrb;
6772  vb128_t age0, bge0;
6773  vui128_t vrap, vran;
6774  vui128_t vrbp, vrbn;
6775 
6776  vra = vec_xfer_bin128_2_vui128t (vfa);
6777  vrb = vec_xfer_bin128_2_vui128t (vfb);
6778 
6779  age0 = vec_setb_qp (vfa);
6780  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
6781  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
6782  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
6783 
6784  bge0 = vec_setb_qp (vfb);
6785  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
6786  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
6787  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
6788 
6789  result = vec_cmpuq_all_gt ((vui128_t) vra, (vui128_t) vrb)
6790  && !vec_all_isunorderedf128 (vfa, vfb);
6791 #endif
6792  return result;
6793 }
6825 static inline int
6827 {
6828  int result;
6829 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6830  result = (vfa <= vfb);
6831 #else // defined( _ARCH_PWR8 )
6832  vui128_t vfa128, vfb128;
6833  vb128_t altb, agtb;
6834  vb128_t signbool;
6835  const vui8_t shift = vec_splat_u8 (7);
6836  vui8_t splatvfa;
6837  vui32_t tolt;
6838  const vui32_t zeros = (vui32_t) vec_splat_u32 (0);
6839 
6840  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
6841  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
6842 
6843  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
6844  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
6845  signbool = (vb128_t) vec_sra (splatvfa, shift);
6846 
6847  altb = vec_cmplesq ((vi128_t) vfa128, (vi128_t) vfb128);
6848  agtb = vec_cmpgeuq ((vui128_t) vfa128, (vui128_t) vfb128);
6849  tolt = vec_sel ((vui32_t)altb, (vui32_t)agtb, (vui32_t)signbool);
6850  result = vec_all_ne (tolt, zeros);
6851 #endif
6852  return result;
6853 }
6854 
6887 static inline int
6889 {
6890  int result;
6891 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6892  result = (vfa <= vfb);
6893 #else // defined( _ARCH_PWR8 )
6894  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
6895  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6896  vui128_t vra, vrb;
6897  vb128_t age0, bge0;
6898  vui128_t vrap, vran;
6899  vui128_t vrbp, vrbn;
6900 
6901  vra = vec_xfer_bin128_2_vui128t (vfa);
6902  vrb = vec_xfer_bin128_2_vui128t (vfb);
6903 
6904  age0 = vec_setb_qp (vfa);
6905  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
6906  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
6907  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
6908 
6909  bge0 = vec_setb_qp (vfb);
6910  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
6911  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
6912  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
6913 
6914  result = vec_cmpuq_all_le ((vui128_t) vra, (vui128_t) vrb);
6915 #endif
6916  return result;
6917 }
6918 
6951 static inline int
6953 {
6954  int result;
6955 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
6956  result = (vfa <= vfb);
6957 #else // defined( _ARCH_PWR8 )
6958  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
6959  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
6960  vui128_t vra, vrb;
6961  vb128_t age0, bge0;
6962  vui128_t vrap, vran;
6963  vui128_t vrbp, vrbn;
6964 
6965  vra = vec_xfer_bin128_2_vui128t (vfa);
6966  vrb = vec_xfer_bin128_2_vui128t (vfb);
6967 
6968  age0 = vec_setb_qp (vfa);
6969  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
6970  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
6971  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
6972 
6973  bge0 = vec_setb_qp (vfb);
6974  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
6975  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
6976  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
6977 
6978  result = vec_cmpuq_all_le ((vui128_t) vra, (vui128_t) vrb)
6979  && !vec_all_isunorderedf128 (vfa, vfb);
6980 #endif
6981  return result;
6982 }
6983 
7015 static inline int
7017 {
7018  int result;
7019 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
7020  result = (vfa < vfb);
7021 #else // defined( _ARCH_PWR8 )
7022  vui128_t vfa128, vfb128;
7023  vb128_t altb, agtb;
7024  vb128_t signbool;
7025  const vui8_t shift = vec_splat_u8 (7);
7026  vui8_t splatvfa;
7027  vui32_t tolt;
7028  const vui32_t zeros = (vui32_t) vec_splat_u32 (0);
7029 
7030  vfa128 = vec_xfer_bin128_2_vui128t (vfa);
7031  vfb128 = vec_xfer_bin128_2_vui128t (vfb);
7032 
7033  // Replace (vfa >= 0) with (vfa < 0) == vec_setb_qp (vfa)
7034  splatvfa = vec_splat ((vui8_t) vfa128, VEC_BYTE_H);
7035  signbool = (vb128_t) vec_sra (splatvfa, shift);
7036 
7037  altb = vec_cmpltsq ((vi128_t) vfa128, (vi128_t) vfb128);
7038  agtb = vec_cmpgtuq ((vui128_t) vfa128, (vui128_t) vfb128);
7039  tolt = vec_sel ((vui32_t)altb, (vui32_t)agtb, (vui32_t)signbool);
7040  result = vec_all_ne (tolt, zeros);
7041 #endif
7042  return result;
7043 }
7044 
7077 static inline int
7079 {
7080  int result;
7081 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
7082  result = (vfa < vfb);
7083 #else // defined( _ARCH_PWR8 )
7084  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
7085  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7086  vui128_t vra, vrb;
7087  vb128_t age0, bge0;
7088  vui128_t vrap, vran;
7089  vui128_t vrbp, vrbn;
7090 
7091  vra = vec_xfer_bin128_2_vui128t (vfa);
7092  vrb = vec_xfer_bin128_2_vui128t (vfb);
7093 
7094  age0 = vec_setb_qp (vfa);
7095  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
7096  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
7097  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
7098 
7099  bge0 = vec_setb_qp (vfb);
7100  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
7101  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
7102  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
7103 
7104  result = vec_cmpuq_all_lt ((vui128_t) vra, (vui128_t) vrb);
7105 #endif
7106  return result;
7107 }
7108 
7141 static inline int
7143 {
7144  int result;
7145 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
7146  result = (vfa < vfb);
7147 #else // defined( _ARCH_PWR8 )
7148  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
7149  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7150  vui128_t vra, vrb;
7151  vb128_t age0, bge0;
7152  vui128_t vrap, vran;
7153  vui128_t vrbp, vrbn;
7154 
7155  vra = vec_xfer_bin128_2_vui128t (vfa);
7156  vrb = vec_xfer_bin128_2_vui128t (vfb);
7157 
7158  age0 = vec_setb_qp (vfa);
7159  vrap = (vui128_t) vec_xor ((vui32_t) vra, signmask);
7160  vran = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vra);
7161  vra = (vui128_t) vec_sel ((vui32_t)vrap, (vui32_t)vran, (vui32_t)age0);
7162 
7163  bge0 = vec_setb_qp (vfb);
7164  vrbp = (vui128_t) vec_xor ((vui32_t) vrb, signmask);
7165  vrbn = (vui128_t) vec_subuqm ((vui128_t) zero, (vui128_t) vrb);
7166  vrb = (vui128_t) vec_sel ((vui32_t)vrbp, (vui32_t)vrbn, (vui32_t)bge0);
7167 
7168  result = vec_cmpuq_all_lt ((vui128_t) vra, (vui128_t) vrb)
7169  && !vec_all_isunorderedf128 (vfa, vfb);
7170 #endif
7171  return result;
7172 }
7173 
7203 static inline int
7205 {
7206  int result;
7207 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
7208  result= (vfa != vfb);
7209 #else // defined( _ARCH_PWR8 )
7210  vui128_t vra, vrb;
7211  vra = vec_xfer_bin128_2_vui128t (vfa);
7212  vrb = vec_xfer_bin128_2_vui128t (vfb);
7213  result = vec_cmpuq_all_ne ( vra, vrb );
7214 #endif
7215  return result;
7216 }
7217 
7248 static inline int
7250 {
7251  int result;
7252 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
7253  result = (vfa != vfb);
7254 #else // defined( _ARCH_PWR8 )
7255  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7256  vb128_t or_ab;
7257  vui64_t vra, vrb;
7258 
7259  vra = vec_xfer_bin128_2_vui64t (vfa);
7260  vrb = vec_xfer_bin128_2_vui64t (vfb);
7261 
7262  or_ab = (vb128_t) vec_or ( vra, vrb );
7263  result = vec_cmpuq_all_ne ((vui128_t) or_ab, (vui128_t) signmask)
7264  && vec_cmpuq_all_ne ((vui128_t) vra, (vui128_t)vrb);
7265 #endif
7266  return result;
7267 }
7268 
7300 static inline int
7302 {
7303  int result;
7304 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
7305  result = (vfa != vfb);
7306 #else // defined( _ARCH_PWR8 )
7307  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7308  vb128_t or_ab;
7309  vui64_t vra, vrb;
7310 
7311  vra = vec_xfer_bin128_2_vui64t (vfa);
7312  vrb = vec_xfer_bin128_2_vui64t (vfb);
7313 
7314  or_ab = (vb128_t) vec_or ( vra, vrb );
7315  result = (vec_cmpuq_all_ne ((vui128_t) or_ab, (vui128_t) signmask)
7316  && vec_cmpuq_all_ne ((vui128_t) vra, (vui128_t)vrb))
7317  || vec_all_isunorderedf128 (vfa, vfb);
7318 #endif
7319  return result;
7320 }
7321 
7352 static inline int
7354 {
7355 #if defined (_ARCH_PWR9) && defined (scalar_cmp_exp_gt) \
7356  && defined (__FLOAT128__) && (__GNUC__ >= 9)
7357  return scalar_cmp_exp_eq (vfa, vfb);
7358 #else
7359  vui32_t vra, vrb;
7360  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7361 
7362  vra = vec_and_bin128_2_vui32t (vfa, expmask);
7363  vrb = vec_and_bin128_2_vui32t (vfb, expmask);
7364  return vec_cmpuq_all_eq ((vui128_t) vra, (vui128_t) vrb);
7365 #endif
7366 }
7367 
7399 static inline int
7401 {
7402 #if defined (_ARCH_PWR9) && defined (scalar_cmp_exp_gt) \
7403  && defined (__FLOAT128__) && (__GNUC__ >= 9)
7404  return scalar_cmp_exp_gt (vfa, vfb);
7405 #else
7406  vui32_t vra, vrb;
7407  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7408 
7409  vra = vec_and_bin128_2_vui32t (vfa, expmask);
7410  vrb = vec_and_bin128_2_vui32t (vfb, expmask);
7411  return vec_cmpuq_all_gt ((vui128_t) vra, (vui128_t) vrb);
7412 #endif
7413 }
7414 
7415 
7446 static inline int
7448 {
7449 #if defined (_ARCH_PWR9) && defined (scalar_cmp_exp_gt) \
7450  && defined (__FLOAT128__) && (__GNUC__ >= 9)
7451  return scalar_cmp_exp_lt (vfa, vfb);
7452 #else
7453  vui32_t vra, vrb;
7454  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7455 
7456  vra = vec_and_bin128_2_vui32t (vfa, expmask);
7457  vrb = vec_and_bin128_2_vui32t (vfb, expmask);
7458  return vec_cmpuq_all_lt ((vui128_t) vra, (vui128_t) vrb);
7459 #endif
7460 }
7461 
7493 static inline int
7495 {
7496 #if defined (_ARCH_PWR9) && defined (scalar_cmp_exp_gt) \
7497  && defined (__FLOAT128__) && (__GNUC__ >= 9)
7498  return scalar_cmp_exp_unordered (vfa, vfb);
7499 #else
7500  vui32_t vra, vrb;
7501  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7502 
7503  vra = vec_and_bin128_2_vui32t (vfa, expmask);
7504  vrb = vec_and_bin128_2_vui32t (vfb, expmask);
7505  return vec_cmpuq_all_lt ((vui128_t) vra, (vui128_t) vrb);
7506 #endif
7507 }
7508 
7530 static inline vb128_t
7532 {
7533 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
7534  vui32_t result = CONST_VINT128_W (-1, -1, -1, -1);
7535 
7536  if (scalar_test_data_class (f128, 0x70))
7537  result = CONST_VINT128_W (0, 0, 0, 0);
7538 
7539  return (vb128_t)result;
7540 #else
7541  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7542  vui32_t tmp;
7543  vb128_t tmp2, tmp3;
7544 
7545  tmp = vec_and_bin128_2_vui32t (f128, expmask);
7546  tmp2 = (vb128_t) vec_cmpeq (tmp, expmask);
7547  tmp3 = (vb128_t) vec_splat ((vui32_t) tmp2, VEC_W_H);
7548  return (vb128_t) vec_nor ((vui32_t) tmp3, (vui32_t) tmp3); // vec_not
7549 #endif
7550 }
7551 
7575 static inline int
7577 {
7578  int result;
7579 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
7580  if (scalar_test_data_class (f128, 0x20))
7581  result = 1;
7582  else if (scalar_test_data_class (f128, 0x10))
7583  result = -1;
7584  else
7585  result = 0;
7586 #else
7587  vui32_t tmp, t128;
7588  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7589  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7590 
7591  t128 = vec_xfer_bin128_2_vui32t (f128);
7592  tmp = vec_andc_bin128_2_vui32t (f128, signmask);
7593 
7594  if (vec_all_eq(tmp, expmask))
7595  {
7596  if (vec_any_gt(t128, expmask))
7597  result = -1;
7598  else
7599  result = 1;
7600  }
7601  else
7602  result = 0;
7603 #endif
7604  return (result);
7605 }
7606 
7625 static inline vb128_t
7627 {
7628 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
7629  vui32_t result = CONST_VINT128_W (0, 0, 0, 0);
7630 
7631  if (scalar_test_data_class (f128, 0x30))
7632  result = CONST_VINT128_W (-1, -1, -1, -1);
7633 
7634  return (vb128_t)result;
7635 #else
7636  vui32_t tmp;
7637  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7638  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7639 
7640  tmp = vec_andc_bin128_2_vui32t (f128, signmask);
7641  return vec_cmpequq ((vui128_t)tmp , (vui128_t)expmask);
7642 #endif
7643 }
7644 
7665 static inline vb128_t
7667 {
7668 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
7669  vui32_t result = CONST_VINT128_W (0, 0, 0, 0);
7670 
7671  if (scalar_test_data_class (f128, 0x40))
7672  result = CONST_VINT128_W (-1, -1, -1, -1);
7673 
7674  return (vb128_t)result;
7675 #else
7676  vui32_t tmp;
7677  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7678  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7679 
7680  tmp = vec_andc_bin128_2_vui32t (f128, signmask);
7681  return vec_cmpgtuq ((vui128_t)tmp , (vui128_t)expmask);
7682 #endif
7683 }
7684 
7704 static inline vb128_t
7706 {
7707 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
7708  vui32_t result = CONST_VINT128_W (-1, -1, -1, -1);
7709 
7710  if (scalar_test_data_class (f128, 0x7f))
7711  result = CONST_VINT128_W (0, 0, 0, 0);
7712 
7713  return (vb128_t)result;
7714 #else
7715  vui32_t tmp;
7716  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
7717  const vui32_t vec_zero = CONST_VINT128_W (0, 0, 0, 0);
7718  vb128_t result;
7719 
7720  tmp = vec_and_bin128_2_vui32t (f128, expmask);
7721  result = (vb128_t) vec_nor (vec_cmpeq (tmp, expmask),
7722  vec_cmpeq (tmp, vec_zero));
7723  return (vb128_t) vec_splat ((vui32_t) result, VEC_W_H);
7724 #endif
7725 }
7726 
7746 static inline vb128_t
7748 {
7749 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
7750  vui32_t result = CONST_VINT128_W (0, 0, 0, 0);
7751 
7752  if (scalar_test_data_class (f128, 0x03))
7753  result = CONST_VINT128_W (-1, -1, -1, -1);
7754 
7755  return (vb128_t)result;
7756 #else
7757  vui32_t tmp, tmpz, tmp2;
7758  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7759  const vui32_t vec_zero = CONST_VINT128_W (0, 0, 0, 0);
7760  const vui32_t minnorm = CONST_VINT128_W (0x00010000, 0, 0, 0);
7761 
7762  // Equivalent to vec_absf128 (f128)
7763  tmp = vec_andc_bin128_2_vui32t (f128, signmask);
7764 
7765  tmp2 = (vui32_t) vec_cmpltuq ((vui128_t)tmp, (vui128_t)minnorm);
7766  tmpz = (vui32_t) vec_cmpequq ((vui128_t)tmp, (vui128_t)vec_zero);
7767  return (vb128_t) vec_andc (tmp2, tmpz);
7768 #endif
7769 }
7770 
7792 static inline vb128_t
7794 {
7795  return (vb128_t) vec_or ((vui32_t) vec_isnanf128 (vfa),
7796  (vui32_t) vec_isnanf128 (vfb));
7797 }
7798 
7818 static inline vb128_t
7820 {
7821 #if defined (_ARCH_PWR9) && defined (scalar_test_data_class) && defined (__FLOAT128__) && (__GNUC__ > 7)
7822  vui32_t result = CONST_VINT128_W (0, 0, 0, 0);
7823 
7824  if (scalar_test_data_class (f128, 0x0c))
7825  result = CONST_VINT128_W (-1, -1, -1, -1);
7826 
7827  return (vb128_t)result;
7828 #else
7829  vui128_t t128;
7830  const vui64_t vec_zero = CONST_VINT128_DW(0, 0);
7831  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7832 
7833  // Equivalent to vec_absf128 (f128)
7834  t128 = (vui128_t) vec_andc_bin128_2_vui32t (f128, signmask);
7835  return (vb128_t)vec_cmpequq (t128, (vui128_t)vec_zero);
7836 #endif
7837 }
7838 
7853 static inline __binary128
7855 {
7856  __binary128 result;
7857 #if _ARCH_PWR9
7858  __asm__(
7859  "xsnabsqp %0,%1;\n"
7860  : "=v" (result)
7861  : "v" (f128)
7862  :);
7863 #else
7864  vui32_t tmp;
7865  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7866 
7867  tmp = vec_andc_bin128_2_vui32t (f128, signmask);
7868  result = vec_xfer_vui32t_2_bin128 (tmp);
7869 #endif
7870  return (result);
7871 }
7872 
7885 static inline __binary128
7887 {
7888  __binary128 result;
7889 #if defined (_ARCH_PWR9) && (__GNUC__ > 6)
7890 #if defined (__FLOAT128__) && (__GNUC__ > 7)
7891  // Let the compilers generate and optimize code.
7892  result = -f128;
7893 #else
7894  // If the compiler supports _ARCH_PWR9, must support mnemonics.
7895  __asm__(
7896  "xsnegqp %0,%1"
7897  : "=v" (result)
7898  : "v" (f128)
7899  : );
7900 #endif
7901 #else
7902  vui32_t tmp;
7903  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
7904 
7905  tmp = vec_xor_bin128_2_vui32t (f128, signmask);
7906  result = vec_xfer_vui32t_2_bin128 (tmp);
7907 #endif
7908  return (result);
7909 }
7910 
7925 static inline __binary128
7927 {
7928  return vec_sel_bin128_2_bin128 (vfa, vfb, mask);
7929 }
7930 
7952 static inline vb128_t
7954 {
7955  vb128_t result;
7956 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
7957  __asm__(
7958  "vexpandqm %0,%1"
7959  : "=v" (result)
7960  : "v" (f128)
7961  : );
7962 #elif defined (_ARCH_PWR9) && defined (scalar_test_neg) && (__GNUC__ > 7)
7963  result = (vb128_t) {(__int128) 0};
7964 
7965  if (scalar_test_neg (f128))
7966  result = (vb128_t) {(__int128)-1};
7967 
7968  return (vb128_t)result;
7969 #else
7970  const vui8_t shift = vec_splat_u8 (7);
7971  vui8_t t128 = vec_xfer_bin128_2_vui8t (f128);
7972  vui8_t splat = vec_splat (t128, VEC_BYTE_H);
7973 
7974  result = (vb128_t) vec_sra (splat, shift);
7975 #endif
7976  return result;
7977 }
7978 
7995 static inline int
7997 {
7998 #if defined (_ARCH_PWR9) && defined (scalar_test_neg) && (__GNUC__ > 7)
7999  return scalar_test_neg (f128);
8000 #else
8001  vui32_t tmp;
8002  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
8003 
8004  tmp = vec_and_bin128_2_vui32t (f128, signmask);
8005  return vec_all_eq(tmp, signmask);
8006 #endif
8007 }
8008 
8034 static inline __binary128
8036 {
8037  __binary128 result;
8038 #if defined (_ARCH_PWR9) && (__GNUC__ > 7)
8039 #if defined (__FLOAT128__) && (__GNUC__ > 8)
8040  // earlier GCC versions generate extra data moves for this.
8041  result = __builtin_addf128_round_to_odd (vfa, vfb);
8042 #else
8043  // No extra data moves here.
8044  __asm__(
8045  "xsaddqpo %0,%1,%2"
8046  : "=v" (result)
8047  : "v" (vfa), "v" (vfb)
8048  : );
8049 #endif
8050  return result;
8051 #else // defined (_ARCH_PWR7)
8052  vui64_t q_exp, a_exp, b_exp, x_exp;
8053  vui128_t q_sig, a_sig, b_sig, p_tmp, p_odd;
8054  vui128_t a_mag, b_mag;
8055  vui32_t q_sign, a_sign, b_sign;
8056  vb128_t a_lt_b;
8057  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
8058  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
8059  const vui32_t magmask = vec_mask128_f128mag();
8060  const vui64_t exp_naninf = vec_mask64_f128exp();
8061  // Vector extract the exponents from vfa, vfb
8062  x_exp = vec_xxxexpqpp (vfa, vfb);
8063  // Mask off sign bits so can use integers for magnitude compare.
8064  a_mag = (vui128_t) vec_and_bin128_2_vui32t (vfa, magmask);
8065  b_mag = (vui128_t) vec_and_bin128_2_vui32t (vfb, magmask);
8066  a_sign = vec_andc_bin128_2_vui32t (vfa, magmask);
8067  b_sign = vec_andc_bin128_2_vui32t (vfb, magmask);
8068 // if (vec_all_isfinitef128 (vfa) && vec_all_isfinitef128 (vfb))
8069 // The above can be optimized to the following
8070  if (__builtin_expect (vec_cmpud_all_lt (x_exp, exp_naninf), 1))
8071  {
8072  const vui128_t xbitmask = vec_splat_u128 (1);
8073  const vui128_t grx_mask = vec_splat_u128 (7);
8074  const vui64_t exp_min = vec_splat_u64 (1);
8075  const vui8_t t_sig_L = vec_splat_u8 (7);
8076  const vui8_t t_sig_C = vec_splat_u8 (15);
8077  const vui64_t exp_one = exp_min;
8078  const vui64_t exp_dnrm = (vui64_t) q_zero;
8079  vui128_t add_sig, sub_sig;
8080  vui128_t s_sig, x_bits;
8081  vui32_t diff_sign;
8082  vui32_t sigmask = vec_mask128_f128sig();
8083  vui32_t hidden = vec_mask128_f128Lbit();
8084  vui32_t a_norm, b_norm, x_norm;
8085  vui32_t a_s32, b_s32;
8086 
8087  // Extract the significand
8088  // Assume that the sign-bit is already masked off
8089  // Mask off the significands
8090  a_s32 = vec_and ((vui32_t) a_mag, sigmask);
8091  b_s32 = vec_and ((vui32_t) b_mag, sigmask);
8092  // Assume that exponents are already extracted and merged
8093  // Compare exponents for denormal, assume finite
8094  x_norm = (vui32_t) vec_cmpgt ((vui32_t) x_exp, q_zero);
8095  a_norm = vec_splat (x_norm, VEC_WE_1);
8096  b_norm = vec_splat (x_norm, VEC_WE_3);
8097  // For Normal QP insert (hidden) L-bit into significand
8098  a_sig = (vui128_t) vec_sel (a_s32, a_norm, hidden);
8099  b_sig = (vui128_t) vec_sel (b_s32, b_norm, hidden);
8100  // Correct exponent for zeros or denormals to E_min
8101  // will force 0 exponents for zero/denormal results later
8102  //exp_mask = vec_cmpequd (x_exp, exp_dnrm);
8103  x_exp = vec_selud ( exp_min, x_exp, (vb64_t) x_norm);
8104  // Generation sign difference for signed 0.0
8105  q_sign = vec_xor (a_sign, b_sign);
8106  // Precondition the significands before add so the GRX bits
8107  // are in the least significant 3 bit.
8108  a_sig = vec_slqi (a_sig, 3);
8109  b_sig = vec_slqi (b_sig, 3);
8110 
8111  // If sign(vfa) != sign(vfb) will need to:
8112  // 1) Subtract instead of add significands
8113  // 2) Generate signed zeros
8114  diff_sign = (vui32_t) vec_setb_sq ((vi128_t) q_sign);
8115  // If magnitude(b) > magnitude(a) will need to swap a/b, later
8116  a_lt_b = vec_cmpltuq (a_mag, b_mag);
8117 
8118  // Now swap operands a/b if necessary so a has greater magnitude.
8119  {
8120  vui128_t a_tmp = a_sig;
8121  vui128_t b_tmp = b_sig;
8122  vui64_t x_tmp = vec_swapd (x_exp);
8123 
8124  q_sign = vec_sel (a_sign, b_sign, (vui32_t) a_lt_b);
8125 
8126  x_exp = vec_selud (x_exp, x_tmp, (vb64_t) a_lt_b);
8127  a_exp = vec_splatd (x_exp, VEC_DW_H);
8128  b_exp = vec_splatd (x_exp, VEC_DW_L);
8129  q_exp = a_exp;
8130 
8131  a_sig = vec_seluq (a_tmp, b_tmp, (vb128_t) a_lt_b);
8132  b_sig = vec_seluq (b_tmp, a_tmp, (vb128_t) a_lt_b);
8133  }
8134  // At this point we can assume that:
8135  // The magnitude (vfa) >= magnitude (vfb)
8136  // 1) Exponents (a_exp, b_exp) in the range E_min -> E_max
8137  // 2) a_exp >= b_exp
8138  // 2a) If a_exp == b_exp then a_sig >= b_sig
8139  // 2b) If a_exp > b_exp then
8140  // shift (b_sig) right by (a_exp - b_exp)
8141  // any bits shifted out of b_sig are ORed into the X-bit
8142  if (vec_cmpud_all_lt (b_exp, a_exp))
8143  {
8144  vui64_t d_exp, l_exp;
8145  vui128_t t_sig;
8146  vb128_t exp_mask;
8147  const vui64_t exp_128 = vec_const64_f128_128();
8148 
8149  d_exp = vec_subudm (a_exp, b_exp);
8150  exp_mask = (vb128_t) vec_cmpltud (d_exp, exp_128);
8151  l_exp = vec_subudm (exp_128, d_exp);
8152  t_sig = vec_srq (b_sig, (vui128_t) d_exp);
8153  x_bits = vec_slq (b_sig, (vui128_t) l_exp);
8154  t_sig = vec_seluq ((vui128_t) q_zero, t_sig, exp_mask);
8155  x_bits = vec_seluq (b_sig, x_bits, exp_mask);
8156  p_odd = vec_addcuq (x_bits, (vui128_t) q_ones);
8157  b_sig = (vui128_t) vec_or ((vui32_t) t_sig, (vui32_t) p_odd);
8158  }
8159 
8160  // If operands have the same sign then s_sig = a_sig + b_sig
8161  // Otherwise s_sig = a_sig - b_sig
8162  add_sig = vec_adduqm (a_sig, b_sig);
8163  sub_sig = vec_subuqm (a_sig, b_sig);
8164  s_sig = vec_seluq (add_sig, sub_sig, (vb128_t) diff_sign);
8165 
8166  if (__builtin_expect (vec_cmpuq_all_eq (s_sig, (vui128_t) q_zero), 0))
8167  { // Special case of both zero with different sign
8168  q_sign = vec_sel (a_sign, (vui32_t) q_zero, diff_sign);
8169  return vec_xfer_vui32t_2_bin128 (q_sign);
8170  }
8171 
8172  // Issolate CL bits from significand too simplify the compare
8173 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8174  vui8_t t_sig = vec_splat ((vui8_t) s_sig, 14);
8175 #else
8176  vui8_t t_sig = vec_splat ((vui8_t) s_sig, 1);
8177 #endif
8178 // if (vec_cmpuq_all_gt (s_sig, (vui128_t) sigov))
8179  if (vec_all_gt (t_sig, t_sig_C))
8180  { // Check for carry and adjust
8181  p_odd = (vui128_t) vec_and ((vui32_t) s_sig, (vui32_t) xbitmask);
8182  s_sig = vec_srqi (s_sig, 1);
8183  s_sig = (vui128_t) vec_or ((vui32_t) s_sig, (vui32_t) p_odd);
8184  q_exp = vec_addudm (q_exp, exp_one);
8185  }
8186  else // if (vec_cmpuq_all_le (s_sig, (vui128_t) sigovt))
8187  if (vec_all_le (t_sig, t_sig_L))
8188  {
8189  // Or the significand is below normal range.
8190  // This can happen with subtraction.
8191  vui64_t c_exp, d_exp;
8192  vui128_t c_sig;
8193  const vui64_t exp_12 = vec_splat_u64 (12);
8194 
8195  c_sig = vec_clzq (s_sig);
8196  c_exp = vec_splatd ((vui64_t) c_sig, VEC_DW_L);
8197  // The IR has 12 leading zeros that should not effect the shift count.
8198  c_exp = vec_subudm (c_exp, exp_12);
8199  d_exp = vec_subudm (q_exp, (vui64_t) exp_min);
8200  d_exp = vec_minud (c_exp, d_exp);
8201  {
8202  vb64_t nrm_mask = vec_cmpgtsd ((vi64_t) q_exp, (vi64_t) exp_min);
8203  vb64_t exp_mask = vec_cmpgtud (q_exp, c_exp);
8204 
8205  c_sig = vec_slq (s_sig, (vui128_t) d_exp);
8206  q_exp = vec_subudm (q_exp, d_exp);
8207  exp_mask = (vb64_t) vec_and ((vui32_t) exp_mask, (vui32_t) nrm_mask);
8208  q_exp = vec_selud (exp_dnrm, q_exp, exp_mask);
8209  s_sig = vec_seluq (s_sig, c_sig, (vb128_t) nrm_mask);
8210  }
8211  }
8212  // Round to odd from low order GRX-bits
8213  p_tmp = (vui128_t) vec_and ((vui32_t) s_sig, (vui32_t) grx_mask);
8214  p_odd = vec_addcuq (p_tmp, (vui128_t) q_ones);
8215  q_sig = vec_srqi (s_sig, 3);
8216  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, (vui32_t) p_odd);
8217  // Check for exponent overflow -> __FLT128_MAX__
8218  if (__builtin_expect ((vec_cmpud_all_ge ( q_exp, exp_naninf)), 0))
8219  {
8220  // return maximum finite exponent and significand
8221  const vui32_t f128_max = CONST_VINT128_W(0x7ffeffff, -1, -1, -1);
8222  vui32_t f128_smax = vec_or ((vui32_t) f128_max, q_sign);
8223  return vec_xfer_vui32t_2_bin128 (f128_smax);
8224  }
8225  // Merge sign, significand, and exponent into final result
8226  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_sign);
8227  result = vec_xsiexpqp (q_sig, q_exp);
8228  return result;
8229  }
8230  else // One or both operands are NaN or Infinity
8231  {
8232  //const vui32_t q_nan = CONST_VINT128_W(0x00008000, 0, 0, 0);
8233  vui32_t q_nan = vec_mask128_f128Qbit ();
8234  // One or both operands are NaN
8235  if (vec_all_isnanf128 (vfa))
8236  {
8237  // vfa is NaN, Convert vfa to QNaN and return
8238  vui32_t vf128 = vec_or_bin128_2_vui32t (vfa, q_nan);
8239  return vec_xfer_vui32t_2_bin128 (vf128);
8240  }
8241  else if (vec_all_isnanf128 (vfb))
8242  {
8243  // vfb is NaN, Convert vfb to QNaN and return
8244  vui32_t vf128 = vec_or_bin128_2_vui32t (vfb, q_nan);
8245  return vec_xfer_vui32t_2_bin128 (vf128);
8246  }
8247  else // Or one or both operands are Infinity
8248  {
8249  a_exp = vec_splatd (x_exp, VEC_DW_H);
8250  // b_exp = vec_splatd (x_exp, VEC_DW_L);
8251  if (vec_cmpud_all_eq (x_exp, exp_naninf)
8252  && vec_cmpud_any_ne ((vui64_t) a_sign, (vui64_t) b_sign))
8253  { // Both operands infinity and opposite sign
8254  // Inifinty + Infinity (opposite sign) is Default Quiet NaN
8255  return vec_const_nanf128 ();
8256  }
8257  else
8258  { // Either both operands infinity and same sign
8259  // Or one infinity and one finite
8260  if (vec_cmpud_any_eq (a_exp, exp_naninf))
8261  {
8262  // return infinity
8263  return vfa;
8264  }
8265  else
8266  {
8267  // return infinity
8268  return vfb;
8269  }
8270  }
8271  }
8272  }
8273 #endif
8274  return result;
8275 }
8276 
8302 static inline __binary128
8304 {
8305  __binary128 result;
8306 #if defined (_ARCH_PWR9) && (__GNUC__ > 7)
8307 #if defined (__FLOAT128__) && (__GNUC__ > 8)
8308  // earlier GCC versions generate extra data moves for this.
8309  result = __builtin_subf128_round_to_odd (vfa, vfb);
8310 #else
8311  // No extra data moves here.
8312  __asm__(
8313  "xssubqpo %0,%1,%2"
8314  : "=v" (result)
8315  : "v" (vfa), "v" (vfb)
8316  : );
8317 #endif
8318  return result;
8319 #else // defined (_ARCH_PWR7)
8320  vui64_t q_exp, a_exp, b_exp, x_exp;
8321  vui128_t q_sig, a_sig, b_sig, p_tmp, p_odd;
8322  vui128_t a_mag, b_mag;
8323  vui32_t q_sign, a_sign, b_sign;
8324  vb128_t a_lt_b;
8325  const vui32_t q_zero = CONST_VINT128_W (0, 0, 0, 0);
8326  const vui32_t q_ones = CONST_VINT128_W (-1, -1, -1, -1);
8327  const vui32_t magmask = vec_mask128_f128mag();
8328  const vui64_t exp_naninf = vec_mask64_f128exp();
8329  // Vector extract the exponents from vfa, vfb
8330  x_exp = vec_xxxexpqpp (vfa, vfb);
8331  // Mask off sign bits so can use integers for magnitude compare.
8332  a_mag = (vui128_t) vec_and_bin128_2_vui32t (vfa, magmask);
8333  b_mag = (vui128_t) vec_and_bin128_2_vui32t (vfb, magmask);
8334  a_sign = vec_andc_bin128_2_vui32t (vfa, magmask);
8335  b_sign = vec_andc_bin128_2_vui32t (vfb, magmask);
8336 // if (vec_all_isfinitef128 (vfa) && vec_all_isfinitef128 (vfb))
8337 // The above can be optimized to the following
8338  if (__builtin_expect (vec_cmpud_all_lt (x_exp, exp_naninf), 1))
8339  {
8340  const vui128_t xbitmask = vec_splat_u128 (1);
8341  const vui128_t grx_mask = vec_splat_u128 (7);
8342  const vui64_t exp_min = vec_splat_u64 (1);
8343  const vui8_t t_sig_L = vec_splat_u8 (7);
8344  const vui8_t t_sig_C = vec_splat_u8 (15);
8345  const vui64_t exp_one = exp_min;
8346  const vui64_t exp_dnrm = (vui64_t) q_zero;
8347  // signmask is the complement of the magmask
8348  const vui32_t signmask = vec_nor(magmask, magmask);
8349  vui128_t add_sig, sub_sig;
8350  vui128_t s_sig, x_bits;
8351  vui32_t diff_sign;
8352  vui32_t sigmask = vec_mask128_f128sig();
8353  vui32_t hidden = vec_mask128_f128Lbit();
8354  vui32_t a_norm, b_norm, x_norm;
8355  vui32_t a_s32, b_s32;
8356 
8357  // Extract the significand
8358  // Assume that the sign-bit is already masked off
8359  // Mask off the significands
8360  a_s32 = vec_and ((vui32_t) a_mag, sigmask);
8361  b_s32 = vec_and ((vui32_t) b_mag, sigmask);
8362  // Assume that exponents are already extracted and merged
8363  // Compare exponents for denormal, assume finite
8364  x_norm = (vui32_t) vec_cmpgt ((vui32_t) x_exp, q_zero);
8365  a_norm = vec_splat (x_norm, VEC_WE_1);
8366  b_norm = vec_splat (x_norm, VEC_WE_3);
8367  // For Normal QP insert (hidden) L-bit into significand
8368  a_sig = (vui128_t) vec_sel (a_s32, a_norm, hidden);
8369  b_sig = (vui128_t) vec_sel (b_s32, b_norm, hidden);
8370  // Correct exponent for zeros or denormals to E_min
8371  // will force 0 exponents for zero/denormal results later
8372  //exp_mask = vec_cmpequd (x_exp, exp_dnrm);
8373  x_exp = vec_selud ( exp_min, x_exp, (vb64_t) x_norm);
8374  // Negate sign for subtract, then use add logic
8375  b_sign = vec_xor (signmask, b_sign);
8376  // Generation sign difference for signed 0.0
8377  q_sign = vec_xor (a_sign, b_sign);
8378  // Precondition the significands before add so the GRX bits
8379  // are in the least significant 3 bit.
8380  a_sig = vec_slqi (a_sig, 3);
8381  b_sig = vec_slqi (b_sig, 3);
8382 
8383  // If sign(vfa) != sign(vfb) will need to:
8384  // 1) Subtract instead of add significands
8385  // 2) Generate signed zeros
8386  diff_sign = (vui32_t) vec_setb_sq ((vi128_t) q_sign);
8387  // If magnitude(b) > magnitude(a) will need to swap a/b, later
8388  a_lt_b = vec_cmpltuq (a_mag, b_mag);
8389 
8390  // Now swap operands a/b if necessary so a has greater magnitude.
8391  {
8392  vui128_t a_tmp = a_sig;
8393  vui128_t b_tmp = b_sig;
8394  vui64_t x_tmp = vec_swapd (x_exp);
8395 
8396  q_sign = vec_sel (a_sign, b_sign, (vui32_t) a_lt_b);
8397 
8398  x_exp = vec_selud (x_exp, x_tmp, (vb64_t) a_lt_b);
8399  a_exp = vec_splatd (x_exp, VEC_DW_H);
8400  b_exp = vec_splatd (x_exp, VEC_DW_L);
8401  q_exp = a_exp;
8402 
8403  a_sig = vec_seluq (a_tmp, b_tmp, (vb128_t) a_lt_b);
8404  b_sig = vec_seluq (b_tmp, a_tmp, (vb128_t) a_lt_b);
8405  }
8406  // At this point we can assume that:
8407  // The magnitude (vfa) >= magnitude (vfb)
8408  // 1) Exponents (a_exp, b_exp) in the range E_min -> E_max
8409  // 2) a_exp >= b_exp
8410  // 2a) If a_exp == b_exp then a_sig >= b_sig
8411  // 2b) If a_exp > b_exp then
8412  // shift (b_sig) right by (a_exp - b_exp)
8413  // any bits shifted out of b_sig are ORed into the X-bit
8414  if (vec_cmpud_all_lt (b_exp, a_exp))
8415  {
8416  vui64_t d_exp, l_exp;
8417  vui128_t t_sig;
8418  vb128_t exp_mask;
8419  const vui64_t exp_128 = vec_const64_f128_128();
8420 
8421  d_exp = vec_subudm (a_exp, b_exp);
8422  exp_mask = (vb128_t) vec_cmpltud (d_exp, exp_128);
8423  l_exp = vec_subudm (exp_128, d_exp);
8424  t_sig = vec_srq (b_sig, (vui128_t) d_exp);
8425  x_bits = vec_slq (b_sig, (vui128_t) l_exp);
8426  t_sig = vec_seluq ((vui128_t) q_zero, t_sig, exp_mask);
8427  x_bits = vec_seluq (b_sig, x_bits, exp_mask);
8428  p_odd = vec_addcuq (x_bits, (vui128_t) q_ones);
8429  b_sig = (vui128_t) vec_or ((vui32_t) t_sig, (vui32_t) p_odd);
8430  }
8431 
8432  // If operands have the same sign then s_sig = a_sig + b_sig
8433  // Otherwise s_sig = a_sig - b_sig
8434  add_sig = vec_adduqm (a_sig, b_sig);
8435  sub_sig = vec_subuqm (a_sig, b_sig);
8436  s_sig = vec_seluq (add_sig, sub_sig, (vb128_t) diff_sign);
8437 
8438  if (__builtin_expect (vec_cmpuq_all_eq (s_sig, (vui128_t) q_zero), 0))
8439  { // Special case of both zero with different sign
8440  q_sign = vec_sel (a_sign, (vui32_t) q_zero, diff_sign);
8441  return vec_xfer_vui32t_2_bin128 (q_sign);
8442  }
8443 
8444  // Issolate CL bits from significand too simplify the compare
8445 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
8446  vui8_t t_sig = vec_splat ((vui8_t) s_sig, 14);
8447 #else
8448  vui8_t t_sig = vec_splat ((vui8_t) s_sig, 1);
8449 #endif
8450 // if (vec_cmpuq_all_gt (s_sig, (vui128_t) sigov))
8451  if (vec_all_gt (t_sig, t_sig_C))
8452  { // Check for carry and adjust
8453  p_odd = (vui128_t) vec_and ((vui32_t) s_sig, (vui32_t) xbitmask);
8454  s_sig = vec_srqi (s_sig, 1);
8455  s_sig = (vui128_t) vec_or ((vui32_t) s_sig, (vui32_t) p_odd);
8456  q_exp = vec_addudm (q_exp, exp_one);
8457  }
8458  else // if (vec_cmpuq_all_le (s_sig, (vui128_t) sigovt))
8459  if (vec_all_le (t_sig, t_sig_L))
8460  {
8461  // Or the significand is below normal range.
8462  // This can happen with subtraction.
8463  vui64_t c_exp, d_exp;
8464  vui128_t c_sig;
8465  const vui64_t exp_12 = vec_splat_u64 (12);
8466 
8467  c_sig = vec_clzq (s_sig);
8468  c_exp = vec_splatd ((vui64_t) c_sig, VEC_DW_L);
8469  // The IR has 12 leading zeros that should not effect the shift count.
8470  c_exp = vec_subudm (c_exp, exp_12);
8471  d_exp = vec_subudm (q_exp, (vui64_t) exp_min);
8472  d_exp = vec_minud (c_exp, d_exp);
8473  {
8474  vb64_t nrm_mask = vec_cmpgtsd ((vi64_t) q_exp, (vi64_t) exp_min);
8475  vb64_t exp_mask = vec_cmpgtud (q_exp, c_exp);
8476 
8477  c_sig = vec_slq (s_sig, (vui128_t) d_exp);
8478  q_exp = vec_subudm (q_exp, d_exp);
8479  exp_mask = (vb64_t) vec_and ((vui32_t) exp_mask, (vui32_t) nrm_mask);
8480  q_exp = vec_selud (exp_dnrm, q_exp, exp_mask);
8481  s_sig = vec_seluq (s_sig, c_sig, (vb128_t) nrm_mask);
8482  }
8483  }
8484  // Round to odd from low order GRX-bits
8485  p_tmp = (vui128_t) vec_and ((vui32_t) s_sig, (vui32_t) grx_mask);
8486  p_odd = vec_addcuq (p_tmp, (vui128_t) q_ones);
8487  q_sig = vec_srqi (s_sig, 3);
8488  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, (vui32_t) p_odd);
8489  // Check for exponent overflow -> __FLT128_MAX__
8490  if (__builtin_expect ((vec_cmpud_all_ge ( q_exp, exp_naninf)), 0))
8491  {
8492  // return maximum finite exponent and significand
8493  const vui32_t f128_max = CONST_VINT128_W(0x7ffeffff, -1, -1, -1);
8494  vui32_t f128_smax = vec_or ((vui32_t) f128_max, q_sign);
8495  return vec_xfer_vui32t_2_bin128 (f128_smax);
8496  }
8497  // Merge sign, significand, and exponent into final result
8498  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_sign);
8499  result = vec_xsiexpqp (q_sig, q_exp);
8500  return result;
8501  }
8502  else // One or both operands are NaN or Infinity
8503  {
8504  //const vui32_t q_nan = CONST_VINT128_W(0x00008000, 0, 0, 0);
8505  vui32_t q_nan = vec_mask128_f128Qbit ();
8506  // One or both operands are NaN
8507  if (vec_all_isnanf128 (vfa))
8508  {
8509  // vfa is NaN, Convert vfa to QNaN and return
8510  vui32_t vf128 = vec_or_bin128_2_vui32t (vfa, q_nan);
8511  return vec_xfer_vui32t_2_bin128 (vf128);
8512  }
8513  else if (vec_all_isnanf128 (vfb))
8514  {
8515  // vfb is NaN, Convert vfb to QNaN and return
8516  vui32_t vf128 = vec_or_bin128_2_vui32t (vfb, q_nan);
8517  return vec_xfer_vui32t_2_bin128 (vf128);
8518  }
8519  else // Or one or both operands are Infinity
8520  {
8521  a_exp = vec_splatd (x_exp, VEC_DW_H);
8522  // b_exp = vec_splatd (x_exp, VEC_DW_L);
8523  if (vec_cmpud_all_eq (x_exp, exp_naninf)
8524  && vec_cmpud_all_eq ((vui64_t) a_sign, (vui64_t) b_sign))
8525  { // Both operands infinity and opposite sign
8526  // Inifinty - Infinity (same sign) is Default Quiet NaN
8527  return vec_const_nanf128 ();
8528  }
8529  else
8530  { // Either both operands infinity and same sign
8531  // Or one infinity and one finite
8532  if (vec_cmpud_any_eq (a_exp, exp_naninf))
8533  {
8534  // return infinity
8535  return vfa;
8536  }
8537  else
8538  {
8539  // return infinity
8540  return vec_negf128(vfb);
8541  }
8542  }
8543  }
8544  }
8545 #endif
8546  return result;
8547 }
8548 
8572 static inline vec_xscvdpqp (vf64_t f64)
8573 {
8574  __binary128 result;
8575 #if defined (_ARCH_PWR9) && (__GNUC__ > 7)
8576 #if defined (__FLOAT128__) && (__GNUC__ > 9)
8577  // earlier GCC versions generate extra data moves for this.
8578  result = f64[VEC_DW_H];
8579 #else
8580  // No extra data moves here.
8581  __asm__(
8582  "xscvdpqp %0,%1"
8583  : "=v" (result)
8584  : "v" (f64)
8585  : );
8586 #endif
8587 #elif defined (_ARCH_PWR8)
8588  vui64_t d_exp, d_sig, q_exp;
8589  vui128_t q_sig;
8590  vui32_t q_sign;
8591  const vui64_t exp_delta = (vui64_t) CONST_VINT64_DW ( (0x3fff - 0x3ff), 0 );
8592  const vui64_t d_naninf = (vui64_t) CONST_VINT64_DW ( 0x7ff, 0 );
8593  const vui64_t d_denorm = (vui64_t) CONST_VINT64_DW ( 0, 0 );
8594  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
8595 
8596 
8597  f64[VEC_DW_L] = 0.0; // clear the right most element to zero.
8598  // Extract the exponent, significand, and sign bit.
8599  d_exp = vec_xvxexpdp (f64);
8600  d_sig = vec_xvxsigdp (f64);
8601  q_sign = vec_and ((vui32_t) f64, signmask);
8602  // The extract sig operation has already tested for finite/subnormal.
8603  // So avoid testing isfinite/issubnormal again by simply testing
8604  // the extracted exponent.
8605  if (__builtin_expect (!vec_cmpud_all_eq (d_exp, d_naninf), 1))
8606  {
8607  if (__builtin_expect (!vec_cmpud_all_eq (d_exp, d_denorm), 1))
8608  {
8609  q_sig = vec_srqi ((vui128_t) d_sig, 4);
8610  q_exp = vec_addudm (d_exp, exp_delta);
8611  }
8612  else
8613  {
8614  if (vec_cmpud_all_eq (d_sig, d_denorm))
8615  {
8616  q_sig = (vui128_t) d_sig;
8617  q_exp = (vui64_t) d_exp;
8618  }
8619  else
8620  { // Must be subnormal but we need to produce a normal QP.
8621  // So need to adjust the quad exponent by the f64 denormal
8622  // exponent (-1023) and any leading '0's in the f64 sig.
8623  // There will be at least 12.
8624  vui64_t q_denorm = (vui64_t) CONST_VINT64_DW ( (0x3fff - (1023 - 12)), 0 );
8625  vui64_t f64_clz;
8626  f64_clz = vec_clzd (d_sig);
8627  d_sig = vec_vsld (d_sig, f64_clz);
8628  q_exp = vec_subudm (q_denorm, f64_clz);
8629  q_sig = vec_srqi ((vui128_t) d_sig, 15);
8630  }
8631  }
8632  }
8633  else
8634  { // isinf or isnan.
8635  q_sig = vec_srqi ((vui128_t) d_sig, 4);
8636  q_exp = (vui64_t) CONST_VINT64_DW (0x7fff, 0);
8637  }
8638  // Copy Sign-bit to QP significand before insert.
8639  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_sign);
8640  // Insert exponent into significand to complete conversion to QP
8641  result = vec_xsiexpqp (q_sig, q_exp);
8642 #else
8643  result = f64[VEC_DW_H];
8644 #endif
8645  return result;
8646 }
8647 
8674 static inline vf64_t
8676 {
8677  vf64_t result;
8678 #if defined (_ARCH_PWR9) && (__GNUC__ > 7)
8679 #if defined (__FLOAT128__) && (__GNUC__ > 9)
8680  // GCC runtime does not convert/round directly from __float128 to
8681  // vector double. So convert scalar double then copy to vector double.
8682  result = (vf64_t) { 0.0, 0.0 };
8683  result [VEC_DW_H] = __builtin_truncf128_round_to_odd (f128);
8684 #else
8685  // No extra data moves here.
8686  __asm__(
8687  "xscvqpdpo %0,%1"
8688  : "=v" (result)
8689  : "v" (f128)
8690  : );
8691 #endif
8692 #else // defined (_ARCH_PWR8)
8693  vui64_t d_exp, d_sig, x_exp;
8694  vui64_t q_exp;
8695  vui128_t q_sig;
8696  vui32_t q_sign;
8697  const vui128_t q_zero = { 0 };
8698  const vui128_t q_ones = (vui128_t) vec_splat_s32 (-1);
8699  const vui64_t qpdp_delta = (vui64_t) CONST_VINT64_DW ( (0x3fff - 0x3ff), 0 );
8700  const vui64_t exp_tiny = (vui64_t) CONST_VINT64_DW ( (0x3fff - 1022), (0x3fff - 1022) );
8701  const vui64_t exp_high = (vui64_t) CONST_VINT64_DW ( (0x3fff + 1023), (0x3fff + 1023));
8702  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
8703  const vui64_t q_naninf = (vui64_t) CONST_VINT64_DW ( 0x7fff, 0x7fff );
8704  const vui64_t d_naninf = (vui64_t) CONST_VINT64_DW ( 0x7ff, 0 );
8705 
8706  q_exp = vec_xsxexpqp (f128);
8707  x_exp = vec_splatd (q_exp, VEC_DW_H);
8708  q_sig = vec_xsxsigqp (f128);
8709  q_sign = vec_and_bin128_2_vui32t (f128, signmask);
8710  if (__builtin_expect (!vec_cmpud_all_eq (x_exp, q_naninf), 1))
8711  {
8712  if (vec_cmpud_all_ge (x_exp, exp_tiny))
8713  { // Greater than or equal to 2**-1022
8714  if (vec_cmpud_all_le (x_exp, exp_high))
8715  { // Less than or equal to 2**+1023
8716  vui64_t d_X;
8717  // Convert the significand to double with left shift 4
8718  q_sig = vec_slqi ((vui128_t) q_sig, 4);
8719  // The GRX round bits are now in bits 64-127 (DW element 1)
8720  // For round-to-odd just test for any GRX bits nonzero
8721  d_X = (vui64_t) vec_cmpgtud ((vui64_t) q_sig, (vui64_t) q_zero);
8722  d_X = vec_mrgald (q_zero, (vui128_t) d_X);
8723  d_X = (vui64_t) vec_slqi ((vui128_t) d_X, 1);
8724  d_sig = (vui64_t) vec_or ((vui32_t) q_sig, (vui32_t) d_X);
8725  d_exp = vec_subudm (q_exp, qpdp_delta);
8726  }
8727  else
8728  { // To high so return infinity OR double max???
8729  d_sig = (vui64_t) CONST_VINT64_DW (0x001fffffffffffff, 0);
8730  d_exp = (vui64_t) CONST_VINT64_DW (0x7fe, 0);
8731  }
8732  }
8733  else
8734  { // tiny
8735  vui64_t d_X;
8736  vui64_t q_delta;
8737  const vui64_t exp_tinyr = (vui64_t)
8738  CONST_VINT64_DW ( (0x3fff-(1022+53)), (0x3fff-(1022+53)));
8739  q_delta = vec_subudm (exp_tiny, x_exp);
8740  // Set double exp to denormal
8741  d_exp = (vui64_t) q_zero;
8742  if (vec_cmpud_all_gt (x_exp, exp_tinyr))
8743  {
8744  // Convert the significand to double with left shift 4
8745  // The GRX round bits are now in bits 64-127 (DW element 1)
8746  q_sig = vec_slqi ((vui128_t) q_sig, 4);
8747  d_sig = (vui64_t) vec_srq (q_sig, (vui128_t) q_delta);
8748  // For round-to-odd just test for any nonzero GRX bits.
8749  d_X = (vui64_t) vec_cmpgtud ((vui64_t) d_sig, (vui64_t) q_zero);
8750  // Generate a low order 0b1 in DW[0]
8751  d_X = vec_mrgald (q_zero, (vui128_t) d_X);
8752  d_X = (vui64_t) vec_slqi ((vui128_t) d_X, 1);
8753  d_sig = (vui64_t) vec_or ((vui32_t) d_sig, (vui32_t) d_X);
8754  }
8755  else
8756  { // tinyr
8757  // For round-to-odd just test for any nonzero GRX bits.
8758  d_X = (vui64_t) vec_addcuq (q_sig, q_ones);
8759  d_sig = (vui64_t) vec_swapd (d_X);
8760  }
8761  }
8762  }
8763  else
8764  { // isinf or isnan.
8765  const vui64_t q_quiet = CONST_VINT64_DW (0x0000800000000000, 0);
8766  vb128_t is_inf;
8767  vui128_t x_sig;
8768  is_inf = vec_cmpequq ((vui128_t) q_sig, (vui128_t) q_zero);
8769  x_sig = (vui128_t) vec_or ((vui32_t) q_sig, (vui32_t) q_quiet);
8770  q_sig = (vui128_t) vec_sel ((vui32_t)x_sig, (vui32_t)q_sig, (vui32_t)is_inf);
8771  d_sig = (vui64_t)vec_slqi (q_sig, 4);
8772  d_exp = d_naninf;
8773  }
8774 
8775  d_sig [VEC_DW_L] = 0UL;
8776  d_sig = (vui64_t) vec_or ((vui32_t) d_sig, q_sign);
8777  result = vec_xviexpdp (d_sig, d_exp);
8778 #endif
8779  return result;
8780 }
8781 
8806 static inline vui64_t
8808 {
8809  vui64_t result;
8810 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
8811  __asm__(
8812  "xscvqpudz %0,%1"
8813  : "=v" (result)
8814  : "v" (f128)
8815  : );
8816 #else
8817  vui64_t q_exp, q_delta, x_exp;
8818  vui128_t q_sig;
8819  vb128_t b_sign;
8820  const vui64_t q_zero = { 0, 0 };
8821  const vui64_t q_ones = { -1, -1 };
8822  const vui64_t exp_low = (vui64_t) CONST_VINT64_DW ( 0x3fff, 0x3fff );
8823  const vui64_t exp_high = (vui64_t) CONST_VINT64_DW ( (0x3fff+64), (0x3fff+64) );
8824  const vui64_t exp_63 = (vui64_t) CONST_VINT64_DW ( (0x3fff+63), (0x3fff+63) );
8825  const vui64_t q_naninf = (vui64_t) CONST_VINT64_DW ( 0x7fff, 0x7fff );
8826 
8827  result = q_zero;
8828  q_exp = vec_xsxexpqp (f128);
8829  q_sig = vec_xsxsigqp (f128);
8830  x_exp = vec_splatd (q_exp, VEC_DW_H);
8831  b_sign = vec_setb_qp (f128);
8832  if (__builtin_expect (!vec_cmpud_all_eq (x_exp, q_naninf), 1))
8833  {
8834  if (vec_cmpud_all_ge (x_exp, exp_low)
8835  && vec_cmpud_all_eq ((vui64_t)b_sign, (vui64_t)q_zero))
8836  { // Greater than or equal to 1.0
8837  if (vec_cmpud_all_lt (x_exp, exp_high))
8838  { // Less than 2**64-1
8839  q_sig = vec_slqi (q_sig, 15);
8840  q_delta = vec_subudm (exp_63, x_exp);
8841  result = vec_vsrd ((vui64_t) q_sig, q_delta);
8842  }
8843  else
8844  { // set result to 2**64-1
8845  result = q_ones;
8846  }
8847  }
8848  else
8849  { // less than 1.0 or negative
8850  result = q_zero;
8851  }
8852  }
8853  else
8854  { // isinf or isnan.
8855  vb128_t is_inf;
8856  // Positive Inf returns all ones
8857  // else NaN or -Infinity returns zero
8858  is_inf = vec_cmpequq (q_sig, (vui128_t) q_zero);
8859  // result = ~NaN | (pos & Inf) -> Inf & (pos & Inf) -> pos & Inf
8860  result = (vui64_t) vec_andc ((vui32_t) is_inf, (vui32_t) b_sign);
8861  }
8862  result = vec_mrgahd ((vui128_t) result, (vui128_t) q_zero);
8863 #endif
8864  return result;
8865 }
8866 
8890 static inline vui128_t
8892 {
8893  vui128_t result;
8894 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
8895  __asm__(
8896  "xscvqpuqz %0,%1"
8897  : "=v" (result)
8898  : "v" (f128)
8899  : );
8900 #else
8901  vui64_t q_exp, q_delta, x_exp;
8902  vui128_t q_sig;
8903  vb128_t b_sign;
8904  const vui128_t q_zero = { 0 };
8905  const vui128_t q_ones = (vui128_t) vec_splat_s32 (-1);
8906  const vui64_t exp_low = (vui64_t) CONST_VINT64_DW ( 0x3fff, 0x3fff );
8907  const vui64_t exp_high = (vui64_t) CONST_VINT64_DW ( (0x3fff+128), (0x3fff+128) );
8908  const vui64_t exp_127 = (vui64_t) CONST_VINT64_DW ( (0x3fff+127), (0x3fff+127) );
8909  const vui64_t q_naninf = (vui64_t) CONST_VINT64_DW ( 0x7fff, 0x7fff );
8910 
8911  result = q_zero;
8912  q_exp = vec_xsxexpqp (f128);
8913  q_sig = vec_xsxsigqp (f128);
8914  x_exp = vec_splatd (q_exp, VEC_DW_H);
8915  b_sign = vec_setb_qp (f128);
8916  if (__builtin_expect (!vec_cmpud_all_eq (x_exp, q_naninf), 1))
8917  {
8918  if (vec_cmpud_all_ge (x_exp, exp_low)
8919  && vec_cmpud_all_eq ((vui64_t)b_sign, (vui64_t)q_zero))
8920  { // Greater than or equal to 1.0
8921  if (vec_cmpud_all_lt (x_exp, exp_high))
8922  { // Less than 2**128-1
8923  q_sig = vec_slqi (q_sig, 15);
8924  q_delta = vec_subudm (exp_127, x_exp);
8925  result = vec_srq (q_sig, (vui128_t) q_delta);
8926  }
8927  else
8928  { // set result to 2**128-1
8929  result = (vui128_t) q_ones;
8930  }
8931  }
8932  else
8933  { // less than 1.0 or negative
8934  result = (vui128_t) q_zero;
8935  }
8936  }
8937  else
8938  { // isinf or isnan.
8939  vb128_t is_inf;
8940  // Positive Inf returns all ones
8941  // else NaN or -Infinity returns zero
8942  is_inf = vec_cmpequq (q_sig, (vui128_t) q_zero);
8943  // result = ~NaN | (pos & Inf) -> Inf & (pos & Inf) -> pos & Inf
8944  result = (vui128_t) vec_andc ((vui32_t) is_inf, (vui32_t) b_sign);
8945  }
8946 #endif
8947  return result;
8948 }
8949 
8974 static inline vec_xscvsdqp (vi64_t int64)
8975 {
8976  __binary128 result;
8977 #if defined (_ARCH_PWR9) && (__GNUC__ > 7)
8978 #if defined (__FLOAT128__) && (__GNUC__ > 9)
8979  // earlier GCC versions generate extra data moves for this.
8980  result = int64[VEC_DW_H];
8981 #else
8982  // No extra data moves here.
8983  __asm__(
8984  "xscvsdqp %0,%1"
8985  : "=v" (result)
8986  : "v" (int64)
8987  : );
8988 #endif
8989 #elif defined (_ARCH_PWR8)
8990  vui64_t d_sig, q_exp, d_sign, d_neg;
8991  vui128_t q_sig;
8992  vui32_t q_sign;
8993  const vui64_t d_zero = (vui64_t) CONST_VINT64_DW ( 0, 0 );
8994  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
8995 
8996  int64[VEC_DW_L] = 0UL; // clear the right most element to zero.
8997 
8998  if (vec_cmpud_all_eq ((vui64_t) int64, d_zero))
8999  {
9000  result = vec_xfer_vui64t_2_bin128 (d_zero);
9001  }
9002  else
9003  {
9004  // We need to produce a normal QP, so we treat the integer like a
9005  // denormal, then normalize it.
9006  // Start with the quad exponent bias + 63 then subtract the count
9007  // leading '0's. The 64-bit magnitude has 1-63 leading '0's
9008  vui64_t q_expm = (vui64_t) CONST_VINT64_DW ((0x3fff + 63), 0 );
9009  vui64_t i64_clz;
9010  // Convert 2s complement to signed magnitude form.
9011  q_sign = vec_and ((vui32_t) int64, signmask);
9012  d_neg = vec_subudm (d_zero, (vui64_t)int64);
9013  d_sign = (vui64_t) vec_cmpequd ((vui64_t) q_sign, (vui64_t) signmask);
9014  d_sig = (vui64_t) vec_sel ((vui32_t) int64, (vui32_t) d_neg, (vui32_t) d_sign);
9015  // Count leading zeros and normalize.
9016  i64_clz = vec_clzd (d_sig);
9017  d_sig = vec_vsld (d_sig, i64_clz);
9018  q_exp = vec_subudm (q_expm, i64_clz);
9019  q_sig = vec_srqi ((vui128_t) d_sig, 15);
9020  // Copy Sign-bit to QP significand before insert.
9021  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_sign);
9022  // Insert exponent into significand to complete conversion to QP
9023  result = vec_xsiexpqp (q_sig, q_exp);
9024  }
9025 #else
9026  result = int64[VEC_DW_H];
9027 #endif
9028  return result;
9029 }
9030 
9054 static inline vec_xscvudqp (vui64_t int64)
9055 {
9056  __binary128 result;
9057 #if defined (_ARCH_PWR9) && (__GNUC__ > 7)
9058 #if defined (__FLOAT128__) && (__GNUC__ > 9)
9059  // earlier GCC versions generate extra data moves for this.
9060  result = int64[VEC_DW_H];
9061 #else
9062  // No extra data moves here.
9063  __asm__(
9064  "xscvudqp %0,%1"
9065  : "=v" (result)
9066  : "v" (int64)
9067  : );
9068 #endif
9069 #elif defined (_ARCH_PWR8)
9070  vui64_t d_sig, q_exp;
9071  vui128_t q_sig;
9072  const vui64_t d_zero = (vui64_t) CONST_VINT64_DW ( 0, 0 );
9073 
9074  int64[VEC_DW_L] = 0UL; // clear the right most element to zero.
9075  d_sig = int64;
9076  // Quick test for 0UL as this case requires a special exponent.
9077  if (vec_cmpud_all_eq (int64, d_zero))
9078  {
9079  result = vec_xfer_vui64t_2_bin128 (d_zero);
9080  }
9081  else
9082  { // We need to produce a normal QP, so we treat the integer like a
9083  // denormal, then normalize it.
9084  // Start with the quad exponent bias + 63 then subtract the count of
9085  // leading '0's. The 64-bit sig can have 0-63 leading '0's.
9086  const vui64_t q_expm = (vui64_t) CONST_VINT64_DW ((0x3fff + 63), 0 );
9087  vui64_t i64_clz = vec_clzd (int64);
9088  d_sig = vec_vsld (int64, i64_clz);
9089  q_exp = vec_subudm (q_expm, i64_clz);
9090  q_sig = vec_srqi ((vui128_t) d_sig, 15);
9091  // Insert exponent into significand to complete conversion to QP
9092  result = vec_xsiexpqp (q_sig, q_exp);
9093  }
9094 #else
9095  result = int64[VEC_DW_H];
9096 #endif
9097  return result;
9098 }
9099 
9130 static inline vec_xscvsqqp (vi128_t int128)
9131 {
9132  __binary128 result;
9133 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
9134  __asm__(
9135  "xscvsqqp %0,%1"
9136  : "=v" (result)
9137  : "v" (int128)
9138  : );
9139 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
9140  __binary128 hi64, lo64, i_sign;
9141  __binary128 two64 = 0x1.0p64;
9142  vui128_t q_sig;
9143  vui32_t q_sign;
9144  vui128_t q_neg;
9145  vb128_t b_sign;
9146  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
9147  // Collect the sign bit of the input value.
9148  q_sign = vec_and ((vui32_t) int128, signmask);
9149  // Convert 2s complement to unsigned magnitude form.
9150  q_neg = (vui128_t) vec_negsq (int128);
9151  b_sign = vec_setb_sq (int128);
9152  q_sig = vec_seluq ((vui128_t) int128, q_neg, b_sign);
9153  // Generate a signed 0.0 to use with vec_copysignf128
9154  i_sign = vec_xfer_vui32t_2_bin128 (q_sign);
9155  // Convert the unsigned int128 magnitude to __binary128
9156  vui64_t int64 = (vui64_t) q_sig;
9157  hi64 = int64[VEC_DW_H];
9158  lo64 = int64[VEC_DW_L];
9159  result = (hi64 * two64) + lo64;
9160  // Copy the __int128's sign into the __binary128 result
9161  result = vec_copysignf128 (i_sign, result);
9162 #elif defined (_ARCH_PWR8)
9163  vui64_t q_exp;
9164  vui128_t q_sig;
9165  vui128_t q_neg;
9166  vui32_t q_sign;
9167  vb128_t b_sign;
9168  const vui128_t q_zero = (vui128_t) { 0 };
9169  const vui32_t lowmask = CONST_VINT128_W ( 0, 0, 0, 1);
9170  const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
9171  // Quick test for 0UL as this case requires a special exponent.
9172  if (vec_cmpuq_all_eq ((vui128_t) int128, q_zero))
9173  {
9174  result = vec_xfer_vui128t_2_bin128 (q_zero);
9175  }
9176  else
9177  { // We need to produce a normal QP, so we treat the integer like a
9178  // denormal, then normalize it.
9179  // Collect the sign bit of the input value.
9180  q_sign = vec_and ((vui32_t) int128, signmask);
9181  // Convert 2s complement to signed magnitude form.
9182  q_neg = (vui128_t) vec_negsq (int128);
9183  b_sign = vec_setb_sq (int128);
9184  q_sig = vec_seluq ((vui128_t) int128, q_neg, b_sign);
9185  // Start with the quad exponent bias + 127 then subtract the count of
9186  // leading '0's. The 128-bit sig can have 0-127 leading '0's.
9187  vui64_t q_expm = (vui64_t) CONST_VINT64_DW (0, (0x3fff + 127));
9188  vui64_t i64_clz = (vui64_t) vec_clzq (q_sig);
9189  q_sig = vec_slq (q_sig, (vui128_t) i64_clz);
9190  q_exp = vec_subudm (q_expm, i64_clz);
9191  // This is the part that might require rounding.
9192 
9193  // The Significand (including the L-bit) is right justified in
9194  // in the high-order 113-bits of q_sig.
9195  // The guard, round, and sticky (GRX) bits are in the low-order
9196  // 15 bits.
9197  // The sticky-bits are the last 13 bits and are logically ORed
9198  // (or added to 0x1fff) to produce the X-bit.
9199  //
9200  // For "Round to Nearest Even".
9201  // GRX = 0b001 - 0b011; truncate
9202  // GRX = 0b100 and bit-112 is odd; round up, otherwise truncate
9203  // GRX = 0b100 - 0b111; round up
9204  // We can simplify by copying bit-112 and OR it with bit-X
9205  // Then add 0x3fff to q_sig will generate a carry into bit-112
9206  // if and only if GRX > 0b100 or (GRX == 0b100) && (bit-112 == 1)
9207  const vui32_t RXmask = CONST_VINT128_W ( 0, 0, 0, 0x3fff);
9208  vui128_t q_carry, q_sigc;
9209  vb128_t qcmask;
9210  vui32_t q_odd;
9211  // Isolate bit-112 and OR into GRX bits if q_sig is odd
9212  q_odd = (vui32_t) vec_srhi ((vui16_t)q_sig, 15);
9213  q_odd = vec_and (q_odd, lowmask);
9214  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_odd);
9215  // We add 0x3fff to GRX-bits which may carry into low order sig-bit
9216  // This may result in a carry out of bit L into bit-C.
9217  q_carry = vec_addcuq (q_sig, (vui128_t) RXmask);
9218  q_sig = vec_adduqm (q_sig, (vui128_t) RXmask);
9219  // Generate a bool mask from the carry to use in the vsel
9220  qcmask = vec_setb_cyq (q_carry);
9221  // Two cases; 1) We did carry so shift (double) left 112 bits
9222  q_sigc = vec_sldqi (q_carry, q_sig, 112);
9223  // 2) no carry so shift left 15 bits
9224  q_sig = vec_srqi ((vui128_t) q_sig, 15);
9225  // Select which based on carry
9226  q_sig = (vui128_t) vec_sel ((vui32_t) q_sig, (vui32_t) q_sigc, (vui32_t) qcmask);
9227  // Increment the exponent based on the carry
9228  q_exp = vec_addudm (q_exp, (vui64_t) q_carry);
9229 
9230  q_exp = vec_swapd (q_exp);
9231  // Copy Sign-bit to QP significand before insert.
9232  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_sign);
9233  result = vec_xsiexpqp (q_sig, q_exp);
9234  }
9235 #else
9236  result = int128[0];
9237 #endif
9238  return result;
9239 }
9240 
9271 static inline vec_xscvuqqp (vui128_t int128)
9272 {
9273  __binary128 result;
9274 #if defined (_ARCH_PWR10) && (__GNUC__ >= 10)
9275  __asm__(
9276  "xscvuqqp %0,%1"
9277  : "=v" (result)
9278  : "v" (int128)
9279  : );
9280 #elif defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
9281  vui64_t int64 = (vui64_t) int128;
9282  __binary128 hi64, lo64;
9283  __binary128 two64 = 0x1.0p64;
9284  hi64 = int64[VEC_DW_H];
9285  lo64 = int64[VEC_DW_L];
9286  result = (hi64 * two64) + lo64;
9287 #elif defined (_ARCH_PWR8)
9288  vui64_t q_exp;
9289  vui128_t q_sig;
9290  const vui128_t q_zero = (vui128_t) { 0 };
9291  const vui32_t lowmask = CONST_VINT128_W ( 0, 0, 0, 1);
9292 
9293  q_sig = int128;
9294  // Quick test for 0UL as this case requires a special exponent.
9295  if (vec_cmpuq_all_eq (q_sig, q_zero))
9296  {
9297  result = vec_xfer_vui128t_2_bin128 (q_zero);
9298  }
9299  else
9300  { // We need to produce a normal QP, so we treat the integer like a
9301  // denormal, then normalize it.
9302  // Start with the quad exponent bias + 127 then subtract the count of
9303  // leading '0's. The 128-bit sig can have 0-127 leading '0's.
9304  vui64_t q_expm = (vui64_t) CONST_VINT64_DW (0, (0x3fff + 127));
9305  vui64_t i64_clz = (vui64_t) vec_clzq (q_sig);
9306  q_sig = vec_slq (q_sig, (vui128_t) i64_clz);
9307  q_exp = vec_subudm (q_expm, i64_clz);
9308  // This is the part that might require rounding.
9309  // The Significand (including the L-bit) is right justified in
9310  // in the high-order 113-bits of q_sig.
9311  // The guard, round, and sticky (GRX) bits are in the low-order
9312  // 15 bits.
9313  // The sticky-bits are the last 13 bits and are logically ORed
9314  // (or added to 0x1fff) to produce the X-bit.
9315  //
9316  // For "Round to Nearest Even".
9317  // GRX = 0b001 - 0b011; truncate
9318  // GRX = 0b100 and bit-112 is odd; round up, otherwise truncate
9319  // GRX = 0b100 - 0b111; round up
9320  // We can simplify by copying bit-112 and OR it with bit-X
9321  // Then add 0x3fff to q_sig will generate a carry into bit-112
9322  // if and only if GRX > 0b100 or (GRX == 0b100) && (bit-112 == 1)
9323  const vui32_t RXmask = CONST_VINT128_W ( 0, 0, 0, 0x3fff);
9324  vui128_t q_carry, q_sigc;
9325  vb128_t qcmask;
9326  vui32_t q_odd;
9327  // Isolate bit-112 and OR into GRX bits if q_sig is odd
9328  q_odd = (vui32_t) vec_srhi ((vui16_t)q_sig, 15);
9329  q_odd = vec_and (q_odd, lowmask);
9330  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_odd);
9331  // We add 0x3fff to GRX-bits which may carry into low order sig-bit
9332  // This may result in a carry out of bit L into bit-C.
9333  q_carry = vec_addcuq (q_sig, (vui128_t) RXmask);
9334  q_sig = vec_adduqm (q_sig, (vui128_t) RXmask);
9335  // Generate a bool mask from the carry to use in the vsel
9336  qcmask = vec_setb_cyq (q_carry);
9337  // Two cases; 1) We did carry so shift (double) left 112 bits
9338  q_sigc = vec_sldqi (q_carry, q_sig, 112);
9339  // 2) no carry so shift left 15 bits
9340  q_sig = vec_srqi ((vui128_t) q_sig, 15);
9341  // Select which based on carry
9342  q_sig = (vui128_t) vec_sel ((vui32_t) q_sig, (vui32_t) q_sigc, (vui32_t) qcmask);
9343  // Increment the exponent based on the carry
9344  q_exp = vec_addudm (q_exp, (vui64_t) q_carry);
9345  q_exp = vec_swapd (q_exp);
9346  result = vec_xsiexpqp (q_sig, q_exp);
9347  }
9348 #else
9349  result = int128[0];
9350 #endif
9351  return result;
9352 }
9353 
9379 static inline __binary128
9381 {
9382  __binary128 result;
9383 #if defined (_ARCH_PWR9) && (__GNUC__ > 7)
9384 #if defined (__FLOAT128__) && (__GNUC__ > 8)
9385  // earlier GCC versions generate extra data moves for this.
9386  result = __builtin_mulf128_round_to_odd (vfa, vfb);
9387 #else
9388  // No extra data moves here.
9389  __asm__(
9390  "xsmulqpo %0,%1,%2"
9391  : "=v" (result)
9392  : "v" (vfa), "v" (vfb)
9393  : );
9394 #endif
9395  return result;
9396 #else //_ARCH_PWR8 or _ARCH_PWR7
9397  vui64_t q_exp, a_exp, b_exp, x_exp;
9398  vui128_t q_sig, a_sig, b_sig, p_sig_h, p_sig_l, p_odd;
9399  vui32_t q_sign, a_sign, b_sign;
9400  vui128_t a_mag, b_mag;
9401  const vui32_t q_zero = CONST_VINT128_W(0, 0, 0, 0);
9402  const vui32_t q_ones = CONST_VINT128_W(-1, -1, -1, -1);
9403  //const vui64_t exp_naninf = (vui64_t) { 0x7fff, 0x7fff };
9404  const vui64_t exp_naninf = vec_mask64_f128exp ();
9405  const vui32_t magmask = vec_mask128_f128mag ();
9406 
9407  // Vector extract the exponents from vfa, vfb
9408  x_exp = vec_xxxexpqpp (vfa, vfb);
9409  // Mask off sign bits so can use integers for magnitude compare.
9410  a_mag = (vui128_t) vec_and_bin128_2_vui32t (vfa, magmask);
9411  b_mag = (vui128_t) vec_and_bin128_2_vui32t (vfb, magmask);
9412  a_sign = vec_andc_bin128_2_vui32t (vfa, magmask);
9413  b_sign = vec_andc_bin128_2_vui32t (vfb, magmask);
9414  q_sign = vec_xor (a_sign, b_sign);
9415 
9416 // if (vec_all_isfinitef128 (vfa) && vec_all_isfinitef128 (vfb))
9417  if (__builtin_expect (vec_cmpud_all_lt (x_exp, exp_naninf), 1))
9418  {
9419  const vui64_t exp_dnrm = (vui64_t) q_zero;
9420  vui64_t exp_min, exp_one, exp_bias;
9421  vui128_t p_tmp;
9422  // const vui64_t exp_min, exp_one = { 1, 1 };
9423  // exp_min = exp_one = vec_splat_u64 (1);
9424  { // Extract the significands and insert the Hidden bit
9425  //const vui32_t q_zero = CONST_VINT128_W(0, 0, 0, 0);
9426  const vui32_t sigmask = vec_mask128_f128sig ();
9427  vui32_t a_s32, b_s32;
9428  vui16_t a_e16, b_e16, x_hidden;
9429  vb16_t a_norm, b_norm;
9430 
9431  //const vui32_t hidden = vec_mask128_f128Lbit();
9432  x_hidden = vec_splat_u16(1);
9433  // Assume that the operands are finite magnitudes
9434  // Mask off the significands
9435  // Applying sigmask to orignal inputs can save 2 cycles here
9436  a_s32 = vec_and_bin128_2_vui32t (vfa, sigmask);
9437  b_s32 = vec_and_bin128_2_vui32t (vfb, sigmask);
9438  // But still need a/b_mag for exp extract to clear sign-bit
9439  // Mask off the exponents in high halfword
9440  a_e16 = (vui16_t) vec_andc ((vui32_t) a_mag, sigmask);
9441  b_e16 = (vui16_t) vec_andc ((vui32_t) b_mag, sigmask);
9442  // Compare exponents for finite i.e. > denomal (q_zero)
9443  a_norm = vec_cmpgt (a_e16, (vui16_t) q_zero);
9444  b_norm = vec_cmpgt (b_e16, (vui16_t) q_zero);
9445  // For Normal QP insert (hidden) L-bit into significand
9446  a_sig = (vui128_t) vec_sel ((vui16_t) a_s32, x_hidden, a_norm);
9447  b_sig = (vui128_t) vec_sel ((vui16_t) b_s32, x_hidden, b_norm);
9448  }
9449 
9450  // Precondition the significands before multiply so that the
9451  // high-order 114-bits (C,L,FRACTION) of the product are right
9452  // adjusted in p_sig_h. And the Low-order 112-bits are left
9453  // justified in p_sig_l.
9454  // Logically this (multiply) step could be moved after the zero
9455  // test. But this uses a lot of registers and the compiler may
9456  // see this as register pressure and decide to spill and reload
9457  // unrelated data around this block.
9458  // The zero multiply is rare so on average performance is better
9459  // if we get this started now.
9460  a_sig = vec_slqi (a_sig, 8);
9461  b_sig = vec_slqi (b_sig, 8);
9462  p_sig_l = vec_muludq (&p_sig_h, a_sig, b_sig);
9463 
9464  // check for zero significands in multiply
9465  if (__builtin_expect (
9466  (vec_all_eq((vui32_t ) a_sig, (vui32_t ) q_zero)
9467  || vec_all_eq((vui32_t ) b_sig, (vui32_t ) q_zero)),
9468  0))
9469  { // Multiply by zero, return QP signed zero
9470  result = vec_xfer_vui32t_2_bin128 (q_sign);
9471  return result;
9472  }
9473 
9474  // const vui64_t exp_min, exp_one = { 1, 1 };
9475  exp_min = exp_one = vec_splat_u64 (1);
9476  //const vui64_t exp_bias = (vui64_t) { 0x3fff, 0x3fff };
9477  exp_bias = (vui64_t) vec_srhi ((vui16_t) exp_naninf, 1);
9478  { // Compute product exponent q_exp
9479  // Operand exponents should >= Emin for computation
9480  vb64_t exp_mask;
9481  exp_mask = vec_cmpequd (x_exp, exp_dnrm);
9482  x_exp = vec_selud (x_exp, exp_min, (vb64_t) exp_mask);
9483  // sum exponents across x_exp
9484  q_exp = vec_addudm (x_exp, vec_swapd (x_exp));
9485  // Sum includes 2 x exp_bias, So subtract 1 x exp_bias
9486  q_exp = vec_subudm (q_exp, exp_bias);
9487  }
9488 
9489  // Check for carry; shift right 1 and adjust exp +1
9490  {
9491  vb128_t carry_mask;
9492  vui128_t sig_h, sig_l;
9493  // Test Carry-bit (greater than L-bit)
9494  vui16_t sig_l_mask = vec_splat_u16(1);
9495  vui16_t t_sig = vec_splat ((vui16_t) p_sig_h, VEC_HW_H);
9496  carry_mask = (vb128_t) vec_cmpgt (t_sig, sig_l_mask);
9497  // Shift double quadword right 1 bit
9498  p_tmp = vec_sldqi (p_sig_h, p_sig_l, 120);
9499  sig_h = vec_srqi (p_sig_h, 1);
9500  sig_l = vec_slqi (p_tmp, 7);
9501  // Increment the exponent
9502  x_exp = vec_addudm (q_exp, exp_one);
9503  // Select original or normalized exp/sig
9504  p_sig_h = vec_seluq (p_sig_h, sig_h, carry_mask);
9505  p_sig_l = vec_seluq (p_sig_l, sig_l, carry_mask);
9506  q_exp = vec_selud (q_exp, x_exp, (vb64_t) carry_mask);
9507  }
9508 
9509  // There are two cases for denormal
9510  // 1) The sum of unbiased exponents is less the E_min (tiny).
9511  // 2) The significand is less then 1.0 (C and L-bits are zero).
9512  // 2a) The exponent is > E_min
9513  // 2b) The exponent is == E_min
9514  //
9515  q_sig = p_sig_h;
9516  // Check for Tiny exponent
9517  if (__builtin_expect (
9518  (vec_cmpsd_all_lt ((vi64_t) q_exp, (vi64_t) exp_min)), 0))
9519  {
9520  //const vui64_t exp_128 = (vui64_t) { 128, 128 };
9521  const vui64_t exp_128 = vec_const64_f128_128 ();
9522  const vui64_t too_tiny = (vui64_t
9523  )
9524  { 116, 116 };
9525  // const vui32_t xmask = CONST_VINT128_W(0x1fffffff, -1, -1, -1);
9526  vui32_t xmask = (vui32_t) vec_srqi ((vui128_t) q_ones, 3);
9527  vui32_t tmp;
9528 
9529  // Intermediate result is tiny, unbiased exponent < -16382
9530  //x_exp = vec_subudm ((vui64_t) exp_tiny, q_exp);
9531  x_exp = vec_subudm (exp_min, q_exp);
9532 
9533  if (vec_cmpud_all_gt ((vui64_t) x_exp, too_tiny))
9534  {
9535  // Intermediate result is too tiny, the shift will
9536  // zero the fraction and the GR-bit leaving only the
9537  // Sticky bit. The X-bit needs to include all bits
9538  // from p_sig_h and p_sig_l
9539  p_sig_l = vec_srqi (p_sig_l, 8);
9540  p_sig_l = (vui128_t) vec_or ((vui32_t) p_sig_l,
9541  (vui32_t) p_sig_h);
9542  // generate a carry into bit-2 for any nonzero bits 3-127
9543  p_sig_l = vec_adduqm (p_sig_l, (vui128_t) xmask);
9544  q_sig = (vui128_t) q_zero;
9545  p_sig_l = (vui128_t) vec_andc ((vui32_t) p_sig_l, xmask);
9546  }
9547  else
9548  { // Normal tiny, right shift may loose low order bits
9549  // from p_sig_l. So collect any 1-bits below GRX and
9550  // OR them into the X-bit, before the right shift.
9551  vui64_t l_exp;
9552 
9553  // Propagate low order bits into the sticky bit
9554  // GRX left adjusted in p_sig_l
9555  // Issolate bits below GDX (bits 3-128).
9556  tmp = vec_and ((vui32_t) p_sig_l, xmask);
9557  // generate a carry into bit-2 for any nonzero bits 3-127
9558  tmp = (vui32_t) vec_adduqm ((vui128_t) tmp, (vui128_t) xmask);
9559  // Or this with the X-bit to propagate any sticky bits into X
9560  p_sig_l = (vui128_t) vec_or ((vui32_t) p_sig_l, tmp);
9561  p_sig_l = (vui128_t) vec_andc ((vui32_t) p_sig_l, xmask);
9562 
9563  l_exp = vec_subudm (exp_128, x_exp);
9564  p_sig_l = vec_sldq (p_sig_h, p_sig_l, (vui128_t) l_exp);
9565  p_sig_h = vec_srq (p_sig_h, (vui128_t) x_exp);
9566  q_sig = p_sig_h;
9567  }
9568  // Set the exponent for denormal
9569  q_exp = exp_dnrm;
9570  }
9571  // Exponent is not tiny but significand may be denormal
9572  // Isolate sig CL bits and compare
9573  vui16_t t_sig = vec_splat ((vui16_t) p_sig_h, VEC_HW_H);
9574  if (__builtin_expect ((vec_all_eq(t_sig, (vui16_t ) q_zero)), 0))
9575  {
9576  // Is below normal range. This can happen when
9577  // multiplying a denormal by a normal.
9578  // So try to normalize the significand.
9579  //const vui64_t exp_15 = { 15, 15 };
9580  const vui64_t exp_15 = vec_splat_u64 (15);
9581  vui64_t c_exp, d_exp;
9582  vui128_t c_sig;
9583  vb64_t exp_mask;
9584  c_sig = vec_clzq (p_sig_h);
9585  c_exp = vec_splatd ((vui64_t) c_sig, VEC_DW_L);
9586  c_exp = vec_subudm (c_exp, exp_15);
9587  d_exp = vec_subudm (q_exp, exp_min);
9588  d_exp = vec_minud (c_exp, d_exp);
9589  exp_mask = vec_cmpgtud (q_exp, c_exp);
9590 
9591  // Intermediate result <= tiny, unbiased exponent <= -16382
9592  if (vec_cmpsd_all_gt ((vi64_t) q_exp, (vi64_t) exp_min))
9593  {
9594  // Try to normalize the significand.
9595  p_sig_h = vec_sldq (p_sig_h, p_sig_l, (vui128_t) d_exp);
9596  p_sig_l = vec_slq (p_sig_l, (vui128_t) d_exp);
9597  q_sig = p_sig_h;
9598  // Compare computed exp to shift count to normalize.
9599  //exp_mask = vec_cmpgtud (q_exp, c_exp);
9600  q_exp = vec_subudm (q_exp, d_exp);
9601  q_exp = vec_selud (exp_dnrm, q_exp, exp_mask);
9602  }
9603  else
9604  { // sig is denormal range (L-bit is 0). Set exp to zero.
9605  q_exp = exp_dnrm;
9606  }
9607  }
9608  // Merge sign early will not effect rounding for this mode
9609  // q_ssig = vec_or ((vui32_t) q_sig, q_sign);
9610  // Round to odd from lower product bits
9611  p_odd = vec_addcuq (p_sig_l, (vui128_t) q_ones);
9612  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, (vui32_t) p_odd);
9613 
9614  // Check for exponent overflow -> __FLT128_MAX__ (round to odd)
9615  if (__builtin_expect ((vec_cmpud_all_ge (q_exp, exp_naninf)), 0))
9616  {
9617  // Intermediate result is huge, unbiased exponent > 16383
9618  // so return __FLT128_MAX__ with the appropriate sign.
9619  const vui32_t f128_max = CONST_VINT128_W(0x7ffeffff, -1, -1, -1);
9620  vui32_t f128_smax = vec_or ((vui32_t) f128_max, q_sign);
9621  return vec_xfer_vui32t_2_bin128 (f128_smax);
9622  }
9623  else // combine sign, exp, and significand for return
9624  {
9625  // Merge sign, significand, and exponent into final result
9626  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_sign);
9627  vui32_t tmp, t128;
9628  // convert DW exp_naninf to QW expmask
9629  vui32_t expmask = vec_sld ((vui32_t) exp_naninf, q_zero, 14);
9630  // convert q_exp from DW to QW for QP format
9631  tmp = vec_sld ((vui32_t) q_exp, q_zero, 14);
9632  t128 = vec_sel ((vui32_t) q_sig, tmp, expmask);
9633  result = vec_xfer_vui32t_2_bin128 (t128);
9634  return result;
9635  }
9636  }
9637  else
9638  { // One or both operands are NaN or Infinity
9639  //const vui32_t q_nan = CONST_VINT128_W(0x00008000, 0, 0, 0);
9640  vui32_t q_nan = vec_mask128_f128Qbit ();
9641  vui32_t q_inf = vec_mask128_f128exp ();
9642  // One or both operands are NaN
9643  if (vec_all_isnanf128 (vfa))
9644  {
9645  // vfa is NaN, Convert vfa to QNaN and return
9646  vui32_t vf128 = vec_or_bin128_2_vui32t (vfa, q_nan);
9647  return vec_xfer_vui32t_2_bin128 (vf128);
9648  }
9649  else if (vec_all_isnanf128 (vfb))
9650  {
9651  // vfb is NaN, Convert vfb to QNaN and return
9652  vui32_t vf128 = vec_or_bin128_2_vui32t (vfb, q_nan);
9653  return vec_xfer_vui32t_2_bin128 (vf128);
9654  }
9655  else // Or one or both operands are Infinity
9656  {
9657  if (vec_cmpud_all_eq (x_exp, (vui64_t) exp_naninf))
9658  {
9659  // Infinity x Infinity == signed Infinity
9660  q_sig = (vui128_t) q_inf;
9661  }
9662  else
9663  {
9664  // One each Infinity/Finite value, check for 0.0
9665  if (vec_cmpuq_all_eq (a_mag, (vui128_t) q_zero)
9666  || vec_cmpuq_all_eq (b_mag, (vui128_t) q_zero))
9667  {
9668  // Inifinty x Zero is Default Quiet NaN
9669  return vec_const_nanf128 ();
9670  }
9671  else // an Infinity and a Nonzero finite number
9672  {
9673  // Return Infinity with product sign.
9674  q_sig = (vui128_t) q_inf;
9675  }
9676  }
9677  // Merge sign, exp/sig into final result
9678  q_sig = (vui128_t) vec_or ((vui32_t) q_sig, q_sign);
9679  return vec_xfer_vui128t_2_bin128 (q_sig);
9680  }
9681  }
9682 #endif
9683  return result;
9684 }
9685 
9715 static inline __binary128
9717 {
9718  __binary128 result;
9719 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
9720  __asm__(
9721  "xsiexpqp %0,%1,%2"
9722  : "=v" (result)
9723  : "v" (sig), "v" (exp)
9724  : );
9725 
9726 #else
9727  vui32_t tmp, t128;
9728  vui32_t expmask = vec_mask128_f128exp();
9729 
9730  tmp = vec_sld ((vui32_t) exp, (vui32_t) exp, 6);
9731  t128 = vec_sel ((vui32_t) sig, tmp, expmask);
9732  result = vec_xfer_vui32t_2_bin128 (t128);
9733 #endif
9734  return result;
9735 }
9736 
9764 static inline vui64_t
9766 {
9767  vui64_t result;
9768 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
9769  __asm__(
9770  "xsxexpqp %0,%1"
9771  : "=v" (result)
9772  : "v" (f128)
9773  : );
9774 
9775 #else
9776  vui32_t tmp;
9777  vui32_t expmask = vec_mask128_f128exp();
9778 
9779  tmp = vec_and_bin128_2_vui32t (f128, expmask);
9780  result = (vui64_t) vec_sld (tmp, tmp, 10);
9781 #endif
9782  return result;
9783 }
9784 
9813 static inline vui128_t
9815 {
9816  vui128_t result;
9817 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
9818  __asm__(
9819  "xsxsigqp %0,%1"
9820  : "=v" (result)
9821  : "v" (f128)
9822  : );
9823 #else
9824  vui32_t t128, tmp, normal;
9825  const vui32_t zero = CONST_VINT128_W (0, 0, 0, 0);
9826 #if 1
9827  const vui32_t sigmask = vec_mask128_f128sig();
9828  const vui32_t expmask = vec_mask128_f128exp();
9829  const vui32_t hidden = vec_mask128_f128Lbit();
9830 #else
9831  const vui32_t sigmask = CONST_VINT128_W (0x0000ffff, -1, -1, -1);
9832  const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);
9833  const vui32_t hidden = CONST_VINT128_W (0x00010000, 0, 0, 0);
9834 #endif
9835 
9836  // Check if f128 is normal. Normal values need the hidden bit
9837  // restored to the significand. We use a simpler sequence here as
9838  // vec_isnormalf128 does more then we need.
9839  tmp = vec_and_bin128_2_vui32t (f128, expmask);
9840  normal = (vui32_t) vec_nor (vec_cmpeq (tmp, expmask),
9841  vec_cmpeq (tmp, zero));
9842  t128 = vec_and_bin128_2_vui32t (f128, sigmask);
9843  result = (vui128_t) vec_sel (t128, normal, hidden);
9844 #endif
9845  return result;
9846 }
9847 
9870 static inline vui64_t
9872 {
9873  vui64_t result;
9874 #if defined (_ARCH_PWR9) && defined (__FLOAT128__) && (__GNUC__ > 7)
9875  vui64_t exp_a, exp_b;
9876  __asm__(
9877  "xsxexpqp %0,%2;"
9878  "xsxexpqp %1,%3"
9879  : "=v" (exp_a), "=v" (exp_b)
9880  : "v" (vfa), "v" (vfb)
9881  : );
9882  result = vec_mrgahd ((vui128_t) exp_a, (vui128_t) exp_b);
9883 #else
9884  vui32_t tmp, rtmp, exp_mask;
9885  //const vui32_t expmask = CONST_VINT128_W (0, 0x7fff, 0, 0x7fff);
9886  exp_mask = (vui32_t) vec_mask64_f128exp();
9887  tmp = (vui32_t) vec_mrgh_bin128_2_vui64t (vfa, vfb);
9888  rtmp = vec_sld (tmp, tmp, 10);
9889  result = (vui64_t) vec_and (rtmp, exp_mask);
9890 #endif
9891  return result;
9892 }
9893 
9894 #endif /* VEC_F128_PPC_H_ */
vec_cmpltuq
static vb128_t vec_cmpltuq(vui128_t vra, vui128_t vrb)
Vector Compare Less Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3406
vec_absf128
static __binary128 vec_absf128(__binary128 f128)
Absolute Quad-Precision.
Definition: vec_f128_ppc.h:4650
vec_cmpqp_all_uzgt
static int vec_cmpqp_all_uzgt(__binary128 vfa, __binary128 vfb)
Vector Compare all Greater Than (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6699
vec_isinff128
static vb128_t vec_isinff128(__binary128 f128)
Return a 128-bit vector boolean true if the __float128 value is infinity.
Definition: vec_f128_ppc.h:7626
vec_cmpqp_exp_lt
static int vec_cmpqp_exp_lt(__binary128 vfa, __binary128 vfb)
Vector Compare Exponents Quad-Precision for Less Than.
Definition: vec_f128_ppc.h:7447
VEC_W_H
#define VEC_W_H
Element index for highest order word.
Definition: vec_common_ppc.h:326
vec_cmplesq
static vb128_t vec_cmplesq(vi128_t vra, vi128_t vrb)
Vector Compare Less Than or Equal Signed Quadword.
Definition: vec_int128_ppc.h:3267
vec_cmpud_all_le
static int vec_cmpud_all_le(vui64_t a, vui64_t b)
Vector Compare all Less than equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2287
vec_cmpqp_all_uzne
static int vec_cmpqp_all_uzne(__binary128 vfa, __binary128 vfb)
Vector Compare all Not-Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:7249
vec_cmpgtuq
static vb128_t vec_cmpgtuq(vui128_t vra, vui128_t vrb)
Vector Compare Greater Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3227
vec_xfer_vui32t_2_bin128
static __binary128 vec_xfer_vui32t_2_bin128(vui32_t f128)
Transfer a vector unsigned int to __binary128 scalar.
Definition: vec_f128_ppc.h:4580
vec_subuqm
static vui128_t vec_subuqm(vui128_t vra, vui128_t vrb)
Vector Subtract Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:7439
vec_negsq
static vi128_t vec_negsq(vi128_t int128)
Vector Negate Signed Quadword.
Definition: vec_int128_ppc.h:6234
vec_cmpuq_all_lt
static int vec_cmpuq_all_lt(vui128_t vra, vui128_t vrb)
Vector Compare any Less Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3980
vec_cmpqp_all_gt
static int vec_cmpqp_all_gt(__binary128 vfa, __binary128 vfb)
Vector Compare all Greater Than (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6763
vec_issubnormalf128
static vb128_t vec_issubnormalf128(__binary128 f128)
Return 128-bit vector boolean true value, if the __float128 value is subnormal (denormal).
Definition: vec_f128_ppc.h:7747
vb32_t
__vector __bool int vb32_t
vector of 32-bit bool int elements.
Definition: vec_common_ppc.h:228
vec_mask128_f128Qbit
static vui32_t vec_mask128_f128Qbit(void)
Generate Quadword QNaN-bit mask Immediate.
Definition: vec_f128_ppc.h:3964
vec_xvxexpdp
static vui64_t vec_xvxexpdp(vf64_t vrb)
Vector Extract Exponent Double-Precision.
Definition: vec_f64_ppc.h:1713
vec_cmpgtsq
static vb128_t vec_cmpgtsq(vi128_t vra, vi128_t vrb)
Vector Compare Greater Than Signed Quadword.
Definition: vec_int128_ppc.h:3178
VEC_BYTE_H
#define VEC_BYTE_H
Element index for highest order byte.
Definition: vec_common_ppc.h:350
vec_xfer_vui64t_2_bin128
static __binary128 vec_xfer_vui64t_2_bin128(vui64_t f128)
Transfer a vector unsigned long long to __binary128 scalar.
Definition: vec_f128_ppc.h:4603
vb128_t
__vector __bool __int128 vb128_t
vector of one 128-bit bool __int128 element.
Definition: vec_common_ppc.h:240
vec_sldq
static vui128_t vec_sldq(vui128_t vrw, vui128_t vrx, vui128_t vrb)
Vector Shift Left Double Quadword.
Definition: vec_int128_ppc.h:6613
vec_xfer_bin128_2_vui64t
static vui64_t vec_xfer_bin128_2_vui64t(__binary128 f128)
Transfer function from a __binary128 scalar to a vector long long int.
Definition: vec_f128_ppc.h:4448
vec_cmpeqtoqp
static vb128_t vec_cmpeqtoqp(__binary128 vfa, __binary128 vfb)
Vector Compare Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:5051
vec_iszerof128
static vb128_t vec_iszerof128(__binary128 f128)
Return 128-bit vector boolean true value, if the value that is +-0.0.
Definition: vec_f128_ppc.h:7819
vec_cmpneuq
static vb128_t vec_cmpneuq(vui128_t vra, vui128_t vrb)
Vector Compare Not Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3475
vec_xscvuqqp
static __binary128 vec_xscvuqqp(vui128_t int128)
VSX Scalar Convert Unsigned-Quadword to Quad-Precision format.
Definition: vec_f128_ppc.h:9271
vec_cmpqp_all_uzeq
static int vec_cmpqp_all_uzeq(__binary128 vfa, __binary128 vfb)
Vector Compare all Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6342
vec_mask128_f128sign
static vui32_t vec_mask128_f128sign(void)
Generate Quadword Quad-Precision Sign-bit mask.
Definition: vec_f128_ppc.h:3899
vec_cmpgeuqp
static vb128_t vec_cmpgeuqp(__binary128 vfa, __binary128 vfb)
Vector Compare Greater Than or Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5378
vec_cmpuq_all_ge
static int vec_cmpuq_all_ge(vui128_t vra, vui128_t vrb)
Vector Compare any Greater Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3848
vec_cmpud_all_lt
static int vec_cmpud_all_lt(vui64_t a, vui64_t b)
Vector Compare all Less than Unsigned Doubleword.
Definition: vec_int64_ppc.h:2311
vec_cmpqp_all_toge
static int vec_cmpqp_all_toge(__binary128 vfa, __binary128 vfb)
Vector Compare all Greater Than Or Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:6447
vec_minud
static vui64_t vec_minud(vui64_t vra, vui64_t vrb)
Vector Minimum Unsigned Doubleword.
Definition: vec_int64_ppc.h:2663
vec_xsiexpqp
static __binary128 vec_xsiexpqp(vui128_t sig, vui64_t exp)
Scalar Insert Exponent Quad-Precision.
Definition: vec_f128_ppc.h:9716
vec_cmpltuzqp
static vb128_t vec_cmpltuzqp(__binary128 vfa, __binary128 vfb)
Vector Compare Less Than (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5964
CONST_VINT128_W
#define CONST_VINT128_W(__w0, __w1, __w2, __w3)
Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast.
Definition: vec_common_ppc.h:304
vec_cmpqp_exp_unordered
static int vec_cmpqp_exp_unordered(__binary128 vfa, __binary128 vfb)
Vector Compare Exponents Quad-Precision for Unordered.
Definition: vec_f128_ppc.h:7494
vec_cmpqp_all_uzlt
static int vec_cmpqp_all_uzlt(__binary128 vfa, __binary128 vfb)
Vector Compare all Less Than (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:7078
vec_xviexpdp
static vf64_t vec_xviexpdp(vui64_t sig, vui64_t exp)
Vector Insert Exponent Double-Precision.
Definition: vec_f64_ppc.h:1665
vec_isnormalf128
static vb128_t vec_isnormalf128(__binary128 f128)
Return 128-bit vector boolean true if the __float128 value is normal (Not NaN, Inf,...
Definition: vec_f128_ppc.h:7705
vec_cmpgetoqp
static vb128_t vec_cmpgetoqp(__binary128 vfa, __binary128 vfb)
Vector Compare Greater Than or Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:5235
vec_cmpqp_all_tone
static int vec_cmpqp_all_tone(__binary128 vfa, __binary128 vfb)
Vector Compare all Not-Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:7204
__VF_128
Union used to transfer 128-bit data between vector and __float128 types.
Definition: vec_f128_ppc.h:3694
vec_cmpequzqp
static vb128_t vec_cmpequzqp(__binary128 vfa, __binary128 vfb)
Vector Compare Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5106
vec_cmpuq_all_ne
static int vec_cmpuq_all_ne(vui128_t vra, vui128_t vrb)
Vector Compare all Not Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:4025
CONST_VINT64_DW
#define CONST_VINT64_DW(__dw0, __dw1)
Arrange elements of dword initializer in high->low order.
Definition: vec_common_ppc.h:295
__VF_128::vx8
vui16_t vx8
union field of vector unsigned short elements.
Definition: vec_f128_ppc.h:3699
vui16_t
__vector unsigned short vui16_t
vector of 16-bit unsigned short elements.
Definition: vec_common_ppc.h:204
vec_xfer_bin128_2_vui128t
static vui128_t vec_xfer_bin128_2_vui128t(__binary128 f128)
Transfer function from a __binary128 scalar to a vector __int128.
Definition: vec_f128_ppc.h:4491
vec_xscvudqp
static __binary128 vec_xscvudqp(vui64_t int64)
VSX Scalar Convert Unsigned-Doubleword to Quad-Precision format.
Definition: vec_f128_ppc.h:9054
__VF_128::ix1
unsigned __int128 ix1
union field of __int128 elements.
Definition: vec_f128_ppc.h:3711
vec_isfinitef128
static vb128_t vec_isfinitef128(__binary128 f128)
Return 128-bit vector boolean true if the __float128 value is Finite (Not NaN nor Inf).
Definition: vec_f128_ppc.h:7531
vec_cmpleuqp
static vb128_t vec_cmpleuqp(__binary128 vfa, __binary128 vfb)
Vector Compare Less Than or Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5818
vec_cmpuq_all_eq
static int vec_cmpuq_all_eq(vui128_t vra, vui128_t vrb)
Vector Compare all Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3804
vec_cmpletoqp
static vb128_t vec_cmpletoqp(__binary128 vfa, __binary128 vfb)
Vector Compare Less Than or Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:5675
vi128_t
__vector __int128 vi128_t
vector of one 128-bit signed __int128 element.
Definition: vec_common_ppc.h:235
vec_cmpud_all_eq
static int vec_cmpud_all_eq(vui64_t a, vui64_t b)
Vector Compare all Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2193
vec_isnanf128
static vb128_t vec_isnanf128(__binary128 f128)
Return 128-bit vector boolean true if the __float128 value is Not a Number (NaN).
Definition: vec_f128_ppc.h:7666
vec_cmpltuqp
static vb128_t vec_cmpltuqp(__binary128 vfa, __binary128 vfb)
Vector Compare Less Than (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6038
vec_all_isfinitef128
static int vec_all_isfinitef128(__binary128 f128)
Return true if the __float128 value is Finite (Not NaN nor Inf).
Definition: vec_f128_ppc.h:4691
vec_xxxexpqpp
static vui64_t vec_xxxexpqpp(__binary128 vfa, __binary128 vfb)
Vector Extract Exponent Quad-Precision Pair.
Definition: vec_f128_ppc.h:9871
vec_cmpgtuzqp
static vb128_t vec_cmpgtuzqp(__binary128 vfa, __binary128 vfb)
Vector Compare Greater Than (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5524
__binary128
vf128_t __binary128
Define __binary128 if not defined by the compiler. Same as __float128 for PPC.
Definition: vec_f128_ppc.h:3680
vec_cmpqp_all_tolt
static int vec_cmpqp_all_tolt(__binary128 vfa, __binary128 vfb)
Vector Compare All Less Than (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:7016
__VF_128::vf1
__binary128 vf1
union field of __float128 elements.
Definition: vec_f128_ppc.h:3709
vec_mask128_f128Cbit
static vui32_t vec_mask128_f128Cbit(void)
Generate Quadword C-bit mask Immediate.
Definition: vec_f128_ppc.h:3922
vui64_t
__vector unsigned long long vui64_t
vector of 64-bit unsigned long long elements.
Definition: vec_common_ppc.h:208
__VF_128::vx1
vui128_t vx1
union field of vector unsigned __int128 elements.
Definition: vec_f128_ppc.h:3705
vec_common_ppc.h
Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers.
vec_splatd
static vui64_t vec_splatd(vui64_t vra, const int ctl)
Vector splat doubleword. Duplicate the selected doubleword element across the doubleword elements of ...
Definition: vec_int64_ppc.h:3382
vec_mask64_f128exp
static vui64_t vec_mask64_f128exp(void)
Generate Doubleword Quad-Precision exponent mask.
Definition: vec_f128_ppc.h:3806
vui8_t
__vector unsigned char vui8_t
vector of 8-bit unsigned char elements.
Definition: vec_common_ppc.h:202
vec_nabsf128
static __binary128 vec_nabsf128(__binary128 f128)
Negative Absolute value Quad-Precision.
Definition: vec_f128_ppc.h:7854
vec_cmpud_all_gt
static int vec_cmpud_all_gt(vui64_t a, vui64_t b)
Vector Compare all Greater Than Unsigned Doubleword.
Definition: vec_int64_ppc.h:2255
vec_cmpqp_all_ne
static int vec_cmpqp_all_ne(__binary128 vfa, __binary128 vfb)
Vector Compare all Not-Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:7301
vec_cmpqp_all_ge
static int vec_cmpqp_all_ge(__binary128 vfa, __binary128 vfb)
Vector Compare all Greater Than Or Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6573
vec_xscvqpuqz
static vui128_t vec_xscvqpuqz(__binary128 f128)
VSX Scalar Convert with round to zero Quad-Precision to Unsigned Quadword.
Definition: vec_f128_ppc.h:8891
vec_subudm
static vui64_t vec_subudm(vui64_t a, vui64_t b)
Vector Subtract Unsigned Doubleword Modulo.
Definition: vec_int64_ppc.h:3746
vec_cmpqp_exp_eq
static int vec_cmpqp_exp_eq(__binary128 vfa, __binary128 vfb)
Vector Compare Quad-Precision Exponents for Equal.
Definition: vec_f128_ppc.h:7353
vec_vsld
static vui64_t vec_vsld(vui64_t vra, vui64_t vrb)
Vector Shift Left Doubleword.
Definition: vec_int64_ppc.h:4238
vec_xsxsigqp
static vui128_t vec_xsxsigqp(__binary128 f128)
Scalar Extract Significand Quad-Precision.
Definition: vec_f128_ppc.h:9814
vec_int128_ppc.h
Header package containing a collection of 128-bit computation functions implemented with PowerISA VMX...
vec_srqi
static vui128_t vec_srqi(vui128_t vra, const unsigned int shb)
Vector Shift Right Quadword Immediate.
Definition: vec_int128_ppc.h:7154
vec_const_huge_valf128
static __binary128 vec_const_huge_valf128()
return a positive infinity.
Definition: vec_f128_ppc.h:4976
__VF_128::vx16
vui8_t vx16
union field of vector unsigned char elements.
Definition: vec_f128_ppc.h:3697
vec_cmpneuzqp
static vb128_t vec_cmpneuzqp(__binary128 vfa, __binary128 vfb)
Vector Compare Not Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6169
vec_splat_u128
static vui128_t vec_splat_u128(const int sim)
Vector Splat Immediate Unsigned Quadword. Extend a unsigned integer constant across the quadword elem...
Definition: vec_int128_ppc.h:6914
vec_srhi
static vui16_t vec_srhi(vui16_t vra, const unsigned int shb)
Vector Shift Right Halfword Immediate.
Definition: vec_int16_ppc.h:1093
vec_clzq
static vui128_t vec_clzq(vui128_t vra)
Vector Count Leading Zeros Quadword for unsigned __int128 elements.
Definition: vec_int128_ppc.h:2918
vec_xor_bin128_2_vui32t
static vui32_t vec_xor_bin128_2_vui32t(__binary128 f128, vui32_t mask)
Transfer a quadword from a __binary128 scalar to a vector int and logical Exclusive OR with mask.
Definition: vec_f128_ppc.h:4175
vec_mask128_f128Lbit
static vui32_t vec_mask128_f128Lbit(void)
Generate Quadword L-bit mask Immediate.
Definition: vec_f128_ppc.h:3943
vec_mask128_f128exp
static vui32_t vec_mask128_f128exp(void)
Generate Quadword Quad-Precision exponent mask.
Definition: vec_f128_ppc.h:3829
vec_mrgh_bin128_2_vui64t
static vui64_t vec_mrgh_bin128_2_vui64t(__binary128 vfa, __binary128 vfb)
Merge High and Transfer function from a pair of __binary128 scalars to a vector long long int.
Definition: vec_f128_ppc.h:4374
vec_muludq
static vui128_t vec_muludq(vui128_t *mulu, vui128_t a, vui128_t b)
Vector Multiply Unsigned Double Quadword.
Definition: vec_int128_ppc.h:5734
vec_cmpud_any_eq
static int vec_cmpud_any_eq(vui64_t a, vui64_t b)
Vector Compare any Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2365
VEC_WE_3
#define VEC_WE_3
Element index for vector splat word 3.
Definition: vec_common_ppc.h:336
vec_mask128_f128sig
static vui32_t vec_mask128_f128sig(void)
Generate Quadword Quad-Precision significand mask.
Definition: vec_f128_ppc.h:3878
vec_xssubqpo
static __binary128 vec_xssubqpo(__binary128 vfa, __binary128 vfb)
VSX Scalar Subtract Quad-Precision using round to Odd.
Definition: vec_f128_ppc.h:8303
vec_cmpqp_all_toeq
static int vec_cmpqp_all_toeq(__binary128 vfa, __binary128 vfb)
Vector Compare all Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:6297
vec_addcuq
static vui128_t vec_addcuq(vui128_t a, vui128_t b)
Vector Add & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2568
vec_cmpuq_all_le
static int vec_cmpuq_all_le(vui128_t vra, vui128_t vrb)
Vector Compare any Less Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3936
vec_xscvsdqp
static __binary128 vec_xscvsdqp(vi64_t int64)
VSX Scalar Convert Signed-Doubleword to Quad-Precision format.
Definition: vec_f128_ppc.h:8974
vui128_t
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
vb64_t
__vector __bool long long vb64_t
vector of 64-bit bool long long elements.
Definition: vec_common_ppc.h:230
__Float128
vf128_t __Float128
Define __Float128 if not defined by the compiler. Same as __float128 for PPC.
Definition: vec_f128_ppc.h:3677
vec_cmpnetoqp
static vb128_t vec_cmpnetoqp(__binary128 vfa, __binary128 vfb)
Vector Compare Not Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:6113
__VF_128::vx4
vui32_t vx4
union field of vector unsigned int elements.
Definition: vec_f128_ppc.h:3701
vec_clzd
static vui64_t vec_clzd(vui64_t vra)
Vector Count Leading Zeros Doubleword for unsigned long long elements.
Definition: vec_int64_ppc.h:1313
vec_andc_bin128_2_vui128t
static vui128_t vec_andc_bin128_2_vui128t(__binary128 f128, vui128_t mask)
Transfer a quadword from a __binary128 scalar to a vector __int128 and logical AND Compliment with ma...
Definition: vec_f128_ppc.h:4220
vec_cmpltsq
static vb128_t vec_cmpltsq(vi128_t vra, vi128_t vrb)
Vector Compare Less Than Signed Quadword.
Definition: vec_int128_ppc.h:3357
vec_all_isinff128
static int vec_all_isinff128(__binary128 f128)
Return true if the __float128 value is infinity.
Definition: vec_f128_ppc.h:4723
vec_setb_cyq
static vb128_t vec_setb_cyq(vui128_t vcy)
Vector Set Bool from Quadword Carry.
Definition: vec_int128_ppc.h:6509
vi64_t
__vector long long vi64_t
vector of 64-bit signed long long elements.
Definition: vec_common_ppc.h:217
vec_or_bin128_2_vui32t
static vui32_t vec_or_bin128_2_vui32t(__binary128 f128, vui32_t mask)
Transfer a quadword from a __binary128 scalar to a vector int and logical OR with mask.
Definition: vec_f128_ppc.h:4130
vec_xfer_bin128_2_vui16t
static vui16_t vec_xfer_bin128_2_vui16t(__binary128 f128)
Transfer function from a __binary128 scalar to a vector short int.
Definition: vec_f128_ppc.h:4306
CONST_VINT128_DW
#define CONST_VINT128_DW(__dw0, __dw1)
Initializer for 128-bits vector, as two unsigned long long elements in high->low order....
Definition: vec_common_ppc.h:298
vec_const64_f128_128
static vui64_t vec_const64_f128_128(void)
Generate doubleword splat constant 128.
Definition: vec_f128_ppc.h:3739
vec_cmpgttoqp
static vb128_t vec_cmpgttoqp(__binary128 vfa, __binary128 vfb)
Vector Compare Greater Than (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:5455
vb16_t
__vector __bool short vb16_t
vector of 16-bit bool short elements.
Definition: vec_common_ppc.h:226
vec_vsrd
static vui64_t vec_vsrd(vui64_t vra, vui64_t vrb)
Vector Shift Right Doubleword.
Definition: vec_int64_ppc.h:4377
vec_andc_bin128_2_vui32t
static vui32_t vec_andc_bin128_2_vui32t(__binary128 f128, vui32_t mask)
Transfer a quadword from a __binary128 scalar to a vector int and logical AND Compliment with mask.
Definition: vec_f128_ppc.h:4085
vec_setb_qp
static vb128_t vec_setb_qp(__binary128 f128)
Vector Set Bool from Quadword Floating-point.
Definition: vec_f128_ppc.h:7953
vec_cmpuq_all_gt
static int vec_cmpuq_all_gt(vui128_t vra, vui128_t vrb)
Vector Compare any Greater Than Unsigned Quadword.
Definition: vec_int128_ppc.h:3892
vec_const128_f128_128
static vui32_t vec_const128_f128_128(void)
Generate Quadword constant 128.
Definition: vec_f128_ppc.h:3770
vec_cmpgtsd
static vb64_t vec_cmpgtsd(vi64_t a, vi64_t b)
Vector Compare Greater Than Signed Doubleword.
Definition: vec_int64_ppc.h:1571
vec_xfer_vui16t_2_bin128
static __binary128 vec_xfer_vui16t_2_bin128(vui16_t f128)
Transfer a vector unsigned short to __binary128 scalar.
Definition: vec_f128_ppc.h:4557
vec_xvxsigdp
static vui64_t vec_xvxsigdp(vf64_t vrb)
Vector Extract Significand Double-Precision.
Definition: vec_f64_ppc.h:1762
vec_setb_sq
static vb128_t vec_setb_sq(vi128_t vra)
Vector Set Bool from Signed Quadword.
Definition: vec_int128_ppc.h:6576
vec_cmpqp_all_uzge
static int vec_cmpqp_all_uzge(__binary128 vfa, __binary128 vfb)
Vector Compare all Greater Than Or Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6509
vec_cmplttoqp
static vb128_t vec_cmplttoqp(__binary128 vfa, __binary128 vfb)
Vector Compare Less Than (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:5895
VEC_DW_L
#define VEC_DW_L
Element index for low order dword.
Definition: vec_common_ppc.h:324
vec_cmpgeuzqp
static vb128_t vec_cmpgeuzqp(__binary128 vfa, __binary128 vfb)
Vector Compare Greater Than Or Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5304
vf128_t
vui128_t vf128_t
vector of 128-bit binary128 element. Same as __float128 for PPC.
Definition: vec_f128_ppc.h:3674
vec_sel_bin128_2_bin128
static __binary128 vec_sel_bin128_2_bin128(__binary128 vfa, __binary128 vfb, vb128_t mask)
Select and Transfer from one of two __binary128 scalars under a 128-bit mask. The result is a __binar...
Definition: vec_f128_ppc.h:3992
vec_selud
static vui64_t vec_selud(vui64_t vra, vui64_t vrb, vb64_t vrc)
Vector Select Unsigned Doubleword.
Definition: vec_int64_ppc.h:3354
vec_xscvsqqp
static __binary128 vec_xscvsqqp(vi128_t int128)
VSX Scalar Convert Signed-Quadword to Quad-Precision format.
Definition: vec_f128_ppc.h:9130
vec_const_inff128
static __binary128 vec_const_inff128()
return a positive infinity.
Definition: vec_f128_ppc.h:4988
vec_cmpqp_all_tole
static int vec_cmpqp_all_tole(__binary128 vfa, __binary128 vfb)
Vector Compare All Less Than Or Equal (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:6826
VEC_WE_1
#define VEC_WE_1
Element index for vector splat word 1.
Definition: vec_common_ppc.h:332
vec_f64_ppc.h
Header package containing a collection of 128-bit SIMD operations over 64-bit double-precision floati...
vec_cmpequq
static vb128_t vec_cmpequq(vui128_t vra, vui128_t vrb)
Vector Compare Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3043
vec_cmpleuq
static vb128_t vec_cmpleuq(vui128_t vra, vui128_t vrb)
Vector Compare Less Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3316
__VF_128::vbool1
vb128_t vbool1
union field of vector __bool __int128 elements.
Definition: vec_f128_ppc.h:3707
vec_cmpqp_all_eq
static int vec_cmpqp_all_eq(__binary128 vfa, __binary128 vfb)
Vector Compare all Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6394
vec_mask128_f128mag
static vui32_t vec_mask128_f128mag(void)
Generate Quadword Quad-Precision magnitude mask.
Definition: vec_f128_ppc.h:3858
vec_cmpud_any_ne
static int vec_cmpud_any_ne(vui64_t a, vui64_t b)
Vector Compare any Not Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2508
vec_mrgald
static vui64_t vec_mrgald(vui128_t vra, vui128_t vrb)
Vector Merge Algebraic Low Doublewords.
Definition: vec_int64_ppc.h:2736
vec_cmpgtuqp
static vb128_t vec_cmpgtuqp(__binary128 vfa, __binary128 vfb)
Vector Compare Greater Than (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5598
vec_cmpqp_exp_gt
static int vec_cmpqp_exp_gt(__binary128 vfa, __binary128 vfb)
Vector Compare Exponents Quad-Precision for Greater Than.
Definition: vec_f128_ppc.h:7400
vec_negf128
static __binary128 vec_negf128(__binary128 f128)
Negate the sign bit of a __float128 input and return the resulting __float128 value.
Definition: vec_f128_ppc.h:7886
vec_xfer_bin128_2_vui8t
static vui8_t vec_xfer_bin128_2_vui8t(__binary128 f128)
Transfer function from a __binary128 scalar to a vector char.
Definition: vec_f128_ppc.h:4263
vec_all_issubnormalf128
static int vec_all_issubnormalf128(__binary128 f128)
Return true if the __float128 value is subnormal (denormal).
Definition: vec_f128_ppc.h:4841
vec_cmpequqp
static vb128_t vec_cmpequqp(__binary128 vfa, __binary128 vfb)
Vector Compare Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5169
vec_mrgahd
static vui64_t vec_mrgahd(vui128_t vra, vui128_t vrb)
Vector Merge Algebraic High Doublewords.
Definition: vec_int64_ppc.h:2710
vec_isinf_signf128
static int vec_isinf_signf128(__binary128 f128)
Return true (nonzero) value if the __float128 value is infinity. If infinity, indicate the sign as +1...
Definition: vec_f128_ppc.h:7576
vui32_t
__vector unsigned int vui32_t
vector of 32-bit unsigned int elements.
Definition: vec_common_ppc.h:206
vec_splat_u64
static vui64_t vec_splat_u64(const int sim)
Vector Splat Immediate Unsigned Doubleword. Duplicate the unsigned integer constant across doubleword...
Definition: vec_int64_ppc.h:3495
vec_all_isunorderedf128
static int vec_all_isunorderedf128(__binary128 vfa, __binary128 vfb)
Return true if either __float128 value (vra, vrb) is NaN.
Definition: vec_f128_ppc.h:4881
vec_cmpltud
static vb64_t vec_cmpltud(vui64_t a, vui64_t b)
Vector Compare less Than Unsigned Doubleword.
Definition: vec_int64_ppc.h:1771
vec_copysignf128
static __binary128 vec_copysignf128(__binary128 f128x, __binary128 f128y)
Copy the sign bit from f128x and merge with the magnitude from f128y. The merged result is returned a...
Definition: vec_f128_ppc.h:4950
vec_sldqi
static vui128_t vec_sldqi(vui128_t vrw, vui128_t vrx, const unsigned int shb)
Vector Shift Left Double Quadword Immediate.
Definition: vec_int128_ppc.h:6649
vec_and_bin128_2_vui32t
static vui32_t vec_and_bin128_2_vui32t(__binary128 f128, vui32_t mask)
Transfer a quadword from a __binary128 scalar to a vector int and logical AND with a mask.
Definition: vec_f128_ppc.h:4040
vec_mrgl_bin128_2_vui64t
static vui64_t vec_mrgl_bin128_2_vui64t(__binary128 vfa, __binary128 vfb)
Merge Low and Transfer function from a pair of __binary128 scalars to a vector long long int.
Definition: vec_f128_ppc.h:4412
__IBM128
long double __IBM128
Define __IBM128 if not defined by the compiler. Same as old long double for PPC.
Definition: vec_f128_ppc.h:3689
vec_adduqm
static vui128_t vec_adduqm(vui128_t a, vui128_t b)
Vector Add Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:2739
vec_xsaddqpo
static __binary128 vec_xsaddqpo(__binary128 vfa, __binary128 vfb)
VSX Scalar Add Quad-Precision using round to Odd.
Definition: vec_f128_ppc.h:8035
vec_cmpud_all_ge
static int vec_cmpud_all_ge(vui64_t a, vui64_t b)
Vector Compare all Greater Than or Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:2223
vec_xfer_vui8t_2_bin128
static __binary128 vec_xfer_vui8t_2_bin128(vui8_t f128)
Transfer a vector unsigned char to __binary128 scalar.
Definition: vec_f128_ppc.h:4534
vec_xsxexpqp
static vui64_t vec_xsxexpqp(__binary128 f128)
Scalar Extract Exponent Quad-Precision.
Definition: vec_f128_ppc.h:9765
vec_cmpgtud
static vb64_t vec_cmpgtud(vui64_t a, vui64_t b)
Vector Compare Greater Than Unsigned Doubleword.
Definition: vec_int64_ppc.h:1622
__VF_128::vx2
vui64_t vx2
union field of vector unsigned long long elements.
Definition: vec_f128_ppc.h:3703
vec_addudm
static vui64_t vec_addudm(vui64_t a, vui64_t b)
Vector Add Unsigned Doubleword Modulo.
Definition: vec_int64_ppc.h:1261
vf64_t
__vector double vf64_t
vector of 64-bit double elements.
Definition: vec_common_ppc.h:221
vec_xsmulqpo
static __binary128 vec_xsmulqpo(__binary128 vfa, __binary128 vfb)
VSX Scalar Multiply Quad-Precision using round to Odd.
Definition: vec_f128_ppc.h:9380
vec_isunorderedf128
static vb128_t vec_isunorderedf128(__binary128 vfa, __binary128 vfb)
Return 128-bit vector boolean true value, if either __float128 value (vra, vrb) is NaN.
Definition: vec_f128_ppc.h:7793
vec_all_isnormalf128
static int vec_all_isnormalf128(__binary128 f128)
Return true if the __float128 value is normal (Not NaN, Inf, denormal, or zero).
Definition: vec_f128_ppc.h:4807
vec_cmpsd_all_gt
static int vec_cmpsd_all_gt(vi64_t a, vi64_t b)
Vector Compare all Greater Than Signed Doubleword.
Definition: vec_int64_ppc.h:1909
vec_srq
static vui128_t vec_srq(vui128_t vra, vui128_t vrb)
Vector Shift Right Quadword.
Definition: vec_int128_ppc.h:7114
vec_cmpqp_all_togt
static int vec_cmpqp_all_togt(__binary128 vfa, __binary128 vfb)
Vector Compare all Greater Than (Total-order) Quad-Precision.
Definition: vec_f128_ppc.h:6637
vec_xfer_vui128t_2_bin128
static __binary128 vec_xfer_vui128t_2_bin128(vui128_t f128)
Transfer a vector unsigned __int128 to __binary128 scalar.
Definition: vec_f128_ppc.h:4626
vec_cmpqp_all_lt
static int vec_cmpqp_all_lt(__binary128 vfa, __binary128 vfb)
Vector Compare all Less Than (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:7142
vec_xscvqpudz
static vui64_t vec_xscvqpudz(__binary128 f128)
VSX Scalar Convert with round to zero Quad-Precision to Unsigned doubleword.
Definition: vec_f128_ppc.h:8807
vec_cmpneuqp
static vb128_t vec_cmpneuqp(__binary128 vfa, __binary128 vfb)
Vector Compare Not Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6233
vec_cmpsd_all_lt
static int vec_cmpsd_all_lt(vi64_t a, vi64_t b)
Vector Compare all Less than Signed Doubleword.
Definition: vec_int64_ppc.h:1965
vec_self128
static __binary128 vec_self128(__binary128 vfa, __binary128 vfb, vb128_t mask)
Select and Transfer from one of two __binary128 scalars under a 128-bit mask. The result is a __binar...
Definition: vec_f128_ppc.h:7926
vec_swapd
static vui64_t vec_swapd(vui64_t vra)
Vector doubleword swap. Exchange the high and low doubleword elements of a vector.
Definition: vec_int64_ppc.h:3789
vec_const_nanf128
static __binary128 vec_const_nanf128()
return a quiet NaN.
Definition: vec_f128_ppc.h:5000
vec_xscvqpdpo
static vf64_t vec_xscvqpdpo(__binary128 f128)
VSX Scalar Convert with round Quad-Precision to Double-Precision (using round to odd).
Definition: vec_f128_ppc.h:8675
vec_all_isnanf128
static int vec_all_isnanf128(__binary128 f128)
Return true if the __float128 value is Not a Number (NaN).
Definition: vec_f128_ppc.h:4763
vec_slqi
static vui128_t vec_slqi(vui128_t vra, const unsigned int shb)
Vector Shift Left Quadword Immediate.
Definition: vec_int128_ppc.h:6748
vec_xscvdpqp
static __binary128 vec_xscvdpqp(vf64_t f64)
VSX Scalar Convert Double-Precision to Quad-Precision format.
Definition: vec_f128_ppc.h:8572
vec_cmpqp_all_le
static int vec_cmpqp_all_le(__binary128 vfa, __binary128 vfb)
Vector Compare all Less Than Or Equal (Unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6952
vec_cmpgesq
static vb128_t vec_cmpgesq(vi128_t vra, vi128_t vrb)
Vector Compare Greater Than or Equal Signed Quadword.
Definition: vec_int128_ppc.h:3089
vec_cmpleuzqp
static vb128_t vec_cmpleuzqp(__binary128 vfa, __binary128 vfb)
Vector Compare Less Than or Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:5744
vec_slq
static vui128_t vec_slq(vui128_t vra, vui128_t vrb)
Vector Shift Left Quadword.
Definition: vec_int128_ppc.h:6707
vec_xfer_bin128_2_vui32t
static vui32_t vec_xfer_bin128_2_vui32t(__binary128 f128)
Transfer function from a __binary128 scalar to a vector int.
Definition: vec_f128_ppc.h:4329
vec_cmpequd
static vb64_t vec_cmpequd(vui64_t a, vui64_t b)
Vector Compare Equal Unsigned Doubleword.
Definition: vec_int64_ppc.h:1451
VEC_DW_H
#define VEC_DW_H
Element index for high order dword.
Definition: vec_common_ppc.h:322
vec_all_iszerof128
static int vec_all_iszerof128(__binary128 f128)
Return true if the __float128 value is +-0.0.
Definition: vec_f128_ppc.h:4906
__float128
vf128_t __float128
Define __float128 if not defined by the compiler. Same as __float128 for PPC.
Definition: vec_f128_ppc.h:3685
vec_cmpgeuq
static vb128_t vec_cmpgeuq(vui128_t vra, vui128_t vrb)
Vector Compare Greater Than or Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3138
vec_signbitf128
static int vec_signbitf128(__binary128 f128)
Return int boolean true if the __float128 value is negative (sign bit is '1').
Definition: vec_f128_ppc.h:7996
vec_cmpqp_all_uzle
static int vec_cmpqp_all_uzle(__binary128 vfa, __binary128 vfb)
Vector Compare all Less Than Or Equal (Zero-unordered) Quad-Precision.
Definition: vec_f128_ppc.h:6888
VEC_HW_H
#define VEC_HW_H
Element index for highest order hword.
Definition: vec_common_ppc.h:338
vec_const_nansf128
static __binary128 vec_const_nansf128()
return a signaling NaN.
Definition: vec_f128_ppc.h:5012
vec_seluq
static vui128_t vec_seluq(vui128_t vra, vui128_t vrb, vb128_t vrc)
Vector Select Unsigned Quadword.
Definition: vec_int128_ppc.h:6482