POWER Vector Library Manual
1.0.4
|
Header package containing a collection of 128-bit SIMD operations over Quad-Precision floating point elements. More...
#include <pveclib/vec_common_ppc.h>
#include <pveclib/vec_int128_ppc.h>
#include <pveclib/vec_f64_ppc.h>
Go to the source code of this file.
Classes | |
union | __VF_128 |
Union used to transfer 128-bit data between vector and __float128 types. More... | |
Typedefs | |
typedef vui128_t | vf128_t |
vector of 128-bit binary128 element. Same as __float128 for PPC. | |
typedef vf128_t | __Float128 |
Define __Float128 if not defined by the compiler. Same as __float128 for PPC. | |
typedef vf128_t | __binary128 |
Define __binary128 if not defined by the compiler. Same as __float128 for PPC. | |
typedef vf128_t | __float128 |
Define __float128 if not defined by the compiler. Same as __float128 for PPC. | |
typedef long double | __IBM128 |
Define __IBM128 if not defined by the compiler. Same as old long double for PPC. | |
Functions | |
static vui64_t | vec_const64_f128_128 (void) |
Generate doubleword splat constant 128. More... | |
static vui32_t | vec_const128_f128_128 (void) |
Generate Quadword constant 128. More... | |
static vui64_t | vec_mask64_f128exp (void) |
Generate Doubleword Quad-Precision exponent mask. More... | |
static vui32_t | vec_mask128_f128exp (void) |
Generate Quadword Quad-Precision exponent mask. More... | |
static vui32_t | vec_mask128_f128mag (void) |
Generate Quadword Quad-Precision magnitude mask. More... | |
static vui32_t | vec_mask128_f128sig (void) |
Generate Quadword Quad-Precision significand mask. More... | |
static vui32_t | vec_mask128_f128sign (void) |
Generate Quadword Quad-Precision Sign-bit mask. More... | |
static vui32_t | vec_mask128_f128Cbit (void) |
Generate Quadword C-bit mask Immediate. More... | |
static vui32_t | vec_mask128_f128Lbit (void) |
Generate Quadword L-bit mask Immediate. More... | |
static vui32_t | vec_mask128_f128Qbit (void) |
Generate Quadword QNaN-bit mask Immediate. More... | |
static __binary128 | vec_sel_bin128_2_bin128 (__binary128 vfa, __binary128 vfb, vb128_t mask) |
Select and Transfer from one of two __binary128 scalars under a 128-bit mask. The result is a __binary128 of the selected value. More... | |
static vui32_t | vec_and_bin128_2_vui32t (__binary128 f128, vui32_t mask) |
Transfer a quadword from a __binary128 scalar to a vector int and logical AND with a mask. More... | |
static vui32_t | vec_andc_bin128_2_vui32t (__binary128 f128, vui32_t mask) |
Transfer a quadword from a __binary128 scalar to a vector int and logical AND Compliment with mask. More... | |
static vui32_t | vec_or_bin128_2_vui32t (__binary128 f128, vui32_t mask) |
Transfer a quadword from a __binary128 scalar to a vector int and logical OR with mask. More... | |
static vui32_t | vec_xor_bin128_2_vui32t (__binary128 f128, vui32_t mask) |
Transfer a quadword from a __binary128 scalar to a vector int and logical Exclusive OR with mask. More... | |
static vui128_t | vec_andc_bin128_2_vui128t (__binary128 f128, vui128_t mask) |
Transfer a quadword from a __binary128 scalar to a vector __int128 and logical AND Compliment with mask. More... | |
static vui8_t | vec_xfer_bin128_2_vui8t (__binary128 f128) |
Transfer function from a __binary128 scalar to a vector char. More... | |
static vui16_t | vec_xfer_bin128_2_vui16t (__binary128 f128) |
Transfer function from a __binary128 scalar to a vector short int. More... | |
static vui32_t | vec_xfer_bin128_2_vui32t (__binary128 f128) |
Transfer function from a __binary128 scalar to a vector int. More... | |
static vui64_t | vec_mrgh_bin128_2_vui64t (__binary128 vfa, __binary128 vfb) |
Merge High and Transfer function from a pair of __binary128 scalars to a vector long long int. More... | |
static vui64_t | vec_mrgl_bin128_2_vui64t (__binary128 vfa, __binary128 vfb) |
Merge Low and Transfer function from a pair of __binary128 scalars to a vector long long int. More... | |
static vui64_t | vec_xfer_bin128_2_vui64t (__binary128 f128) |
Transfer function from a __binary128 scalar to a vector long long int. More... | |
static vui128_t | vec_xfer_bin128_2_vui128t (__binary128 f128) |
Transfer function from a __binary128 scalar to a vector __int128. More... | |
static __binary128 | vec_xfer_vui8t_2_bin128 (vui8_t f128) |
Transfer a vector unsigned char to __binary128 scalar. More... | |
static __binary128 | vec_xfer_vui16t_2_bin128 (vui16_t f128) |
Transfer a vector unsigned short to __binary128 scalar. More... | |
static __binary128 | vec_xfer_vui32t_2_bin128 (vui32_t f128) |
Transfer a vector unsigned int to __binary128 scalar. More... | |
static __binary128 | vec_xfer_vui64t_2_bin128 (vui64_t f128) |
Transfer a vector unsigned long long to __binary128 scalar. More... | |
static __binary128 | vec_xfer_vui128t_2_bin128 (vui128_t f128) |
Transfer a vector unsigned __int128 to __binary128 scalar. More... | |
static __binary128 | vec_absf128 (__binary128 f128) |
Absolute Quad-Precision. More... | |
static int | vec_all_isfinitef128 (__binary128 f128) |
Return true if the __float128 value is Finite (Not NaN nor Inf). More... | |
static int | vec_all_isinff128 (__binary128 f128) |
Return true if the __float128 value is infinity. More... | |
static int | vec_all_isnanf128 (__binary128 f128) |
Return true if the __float128 value is Not a Number (NaN). More... | |
static int | vec_all_isnormalf128 (__binary128 f128) |
Return true if the __float128 value is normal (Not NaN, Inf, denormal, or zero). More... | |
static int | vec_all_issubnormalf128 (__binary128 f128) |
Return true if the __float128 value is subnormal (denormal). More... | |
static int | vec_all_isunorderedf128 (__binary128 vfa, __binary128 vfb) |
Return true if either __float128 value (vra, vrb) is NaN. More... | |
static int | vec_all_iszerof128 (__binary128 f128) |
Return true if the __float128 value is +-0.0. More... | |
static __binary128 | vec_copysignf128 (__binary128 f128x, __binary128 f128y) |
Copy the sign bit from f128x and merge with the magnitude from f128y. The merged result is returned as a __float128 value. More... | |
static __binary128 | vec_const_huge_valf128 () |
return a positive infinity. More... | |
static __binary128 | vec_const_inff128 () |
return a positive infinity. More... | |
static __binary128 | vec_const_nanf128 () |
return a quiet NaN. More... | |
static __binary128 | vec_const_nansf128 () |
return a signaling NaN. More... | |
static vb128_t | vec_cmpeqtoqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Equal (Total-order) Quad-Precision. More... | |
static vb128_t | vec_cmpequzqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Equal (Zero-unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpequqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Equal (Unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpgetoqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Greater Than or Equal (Total-order) Quad-Precision. More... | |
static vb128_t | vec_cmpgeuzqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Greater Than Or Equal (Zero-unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpgeuqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Greater Than or Equal (Unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpgttoqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Greater Than (Total-order) Quad-Precision. More... | |
static vb128_t | vec_cmpgtuzqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Greater Than (Zero-unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpgtuqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Greater Than (Unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpletoqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Less Than or Equal (Total-order) Quad-Precision. More... | |
static vb128_t | vec_cmpleuzqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Less Than or Equal (Zero-unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpleuqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Less Than or Equal (Unordered) Quad-Precision. More... | |
static vb128_t | vec_cmplttoqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Less Than (Total-order) Quad-Precision. More... | |
static vb128_t | vec_cmpltuzqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Less Than (Zero-unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpltuqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Less Than (Unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpnetoqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Not Equal (Total-order) Quad-Precision. More... | |
static vb128_t | vec_cmpneuzqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Not Equal (Zero-unordered) Quad-Precision. More... | |
static vb128_t | vec_cmpneuqp (__binary128 vfa, __binary128 vfb) |
Vector Compare Not Equal (Unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_toeq (__binary128 vfa, __binary128 vfb) |
Vector Compare all Equal (Total-order) Quad-Precision. More... | |
static int | vec_cmpqp_all_uzeq (__binary128 vfa, __binary128 vfb) |
Vector Compare all Equal (Zero-unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_eq (__binary128 vfa, __binary128 vfb) |
Vector Compare all Equal (Unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_toge (__binary128 vfa, __binary128 vfb) |
Vector Compare all Greater Than Or Equal (Total-order) Quad-Precision. More... | |
static int | vec_cmpqp_all_uzge (__binary128 vfa, __binary128 vfb) |
Vector Compare all Greater Than Or Equal (Zero-unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_ge (__binary128 vfa, __binary128 vfb) |
Vector Compare all Greater Than Or Equal (Unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_togt (__binary128 vfa, __binary128 vfb) |
Vector Compare all Greater Than (Total-order) Quad-Precision. More... | |
static int | vec_cmpqp_all_uzgt (__binary128 vfa, __binary128 vfb) |
Vector Compare all Greater Than (Zero-unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_gt (__binary128 vfa, __binary128 vfb) |
Vector Compare all Greater Than (Unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_tole (__binary128 vfa, __binary128 vfb) |
Vector Compare All Less Than Or Equal (Total-order) Quad-Precision. More... | |
static int | vec_cmpqp_all_uzle (__binary128 vfa, __binary128 vfb) |
Vector Compare all Less Than Or Equal (Zero-unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_le (__binary128 vfa, __binary128 vfb) |
Vector Compare all Less Than Or Equal (Unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_tolt (__binary128 vfa, __binary128 vfb) |
Vector Compare All Less Than (Total-order) Quad-Precision. More... | |
static int | vec_cmpqp_all_uzlt (__binary128 vfa, __binary128 vfb) |
Vector Compare all Less Than (Zero-unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_lt (__binary128 vfa, __binary128 vfb) |
Vector Compare all Less Than (Unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_tone (__binary128 vfa, __binary128 vfb) |
Vector Compare all Not-Equal (Total-order) Quad-Precision. More... | |
static int | vec_cmpqp_all_uzne (__binary128 vfa, __binary128 vfb) |
Vector Compare all Not-Equal (Zero-unordered) Quad-Precision. More... | |
static int | vec_cmpqp_all_ne (__binary128 vfa, __binary128 vfb) |
Vector Compare all Not-Equal (Unordered) Quad-Precision. More... | |
static int | vec_cmpqp_exp_eq (__binary128 vfa, __binary128 vfb) |
Vector Compare Quad-Precision Exponents for Equal. More... | |
static int | vec_cmpqp_exp_gt (__binary128 vfa, __binary128 vfb) |
Vector Compare Exponents Quad-Precision for Greater Than. More... | |
static int | vec_cmpqp_exp_lt (__binary128 vfa, __binary128 vfb) |
Vector Compare Exponents Quad-Precision for Less Than. More... | |
static int | vec_cmpqp_exp_unordered (__binary128 vfa, __binary128 vfb) |
Vector Compare Exponents Quad-Precision for Unordered. More... | |
static vb128_t | vec_isfinitef128 (__binary128 f128) |
Return 128-bit vector boolean true if the __float128 value is Finite (Not NaN nor Inf). More... | |
static int | vec_isinf_signf128 (__binary128 f128) |
Return true (nonzero) value if the __float128 value is infinity. If infinity, indicate the sign as +1 for positive infinity and -1 for negative infinity. More... | |
static vb128_t | vec_isinff128 (__binary128 f128) |
Return a 128-bit vector boolean true if the __float128 value is infinity. More... | |
static vb128_t | vec_isnanf128 (__binary128 f128) |
Return 128-bit vector boolean true if the __float128 value is Not a Number (NaN). More... | |
static vb128_t | vec_isnormalf128 (__binary128 f128) |
Return 128-bit vector boolean true if the __float128 value is normal (Not NaN, Inf, denormal, or zero). More... | |
static vb128_t | vec_issubnormalf128 (__binary128 f128) |
Return 128-bit vector boolean true value, if the __float128 value is subnormal (denormal). More... | |
static vb128_t | vec_isunorderedf128 (__binary128 vfa, __binary128 vfb) |
Return 128-bit vector boolean true value, if either __float128 value (vra, vrb) is NaN. More... | |
static vb128_t | vec_iszerof128 (__binary128 f128) |
Return 128-bit vector boolean true value, if the value that is +-0.0. More... | |
static __binary128 | vec_nabsf128 (__binary128 f128) |
Negative Absolute value Quad-Precision. More... | |
static __binary128 | vec_negf128 (__binary128 f128) |
Negate the sign bit of a __float128 input and return the resulting __float128 value. More... | |
static __binary128 | vec_self128 (__binary128 vfa, __binary128 vfb, vb128_t mask) |
Select and Transfer from one of two __binary128 scalars under a 128-bit mask. The result is a __binary128 of the selected value. More... | |
static vb128_t | vec_setb_qp (__binary128 f128) |
Vector Set Bool from Quadword Floating-point. More... | |
static int | vec_signbitf128 (__binary128 f128) |
Return int boolean true if the __float128 value is negative (sign bit is '1'). More... | |
static __binary128 | vec_xsaddqpo (__binary128 vfa, __binary128 vfb) |
VSX Scalar Add Quad-Precision using round to Odd. More... | |
static __binary128 | vec_xssubqpo (__binary128 vfa, __binary128 vfb) |
VSX Scalar Subtract Quad-Precision using round to Odd. More... | |
static __binary128 | vec_xscvdpqp (vf64_t f64) |
VSX Scalar Convert Double-Precision to Quad-Precision format. More... | |
static vf64_t | vec_xscvqpdpo (__binary128 f128) |
VSX Scalar Convert with round Quad-Precision to Double-Precision (using round to odd). More... | |
static vui64_t | vec_xscvqpudz (__binary128 f128) |
VSX Scalar Convert with round to zero Quad-Precision to Unsigned doubleword. More... | |
static vui128_t | vec_xscvqpuqz (__binary128 f128) |
VSX Scalar Convert with round to zero Quad-Precision to Unsigned Quadword. More... | |
static __binary128 | vec_xscvsdqp (vi64_t int64) |
VSX Scalar Convert Signed-Doubleword to Quad-Precision format. More... | |
static __binary128 | vec_xscvudqp (vui64_t int64) |
VSX Scalar Convert Unsigned-Doubleword to Quad-Precision format. More... | |
static __binary128 | vec_xscvsqqp (vi128_t int128) |
VSX Scalar Convert Signed-Quadword to Quad-Precision format. More... | |
static __binary128 | vec_xscvuqqp (vui128_t int128) |
VSX Scalar Convert Unsigned-Quadword to Quad-Precision format. More... | |
static __binary128 | vec_xsmulqpo (__binary128 vfa, __binary128 vfb) |
VSX Scalar Multiply Quad-Precision using round to Odd. More... | |
static __binary128 | vec_xsiexpqp (vui128_t sig, vui64_t exp) |
Scalar Insert Exponent Quad-Precision. More... | |
static vui64_t | vec_xsxexpqp (__binary128 f128) |
Scalar Extract Exponent Quad-Precision. More... | |
static vui128_t | vec_xsxsigqp (__binary128 f128) |
Scalar Extract Significand Quad-Precision. More... | |
static vui64_t | vec_xxxexpqpp (__binary128 vfa, __binary128 vfb) |
Vector Extract Exponent Quad-Precision Pair. More... | |
Header package containing a collection of 128-bit SIMD operations over Quad-Precision floating point elements.
PowerISA 3.0B added Quad-Precision floating point type and operations to the Vector-Scalar Extension (VSX) facility. The first hardware implementation is available in POWER9.
PowerISA 3.1 added new min/max/compare Quad-Precision operations. Also added new quadword (128-bit) integer operations including converts between quadword integer and Quad-Precision floating point. The first hardware implementation is available in POWER10.
While all Quad-Precision operations are on 128-bit vector registers, they are defined as scalars in the PowerISA. The OpenPOWER ABI also treats the __float128 type as scalar that just happens to use vector registers for parameter passing and operations. As such no operations using __float128 (_Float128, or __ieee128) as parameter or return value are defined as vector built-ins in the ABI or <altivec.h>.
Quad-Precision is not supported in hardware until POWER9. However the compiler and runtime supports the __float128 type and arithmetic operations via soft-float emulation for earlier processors. The soft-float implementation follows the ABI and passes __float128 parameters and return values in vector registers.
The PowerISA 3.0 also defines a number of useful quad-precision operations using the "round-to-odd" override. This is useful when the results of quad-precision arithmetic must be rounded to a shorter precision while avoiding double rounding. Recent GCC compilers support these operations as built-ins for the POWER9 target, but they not supported by the C language or GCC runtime library. This means that round-to-odd is not easily available to libraries that need to support IEEE-128 on POWER8. Again it may be reasonable to add these to pveclib.
Another issue is the performance of GCC soft-float runtime for IEEE-128 (KF mode). There seem to be a number of issues with code generation for transfers from __float128 to 64-bit integer GPRs. This is required to match the ABI (vector) parameters to the soft-float runtime using integer scalars. For POWER8 targets the GCC compiler generates store vector followed by two load doubleword instructions. This generates high frequencies of load-hit-store rejects at runtime. It also looks like there is significant instruction latency associated with the XER carry bit required for extended (128-bit) integer arithmetic.
For the QP operations that have libgcc implementations and we have corresponding PVECLIB implementations we can do direct performance comparisons. So far micro-benchmarks show a significant performance gain for the PVECLIB vector implementations vs the GCC KF mode runtime.
Power8 QP | cmpqp | cvtdpqp | cvtqpdp | cvtuqqp | cvtqpuq | mulqp | addqp |
---|---|---|---|---|---|---|---|
%improvement | 22.4 | 60.7 | 46.2* | 28.9 | 72.4 | 1.8* | 10.1* |
There are number of __float128 operations that should generate a single instruction for POWER9 and few (less than 10) instructions for POWER8. This includes all of the __float128 classification functions (isnormal/subnormal/finite/inf/nan/zero). Unfortunately for POWER8 the compilers will generate calls to the GCC runtime (__unordkf2, __gekf2, ...) for these functions. In many cases the code size generated for the runtime calls far exceeds any in-line VSX code PVECLIB will generate.
So it is not unreasonable for this header to provide vector forms of the __float128 classification functions (isnormal/subnormal/finite/inf/nan/zero). It is little additional effort to include the sign bit manipulation operations (copysign, abs, nabs, and neg).
These functions can be implemented directly using (one or more) POWER9 instructions, or a few vector logical and integer compare instructions for POWER7/8. Each is comfortably small enough to be in-lined and inherently faster than the equivalent POSIX or compiler built-in runtime functions. Performing these operations in-line and directly in vector registers (VRs) avoids call/return and VR <-> GPR transfer overhead. It also seems reasonable to provide Quad-Precision extract/insert exponent/significand and compare exponent operations for POWER7/8.
The PVECLIB implementations for quad-precision arithmetic and conversion operations are large enough that most applications will want to call a library. PVECLIB will build and release the appropriate CPU tuned libraries. This will follow the general design used for multiple quadword integer multiply functions (vec_int512_ppc.h).
These PVECLIB operations should be useful for applications using Quad-Precision while needing to still support POWER8 but also build for POWER9/10. An important goal is to allow applications and libraries to safely substitute PVECLIB operations for C language and math.h __float128 operators and functions as point optimizations. The largest gains will be seen for builds targeting POWER8 without degrading performance when targeting POWER9/10. They should also be useful and improve performance of soft-float implementations of Quad-Precision math library functions.
This header covers operations that are any of the following:
The discussion above raises a interesting question. If we can provide useful implementations of Quad-Precision; classification, extract/insert, and compare exponent operations, why not continue with Quad-Precision compare, convert to/from integer, and arithmetic operations?
This raises the stakes in complexity and size of implementation. Providing a vector soft-float implementation equivalent to the GCC run-time libgcc __addkf3/__divkf3/__mulkf3/__subkf3 would be a substantial effort. The IEEE standard is exacting about rounding and exception handling. Comparisons require special handling of; signed zero, infinities, and NaNs. Even float/integer conversions require correct rounding and return special values for overflow. Also it is not clear how such an effort would be accepted.
The good news is PVECLIB already provides a strong quadword integer operations set. Integer Add, subtract, and multiply are covered with the usual compare/shift/rotate operations (See vec_int128_ppc.h and vec_int64_ppc.h). The weak spot is general quadword integer divide. Until recently, integer divide has not been part of the vector ISA. But the introduction of Vector Divide Signed/Unsigned Quadword in POWER10 raises the priority of vector integer divide for PVECLIB.
For now we propose a phased approach, starting with enablers and infrastructure, building up layers, starting simple and adding complexity.
The intent is that such PVECLIB operations can be mixed in with or substituted for C Language _FLoat128 expressions or functions. The in-lined operations should have performance advantages over equivalent library functions on both POWER8/9.
This is a big list. It is TBD how far I will get given my current limited resources.
Most math library functions need to test the data class (normal, infinity, NaN, etc) and or range of input values. This usually involves separating the sign, exponent, and significand out from __float128 values, and comparing one or more of these parts, to special integer values.
PowerISA 3.0B (POWER9) provides instructions for these in addition to a comprehensive set of arithmetic and compare instructions. These operations are also useful for the soft-float implementation of __float128 for POWER8 and earlier. The OpenPOWER ABI specifies __float128 parameters are in VRs and are immediately accessible to VMX/VSR instructions. This is important as the cost of transferring values between VRs and GPRs is quite high on POWER8 and even higher for POWER7 and earlier (which requires store to temporaries and reload).
Fortunately these operations only require logical (and/or/xor), shift and integer compare operations to implement. These are available as vector intrinsics or provides by PVECLIB (see vec_int128_ppc.h).
The operations in this group include:
For example the data class test isnan:
Which has implementations for POWER9 (and later) and POWER8 (and earlier).
For POWER9 it generates:
Which uses the intrinsic scalar_test_data_class() to generate the VSX Scalar Test Data Class Quad-Precision instruction with "data class mask" of class.NaN to set the condition code. If the condition is match, load the 128-bit bool value of all 1's (true). Otherwise load all 0's (false).
For POWER8 it generates
The first 7 instructions above, load the constant vectors needed by the logic. These constants only need to be generated once per function and can be shared across operations.
In the C code we use a special transfer function combined with logical AND complement (vec_andc_bin128_2_vui32t()). This is required because while __float128 values are held in VRs, the compiler considers them to be scalars and will not allow simple casts to (any) vector type. So the PVECLIB implementation provides xfer function using a union to transfer the __float128 value to a vector type. In most case this logical transfer simply serves to make the compiler happy and does not need to generate any code. In this case the xfer function combines the transfer with a vector and complement to mask off the sign bit.
Then compare the masked result as a 128-bit integer value greater than infinity (expmask). Here we use the vec_cmpgtuq() operation from vec_int128_ppc.h. For POWER8, vec_cmpgtuq() generates the Vector Subtract and Write Carry Unsigned Quadword instruction for 128-bit unsigned compares. A '0' carry indicates greater than. The next two instructions (from vec_setb_ncq()) convert the carry bit to the required 128-bit bool value.
While the POWER8 sequence requires more instructions (including the const vector set up) than POWER9, it is not significantly larger. And as mentioned above, the set-up code can be optimized across operations sharing the same constants. The code (less the setup) is only 10 cycles for POWER8 vs 6 for POWER9. Also the code is not any larger than the function call overhead for the libgcc runtime equivalent __unordkf2. And is much faster then the generic soft-float implementation.
Another example, Scalar Extract Exponent Quad-Precision:
Which has implementations for POWER9 (and later) and POWER8 (and earlier).
For POWER9 it generates the VSX Scalar Extract Exponent Quad-Precision instruction.
For POWER8 we generate
The first 3 instructions above load the constant vector needed by the logic. This constant only needs to be generated once per function and can be shared across operations.
Again we use a special transfer function combined with logical AND (vec_and_bin128_2_vui32t()) to transfer the__float128 to a vector type and mask off all bits except for the 15-bit exponent. Then we rotate the exponent logically right 48-bit to right justify the exponent in vector doubleword 0. This matches the results of the xsxexpqp instruction.
The IEEE-128 floating-point storage (external) format fits neatly in 128-bits. But this compact format needs to be expanded internally during QP operations. The sign and exponent are normally manipulated separately from the significand. And for finite values the Leading-bit (implied but not included in the storage format) must be restored to take part in arithmetic/rounding/normalization operations.
For a soft-float implementation of IEEE-128 on POWER8 we want to extract these components into 128-bit vector registers and operate on them using vector instructions. This allows direct use of 128-bit arithmetic/shift/rotate operations (see vec_int128_ppc.h), while avoiding expensive transfers between VRs and GPRs.
To extract the sign-bit we can either AND with a 128-bit mask or use a set-bool operation (vec_setb_qp() or vec_setb_sq()). The masked sign-bit can be ORed with the final IEEE-128 vector result to set the appropriate sign. The 128-bit vector bool can be used with vec_sel() (vec_self128(), vec_selsq(), vec_seluq()) to select results based on the sign-bit while avoiding branch logic.
We use vec_xsxexpqp() to extract the 15-bit exponent into a vector doubleword integer element. The biased exponent is returned in the high doubleword (matching the POWER9 instruction). Depending on the operation, the exponent (or derived values) may need to be transfered/replicated to the low doubleword element. This is easily accomplished using vec_splatd(). Operations requiring two Quad-precision operands can combine the extracted exponents into a single vector doubleword using vec_mrgahd().
We use vec_xsxsigqp() to extract the 113-bit significand into a vector quadword integer. This operation restores the leading-bit for normal (not NaN, Infinity, denormal or zero) values. The significand is returned right-justified in the quadword.
At the end of the operation we can use vec_or() and vec_xsiexpqp() to combine these (sign, exponent, and significand) components into a IEEE-128 result.
Internal IEEE floating-point operations will need/generate additional bits to support normalization and rounding. The PowerISA describes a VSX Execution Model for IEEE Operations
IEEE quad-precision execution model
0 | 1 --------------------------— 112 | |||||
---|---|---|---|---|---|---|
S | C | L | FRACTION | G | R | X |
- Sign bit | ||||||
- Carry bit | ||||||
- Leading bit, also called the implicit or hidden bit | ||||||
- Fraction (112-bits) | ||||||
- Guard bit | ||||||
- Round bit | ||||||
- (X) AKA Sticky bit, logical OR of remaining bits |
This model is a guide for processor design and soft-float implementors. This is also described as the Intermediate result Representation (IR). As such the implementation may arrange these bits into different registers as dictated by design and performance.
The GRX bits extend the low order bits of the fraction and are required for rounding. Basically these bits encode how near the intermediate result is to a representable result. The GR bits are required for post-normalization of the result and participate in shifts during normalization. For right shifts, bits shift out of the R-bit are logically ORed into the X-bit. For left shifts, 0 bits shifted into the R-bit (the X-bit is ignored).
As mentioned before, it is convenient to keep the sign-bit in a separate vector quadword. This not an extension of the significand but is needed to select results for arithmetic and some rounding modes. The remaining (C through X) bits can be represented in a vector quadword register or a vector register pair.
For example integer to QP conversions can be represented in a vector quadword by left justifying the magnitude before normalization and rounding. For example from vec_xscvuqqp():
See Examples for Round to Nearest Even and Examples for Round toward Zero.
The Round to Nearest Even case may increment the significand and that may generate a carry from the L-bit. One option is to use vec_addcuq() to capture the carry. For example:
In this case having the carry as a separate vector simplifies adjusting the exponent.
Quad-precision addition and subtraction is a case where right justifying the IR is helpful. For addition/subtraction the IR only needs 117-bits which can be accommodated in a single 128-vector. Significands (which includes the leading/implicit bit) can be converted to IR form by shifting left 3-bits. This still leaves room on the left for the carry-bit. For example:
In this case we need to insure that any right shifts of the IR collect any bits shifted-away into the X-bit. For example:
In this case, condition-1 means that, only b_sig needs right shifting before significand addition. The a_sig can be used directly as it had previously been left shifted 3-bits where the GRX-bits were set to 0b000.
A simpler case occurs when addition generates a carry. Here we need to shift right 1-bit while preserving any nonzero X-bit. For example:
These two sequences preserve the X-bit going into the rounding stage. See Examples for Round to Nearest Even and Examples for Round to Odd.
Quad-precision Multiply and Multiply-Add require quadword register pairs to hold the IR product. The product of two 113-bit significands requires 226-bits. This includes the product C-/L-bits but we will need at least 3 additional bits for GRX.
We can use operations from vec_int128_ppc.h to produce the double quadword product. By pre-adjusting the inputs before the multiply we can align the split between the high 114-bits (right justified) and low 112-bits (left justified) of the product to align with the high and low quadword registers. For example:
The high_sig includes the C-/L-bits and high-order 112-bits of product fraction. The low_sig includes the low order 112-bits of the fraction followed by 16-bits of zero.
Most of the low-order fraction bits contribute to the X-bit. So we can normally delay collecting x-bits until the rounding stage. Only the most extreme denormals will result in a right shift large enough to require intervention to preserve bits that would otherwise be shifted away.
After normalization the high-order bits of low_sig become the GRX-bits. Actually we can treat the low order 126-bits as uncollected x-bits. We use this to simplify the rounding process. For this case we can manipulate low_sig to generate a carry for rounding up/down/odd and then add/subtract/or this carry to high_sig to produce the rounded product significand. See Examples for Round to Nearest Even and Examples for Round to Odd.
The PowerISA support 6 rounding modes for Quad-Precision
The first four modes are encoded in the FPSCRRN rounding mode bits. The last two are encoded in instructions as instruction local overrides. The VSX Scalar Round to Quad-Precision Integer instruction can override the RN and encode any of the six rounding modes.
The rounding mode results are defined in terms of the intermediate result (IR), and how close it is to the representable result, based on the GRX-bits. The IR is either; exact, closer to the next lower (NL) representable result, Midway between, or closer to the next Higher (NH) representable result,
G | R | X | interpretation |
---|---|---|---|
0 | 0 | 0 | IR is exact |
0 | 0 | 1 | IR is closer to NL |
0 | 1 | 0 | IR is closer to NL |
0 | 1 | 1 | IR is closer to NL |
1 | 0 | 0 | IR is midway between NL and NH |
1 | 0 | 1 | IR is closer to NH |
1 | 1 | 0 | IR is closer to NH |
1 | 1 | 1 | IR is closer to NH |
Next lower is effectively truncating the IR (setting GRX = 0b000), while next higher will increment the significand by one.
The exact coding for rounding modes depends on how the IR is represented in vector registers and this may differ by operation. Conversions, addition, and subtraction tend to use a single vector register where the GRX resides in the low-order bits. While Multiplies generate double quadword results and so use vector register pairs. Here the CL and Fraction bits are right justified in a high_sig vector. While the low-order fraction/GRX bits are left justified in a low_sig vector.
Example for Convert to Quad-precision:
This code runs about 16 instructions.
Example for Add/Subtract Quad-precision:
Adding 3 plus the fraction odd bit to thex GRX-bits generates a carry into the low-order fraction bit for rounding. In this case we use the extend form of add quadword to effect a 3 way add. After rounding convert the IR into a significand by shifting right 3 bits.
Example for Multiply Quad-precision:
Here we take advantage of uncollected x-bits in low_sig. Until we add the rmask and generate the carry, we can OR/ADD p_odd to any bit in low_sig except the Guard or Round bits. In this case we use the extend/carry form of add quadword to effect a 3 way add and generate the carry/round bit.
Example for Convert Quadword to Quad-precision:
This code runs about 3 instructions.
Example for Convert to Quad-precision.
This code runs about 6 instructions to load the mask and round=odd.
Example for Add/Subtract Quad-precision:
Examples for Multiply Quad-precision. For this case we can manipulate low_sig to generate a carry for rounding up/down/odd and then add/subtract/or this carry to high_sig to produce the rounded product significand. For example round-to-odd:
IEEE floating-point compare is a bit more complicated than binary integer compare operations. The two main complications are; Not-a-Number (NaN) which IEEE insists are unordered, and signed 0.0 where IEEE insists that -0.0 is equal to +0.0. If you ignore the NaN and signed 0.0 cases you can treat floating-point values as signed magnitude binary integers, and use integer compares and boolean logic. Which looks like this:
Where; =f, <f, and <=f are the desired floating-point compares, =s, <s, <=s, >s and >=s, are signed integer compares, and =u, >u, and >=u are unsigned integer compares.
One key implication of this is that we will need signed and unsigned 128-bit compare operations. Instructions for 128-bit integer compares was added for PowerISA 3.1 (POWER10) but we also need to support POWER8/9. The good news is that PowerISA 2.07B (POWER8) includes Vector Add/Subtract Modulo/Carry/Extend Quadword instructions. Specifically Vector Subtract & write Carry Unsigned Quadword can implement all the unsigned ordered (<. <=, >, >=) compares by manipulating the comparand order and evaluating the carry for 0 or 1.
POWER8 also includes vector doubleword integer compare instructions. And the Vector Compare Equal To Unsigned Doubleword instruction can be used with a few additional logical operations to implement 128-bit equal and not equal operations. These operations are already provided by vec_int128_ppc.h.
Some additional observations:
Now we can tackle the pesky signed 0.0 case. The simplest method is to add another term that test for either a or b is -0.0. This simplifies to just logical a OR b and unsigned compare to -0.0. Which looks like this:
Again we can replace signed compares (a >= 0) and (a < 0) with a single vec_setb_qp() and simplify the boolean logic by using vec_sel(). For the ((a | b) != 0x80000000...0) term we can save an instruction by replacing vec_cmpneuq() with vec_cmpequq() and replacing the AND operation with AND compliment.
This sequence runs 27 instructions when you include the constant loads.
An alternative compare method converts both floating-point values in a way that a single (unsigned) integer compare can be used.
An interesting feature of this method is that +0.0 becomes (0x00000000 + 0x80000000 = 0x80000000) and -0.0 becomes (0x80000000 - 0x80000000 = 0x80000000) which effectively converts any -0.0 into +0.0 for comparison. Signed 0.0 solved.
Another optimization converts (n = n + 0x80000000) to (n = n XOR 0x80000000). Gives the same result and for POWER8 a vec_xor() is 2 cycles latency vs 4 cycles for _vec_adduqm().
This sequence runs (approximately) 20 instructions when you include loading the required constants. It also manages to use only splat-immediate forms to load constants and so does not need to establish the TOC pointer nor any address calculations to access constants via load instructions.
The next IEEE issue is detecting NaNs and returning unordered status. Adding the following code to a compare operation insures that if either comparand is NaN; false (unordered) is returned for compares (eq, lt, gt).
The pair of vec_all_isnanf128() operations add significant overhead both in code size (39 instructions) and cycles. This form should only be used if is required for correct results and has not been tested by prior logic in this code path.
IEEE floating-point conversions are also a bit complicated. Dealing with Not-a-Number (NaN), Infinities and subnormal is part of it. But the conversion may also require normalization and rounding depending on element size and types involved. Some examples:
For PowerISA 3.0 (POWER9) includes full hardware instruction support for Quad-Precision, Including:
PowerISA 3.1 (POWER10) includes:
For POWER8 (and earlier) we need to do a little more work The general plan for conversion starts by disassembling the input value into its parts and analyze. For signed integer values disassemble usually means sign and unsigned magnitude. Analysis might be a range check or counting leading zeros. For floating point values this is usually sign, exponent, and significand. Analysis usually means determining the data class (NaN, infinity, normal, subnormal, zero) as each requires special handling in the conversion.
Conversion involves adjusting the parts as needed to match the type of the result. This is normally only adds and shifts. Finally we need to reassemble the parts based on the result type. For integers this normally just converting the unsigned magnitude to a signed '2's complement value based on the sign of the input. For floating-point this requires merging the sign bit with the (adjusted) significand and merging that with the (adjusted) exponent.
The good news is that all of the required operations are already available in altivec.h or PVECLIB.
This is one of the simpler conversions as the conversion is always exact (no rounding/truncation is required, and no overflow is possible). The process starts with disassembling the double-precision value.
We insure that the low-order doubleword of the vector f64 is zeroed. This is necessary for then we normalize the 128-bit significand for the quad-precision result. The operations vec_xvxexpdp() and vec_xvxsigdp() are provided by vec_f64_ppc.h supporting both the POWER9 instruction and equivalent implementation for POWER8. And finally we extract the sign-bit. We can't use the copysign() here due to the difference in type.
Now we analyze the data class of the double-precision input.
This code is arranged with an eye to the most common cases and specifics of the conversion required by each data class. The operations vec_all_isfinitef64(), vec_all_isnormalf64() and vec_all_iszerof64() are provided by vec_f64_ppc.h supporting both the POWER9 instruction and equivalent implementation for POWER8.
The normal case requires shifting the significand and adjusting the exponent.
The double significand has the fraction bits starting a bit-12 and the implied '1' in bit-11. For quad-precision we need to shift this right 4-bits to align the fraction to start in bit-16. We need a quadword shift as the significand will now extend into the high order bits of the second (low order) doubleword. To adjust the exponent we need to convert the double biased exponent (1 to 2046) into unbiased (-1022 to +1023) by subtracting the exponent bias (+1023 or 0x3ff) value. Then we can add the quad-precision exponent bias (+16383 or 0x3fff) to compute the final exponent. We can combine the bias difference into a single constant (0x3fff - 0x3ff) and only need a single add at runtime.
The operations vec_srqi() is provided by vec_int128_ppc.h and vec_addudm() is provided by vec_int64_ppc.h. We use PVECLIB operations here to insure that this code is safe to use with older compilers and pre-POWER8 processors.
The zero case requires setting the quad-precision significand and exponent to zero.
We know that the double significand and exponent are zero, so just assign them to the quad-precision parts. The sign bit will applied later with the common insert exponent code.
The subnormal case is a bit more complicated. The tricky part is while the double-precision value is subnormal the equivalent quad-precision value is not. So we need to normalize the significand and compute a new exponent.
We use a doubleword count leading zeros (ctz) and shift left to normalize the significand so that the first '1'-bit moves to bit-0. Then we compute the quad-precision exponent by subtracting the ctz value from a constant (16383 - (1023 -12)). This represents the quad-precision exponent bias, minus the double-precsion exponent bias, minus the minimum leading zero count for the double-precision subnormal significand.
The NaN/Infinity case requires shifting the significand and setting the exponent to quad-precision max.
We need this shift as NaN has a non-zero significand and it might be nonzero in one of the low order bits. Separating out the infinity case (where the significand is zero) is not worth the extra (isnan) test to avoid the shift.
Now that all the parts are converted to quad-precision, we are ready to reassemble the QP result.
Putting this all together we get something like:
At this stage we have a functionally correct implementation and now we can look for opportunities for optimization. One issue is the generated code is fairly large (~436 bytes and ~100 instructions). For POWER8 the data class predicates (vec_all_isfinitef64, etc) each require one or more vector constant loads and bit mask operations before the associated vector compares. Also the extract significand operation requires the equivalent of isnormal (with two vector compares) as preparation for conditionally restoring the implied (hidden) bit.
By testing the extracted (exponent and significand) parts directly we can simplify the compare logic and eliminate some (redundant) vector constant loads. For example:
The implementation based on this logic is smaller (~300 bytes and ~75 instructions). Performance results TBD.
Converting binary integers to floating point is simpler as there are fewer data classes to deal with. Basically zero and non-zero numbers (no signed 0s, infinities or NaNs). Also the conversion from 64-bit integers to 128-bit floating-point is always exact (there is no rounding).
Unsigned doubleword is the simplest case. We only need to test for binary zero. If zero just return a QP +0.0 constant. Otherwise we can treat the binary magnitude as a denormalized number and normalize it. The binary zero test and processing looks like this:
For the non-zero case we assume the binary point follows the unit bit (bit-63) of the 64-bit magnitude. Then we use count leading zeros to find the first significant bit. This count is used to normalize/shift (left justify) the magnitude and adjust the QP exponent to reflect the binary point following the unit (original doubleword bit 63) bit. So far we are using only doubleword data and instructions.
The high order bit (after normalization) will become the implicit (hidden) bit in QP format. So we shift the quadword right 15-bits to become the QP significand. This shift includes the low order 64-bits we zeroed out early on and zeros out the sign-bit as a bonus. Finally we use vec_xsiexpqp() to merge the adjusted exponent and significand.
The signed doubleword conversion is bit more complicated. We deal with zero case in the same way. Otherwise we need to separate the signed doubleword into a sign-bit and unsigned 64-bit magnitude. Which looks something like this:
The normalization process is basically the same as unsigned but we merge the sign-bit into the significant before inserting the exponent.
Convertions between quad-precision and quadword integers is complicated by the fact that the QP significand is only 113-bits while the quadword integer magnitude can be 127/128 bits. It may not be possible to represent the quadword magnitude exactly. Conversions from quad-precision float to integer may have nonzero fractions which require rounding/truncation.
For POWER9 we have the VSX Scalar Convert with round to zero Quad-Precision to Signed/Unsigned Doubleword (xscvqpsdz/xscvqpudz) instructions. For POWER10 we have the VSX Scalar Convert with round to zero Quad-Precision to Signed/Unsigned Quadword (xscvqpsqz/xscvqpuqz) instructions. Conversion using other rounding modes require using VSX Scalar Round to Quad-Precision Integer (xsrqpi) instruction.
For this example we will look at Convert with Round to Zero Quad-Precision to Unsigned Quadword. The POWER10 operation can be implemented as a single xscvqpuqz instruction. For example:
We use in-line assembler here as there are no current or planed compiler intrinsics for this and the C language only supports conversions between __float128 and __int128 scalars. The scalar conversions returns the __int128 result in GPR pair, while we need the result in vector register.
The POWER8 implementation looks like this:
As is the usual for floating-point conversions, we extract the sign, significand, and exponent then test for class and range. We compare the extracted exponent directly using vector doubleword compares. These are faster (on POWER8) than quadword compares but require doubleword splatting the QP exponent and compare constants for correct results. This only requires one additional instruction (xxpermdi) as the vector constants will be loaded as quadwords either way.
The outer test is for NaN/Infinity. These should be rare so we use __builtin_expect(). The implementation returns special values to match the instruction definition.
Once we know the value is finite, we check for greater than or equal to +1.0. Negative or fractional values return quadword zero. Then we check for less than 2128. If not we return all ones (2128 -1).
If the input is in the valid range for unsigned quadword we left-justify the significand then shift the quadword right by (127 - unbiasedexp). The right shift truncates (round toward zero) any fractional bits. See vec_xscvqpuqz().
The signed operation follows similar logic with appropriate adjustments for negative values and reduced magnitude range. The doubleword versions of the convert operation follows the same outline with different range constants. See vec_xscvqpsqz(), vec_xscvqpudz() and vec_xscvqpsdz().
Conversions from doubleword integer to quad-precision float can be represented exactly and do not require any rounding. But conversions from quadword integer to quad-precision float may overflow the 113-bit significand which does require rounding.
For POWER9 we have the VSX Scalar Convert Signed/Unsigned Doubleword to Quad-Precision format (xscvsdqp/xscvudqp) instructions. For POWER10 we have the VSX Scalar Convert with Round Signed/Unsigned Quadword to Quad-Precision format (xscvsqqp/xscvuqqp) instructions. One of four rounding modes is selected from the 2-bit FPSCR.RN field. The default rounding mode is Round to Nearest Even which we will use in this example. Convert using other rounding modes by changing the FPSCR.RN field.
For example:
The POWER10 implementation uses the xscvuqqp instruction. While POWER9 implementation uses xscvudqp instructions to convert the high/low 64-bit halves of the quadword integer. To complete the conversion we need to multiply the converted high 64-bits by 2**64 than add the lower converted 64-bits. The compiler should generate something like this:
The POWER8 implementation looks like this:
In this example the significand (including the L-bit) is right justified in the high-order 113-bits of q_sig. The guard, round, and sticky (GRX) bits are in the low-order 15 bits. The sticky-bits are the last 13 bits and are logically ORed (or added to 0x1fff) to produce the X-bit.
The signed quadword conversion is bit more complicated for both POWER9/8. For example:
For POWER9 we can not just use the signed doubleword conversions for this case. First we split the signed quadword into a 128-bit boolean (representing the sign) and an unsigned quadword magnitude. Then perform the unsigned conversion to QP format as for vec_xscvuqqp(), And finally use vec_copysignf128() to insert the original sign into the QP result.
Similarly for POWER8:
TBD
TBD
The POWER9 (PowerISA 3.0B) processor provides a full set of Quad-Precision arithmetic operations; add, divide, multiply, multiply-add/sub, and subtract. The compilers (that support IEEE128) provide normal C-language arithmetic operators for the __float128 (ISO _Float128) data type. The compiler will generate in-line quad-precision instructions for the (-mcpu=power9) target and calls to the (soft-float) runtime for earlier processor targets.
POWER9 also provides a round-to-odd override for these operations. This helps software avoid double rounding errors when rounding to smaller precision. These are supported with compile built-ins (or in-line assembler).
The compilers (that support IEEE128) provide a soft-float implementation for POWER8. However the runtime does not support round-to-odd as a rounding mode. Also the compiler built-ins for round-to-odd arithmetic are disabled for POWER8 and earlier.
As a minimum the PVECLIB implementation should implement POWER8 equivalents to the compiler built-ins supported for ISA 3.0. This would include the explicit round-to-odd operations.
The PVECLIB implementation of Multiply Quad-Precision with Round-to-Odd will use the POWER9 xsmulqpo instruction if the compile target supports it. Otherwise provide a POWER8 VSX implementation using operations from vec_int128_ppc.h and vec_int64_ppc. For example:
We prefer the compiler built-in (if available) but can substitute in-line assembler if needed. The built-in is subject to additional compiler optimizations (like instruction scheduling) while in-line assembler is not.
The PVECLIB soft-float implementation can leverage the 128-bit vector registers and operations supported by POWER8. The implementation starts with the usual exponent and significand extraction and ends with merging the computed sign-bit with the significand and inserting the computed exponent. For example:
Finite operands should be to the most common case. So it may help the compiler to use __builtin_expect(). For example:
For finite operands we need to multiply the significands, sum the (unbiased) exponents, normalize the product, round, and check for exponent under/overflow. We can use vec_muludq() to multiply two 128-bit unsigned values returning 256-bit product in two (high/low) 128-bit vectors.
The product of two 113-bit significands is up to up to 226-bits (depending on inputs). From this we only need 117-bits (C-bit, L-bit, 112 Fraction bits, G-bit, R-bit, and X (sticky) bit. The X-bit is the logical OR of all 110-bits to the right of the R-bit. But we should not discard (or collapse) any sticky bits until after normalization.
We have some latitude on how we represent this product in vector registers. It is convenient for rounding if the high order 114-bits (C, L, Fraction -bits) are right justified in the high vector. While the low order 112-bit are left justified in in the low vector. One way to accomplish this is:
There are some special considerations for denormal and zero (+- 0.0) operands. Both have a biased exponent of 0x0000 but we can't use that to compute the product exponent. If the either significand is zero then the product is zero. We can short circuit this by returning the product sign-bit (Asign XOR Bsign) followed by 127 0b0s.
Otherwise a denormal is encoded as a biased exponent of 0x0000 and a nonzero (112-bit) fraction. But the architecture defines a denormal as 2Emin x (0.fraction). For Quad-Precision, Emin is defined as -16382 which is a biased exponent of 0x0001. So which value (0x0000 or 0x0001) is used to compute the product exponent for the intermediate result?
For denormal values we use Emin to compute the product exponent before normalization. This requires a small fix-up before computing the exponent. We can optimize things by nesting the denormal fix-up under the zero check.
The exponent vector are splatted to both doublewords. We need the exponent in the high order doubleword for input to vec_xsiexpqp(). We need the exponent in the low order doubleword for computing shift amounts as input to vec_sldq(), vec_slq(), vec_srq(). Here we use biased exponents in the computation and result. The addition will double the bias so we need to correct it by subtracting the constant exp_bias.
Again the zero multiply case is rare, so we can help the compiler by using __builtin_expect(). Also we can replace the else block with boolean select logic set exponents to Emin for denormal operands. This also eliminates a redundant compare for exp_dnrm.
The multiply may generate a carry in the intermediate result. This can occur when both operands are normal and sufficiently large. For example 1.5 x 1.5 = 2.25 (0x1.8 x 0x1.8 = 0x2.4). Any product with the C-bit set requires normalization by shifting one bit right and incrementing the exponent. For example:
The shift sequence above is optimized for the case of a 1 bit shift right double quadword for POWER8.
This case is not rare at all so __builtin_expect() is not helpful for this case. Also quadword compare is an expensive test and requires a load of the sigov constant value. We only need to compare the high order 16-bits of the significand to detect the Carry/Leading bits.
An alternative uses splat halfword to replicate the CL-bits across the vector and use splat immediate halfword for the compare value. For example:
Then use boolean select logic to propagate the right shifted significand and incremented exponent if carry detected. For example:
Next we may need to deal with denormal results. A intermediate result is considered tiny if the exponent is less than Emin. If the result is still tiny after normalization and rounding the result is denormal and we can set the product exponent to 0x0000 before inserting it into the sign/significand for the final result.
The normalization of a tiny intermediate result is a little complicated. The PowerUSA states:
If the intermediate result is Tiny (i.e., the unbiased exponent is less than -16382) and UE=0, the significand is shifted right N bits, where N is the difference between -16382 and the unbiased exponent of the intermediate result. The exponent of the intermediate result is set to the value -16382.
This requires two tests. For example:
Again both cases are rare, so we can help the compiler by using __builtin_expect(). Also the second test is a quadword compare requiring another quadword constant. Both expensive compared the alternative.
We only need to compare the high order 16-bits of the significand to detect the a zero Leading bit. Again use splat halfword to replicate the CL-bits across the vector and compare to zero. For example:
There are some issues that need to be addressed as we (attempt to) normalize tiny results. We need to assure as we shift bits right, any bits shifted out of the low order (112-bit) of the product, are (effectively) accumulated in the sticky-bit (X-bit). This is a consequence of deferred X-bit (accumulated) as we pass the whole 226-bit product (p_sig_h and p_sig_l) into the rounding step. This breaks down in to two cases:
The first case may leave significant bits in the fraction and the GR-bits. But we must preserve any bits below GR in the sticky-bit. In this case we accumulate and clear the low-order 109-bits of the product and OR them into the X-bit.
Now we can perform a double quadword shift right of up to 116 bits without losing any bits from the intermediate representation. For example:
The second case implies that all bits of the intermediate representation (bits CL through GR) will be shifted away and can only be accumulated in the sticky (X) bit.
There is another special case where the product exponent is greater than or equal to Emin but the significand is not in normal range. Otherwise we can continue to the rounding process.
This case can happen if a normal value is multiplied by a denormal. The normalization process for this case is to:
If the exponent is Emin then the result is denormal. Set the biased exponent to zero and continue to rounding.
Otherwise shift the significand left and decrement the exponent, until the exponent equals Emin or the significant is in normal range.
For the second case we use count leading zeros to (vec_clzq()) to compute the number bits (c_exp) we need to shift left to get a normalized significand. We also compute the number of bits (d_exp) we need to decrement the exponent to Emin. Then we take the minimum (of c_exp and d_exp) as the shift count. For example:
We use the minimum value (d_exp) to shift the significand left. Then check if the result is still denormal. If so set the result biased exponent to zero. Otherwise compute the normalized exponent.
Of course we can replace the exponent compare if-then-else with select logic. For example:
The round-to-odd case to has some special considerations.
Round-to-odd is a simple operation. We already have the significand in a separate vector register from the GRX-bits. So we use vec_addcuq() to generate a carry-bit if there are any non-zero bits in the extended fraction. This carry bit is simply ORed into the low order bit of the significand.
Returning FLT128_MAX for exponent overfloat is special case for round-to-odd. Other rounding modes would return infinity. Again this is a rare case and we can help the compiler by using __builtin_expect(). For example:
When operands are not finite (infinite or Not-a-Number) we have to deal with a matrix of operand pairs and return specific result values.
Implementing this matrix of results does not require much computation, but does require conditional logic to separate all the cases. Another complication is that the matrix above specifies a Default Quiet NaN to be returned for any multiply of Infinity by zero (ignoring the operand's sign). In this case vec_const_nanf128() is returned while bypassing normal vec_xsiexpqp() exit sequence. All other cases must consider the operand's sign in the result. For example:
The PVECLIB implementation of Add Quad-Precision with Round-to-Odd will use the POWER9 xsaddqpo instruction if the compile target supports it. Otherwise provide a POWER8 VSX implementation using operations from vec_int128_ppc.h and vec_int64_ppc. For example:
We prefer the compiler built-in (if available) but can substitute in-line assembler if needed. The built-in is subject to additional compiler optimizations (like instruction scheduling) while in-line assembler is not.
The PVECLIB soft-float implementation can leverage the 128-bit vector registers and operations supported by POWER8. The implementation starts with the usual sign-bit, exponent and significand extraction and ends with merging the computed sign-bit with the significand and inserting the computed exponent. For example:
Floating-point addition requires that operands are represented with the same exponent before the add operation. This means shifting the significand of the smaller magnitude right by the absolute difference between the exponents. To reduce the effect of round-off error this shift must preserve any low order bits (shifted away) as the Guard, Round and Sticky bits for internal Intermediate Results (IR). For addition the IR only needs 117-bits which can be accommodated in a single 128-vector. Significands (which includes the leading/implicit bit) can be converted to IR form by shifting left 3-bits. This still leaves room on the left for the carry-bit. For example:
(See: Representing Intermediate results for Quad-Precision)
Floating-point addition will add or subtract magnitudes depending the signs of the operands. If the signs are the same, simply add (unsigned quadword) the two (IR format) operands. Otherwise if the signs differ, simply subtract (unsigned quadword) the smaller magnitude from the larger. In this case the sign of the result is the sign of the larger magnitude.
This discussion implies that knowing the relative absolute magnitude up front can simplify the implementation. As we saw in Quad-Precision compares for POWER8 we can use quadword integer compares if we know that QP values are finite. By masking off sign-bit from the operands we can use unsigned quadword compare to determine relative magnitude. For example:
We can also generate a sign difference mask we can use later. For example:
Again a denormal is encoded as a biased exponent of 0x0000 and a nonzero (112-bit) fraction. But the architecture defines a denormal as 2Emin x (0.fraction). For Quad-Precision, Emin is defined as -16382 which is a biased exponent of 0x0001. We need to use Emin in the IR to to compute shift values. This requires a small fix-up before computing the IR. It is simpler at this point to apply Emin to both denormal and QP 0.0 values.
We vectorize this fixup by merging a_exp/b_exp into a vector doubleword x_exp and applying compare/select.
Before diving into the add operation we simplify the code that follows by swapping the internal representation of vfa/vfb if if absolute magnitude of vfa is less than vfb.
At this point we can assume that:
The inner if/then/else can be replaced with select logic. For example:
Now we are ready to compute the significand. For example:
Now handle the special case of a zero (0.0) result. Either added two zero operands or a subtraction (equal operands with different signs) produced an exact zero result. Exact zero results always have an positive sign, otherwise return return the result with same sign as vfa. For example:
Next check for overflow/carry. In this case we need to shift the significand right one bit and increment the exponent by one. Also insure that and X-bit is not lost (shifted away) before we get to the rounding stage. For example:
Otherwise check for underflow (C- and L-bits are 0b0) which requires normalization and may result in a denormal value. Use count leading zeros to estimate the required normalizing left shift. The IR internal representation with normally have 12 leading zeros and so we need to adjust for that. Also we need to prevent shifting beyond denormal range (q_exp <= E_min), so use the minimum of leading zero count and the delta between the current (q_exp) exponent and E_min. For example:
The overflow/underflow test above are expensive quadword integer compares and quadword constants. However all that is required is testing the C/L-bits. We can convert these tests into byte compares. For example:
If the current exponent is greater then E_min then we can safely use the minimum shift count to:
For example:
This can be simplified using boolean select logic. Here we replace vec_cmpud_all_le() with vec_cmpgtud() and vec_sel() which generates smaller faster code. For example:
After normalization we are ready to round the IR, where round-to-odd is the simplest case. For example:
After rounding we need one more check for overflow. For round-to-odd we will not see an significand overflow (none-zero C-bit) but we may have overflowed the exponent range.
For normal rounding modes, exponent overflow would generate an infinity. However round-to-odd is a special case that returns the maximum finite value __FLT128_MAX__. For example:
This ends the overall path for finite operands. the last step will merge the sign bit with the significand then insert the exponent. For example:
If either operand is a NaN or infinity special handling is required. See PowerISA 3.0B Table 53 Actions for xsaddqp[o].
The PowerISA specifies that the xsaddqpo operation returns a quiet NaN if either operand is a NaN:
For example:
The sign. significand, and exponent are combined into a __float128 quiet NaN and returned.
Otherwise one or both operands are infinity. Addition of an infinity and a finite or two infinities (of the same sign) returns infinity. But subtraction of infinities (additional of different signs) returns a default quiet NaN. For example:
The PVECLIB implementation of Subtract Quad-Precision with Round-to-Odd will use the POWER9 xssubqpo instruction if the compile target supports it. Otherwise provide a POWER8 VSX implementation based on the PVECLIB Add Quad-Precision implementation. This is based on the observation that;
For example:
Unfortunately it is not that simple. The PowerISA specifies that the xssubqpo operation must return the original src2 operand as a quiet NaN if:
See PowerISA 3.0B Table 95 Actions for xssubqp[o].
To insure compatible results for POWER8 and POWER9 implementations we need to avoid negation if src2 is a NaN. For example:
This requires a relatively expensive (~14 cycle) vec_isnanf128() test and adds ~12 instructions to the vec_xssubqpo() operation. But this does product correct results for all operand combinations.
To get POWER8 an implementation of Subtract Quad-Precision with Round-to-Odd with better performance we need to inject the src2 sign negation strategically into a copy of the Add Quad-Precision implementation. This should be after;
Once we know that neither operand is NaN we can simply flip the sign bit for source operand 2. For example once we know that both operands are finite:
The other case is the else leg of the finite case where at least one of the operands is NaN or infinity. For example once we know the none of the operands are NaN:
The changes for vec_xssubqpo() from vec_xsaddqpo() are:
The implementation examples above require a number of __binary128, vector __int128, and vector long long constants. These are used as (special) return values, binary masks, and integer constants for comparison and arithmetic.
In this case finite __float128 (AKA __binary128) constants are provided by the compiler (via -mfloat128) if supported. Literal constants can be defined using the 'q'/'Q' suffix. The compile also provides macros for the IEEE range values; __FLT128_MAX__, __FLT128_MIN__, __FLT128_EPSILON__, and __FLT128_DENORM_MIN__. These macros define the appropriate literal constant using the 'Q' suffix.
The compiler also provides build-in functions for some special constant values; __builtin_infq (), __builtin_huge_valq (), _builtin_nanq (), and __builtin_nansq (). This assumes that you are using a compiler that supports __float128 types and operations and is enabled (via -mfloat128).
PVECLIB has to operated in an environment where the compiler's support for IEEE Float128 is disabled, missing, or incomplete. So this implementation may construct any __binary128 values it may need as vector constants then use the appropriate xfer function to create the required scalar value. For example:
PVECLIB provides splat operations vec_splat_s64() and vec_splat_s128() for small integer constants. For integers in the range -16 <-> 15 the implementation can use vec_splat_s32() then sign extent word elements to doubleword or quadword integer elements. This has the advantage of shorter instruction latency and avoids a vector load from storage. Any vector load has a chance of cache miss and associated cache reload latencies.
For constants outside of this range the implementation uses vec_splats() for which the compiler is expected to load a vector constant from storage. See: Loading small Doubleword constants and Loading small Quadword constants
Some special values and masks don't qualify as small integers and need to be constructed as vector constants in hexadecimal. For example:
For most of these values the compiler will generate a quadword constant in the read-only data segment and generate a vector load for any function that references that constant.
One exception is the quadword sign mask which the GCC compiler recognizes as special case (See: Some special quadword constants). This is a 4 instruction sequence of vector splat immediate words and shift lefts. The nominal execution latency is 6 cycles. The quadword sign mask can be used for both signed quadword integer and quad-precision float values.
It would be useful if we could identify similar (none load) sequences for other mask values. The Quad-Precision Quiet NaN bit mask QNaNbit) is similar to the signmask. Quiet NaNs QNaNs represent the result of specific invalid operations or when Signaling NaNs are (Operation Exception) disabled. Quad-Precision Soft-float needs this mask to generate default QNaNs or convert an operand of an invalid operations to return a QNaN result. See: NaN and Infinity handling for Multiply, NaN and Infinity handling for Add, and NaN and Infinity handling for Subtract.
Like the signmask it requires the generation of a 0x80000000 constant that can be shifted into position. For example:
Unfortunately the compiler does not recognize this vector constant as a special case. So PVECLIB provides it own implementation. See: vec_mask128_f128Qbit().
For example the sign and magnitude masks are one's complements of each other. This allows us to get the effect of the magmask by reusing the signmask and using Vector AND with Complement. For example:
The alternative is to use the magmask and use Vector AND with Complement to extact the sign bits. For example:
This also applies to the vector select (vec_sel()) operation. Vector selects is defined as bit-wise (vra & ~vrc) | (vrb & vrc) where vrc is the select mask. So the complement of the select mask can be used by swapping select inputs vra/vrb.
Using the complement has no advantage unless we can generate the magmask with a shorter/faster (than signmask) sequence. Or if the complement is used elsewhere (in the function) and using the complement (for both/multiple operations) allows the implementation to eliminated an expensive constant load.
So lets look at what we can do to generate the magnitude mask (magmask). For example we can shift the all ones vector right one bit:
which generates:
This sequence only requires 3 instructions and a nominal execution latency of 4 cycles. The vector splat immediate instructions are independent and can execute in parallel. So this technique for generating magmask and using it instead of signmask is advantageous. Also a clear advantage versus loading magmask as a vector constant (at 9+ cycles latency).
Now lets look at how we might generate the significand mask (sigmask.) For example we can shift the all ones vector right 16-bits:
which generates:
Again this sequence only requires 3 instructions and a nominal execution latency of 4 cycles. The vector splat immediate instructions are independent and can execute in parallel. So generating the sigmask this way has an advantage over loading a vector constant.
More examples of special constants are single-bit masks used to compare/test/set the carry/leading bits of the significand. These constants are easy to generate as halfword/word immediate values but need to be shifted left into the high order halfword of the quadword. For example:
which generates:
Again this sequence only requires 3 instructions and a nominal execution latency of 4 cycles. The vector splat immediate instructions are independent and can execute in parallel. We could use the splat halfword immediate for '1' here but word constants are more commonly used and there is no advantage (in instruction count or latency). See: vec_splat_s64() and vec_splat_s128().
Similarly for the carry bit:
Some constants are just more difficult to construct. For example:
These constants are used to extract/insert exponents and to test operands for NaN/Infinity. They are used early in many Quad-precision operations, so any delay in getting these constants ready for use, will impact the performance of the whole operation.
The tricky part is generating any value with sequence of '1's with '0's on both sides (not left or right justified). Especially within the constraints imposed by POWER8 (PowerISA 2.07B) and earlier machines. Any (none load) sequence require both a left and right shift and any bit (versus octet) shifts will require additional vector constants as shift counts. For example:
Which generates:
This is 5 instructions with a nominal latency of 6 cycles. The halfword shift right requires its own halfword shift count which is unlikely to find a common subexpression within Quad-precision codes.
Another possibility is use Vector Pack Pixel (vpkpx) to convert a sequences of words (4x8-bit pixel channels) into a sequence of halfwords (a leading 1-bit channel followed by 3x5-bit channels). The high order channel copies word bit 7 to halfword bit 0. The next three channels are copied 5-bits at a time from the high-order 5-bits of word bytes 1-3. The pack pixel operation requires 8 word elements (256-bits from 2 VRs) to generate 8 halfword elements (one 128-bit vector result).
For example the word element constant 0x00ffffff generates the halfword element 0x7fff. The trick becomes to generate the vectors {0x00ffffff, 0, 0, 0} and {0, 0, 0, 0} using only vector immediate instructions. Where possible we should use built-ins and values common in other operations within PVECLIB and vec_f128_ppc.h. This effectively limits us to the vector splat immediate and Vector Splat Immediate Signed Word (vspltisw) and Vector Shift Left Double by Octet Immediate (vsldoi). For example:
Which generates:
This is actually worse than the previous example. Still 5 instructions with a nominal latency of 6-8 cycles. The last 3 instructions are serially dependent on both of the q-ones/q_zero generation operations. The second vsldoi in addition to vpkpx did not improve the performance.
Time to look more closely at the vpkpx operation. Perhaps we can take advantage of different handling of the auxiliary channel versus the color channels. If we can find a byte value where the low-order bit (bit-7) is '0' while the high-order five bits are 0b11111. For example the byte value 0xf8 (signed char -8) meets these requirements. The word element 0xf8f8f8f8 will pack to the halfword pixel 0x7fff. A vector of these word elements can be generated with vec_splat_s8(-8).
We need the q_zero constant to clear the right three word elements of the left vector and provide zeros to the right 128-bits of the 256-bits required for vpkpx. The q_ones constant is not required in this version. For example:
Which generates:
This is better at 4 instructions with a nominal latency of 4-6 cycles. The first two (splat immediate) instructions are independent and can execute in parallel, but the vsldoi and vpkpx are serially dependent on the results of both instructions. The q-zero constant is likely to be shared with the generation of other quadword constants. The vec_splat_s8(-8) may be shared with the doubleword exp_naninf constant. See vec_mask64_f128exp().
The doubleword form of the exponent mask (exp_naninf) is used as a comparand when we need to test two Quad-precision operands for main path (both finite) or for special case handling (either is NaN/Infinity). We can use a similar technique to that above.
The differences are that the mask is right (versus left) justified and we are splating the mask to both doubleword. For example:
Here we can skip the __BYTE_ORDER__ conditional as the vec_packpx() operands are symmetrical. The example above generates:
Again 4 instructions with a nominal latency of 4-6 cycles.
Another set of difficult constants to generate are the small powers of 2 (greater than 8). Values 64 and 128 are used for range checks before normalization. These values do not fit into the signed 5-bit (vector splat) immediate fields available to POWER8 and earlier.
The sign-bit constant can be converted to the value 128 with a Vector Shift Left Double by Octet Immediate (vsldoi). For example:
This runs 4 instruction with the q_zero/q_ones immediate constants subject to common subexpression evaluation and sharing. This yields a latency of 4 to 6 cycles.
The doubleword form can be based on the above with an additional instruction to splat the low doubleword. For example:
This works for VSX enable platforms (POWER7 and later) but bumps the latency to 6-8 cycles.
Another possibility is to use the Vector Count Leading Zeros instructions to generate constants for the number of bits in the element. For example:
This requires only 2 instruction and the q_zero immediate constant is subject to common subexpression evaluation and sharing. This yields a latency of 2 to 4 cycles.
To generate the doubleword constant 128 we can generate the constant 64 and double it. For example:
This runs 3 instruction with the q_zero immediate constant subject to common subexpression evaluation and sharing. This yields a latency of 4 to 6 cycles.
For example: using the the classification functions for implementing the math library function sine and cosine. The Posix specification requires that special input values are processed without raising extraneous floating point exceptions and return specific floating point values in response. For example the sin() function.
The following code example uses functions from this header to address the POSIX requirements for special values input to sinf128():
For another example the cos() function.
The following code example uses functions from this header to address the Posix requirements for special values input to cosf128():
Neither example raises floating point exceptions or sets errno, as appropriate for a vector math library.
High level performance estimates are provided as an aid to function selection when evaluating algorithms. For background on how Latency and Throughput are derived see: Performance data.
|
inlinestatic |
Absolute Quad-Precision.
Clear the sign bit of the __float128 input and return the resulting positive __float128 value.
processor | Latency | Throughput |
---|---|---|
power8 | 2-11 | 2/cycle |
power9 | 2 | 4/cycle |
f128 | a __float128 value containing a signed value. |
|
inlinestatic |
Return true if the __float128 value is Finite (Not NaN nor Inf).
A IEEE Binary128 finite value has an exponent between 0x0000 and 0x7ffe (a 0x7fff indicates NaN or Inf). The significand can be any value. Using the !vec_all_eq compare conditional verify this condition and avoids a vector -> GPR transfer for platforms before PowerISA-2.07. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 4-20 | 2/cycle |
power9 | 3 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return true if the __float128 value is infinity.
A IEEE Binary128 infinity has a exponent of 0x7fff and significand of all zeros. Using the vec_all_eq compare conditional verifies both conditions and avoids a vector -> GPR transfer for platforms before PowerISA-2.07.
processor | Latency | Throughput |
---|---|---|
power8 | 4-20 | 2/cycle |
power9 | 3 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return true if the __float128 value is Not a Number (NaN).
A IEEE Binary128 NaN has a exponent of 0x7fff and nonzero significand. Using the combined vec_all_eq / vec_any_gt compare conditional verify both conditions and avoids a vector -> GPR transfer for platforms before PowerISA-2.07. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 6-14 | 1/cycle |
power9 | 3 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return true if the __float128 value is normal (Not NaN, Inf, denormal, or zero).
A IEEE Binary128 normal value has an exponent between 0x0001 and 0x7ffe (a 0x7fff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero). Using the combined vec_all_ne compares conditional verify both conditions and avoids a vector -> GPR transfer for platforms before PowerISA-2.07. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 4-29 | 1/cycle |
power9 | 3 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return true if the __float128 value is subnormal (denormal).
A IEEE Binary128 subnormal has an exponent of 0x0000 and a nonzero significand. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 8-29 | 1/cycle |
power9 | 3 | 2/cycle |
f128 | a vector of __binary128 values. |
|
inlinestatic |
Return true if either __float128 value (vra, vrb) is NaN.
A IEEE Binary128 NaN has a exponent of 0x7fff and nonzero significand. The sign bit is ignored. For POWER9 and later we use scalar_test_data_class(). Otherwise mask off the sign bit and compare greater than unsigned quadword to the integer equivalent of Quad-Precision infinity.
processor | Latency | Throughput |
---|---|---|
power8 | 16-25 | 1/cycle |
power9 | 6 | 1/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Return true if the __float128 value is +-0.0.
A IEEE Binary128 zero has an exponent of 0x0000 and a zero significand. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 4-20 | 1/cycle |
power9 | 3 | 2/cycle |
f128 | a vector of __binary64 values. |
|
inlinestatic |
Transfer a quadword from a __binary128 scalar to a vector int and logical AND with a mask.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
mask | a vector unsigned int |
|
inlinestatic |
Transfer a quadword from a __binary128 scalar to a vector __int128 and logical AND Compliment with mask.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
mask | a vector unsigned __int128 |
|
inlinestatic |
Transfer a quadword from a __binary128 scalar to a vector int and logical AND Compliment with mask.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
mask | a vector unsigned int |
|
inlinestatic |
Vector Compare Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa == vfb, otherwise all '0's. Zeros, Infinities and NaN of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or a VSX Scalar Compare Equal Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 6 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa == vfb, otherwise all '0's. Zeros of either sign compare equal. Infinities of the same sign compare equal. A NaN in either or both operands compare unequal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Equal Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 18-30 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa == vfb, otherwise all '0's. Zeros of either sign compare equal. Infinities and NaNs of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or a VSX Scalar Compare Equal Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 10 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Greater Than or Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa >= vfb, otherwise all '0's. Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Greater Than or Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa >= vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Greater Than Or Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa >= vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Greater Than (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa > vfb, otherwise all '0's. Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Greater Than (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa > vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Greater Than (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa > vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Less Than or Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa <= vfb, otherwise all '0's. Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Less Than or Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa <= vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Less Than or Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa <= vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Less Than (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa < vfb, otherwise all '0's. Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Less Than (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa < vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Less Than (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa < vfb, otherwise all '0's. Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Greater Than Quad-Precision instruction. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Not Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa != vfb, otherwise all '0's. Zeros, Infinities and NaN of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or a VSX Scalar Compare Equal Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 6 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Not Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa == vfb, otherwise all '0's. Zeros of either sign compare equal. Infinities of the same sign compare equal. A NaN in either or both operands compare unequal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or (POWER10) VSX Scalar Compare Equal Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 18-30 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Not Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return all '1's, if vfa != vfb, otherwise all '0's. Zeros of either sign compare equal. Infinities and NaNs of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision or a VSX Scalar Compare Equal Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 10 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1, if vfa == vfb, otherwise 0. Zeros of either sign compare equal. Infinities of the same sign compare equal. A NaN in either or both operands compare unequal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 18-30 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Greater Than Or Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa >= vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Greater Than (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa > vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Less Than Or Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa <= vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Less Than (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa < vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities of different signs compare ordered. A NaN in either or both operands compare unordered.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Not-Equal (Unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1, if vfa == vfb, otherwise 0. Zeros of either sign compare equal. Infinities of the same sign compare equal. A NaN in either or both operands compare unequal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 18-30 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return 1, if vfa == vfb, otherwise 0. Zeros, Infinities and NaN of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 6 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Greater Than Or Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa >= vfb, otherwise 0 (false). Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Greater Than (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa > vfb, otherwise 0 (false). Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare All Less Than Or Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa <= vfb, otherwise 0 (false). Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare All Less Than (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa < vfb, otherwise 0 (false). Zeros, Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 26-35 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Not-Equal (Total-order) Quad-Precision.
Compare Binary-float 128-bit values and return 1, if vfa != vfb, otherwise 0. Zeros, Infinities and NaN of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 6 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1, if vfa == vfb, otherwise 0. Zeros of either sign compare equal. Infinities and NaNs of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 10 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Greater Than Or Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa >= vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Greater Than (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa > vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Less Than Or Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa <= vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Less Than (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1 (true), if vfa < vfb, otherwise 0 (false). Zeros of either sign are converted to +0. Infinities and NaNs are compared as signed values. Infinities and NaNs have the highest/lowest magnitudes.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise comparands are converted to unsigned integer magnitudes before using vector __int128 comparison to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 28-37 | 2/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare all Not-Equal (Zero-unordered) Quad-Precision.
Compare Binary-float 128-bit values and return 1, if vfa != vfb, otherwise 0. Zeros of either sign compare equal. Infinities and NaNs of the same sign compare equal.
For POWER9 (PowerISA 3.0B) or later, use a VSX Scalar Compare Unordered Quad-Precision. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 10 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Quad-Precision Exponents for Equal.
Compare the exponents of two Binary-float 128-bit values and return 1, if vfaexp == vfbexp, otherwise 0. A NaN in either or both operands compare unequal.
For POWER9 (PowerISA 3.0B) or later, use the VSX Scalar Compare Exponents Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 8-17 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Exponents Quad-Precision for Greater Than.
Compare the exponents of two Binary-float 128-bit values and return 1, if vfaexp > vfbexp, otherwise 0. A NaN in either or both operands returns 0.
For POWER9 (PowerISA 3.0B) or later, use the VSX Scalar Compare Exponents Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 8-17 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Exponents Quad-Precision for Less Than.
Compare the exponents of two Binary-float 128-bit values and return 1, if vfaexp < vfbexp, otherwise 0. A NaN in either or both operands returns 0.
For POWER9 (PowerISA 3.0B) or later, use the VSX Scalar Compare Exponents Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 8-17 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Vector Compare Exponents Quad-Precision for Unordered.
Compare two Binary-float 128-bit values and return 1, if either or both operands are NaN, otherwise 0.
For POWER9 (PowerISA 3.0B) or later, use the VSX Scalar Compare Exponents Quad-Precision instruction. Otherwise use vector __int128 arithmetic and logical operations to implement the equivalent Quad-precision floating-point operation. This leverages operations from vec_int128_ppc.h.
processor | Latency | Throughput |
---|---|---|
power8 | 8-17 | 1/cycle |
power9 | 3 | 2/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Generate Quadword constant 128.
Load immediate the quadword constant vui32_t {0, 0, 0, 128}.
processor | Latency | Throughput |
---|---|---|
power8 | 4-6 | 1/cycle |
|
inlinestatic |
Generate doubleword splat constant 128.
Load immediate the quadword constant vui32_t {0, 0, 0, 128}.
processor | Latency | Throughput |
---|---|---|
power8 | 4-6 | 1/cycle |
|
inlinestatic |
return a positive infinity.
|
inlinestatic |
return a positive infinity.
|
inlinestatic |
return a quiet NaN.
|
inlinestatic |
return a signaling NaN.
|
inlinestatic |
Copy the sign bit from f128x and merge with the magnitude from f128y. The merged result is returned as a __float128 value.
processor | Latency | Throughput |
---|---|---|
power8 | 2-11 | 2/cycle |
power9 | 2 | 4/cycle |
f128x | a __float128 value containing the sign bit. |
f128y | a __float128 value containing the magnitude. |
|
inlinestatic |
Return 128-bit vector boolean true if the __float128 value is Finite (Not NaN nor Inf).
A IEEE Binary128 finite value has an exponent between 0x0000 and 0x7ffe (a 0x7fff indicates NaN or Inf). The significand can be any value. Using the vec_cmpeq conditional to generate the predicate mask for NaN / Inf and then invert this for the finite condition. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 8-17 | 2/cycle |
power9 | 6 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return true (nonzero) value if the __float128 value is infinity. If infinity, indicate the sign as +1 for positive infinity and -1 for negative infinity.
A IEEE Binary128 infinity has a exponent of 0x7fff and significand of all zeros. Using the vec_all_eq compare conditional verifies both conditions. A subsequent vec_any_gt checks the sign bit and set the result appropriately. The sign bit is ignored.
This sequence avoids a vector -> GPR transfer for platforms before PowerISA-2.07.
processor | Latency | Throughput |
---|---|---|
power8 | 12-32 | 1/cycle |
power9 | 3-12 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return a 128-bit vector boolean true if the __float128 value is infinity.
A IEEE Binary128 infinity has a exponent of 0x7fff and significand of all zeros. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 8-17 | 2/cycle |
power9 | 6 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return 128-bit vector boolean true if the __float128 value is Not a Number (NaN).
A IEEE Binary128 NaN has a exponent of 0x7fff and nonzero significand. The sign bit is ignored. For POWER9 and later we use scalar_test_data_class(). Otherwise mask off the sign bit and compare greater than unsigned quadword to the integer equivalent of Quad-Precision infinity.
processor | Latency | Throughput |
---|---|---|
power8 | 14-23 | 1/cycle |
power9 | 6 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return 128-bit vector boolean true if the __float128 value is normal (Not NaN, Inf, denormal, or zero).
A IEEE Binary128 normal value has an exponent between 0x0001 and 0x7ffe (a 0x7fff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero). The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 10-19 | 2/cycle |
power9 | 6 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Return 128-bit vector boolean true value, if the __float128 value is subnormal (denormal).
A IEEE Binary128 subnormal has an exponent of 0x0000 and a nonzero significand. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 16-25 | 1/cycle |
power9 | 6 | 1/cycle |
f128 | a vector of __binary64 values. |
|
inlinestatic |
Return 128-bit vector boolean true value, if either __float128 value (vra, vrb) is NaN.
A IEEE Binary128 NaN has a exponent of 0x7fff and nonzero significand. The sign bit is ignored. For POWER9 and later we use scalar_test_data_class(). Otherwise mask off the sign bit and compare greater than unsigned quadword to the integer equivalent of Quad-Precision infinity.
processor | Latency | Throughput |
---|---|---|
power8 | 16-25 | 1/cycle |
power9 | 6 | 1/cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Return 128-bit vector boolean true value, if the value that is +-0.0.
A IEEE Binary64 zero has an exponent of 0x000 and a zero significand. The sign bit is ignored.
processor | Latency | Throughput |
---|---|---|
power8 | 8-17 | 2/cycle |
power9 | 6 | 2/cycle |
f128 | a vector of __binary32 values. |
|
inlinestatic |
Generate Quadword C-bit mask Immediate.
Load immediate the quadword constant vui32_t {0x00020000, 0, 0, 0}.
processor | Latency | Throughput |
---|---|---|
power8 | 2-4 | 1/cycle |
|
inlinestatic |
Generate Quadword Quad-Precision exponent mask.
Load immediate the quadword constant vui32_t {0x7fff0000, 0, 0, 0}.
processor | Latency | Throughput |
---|---|---|
power8 | 6 | 1/cycle |
|
inlinestatic |
Generate Quadword L-bit mask Immediate.
Load immediate the quadword constant vui32_t {0x00010000, 0, 0, 0}.
processor | Latency | Throughput |
---|---|---|
power8 | 2-4 | 1/cycle |
|
inlinestatic |
Generate Quadword Quad-Precision magnitude mask.
Load immediate the quadword constant vui32_t {0x7fffffff, -1, -1, -1}.
processor | Latency | Throughput |
---|---|---|
power8 | 2-4 | 1/cycle |
|
inlinestatic |
Generate Quadword QNaN-bit mask Immediate.
Load immediate the quadword constant vui32_t {0x00008000, 0, 0, 0}.
processor | Latency | Throughput |
---|---|---|
power8 | 4-6 | 1/cycle |
|
inlinestatic |
Generate Quadword Quad-Precision significand mask.
Load immediate the quadword constant vui32_t {0x0000ffff, -1, -1, -1}.
processor | Latency | Throughput |
---|---|---|
power8 | 2-4 | 1/cycle |
|
inlinestatic |
Generate Quadword Quad-Precision Sign-bit mask.
Load immediate the quadword constant vui32_t {0x80000000, -1, -1, -1}.
processor | Latency | Throughput |
---|---|---|
power8 | 4-6 | 1/cycle |
|
inlinestatic |
Generate Doubleword Quad-Precision exponent mask.
Load the quadword constant vui32_t {0, 0x7fff, 0, 0x7fff}.
processor | Latency | Throughput |
---|---|---|
power8 | 4-6 | 1/cycle |
|
inlinestatic |
Merge High and Transfer function from a pair of __binary128 scalars to a vector long long int.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
vfa | a __binary128 floating point scalar value. |
vfb | a __binary128 floating point scalar value. |
|
inlinestatic |
Merge Low and Transfer function from a pair of __binary128 scalars to a vector long long int.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
vfa | a __binary128 floating point scalar value. |
vfb | a __binary128 floating point scalar value. |
|
inlinestatic |
Negative Absolute value Quad-Precision.
Unconditionally set sign bit of the __float128 input and return the resulting positive __float128 value.
processor | Latency | Throughput |
---|---|---|
power8 | 2-11 | 2/cycle |
power9 | 2 | 4/cycle |
f128 | a __float128 value containing a signed value. |
|
inlinestatic |
Negate the sign bit of a __float128 input and return the resulting __float128 value.
processor | Latency | Throughput |
---|---|---|
power8 | 2-11 | 2/cycle |
power9 | 2 | 4/cycle |
f128 | a __float128 value containing a signed value. |
|
inlinestatic |
Transfer a quadword from a __binary128 scalar to a vector int and logical OR with mask.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
mask | a vector unsigned int |
|
inlinestatic |
Select and Transfer from one of two __binary128 scalars under a 128-bit mask. The result is a __binary128 of the selected value.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
vfa | a __binary128 floating point scalar value. |
vfb | a __binary128 floating point scalar value. |
mask | a vector bool __int128 |
|
inlinestatic |
Select and Transfer from one of two __binary128 scalars under a 128-bit mask. The result is a __binary128 of the selected value.
processor | Latency | Throughput |
---|---|---|
power8 | 2 | 2/cycle |
power9 | 2 | 4/cycle |
vfa | a __binary128 floating point scalar value. |
vfb | a __binary128 floating point scalar value. |
mask | a vector bool __int128 |
|
inlinestatic |
Vector Set Bool from Quadword Floating-point.
If the quadword's sign bit is '1' then return a vector bool __int128 that is all '1's. Otherwise return all '0's.
The resulting mask can be used in vector masking and select operations.
processor | Latency | Throughput |
---|---|---|
power8 | 4 - 6 | 2/cycle |
power9 | 6 | 2/cycle |
f128 | a 128-bit vector treated a signed __int128. |
|
inlinestatic |
Return int boolean true if the __float128 value is negative (sign bit is '1').
For POWER9 use scalar_test_neg (a special case of scalar_test_data_class). For POWER8 and earlier, vec_and with a signmask and then vec_all_eq compare with that mask generates the boolean of the sign bit.
processor | Latency | Throughput |
---|---|---|
power8 | 4-10 | 2/cycle |
power9 | 3 | 2/cycle |
f128 | a __float128 value in vector. |
|
inlinestatic |
Transfer function from a __binary128 scalar to a vector __int128.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
|
inlinestatic |
Transfer function from a __binary128 scalar to a vector short int.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
|
inlinestatic |
Transfer function from a __binary128 scalar to a vector int.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
|
inlinestatic |
Transfer function from a __binary128 scalar to a vector long long int.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
|
inlinestatic |
Transfer function from a __binary128 scalar to a vector char.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
|
inlinestatic |
Transfer a vector unsigned __int128 to __binary128 scalar.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a vector unsigned __int128 value. |
|
inlinestatic |
Transfer a vector unsigned short to __binary128 scalar.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a vector unsigned short value. |
|
inlinestatic |
Transfer a vector unsigned int to __binary128 scalar.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a vector unsigned int value. |
|
inlinestatic |
Transfer a vector unsigned long long to __binary128 scalar.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a vector unsigned long long value. |
|
inlinestatic |
Transfer a vector unsigned char to __binary128 scalar.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a vector unsigned char value. |
|
inlinestatic |
Transfer a quadword from a __binary128 scalar to a vector int and logical Exclusive OR with mask.
The compiler does not allow direct transfer (assignment or type cast) between __binary128 (__float128) scalars and vector types. This despite the fact the the ABI and ISA require __binary128 in vector registers (VRs).
f128 | a __binary128 floating point scalar value. |
mask | a vector unsigned int |
|
inlinestatic |
VSX Scalar Add Quad-Precision using round to Odd.
The quad-precision element of vectors vfa and vfb are added to produce the quad-precision result. The rounding mode is round to odd.
For POWER9 use the xsaddqpo instruction. For POWER8 use this soft-float implementation using vector instruction generated by PVECLIB operations. For POWER7 and earlier us the compilers soft-float implementation.
processor | Latency | Throughput |
---|---|---|
power8 | 54-71 | 1/cycle |
power9 | 12 | 1/12 cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
VSX Scalar Convert Double-Precision to Quad-Precision format.
The left most double-precision element of vector f64 is converted to quad-precision format.
For POWER9 use the xscvdpqp instruction. For POWER8 and earlier use vector instruction generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 2/cycle |
power9 | 3 | 2/cycle |
f64 | a vector double. The left most element is converted. |
|
inlinestatic |
VSX Scalar Convert with round Quad-Precision to Double-Precision (using round to odd).
The quad-precision element of vector f128 is converted to double-precision. The Floating point value is rounded to odd before conversion. The result is placed in doubleword element 0 while element 1 is set to zero.
For POWER9 use the xscvqpdpo instruction. For POWER8 and earlier use vector instructions generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 1/cycle |
power9 | 12 | 1/cycle |
f128 | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
VSX Scalar Convert with round to zero Quad-Precision to Unsigned doubleword.
The quad-precision element of vector f128 is converted to an unsigned doubleword integer. The Floating point value is rounded toward zero before conversion. The result is placed in element 0 while element 1 is set to zero.
For POWER9 use the xscvqpudz instruction. For POWER8 and earlier use vector instructions generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 2/cycle |
power9 | ? | 2/cycle |
f128 | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
VSX Scalar Convert with round to zero Quad-Precision to Unsigned Quadword.
The quad-precision element of vector f128 is converted to an unsigned quadword integer. The Floating point value is rounded toward zero before conversion.
For POWER10 use the xscvqpuqz instruction. For POWER9 and earlier use vector instruction generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 2/cycle |
power9 | ? | 2/cycle |
f128 | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
VSX Scalar Convert Signed-Doubleword to Quad-Precision format.
The left most signed doubleword element of vector int64 is converted to quad-precision format.
For POWER9 use the xscvsdqp instruction. For POWER8 and earlier use vector instruction generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 2/cycle |
power9 | 3 | 2/cycle |
int64 | a vector signed long long. The left most element is converted. |
|
inlinestatic |
VSX Scalar Convert Signed-Quadword to Quad-Precision format.
The signed quadword element of vector int128 is converted to quad-precision format. If the conversion is not exact the default rounding mode is "Round to Nearest Even".
For POWER10 use the xscvuqqp instruction. POWER9 only supports doubleword converts so use a combination of two xscvudqp and xsmaddqp instructions. For POWER8 and earlier use vector instruction generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 2/cycle |
power9 | 44-53 | 1/13cycles |
int128 | a vector signed __int128 which is converted to QP format. |
|
inlinestatic |
VSX Scalar Convert Unsigned-Doubleword to Quad-Precision format.
The left most unsigned doubleword element of vector int64 is converted to quad-precision format.
For POWER9 use the xscvudqp instruction. For POWER8 and earlier use vector instruction generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 2/cycle |
power9 | 3 | 2/cycle |
int64 | a vector unsigned long long. The left most element is converted. |
|
inlinestatic |
VSX Scalar Convert Unsigned-Quadword to Quad-Precision format.
The unsigned quadword element of vector int128 is converted to quad-precision format. If the conversion is not exact the default rounding mode is "Round to Nearest Even".
For POWER10 use the xscvuqqp instruction. POWER9 only supports doubleword converts so use a combination of two xscvudqp and xsmaddqp instructions. For POWER8 and earlier use vector instruction generated by PVECLIB operations.
processor | Latency | Throughput |
---|---|---|
power8 | ? | 2/cycle |
power9 | 38-47 | 1/13cycles |
int128 | a vector unsigned __int128 which is converted to QP format. |
|
inlinestatic |
Scalar Insert Exponent Quad-Precision.
Merge the sign (bit 0) and significand (bits 16:127) from sig with the 15-bit exponent from exp (bits 49:63). The exponent is moved to bits 1:15 of the final result. The result is returned as a Quad_precision floating point value.
processor | Latency | Throughput |
---|---|---|
power8 | 6-8 | 2/cycle |
power9 | 2 | 4/cycle |
sig | vector __int128 containing the Sign Bit and 112-bit significand. |
exp | vector unsigned long long element 0 containing the 15-bit exponent. |
|
inlinestatic |
VSX Scalar Multiply Quad-Precision using round to Odd.
The quad-precision element of vectors vfa and vfb are multiplied to produce the quad-precision result. The rounding mode is round to odd.
For POWER9 use the xsmulqpo instruction. For POWER8 use this soft-float implementation using vector instruction generated by PVECLIB operations. For POWER7 and earlier is the compilers soft-float implementation.
processor | Latency | Throughput |
---|---|---|
power8 | 78-84 | 1/cycle |
power9 | 24 | 1/12 cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
VSX Scalar Subtract Quad-Precision using round to Odd.
The quad-precision element of vector vfb is subtracted from vfa to produce the quad-precision result. The rounding mode is round to odd.
For POWER9 use the xssubqpo instruction. For POWER8 use this soft-float implementation using vector instruction generated by PVECLIB operations. For POWER7 and earlier us the compilers soft-float implementation.
processor | Latency | Throughput |
---|---|---|
power8 | 51-70 | 1/cycle |
power9 | 12 | 1/12 cycle |
vfa | 128-bit vector treated as a scalar __binary128. |
vfb | 128-bit vector treated as a scalar __binary128. |
|
inlinestatic |
Scalar Extract Exponent Quad-Precision.
Extract the quad-precision exponent (bits 1:15) and right justify it to (bits 49:63 of) doubleword 0 of the result vector. The result is returned as vector long long integer value.
processor | Latency | Throughput |
---|---|---|
power8 | 8-10 | 2/cycle |
power9 | 2 | 4/cycle |
f128 | __binary128 scalar value in a vector register. |
|
inlinestatic |
Scalar Extract Significand Quad-Precision.
Extract the quad-precision significand (bits 16:127) and restore the implied (hidden) bit (bit 15) if the quad-precition value is normal (not zero, subnormal, Infinity or NaN). The result is returned as vector __int128 integer value with up to 113 bits of significance.
processor | Latency | Throughput |
---|---|---|
power8 | 12-14 | 1/6cycles |
power9 | 3 | 2/cycle |
f128 | __binary128 scalar value in a vector register. |
|
inlinestatic |
Vector Extract Exponent Quad-Precision Pair.
Extract the quad-precision exponent (bits 1:15), from each member of the QP Pair, and right justify the exponents to (bits 49:63 of) doublewords 0/1 of the result vector. The result is returned as vector long long integer value.
processor | Latency | Throughput |
---|---|---|
power8 | 6-8 | 1/cycle |
power9 | 5 | 2/cycle |
vfa | first __binary128 scalar value in a vector register. |
vfb | first __binary128 scalar value in a vector register. |