POWER Vector Library Manual  1.0.4
Functions
vec_f64_ppc.h File Reference

Header package containing a collection of 128-bit SIMD operations over 64-bit double-precision floating point elements. More...

#include <pveclib/vec_common_ppc.h>
#include <pveclib/vec_int128_ppc.h>

Go to the source code of this file.

Functions

static vf64_t vec_absf64 (vf64_t vf64x)
 Vector double absolute value. More...
 
static int vec_all_isfinitef64 (vf64_t vf64)
 Return true if all 2x64-bit vector double values are Finite (Not NaN nor Inf). More...
 
static int vec_all_isinff64 (vf64_t vf64)
 Return true if all 2x64-bit vector double values are infinity. More...
 
static int vec_all_isnanf64 (vf64_t vf64)
 Return true if all 2x64-bit vector double values are NaN. More...
 
static int vec_all_isnormalf64 (vf64_t vf64)
 Return true if all 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero). More...
 
static int vec_all_issubnormalf64 (vf64_t vf64)
 Return true if all 2x64-bit vector double values are subnormal (denormal). More...
 
static int vec_all_iszerof64 (vf64_t vf64)
 Return true if all 2x64-bit vector double values are +-0.0. More...
 
static int vec_any_isfinitef64 (vf64_t vf64)
 Return true if any of 2x64-bit vector double values are Finite (Not NaN nor Inf). More...
 
static int vec_any_isinff64 (vf64_t vf64)
 Return true if any of 2x64-bit vector double values are infinity. More...
 
static int vec_any_isnanf64 (vf64_t vf64)
 Return true if any of 2x64-bit vector double values are NaN. More...
 
static int vec_any_isnormalf64 (vf64_t vf64)
 Return true if any of 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero). More...
 
static int vec_any_issubnormalf64 (vf64_t vf64)
 Return true if any of 2x64-bit vector double values is subnormal (denormal). More...
 
static int vec_any_iszerof64 (vf64_t vf64)
 Return true if any of 2x64-bit vector double values are +-0.0. More...
 
static vf64_t vec_copysignf64 (vf64_t vf64x, vf64_t vf64y)
 Copy the sign bit from vf64x merged with magnitude from vf64y and return the resulting vector double values. More...
 
static vb64_t vec_isfinitef64 (vf64_t vf64)
 Return 2x64-bit vector boolean true values for each double element that is Finite (Not NaN nor Inf). More...
 
static vb64_t vec_isinff64 (vf64_t vf64)
 Return 2x64-bit vector boolean true values for each double, if infinity. More...
 
static vb64_t vec_isnanf64 (vf64_t vf64)
 Return 2x64-bit vector boolean true values, for each double NaN value. More...
 
static vb64_t vec_isnormalf64 (vf64_t vf64)
 Return 2x64-bit vector boolean true values, for each double value, if normal (Not NaN, Inf, denormal, or zero). More...
 
static vb64_t vec_issubnormalf64 (vf64_t vf64)
 Return 2x64-bit vector boolean true values, for each double value that is subnormal (denormal). More...
 
static vb64_t vec_iszerof64 (vf64_t vf64)
 Return 2x64-bit vector boolean true values, for each double value that is +-0.0. More...
 
static long double vec_pack_longdouble (vf64_t lval)
 Copy the pair of doubles from a vector to IBM long double. More...
 
static vb64_t vec_setb_dp (vf64_t vra)
 Vector Set Bool from Sign, Double Precision. More...
 
static vf64_t vec_unpack_longdouble (long double lval)
 Copy the pair of doubles from a IBM long double to a vector double. More...
 
static vf64_t vec_vglfdso (double *array, const long long offset0, const long long offset1)
 Vector Gather-Load Float Double from scalar Offsets. More...
 
static vf64_t vec_vglfddo (double *array, vi64_t vra)
 Vector Gather-Load Float Double from Doubleword Offsets. More...
 
static vf64_t vec_vglfddsx (double *array, vi64_t vra, const unsigned char scale)
 Vector Gather-Load Float Double from Doubleword Scaled Indexes. More...
 
static vf64_t vec_vglfddx (double *array, vi64_t vra)
 Vector Gather-Load Float Double from Doubleword indexes. More...
 
static void vec_vsstfdso (vf64_t xs, double *array, const long long offset0, const long long offset1)
 Vector Scatter-Store Float Double to Scalar Offsets. More...
 
static void vec_vsstfddo (vf64_t xs, double *array, vi64_t vra)
 Vector Scatter-Store Float Double to Doubleword Offsets. More...
 
static void vec_vsstfddsx (vf64_t xs, double *array, vi64_t vra, const unsigned char scale)
 Vector Scatter-Store Float Double to Doubleword Scaled Index. More...
 
static void vec_vsstfddx (vf64_t xs, double *array, vi64_t vra)
 Vector Scatter-Store Float Double to Doubleword Indexes. More...
 
static vf64_t vec_vlxsfdx (const signed long long ra, const double *rb)
 Vector Load Scalar Float Double Indexed. More...
 
static void vec_vstxsfdx (vf64_t xs, const signed long long ra, double *rb)
 Vector Store Scalar Float Double Indexed. More...
 
static vf64_t vec_xviexpdp (vui64_t sig, vui64_t exp)
 Vector Insert Exponent Double-Precision. More...
 
static vui64_t vec_xvxexpdp (vf64_t vrb)
 Vector Extract Exponent Double-Precision. More...
 
static vui64_t vec_xvxsigdp (vf64_t vrb)
 Vector Extract Significand Double-Precision. More...
 

Detailed Description

Header package containing a collection of 128-bit SIMD operations over 64-bit double-precision floating point elements.

Many vector double-precision (64-bit float) operations are implemented with PowerISA-2.06 Vector Scalar Extended (VSX) (POWER7 and later) instructions. Most VSX instructions provide access to 64 combined scalar/vector registers. PowerISA-3.0 (POWER9) provides additional vector double operations: convert with round, convert to/from integer, insert/extract exponent and significand, and test data class. Most of these operations (compiler built-ins, or intrinsics) are defined in <altivec.h> and described in the compiler documentation.

Note
The compiler disables associated <altivec.h> built-ins if the mcpu target does not enable the specific instruction. For example if you compile with -mcpu=power8, the double-precision vector converts, insert/extract and test data class built-ins are are not defined. This header provides the appropriate substitutions, will generate the minimum code, appropriate for the target, and produce correct results.
Most ppc64le compilers will default to -mcpu=power8 if not specified.
GCC 7.3 defines vector forms of the test data class, extract significand, and extract/insert_exp for float and double. These built-ins are not defined in GCC 6.4. See compiler documentation. These are useful operations and can be implemented in a few vector logical instructions for earlier machines.

So it is reasonable for this header to provide vector forms of the double-precision floating point classification functions (isnormal/subnormal/finite/inf/nan/zero, etc.). These functions can be implemented directly using (one or more) POWER9 instructions, or a few vector logical and integer compare instructions for POWER7/8. Each is comfortably small enough to be in-lined and inherently faster than the equivalent POSIX or compiler built-in runtime scalar functions.

Most of these operations are implemented in a few instructions on newer (POWER7/POWER8/POWER9) processors. This header serves to fill in functional gaps for older (POWER7, POWER8) processors and provides an inline assembler implementation for older compilers that do not provide the built-ins.

This header covers operations that are any of the following:

Examples

For example: using the the classification functions for implementing the math library function sine and cosine. The POSIX specification requires that special input values are processed without raising extraneous floating point exceptions and return specific floating point values in response. For example, the sin() function.

The following code example uses functions from this header to address the POSIX requirements for special values input to for a vectorized sinf():

test_vec_sinf64 (vf64_t value)
{
const vf64_t vec_f0 = { 0.0, 0.0 };
const vui64_t vec_f64_qnan =
{ 0x7ff8000000000000, 0x7ff8000000000000 };
vf64_t result;
vb64_t normmask, infmask;
normmask = vec_isnormalf64 (value);
if (vec_any_isnormalf64 (value))
{
// replace non-normal input values with safe values.
vf64_t safeval = vec_sel (vec_f0, value, normmask);
// body of vec_sin(safeval) computation elided for this example.
}
else
result = value;
// merge non-normal input values back into result
result = vec_sel (value, result, normmask);
// Inf input value elements return quiet-nan.
infmask = vec_isinff64 (value);
result = vec_sel (result, (vf64_t) vec_f64_qnan, infmask);
return result;
}

The code generated for this fragment runs between 24 (-mcpu=power9) and 40 (-mcpu=power8) instructions. The normal execution path is 14 to 25 instructions respectively.

Another example the cos() function.

The following code example uses functions from this header to address the POSIX requirements for special values input to vectorized cosf():

test_vec_cosf64 (vf64_t value)
{
vf64_t result;
const vf64_t vec_f0 = { 0.0, 0.0 };
const vf64_t vec_f1 = { 1.0, 1.0 };
const vui64_t vec_f64_qnan =
{ 0x7ff8000000000000, 0x7ff8000000000000 };
vb64_t finitemask, infmask, zeromask;
finitemask = vec_isfinitef64 (value);
if (vec_any_isfinitef64 (value))
{
// replace non-finite input values with safe values.
vf64_t safeval = vec_sel (vec_f0, value, finitemask);
// body of vec_sin(safeval) computation elided for this example.
}
else
result = value;
// merge non-finite input values back into result
result = vec_sel (value, result, finitemask);
// Set +-0.0 input elements to exactly 1.0 in result.
zeromask = vec_iszerof64 (value);
result = vec_sel (result, vec_f1, zeromask);
// Set Inf input elements to quiet-nan in result.
infmask = vec_isinff64 (value);
result = vec_sel (result, (vf64_t) vec_f64_qnan, infmask);
return result;
}

Neither example raises floating point exceptions or sets errno, as appropriate for a vector math library.

Performance data.

High level performance estimates are provided as an aid to function selection when evaluating algorithms. For background on how Latency and Throughput are derived see: Performance data.

Function Documentation

◆ vec_absf64()

static vf64_t vec_absf64 ( vf64_t  vf64x)
inlinestatic

Vector double absolute value.

processor Latency Throughput
power8 6-7 2/cycle
power9 2 2/cycle
Parameters
vf64xvector double values containing the magnitudes.
Returns
vector double absolute values of vf64x.

◆ vec_all_isfinitef64()

static int vec_all_isfinitef64 ( vf64_t  vf64)
inlinestatic

Return true if all 2x64-bit vector double values are Finite (Not NaN nor Inf).

A IEEE Binary64 finite value has an exponent between 0x000 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value. The sign bit is ignored.

processor Latency Throughput
power8 4-20 2/cycle
power9 6 1/cycle
Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal __binary64 compare can.
Parameters
vf64a vector of __binary64 values.
Returns
an int containing 0 or 1.

◆ vec_all_isinff64()

static int vec_all_isinff64 ( vf64_t  vf64)
inlinestatic

Return true if all 2x64-bit vector double values are infinity.

A IEEE Binary64 infinity has a exponent of 0x7ff and significand of all zeros. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-20 2/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
boolean int, true if all 2 double values are infinity

◆ vec_all_isnanf64()

static int vec_all_isnanf64 ( vf64_t  vf64)
inlinestatic

Return true if all 2x64-bit vector double values are NaN.

A IEEE Binary64 NaN value has an exponent between 0x7ff and the significand is nonzero. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-20 2/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a boolean int, true if all 2 vector double values are NaN.

◆ vec_all_isnormalf64()

static int vec_all_isnormalf64 ( vf64_t  vf64)
inlinestatic

Return true if all 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero).

A IEEE Binary64 normal value has an exponent between 0x001 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero). The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 10-28 1/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a boolean int, true if all 2 vector double values are normal.

◆ vec_all_issubnormalf64()

static int vec_all_issubnormalf64 ( vf64_t  vf64)
inlinestatic

Return true if all 2x64-bit vector double values are subnormal (denormal).

A IEEE Binary64 subnormal has an exponent of 0x000 and a nonzero significand. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 10-30 1/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a boolean int, true if all of 2 vector double values are subnormal.

◆ vec_all_iszerof64()

static int vec_all_iszerof64 ( vf64_t  vf64)
inlinestatic

Return true if all 2x64-bit vector double values are +-0.0.

A IEEE Binary64 zero has an exponent of 0x000 and a zero significand. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-20 2/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a boolean int, true if all 2 vector double values are +/- zero.

◆ vec_any_isfinitef64()

static int vec_any_isfinitef64 ( vf64_t  vf64)
inlinestatic

Return true if any of 2x64-bit vector double values are Finite (Not NaN nor Inf).

A IEEE Binary64 finite value has an exponent between 0x000 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 4-20 2/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
an int containing 0 or 1.

◆ vec_any_isinff64()

static int vec_any_isinff64 ( vf64_t  vf64)
inlinestatic

Return true if any of 2x64-bit vector double values are infinity.

A IEEE Binary64 infinity has a exponent of 0x7ff and significand of all zeros.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-20 2/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary32 values.
Returns
boolean int, true if any of 2 double values are infinity

◆ vec_any_isnanf64()

static int vec_any_isnanf64 ( vf64_t  vf64)
inlinestatic

Return true if any of 2x64-bit vector double values are NaN.

A IEEE Binary64 NaN value has an exponent between 0x7ff and the significand is nonzero. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-20 2/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a boolean int, true if any of 2 vector double values are NaN.

◆ vec_any_isnormalf64()

static int vec_any_isnormalf64 ( vf64_t  vf64)
inlinestatic

Return true if any of 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero).

A IEEE Binary64 normal value has an exponent between 0x001 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero). The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-20 1/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a boolean int, true if any of 2 vector double values are normal.

◆ vec_any_issubnormalf64()

static int vec_any_issubnormalf64 ( vf64_t  vf64)
inlinestatic

Return true if any of 2x64-bit vector double values is subnormal (denormal).

A IEEE Binary64 subnormal has an exponent of 0x000 and a nonzero significand. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 10-18 1/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
true if any of 2 vector double values are subnormal.

◆ vec_any_iszerof64()

static int vec_any_iszerof64 ( vf64_t  vf64)
inlinestatic

Return true if any of 2x64-bit vector double values are +-0.0.

A IEEE Binary64 zero has an exponent of 0x000 and a zero significand. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-20 2/cycle
power9 6 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a boolean int, true if any of 2 vector double values are +/- zero.

◆ vec_copysignf64()

static vf64_t vec_copysignf64 ( vf64_t  vf64x,
vf64_t  vf64y 
)
inlinestatic

Copy the sign bit from vf64x merged with magnitude from vf64y and return the resulting vector double values.

Note
This operation was patterned after the intrinsic vec_cpsgn (altivec.h) introduced for POWER7 and VSX. It turns out the original (GCC 4.9) compiler implementation reversed the operands and does not match the PowerISA or the Vector Intrinsic Programming Reference manuals. Subsequent compilers and PVECLIB implementations replicated this (operand order) error. This has now been reported as bug against the compilers, which are in the process of applying fixes and distributing updates. This version of PVECLIB is updated to match the Vector Intrinsic Programming Reference. This implementation is independent of the compilers update status.
processor Latency Throughput
power8 6-7 2/cycle
power9 2 2/cycle
Parameters
vf64xvector double values containing the sign bits.
vf64yvector double values containing the magnitudes.
Returns
vector double values with magnitude from vf64y and the sign of vf64x.

◆ vec_isfinitef64()

static vb64_t vec_isfinitef64 ( vf64_t  vf64)
inlinestatic

Return 2x64-bit vector boolean true values for each double element that is Finite (Not NaN nor Inf).

A IEEE Binary64 finite value has an exponent between 0x000 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value.

Using the vec_cmpeq conditional to generate the predicate mask for NaN / Inf and then invert this for the finite condition. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-15 2/cycle
power9 5 2/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a vector boolean long, each containing all 0s(false) or 1s(true).

◆ vec_isinff64()

static vb64_t vec_isinff64 ( vf64_t  vf64)
inlinestatic

Return 2x64-bit vector boolean true values for each double, if infinity.

A IEEE Binary64 infinity has a exponent of 0x7ff and significand of all zeros.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 4-13 2/cycle
power9 3 2/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_isnanf64()

static vb64_t vec_isnanf64 ( vf64_t  vf64)
inlinestatic

Return 2x64-bit vector boolean true values, for each double NaN value.

A IEEE Binary64 NaN value has an exponent between 0x7ff and the significand is nonzero. The sign bit is ignored.

processor Latency Throughput
power8 4-13 2/cycle
power9 3 2/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_isnormalf64()

static vb64_t vec_isnormalf64 ( vf64_t  vf64)
inlinestatic

Return 2x64-bit vector boolean true values, for each double value, if normal (Not NaN, Inf, denormal, or zero).

A IEEE Binary64 normal value has an exponent between 0x001 and 0x7ffe (a 0x7ff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero).

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-15 1/cycle
power9 5 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_issubnormalf64()

static vb64_t vec_issubnormalf64 ( vf64_t  vf64)
inlinestatic

Return 2x64-bit vector boolean true values, for each double value that is subnormal (denormal).

A IEEE Binary64 subnormal has an exponent of 0x000 and a nonzero significand. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 6-16 1/cycle
power9 3 1/cycle
Parameters
vf64a vector of __binary64 values.
Returns
a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_iszerof64()

static vb64_t vec_iszerof64 ( vf64_t  vf64)
inlinestatic

Return 2x64-bit vector boolean true values, for each double value that is +-0.0.

A IEEE Binary64 zero has an exponent of 0x000 and a zero significand. The sign bit is ignored.

Note
This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.
processor Latency Throughput
power8 4-13 2/cycle
power9 3 2/cycle
Parameters
vf64a vector of __binary32 values.
Returns
a vector boolean int, each containing all 0s(false) or 1s(true).

◆ vec_pack_longdouble()

static long double vec_pack_longdouble ( vf64_t  lval)
inlinestatic

Copy the pair of doubles from a vector to IBM long double.

Parameters
lvalvector double values containing the IBM long double.
Returns
IBM long double as FPR pair.

◆ vec_setb_dp()

static vb64_t vec_setb_dp ( vf64_t  vra)
inlinestatic

Vector Set Bool from Sign, Double Precision.

For each double, propagate the sign bit to all 64-bits of that doubleword. The result is vector bool long long reflecting the sign bit of each 64-bit double.

The resulting mask can be used in vector masking and select operations.

Note
This operation will set the sign mask regardless of data class, while the Vector Test Data Class instructions will not distinguish between +/- NaN.
processor Latency Throughput
power8 2-4 2/cycle
power9 2-5 2/cycle
Parameters
vraVector double.
Returns
vector bool long long reflecting the sign bits of each double value.

◆ vec_unpack_longdouble()

static vf64_t vec_unpack_longdouble ( long double  lval)
inlinestatic

Copy the pair of doubles from a IBM long double to a vector double.

Parameters
lvalIBM long double as FPR pair.
Returns
vector double values containing the IBM long double.

◆ vec_vglfddo()

static vf64_t vec_vglfddo ( double *  array,
vi64_t  vra 
)
inlinestatic

Vector Gather-Load Float Double from Doubleword Offsets.

For each doubleword element [i] of vra, load the float double element at *(char*)array+vra[i]. Merge those float double elements and return the resulting vector.

Note
As effective address calculation is modulo 64-bits, signed or unsigned doubleword offsets are equivalent.
processor Latency Throughput
power8 12 1/cycle
power9 11 1/cycle
Parameters
arrayPointer to array of doubles.
vraVector of doubleword (64-bit) byte offsets from &array.
Returns
vector double containing elements loaded from *(char*)array+vra[0] and *(char*)array+vra[1].

◆ vec_vglfddsx()

static vf64_t vec_vglfddsx ( double *  array,
vi64_t  vra,
const unsigned char  scale 
)
inlinestatic

Vector Gather-Load Float Double from Doubleword Scaled Indexes.

For each doubleword element [i] of vra, load the float double element *array[vra[i] * (1 << scale)]. Merge those float double elements and return the resulting vector. Indexes are converted to offsets from *array by shifting each doubleword left (3+scale) bits.

Note
As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.
processor Latency Throughput
power8 14-23 1/cycle
power9 13-22 1/cycle
Parameters
arrayPointer to array of doubles.
vraVector of doubleword indexes.
scale8-bit integer. Indexes are multiplied by 2scale.
Returns
Vector double containing array[vra[0]*(1<<scale)] and array[vra[1]*(1<<scale)].

◆ vec_vglfddx()

static vf64_t vec_vglfddx ( double *  array,
vi64_t  vra 
)
inlinestatic

Vector Gather-Load Float Double from Doubleword indexes.

For each doubleword element [i] of vra, load the double element array[vra[i]]. Merge those float double elements and return the resulting vector. The indexes are converted to offsets from *array by shifting each doubleword index left 3-bits (*8).

Note
As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.
processor Latency Throughput
power8 14-23 1/cycle
power9 13-22 1/cycle
Parameters
arrayPointer to array of doubles.
vraVector of doubleword indexes.
Returns
vector double containing {array[vra[0]], array[vra[1]]}.

◆ vec_vglfdso()

static vf64_t vec_vglfdso ( double *  array,
const long long  offset0,
const long long  offset1 
)
inlinestatic

Vector Gather-Load Float Double from scalar Offsets.

For each scalar offset[0|1], load the float double element at *(char*)array+offset[0|1]. Merge those float double elements and return the resulting vector.

processor Latency Throughput
power8 12 1/cycle
power9 11 1/cycle
Parameters
arrayPointer to array of doubles.
offset0Scalar (64-bit) byte offsets from &array.
offset1Scalar (64-bit) byte offsets from &array.
Returns
vector double containing elements loaded from *(char*)array+offset0 and *(char*)array+offset1.

◆ vec_vlxsfdx()

static vf64_t vec_vlxsfdx ( const signed long long  ra,
const double *  rb 
)
inlinestatic

Vector Load Scalar Float Double Indexed.

Load the left most doubleword of vector xt as a scalar double from the effective address formed by rb+ra. The operand rb is a pointer to an array of doubles. The operand ra is a doubleword integer byte offset from rb. The result xt is returned as a vf64_t vector. For best performance rb and ra should be doubleword aligned (integer multiple of 8).

Note
the right most doubleword of vector xt is left undefined by this operation.

This operation is an alternate form of Vector Load Element (vec_lde), with the added simplification that data is always left justified in the vector. This simplifies merging elements for gather operations.

Note
This is instruction was introduced in PowerISA 2.06 (POWER7). For POWER8/9 there are additional optimizations by effectively converting small constant index values into displacements. For POWER8 a specific pattern of addi/lsxdx instruction is fused into a single load displacement internal operation. For POWER9 we can use the lxsd (DS-form) instruction directly.
processor Latency Throughput
power8 5 2/cycle
power9 5 2/cycle
Parameters
raconst doubleword index (offset/displacement).
rbconst doubleword pointer to an array of doubles.
Returns
The data stored at (ra + rb) is loaded into vector doubleword element 0. Element 1 is undefined.

◆ vec_vsstfddo()

static void vec_vsstfddo ( vf64_t  xs,
double *  array,
vi64_t  vra 
)
inlinestatic

Vector Scatter-Store Float Double to Doubleword Offsets.

For each doubleword element [i] of vra, Store the double element xs[i] at *(char*)array+vra[i].

Note
As effective address calculation is modulo 64-bits, signed or unsigned doubleword offsets are equivalent.
processor Latency Throughput
power8 12 1/cycle
power9 8 1/cycle
Parameters
xsVector double elements to scatter store.
arrayPointer to array of doubles.
vraVector of doubleword (64-bit) byte offsets from &array.

◆ vec_vsstfddsx()

static void vec_vsstfddsx ( vf64_t  xs,
double *  array,
vi64_t  vra,
const unsigned char  scale 
)
inlinestatic

Vector Scatter-Store Float Double to Doubleword Scaled Index.

For each doubleword element [i] of vra, store the double element xs[i] at array[vra[i] * (1 << scale)]. Indexes are converted to offsets from *array by shifting each doubleword of vra left (3+scale) bits.

Note
As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.
processor Latency Throughput
power8 14-23 1/cycle
power9 10-19 1/cycle
Parameters
xsVector double elements to store.
arrayPointer to array of doubles.
vraVector of doubleword indexes.
scaleFactor effectually multiplying the indexes by 2scale.

◆ vec_vsstfddx()

static void vec_vsstfddx ( vf64_t  xs,
double *  array,
vi64_t  vra 
)
inlinestatic

Vector Scatter-Store Float Double to Doubleword Indexes.

For each doubleword element [i] of vra, store the double element xs[i] at array[vra[i]]. Indexes are converted to offsets from *array by shifting each doubleword of vra left 3 bits.

Note
As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.
processor Latency Throughput
power8 14-23 1/cycle
power9 10-19 1/cycle
Parameters
xsVector double elements to store.
arrayPointer to array of doubles.
vraVector of doubleword indexes.

◆ vec_vsstfdso()

static void vec_vsstfdso ( vf64_t  xs,
double *  array,
const long long  offset0,
const long long  offset1 
)
inlinestatic

Vector Scatter-Store Float Double to Scalar Offsets.

For each doubleword element [i] of vra, Store the double element xs[i] at *(char*)array+offset[0|1].

Note
As effective address calculation is modulo 64-bits, signed or unsigned doubleword offsets are equivalent.
processor Latency Throughput
power8 12 1/cycle
power9 8 1/cycle
Parameters
xsVector double elements to scatter store.
arrayPointer to array of doubles.
offset0Scalar (64-bit) byte offset from &array.
offset1Scalar (64-bit) byte offset from &array.

◆ vec_vstxsfdx()

static void vec_vstxsfdx ( vf64_t  xs,
const signed long long  ra,
double *  rb 
)
inlinestatic

Vector Store Scalar Float Double Indexed.

Stores the left most doubleword of vector xs as a scalar double float at the effective address formed by rb+ra. The operand rb is a pointer to an array of doubles. The operand ra is a doubleword integer byte offset from rb. For best performance rb and ra should be doubleword aligned (integer multiple of 8).

This operation is an alternate form of vector store element, with the added simplification that data is always left justified in the vector. This simplifies scatter operations.

Note
This is instruction was introduced in PowerISA 2.06 (POWER7). For POWER9 there are additional optimizations by effectively converting small constant index values into displacements. For POWER9 we can use the stxsd (DS-form) instruction directly.
processor Latency Throughput
power8 0 - 2 2/cycle
power9 0 - 2 4/cycle
Parameters
xsvector doubleword element 0 to be stored.
raconst doubleword index (offset/displacement).
rbconst doubleword pointer to an array of doubles.

◆ vec_xviexpdp()

static vf64_t vec_xviexpdp ( vui64_t  sig,
vui64_t  exp 
)
inlinestatic

Vector Insert Exponent Double-Precision.

For each doubleword of sig and exp, merge the sign (bit 0) and significand (bits 12:63) from sig with the 11-bit exponent from exp (bits 53:63). The exponent is merged into bits 1:11 of the final result. The result is returned as a Vector Double-Precision floating point value.

Note
This operation is equivalent to the POWER9 xviexpdp instruction and the built-in vec_insert_exp. These require a POWER9-enabled compiler targeting -mcpu=power9 and are not available for older compilers nor POWER8 and earlier. This function provides this operation for all VSX-enabled platforms.
processor Latency Throughput
power8 6-15 2/cycle
power9 2 4/cycle
Parameters
sigVector unsigned long long containing the Sign Bit and 52-bit significand.
expVector unsigned long long containing the 11-bit exponent.
Returns
a vf64_t value where the exponent bits (1:11) of sig are replaced from bits 53:63 of exp.

◆ vec_xvxexpdp()

static vui64_t vec_xvxexpdp ( vf64_t  vrb)
inlinestatic

Vector Extract Exponent Double-Precision.

For each doubleword of vrb, Extract the double-precision exponent (bits 1:11) and right justify it to (bits 53:63 of) of the result vector doubleword. The result is returned as vector long long integer value.

Note
This operation is equivalent to the POWER9 xvxexpdp instruction and the built-in vec_extract_exp. These require a POWER9-enabled compiler targeting -mcpu=power9 and are not available for older compilers nor POWER8 and earlier. This function provides this operation for all VSX-enabled platforms.
processor Latency Throughput
power8 6-15 2/cycle
power9 2 4/cycle
Parameters
vrbvector double value.
Returns
vector unsigned long long containing 11-bit exponent right justified in each doubleword

◆ vec_xvxsigdp()

static vui64_t vec_xvxsigdp ( vf64_t  vrb)
inlinestatic

Vector Extract Significand Double-Precision.

For each doubleword of vrb, Extract the double-precision significand (bits 12:63) and restore the implied (hidden) bit (bit 11) if the double-precision value is normal (not zero, subnormal, Infinity or NaN). The result is return as vector long long integer value with up to 53 bits of significance.

Note
This operation is equivalent to the POWER9 xvxsigdp instruction and the built-in vec_extract_sig. These require a POWER9-enabled compiler targeting -mcpu=power9 and are not available for older compilers nor POWER8 and earlier. This function provides this operation for all VSX-enabled platforms.
processor Latency Throughput
power8 8-17 1/cycle
power9 3 2/cycle
Parameters
vrbvector double value.
Returns
vector unsigned long long containing the significand.
vec_any_isnormalf64
static int vec_any_isnormalf64(vf64_t vf64)
Return true if any of 2x64-bit vector double values are normal (Not NaN, Inf, denormal,...
Definition: vec_f64_ppc.h:660
vec_iszerof64
static vb64_t vec_iszerof64(vf64_t vf64)
Return 2x64-bit vector boolean true values, for each double value that is +-0.0.
Definition: vec_f64_ppc.h:1084
vui64_t
__vector unsigned long long vui64_t
vector of 64-bit unsigned long long elements.
Definition: vec_common_ppc.h:208
vec_isinff64
static vb64_t vec_isinff64(vf64_t vf64)
Return 2x64-bit vector boolean true values for each double, if infinity.
Definition: vec_f64_ppc.h:901
vec_any_isfinitef64
static int vec_any_isfinitef64(vf64_t vf64)
Return true if any of 2x64-bit vector double values are Finite (Not NaN nor Inf).
Definition: vec_f64_ppc.h:526
vb64_t
__vector __bool long long vb64_t
vector of 64-bit bool long long elements.
Definition: vec_common_ppc.h:230
vf64_t
__vector double vf64_t
vector of 64-bit double elements.
Definition: vec_common_ppc.h:221
vec_isfinitef64
static vb64_t vec_isfinitef64(vf64_t vf64)
Return 2x64-bit vector boolean true values for each double element that is Finite (Not NaN nor Inf).
Definition: vec_f64_ppc.h:857
vec_isnormalf64
static vb64_t vec_isnormalf64(vf64_t vf64)
Return 2x64-bit vector boolean true values, for each double value, if normal (Not NaN,...
Definition: vec_f64_ppc.h:990