Header package containing a collection of 128-bit SIMD operations over 64-bit double-precision floating point elements. More...

#include <pveclib/vec_common_ppc.h>
#include <pveclib/vec_int128_ppc.h>

Functions
static vf64_t	vec_absf64 (vf64_t vf64x)
	Vector double absolute value. More...

static int	vec_all_isfinitef64 (vf64_t vf64)
	Return true if all 2x64-bit vector double values are Finite (Not NaN nor Inf). More...

static int	vec_all_isinff64 (vf64_t vf64)
	Return true if all 2x64-bit vector double values are infinity. More...

static int	vec_all_isnanf64 (vf64_t vf64)
	Return true if all 2x64-bit vector double values are NaN. More...

static int	vec_all_isnormalf64 (vf64_t vf64)
	Return true if all 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero). More...

static int	vec_all_issubnormalf64 (vf64_t vf64)
	Return true if all 2x64-bit vector double values are subnormal (denormal). More...

static int	vec_all_iszerof64 (vf64_t vf64)
	Return true if all 2x64-bit vector double values are +-0.0. More...

static int	vec_any_isfinitef64 (vf64_t vf64)
	Return true if any of 2x64-bit vector double values are Finite (Not NaN nor Inf). More...

static int	vec_any_isinff64 (vf64_t vf64)
	Return true if any of 2x64-bit vector double values are infinity. More...

static int	vec_any_isnanf64 (vf64_t vf64)
	Return true if any of 2x64-bit vector double values are NaN. More...

static int	vec_any_isnormalf64 (vf64_t vf64)
	Return true if any of 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero). More...

static int	vec_any_issubnormalf64 (vf64_t vf64)
	Return true if any of 2x64-bit vector double values is subnormal (denormal). More...

static int	vec_any_iszerof64 (vf64_t vf64)
	Return true if any of 2x64-bit vector double values are +-0.0. More...

static vf64_t	vec_copysignf64 (vf64_t vf64x, vf64_t vf64y)
	Copy the sign bit from vf64x merged with magnitude from vf64y and return the resulting vector double values. More...

static vb64_t	vec_isfinitef64 (vf64_t vf64)
	Return 2x64-bit vector boolean true values for each double element that is Finite (Not NaN nor Inf). More...

static vb64_t	vec_isinff64 (vf64_t vf64)
	Return 2x64-bit vector boolean true values for each double, if infinity. More...

static vb64_t	vec_isnanf64 (vf64_t vf64)
	Return 2x64-bit vector boolean true values, for each double NaN value. More...

static vb64_t	vec_isnormalf64 (vf64_t vf64)
	Return 2x64-bit vector boolean true values, for each double value, if normal (Not NaN, Inf, denormal, or zero). More...

static vb64_t	vec_issubnormalf64 (vf64_t vf64)
	Return 2x64-bit vector boolean true values, for each double value that is subnormal (denormal). More...

static vb64_t	vec_iszerof64 (vf64_t vf64)
	Return 2x64-bit vector boolean true values, for each double value that is +-0.0. More...

static long double	vec_pack_longdouble (vf64_t lval)
	Copy the pair of doubles from a vector to IBM long double. More...

static vb64_t	vec_setb_dp (vf64_t vra)
	Vector Set Bool from Sign, Double Precision. More...

static vf64_t	vec_unpack_longdouble (long double lval)
	Copy the pair of doubles from a IBM long double to a vector double. More...

static vf64_t	vec_vglfdso (double *array, const long long offset0, const long long offset1)
	Vector Gather-Load Float Double from scalar Offsets. More...

static vf64_t	vec_vglfddo (double *array, vi64_t vra)
	Vector Gather-Load Float Double from Doubleword Offsets. More...

static vf64_t	vec_vglfddsx (double *array, vi64_t vra, const unsigned char scale)
	Vector Gather-Load Float Double from Doubleword Scaled Indexes. More...

static vf64_t	vec_vglfddx (double *array, vi64_t vra)
	Vector Gather-Load Float Double from Doubleword indexes. More...

static void	vec_vsstfdso (vf64_t xs, double *array, const long long offset0, const long long offset1)
	Vector Scatter-Store Float Double to Scalar Offsets. More...

static void	vec_vsstfddo (vf64_t xs, double *array, vi64_t vra)
	Vector Scatter-Store Float Double to Doubleword Offsets. More...

static void	vec_vsstfddsx (vf64_t xs, double *array, vi64_t vra, const unsigned char scale)
	Vector Scatter-Store Float Double to Doubleword Scaled Index. More...

static void	vec_vsstfddx (vf64_t xs, double *array, vi64_t vra)
	Vector Scatter-Store Float Double to Doubleword Indexes. More...

static vf64_t	vec_vlxsfdx (const signed long long ra, const double *rb)
	Vector Load Scalar Float Double Indexed. More...

static void	vec_vstxsfdx (vf64_t xs, const signed long long ra, double *rb)
	Vector Store Scalar Float Double Indexed. More...

static vf64_t	vec_xviexpdp (vui64_t sig, vui64_t exp)
	Vector Insert Exponent Double-Precision. More...

static vui64_t	vec_xvxexpdp (vf64_t vrb)
	Vector Extract Exponent Double-Precision. More...

static vui64_t	vec_xvxsigdp (vf64_t vrb)
	Vector Extract Significand Double-Precision. More...

Detailed Description

Header package containing a collection of 128-bit SIMD operations over 64-bit double-precision floating point elements.

Many vector double-precision (64-bit float) operations are implemented with PowerISA-2.06 Vector Scalar Extended (VSX) (POWER7 and later) instructions. Most VSX instructions provide access to 64 combined scalar/vector registers. PowerISA-3.0 (POWER9) provides additional vector double operations: convert with round, convert to/from integer, insert/extract exponent and significand, and test data class. Most of these operations (compiler built-ins, or intrinsics) are defined in <altivec.h> and described in the compiler documentation.

Note: The compiler disables associated <altivec.h> built-ins if the mcpu target does not enable the specific instruction. For example if you compile with -mcpu=power8, the double-precision vector converts, insert/extract and test data class built-ins are are not defined. This header provides the appropriate substitutions, will generate the minimum code, appropriate for the target, and produce correct results.; Most ppc64le compilers will default to -mcpu=power8 if not specified.; GCC 7.3 defines vector forms of the test data class, extract significand, and extract/insert_exp for float and double. These built-ins are not defined in GCC 6.4. See compiler documentation. These are useful operations and can be implemented in a few vector logical instructions for earlier machines.

So it is reasonable for this header to provide vector forms of the double-precision floating point classification functions (isnormal/subnormal/finite/inf/nan/zero, etc.). These functions can be implemented directly using (one or more) POWER9 instructions, or a few vector logical and integer compare instructions for POWER7/8. Each is comfortably small enough to be in-lined and inherently faster than the equivalent POSIX or compiler built-in runtime scalar functions.

Most of these operations are implemented in a few instructions on newer (POWER7/POWER8/POWER9) processors. This header serves to fill in functional gaps for older (POWER7, POWER8) processors and provides an inline assembler implementation for older compilers that do not provide the built-ins.

This header covers operations that are any of the following:

Implemented in hardware instructions in newer processors, but useful to programmers on slightly older processors (even if the equivalent function requires more instructions).
Defined in the OpenPOWER ABI but not yet defined in <altivec.h> provided by available compilers in common use. Examples include vector double even/odd conversions.
Providing special vector double tests for special conditions without generating extraneous floating-point exceptions. This is important for implementing vectorized forms of ISO C99 Math functions. Examples include vector double isnan, isinf, etc.
Commonly used operations, not covered by the ABI or <altivec.h>, and require multiple instructions or are not obvious. For example, converts that change element size and imply converting two vectors into one vector of smaller elements, or one vector into two vectors of larger elements. Another example is the special case of packing/unpacking an IBM long double between a pair of floating-point registers (FPRs) and a single vector register (VR).

Examples

For example: using the the classification functions for implementing the math library function sine and cosine. The POSIX specification requires that special input values are processed without raising extraneous floating point exceptions and return specific floating point values in response. For example, the sin() function.

If the input value is NaN then return a NaN.
If the input value is +-0.0 then return value.
If the input value is subnormal then return value.
If the input value is +-Inf then return a quiet-NaN.
Otherwise compute and return sin(value).

The following code example uses functions from this header to address the POSIX requirements for special values input to for a vectorized sinf():

vf64_t
test_vec_sinf64 (vf64_t value)
{
  const vf64_t vec_f0 = { 0.0, 0.0 };
  const vui64_t vec_f64_qnan =
    { 0x7ff8000000000000, 0x7ff8000000000000 };
  vf64_t result;
  vb64_t normmask, infmask;
 
  normmask = vec_isnormalf64 (value);
  if (vec_any_isnormalf64 (value))
    {
      // replace non-normal input values with safe values.
      vf64_t safeval = vec_sel (vec_f0, value, normmask);
      // body of vec_sin(safeval) computation elided for this example.
    }
  else
    result = value;
 
  // merge non-normal input values back into result
  result = vec_sel (value, result, normmask);
  // Inf input value elements return quiet-nan.
  infmask = vec_isinff64 (value);
  result = vec_sel (result, (vf64_t) vec_f64_qnan, infmask);
 
  return result;
}

The code generated for this fragment runs between 24 (-mcpu=power9) and 40 (-mcpu=power8) instructions. The normal execution path is 14 to 25 instructions respectively.

Another example the cos() function.

If the input value is NaN then return a NaN.
If the input value is +-0.0 then return 1.0.
If the input value is +-Inf then return a quiet-NaN.
Otherwise compute and return cos(value).

The following code example uses functions from this header to address the POSIX requirements for special values input to vectorized cosf():

vf64_t
test_vec_cosf64 (vf64_t value)
{
  vf64_t result;
  const vf64_t vec_f0 = { 0.0, 0.0 };
  const vf64_t vec_f1 = { 1.0, 1.0 };
  const vui64_t vec_f64_qnan =
    { 0x7ff8000000000000, 0x7ff8000000000000 };
  vb64_t finitemask, infmask, zeromask;
 
  finitemask = vec_isfinitef64 (value);
  if (vec_any_isfinitef64 (value))
    {
      // replace non-finite input values with safe values.
      vf64_t safeval = vec_sel (vec_f0, value, finitemask);
      // body of vec_sin(safeval) computation elided for this example.
    }
  else
    result = value;
 
  // merge non-finite input values back into result
  result = vec_sel (value, result, finitemask);
  // Set +-0.0 input elements to exactly 1.0 in result.
  zeromask = vec_iszerof64 (value);
  result = vec_sel (result, vec_f1, zeromask);
  // Set Inf input elements to quiet-nan in result.
  infmask = vec_isinff64 (value);
  result = vec_sel (result, (vf64_t) vec_f64_qnan, infmask);
 
  return result;
}

Neither example raises floating point exceptions or sets errno, as appropriate for a vector math library.

Performance data.

High level performance estimates are provided as an aid to function selection when evaluating algorithms. For background on how Latency and Throughput are derived see: Performance data.

Function Documentation

◆ vec_absf64()

static vf64_t vec_absf64 ( vf64_t vf64x )

inlinestatic

Vector double absolute value.

processor	Latency	Throughput
power8	6-7	2/cycle
power9	2	2/cycle

Parameters

vf64x vector double values containing the magnitudes.

Returns: vector double absolute values of vf64x.

◆ vec_all_isfinitef64()

static int vec_all_isfinitef64 ( vf64_t vf64 )

inlinestatic

Return true if all 2x64-bit vector double values are Finite (Not NaN nor Inf).

A IEEE Binary64 finite value has an exponent between 0x000 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value. The sign bit is ignored.

processor	Latency	Throughput
power8	4-20	2/cycle
power9	6	1/cycle

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal __binary64 compare can.

Parameters

vf64	a vector of __binary64 values.

Returns: an int containing 0 or 1.

◆ vec_all_isinff64()

static int vec_all_isinff64 ( vf64_t vf64 )

inlinestatic

Return true if all 2x64-bit vector double values are infinity.

A IEEE Binary64 infinity has a exponent of 0x7ff and significand of all zeros. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-20	2/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: boolean int, true if all 2 double values are infinity

◆ vec_all_isnanf64()

static int vec_all_isnanf64 ( vf64_t vf64 )

inlinestatic

Return true if all 2x64-bit vector double values are NaN.

A IEEE Binary64 NaN value has an exponent between 0x7ff and the significand is nonzero. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-20	2/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a boolean int, true if all 2 vector double values are NaN.

◆ vec_all_isnormalf64()

static int vec_all_isnormalf64 ( vf64_t vf64 )

inlinestatic

Return true if all 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero).

A IEEE Binary64 normal value has an exponent between 0x001 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero). The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	10-28	1/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a boolean int, true if all 2 vector double values are normal.

◆ vec_all_issubnormalf64()

static int vec_all_issubnormalf64 ( vf64_t vf64 )

inlinestatic

Return true if all 2x64-bit vector double values are subnormal (denormal).

A IEEE Binary64 subnormal has an exponent of 0x000 and a nonzero significand. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	10-30	1/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a boolean int, true if all of 2 vector double values are subnormal.

◆ vec_all_iszerof64()

static int vec_all_iszerof64 ( vf64_t vf64 )

inlinestatic

Return true if all 2x64-bit vector double values are +-0.0.

A IEEE Binary64 zero has an exponent of 0x000 and a zero significand. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-20	2/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a boolean int, true if all 2 vector double values are +/- zero.

◆ vec_any_isfinitef64()

static int vec_any_isfinitef64 ( vf64_t vf64 )

inlinestatic

Return true if any of 2x64-bit vector double values are Finite (Not NaN nor Inf).

A IEEE Binary64 finite value has an exponent between 0x000 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	4-20	2/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: an int containing 0 or 1.

◆ vec_any_isinff64()

static int vec_any_isinff64 ( vf64_t vf64 )

inlinestatic

Return true if any of 2x64-bit vector double values are infinity.

A IEEE Binary64 infinity has a exponent of 0x7ff and significand of all zeros.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-20	2/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary32 values.

Returns: boolean int, true if any of 2 double values are infinity

◆ vec_any_isnanf64()

static int vec_any_isnanf64 ( vf64_t vf64 )

inlinestatic

Return true if any of 2x64-bit vector double values are NaN.

A IEEE Binary64 NaN value has an exponent between 0x7ff and the significand is nonzero. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-20	2/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a boolean int, true if any of 2 vector double values are NaN.

◆ vec_any_isnormalf64()

static int vec_any_isnormalf64 ( vf64_t vf64 )

inlinestatic

Return true if any of 2x64-bit vector double values are normal (Not NaN, Inf, denormal, or zero).

A IEEE Binary64 normal value has an exponent between 0x001 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero). The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-20	1/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a boolean int, true if any of 2 vector double values are normal.

◆ vec_any_issubnormalf64()

static int vec_any_issubnormalf64 ( vf64_t vf64 )

inlinestatic

Return true if any of 2x64-bit vector double values is subnormal (denormal).

A IEEE Binary64 subnormal has an exponent of 0x000 and a nonzero significand. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	10-18	1/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: true if any of 2 vector double values are subnormal.

◆ vec_any_iszerof64()

static int vec_any_iszerof64 ( vf64_t vf64 )

inlinestatic

Return true if any of 2x64-bit vector double values are +-0.0.

A IEEE Binary64 zero has an exponent of 0x000 and a zero significand. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-20	2/cycle
power9	6	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a boolean int, true if any of 2 vector double values are +/- zero.

◆ vec_copysignf64()

static vf64_t vec_copysignf64	(	vf64_t	vf64x,
		vf64_t	vf64y
	)

inlinestatic

Copy the sign bit from vf64x merged with magnitude from vf64y and return the resulting vector double values.

Note: This operation was patterned after the intrinsic vec_cpsgn (altivec.h) introduced for POWER7 and VSX. It turns out the original (GCC 4.9) compiler implementation reversed the operands and does not match the PowerISA or the Vector Intrinsic Programming Reference manuals. Subsequent compilers and PVECLIB implementations replicated this (operand order) error. This has now been reported as bug against the compilers, which are in the process of applying fixes and distributing updates. This version of PVECLIB is updated to match the Vector Intrinsic Programming Reference. This implementation is independent of the compilers update status.

processor	Latency	Throughput
power8	6-7	2/cycle
power9	2	2/cycle

Parameters

vf64x	vector double values containing the sign bits.
vf64y	vector double values containing the magnitudes.

Returns: vector double values with magnitude from vf64y and the sign of vf64x.

◆ vec_isfinitef64()

static vb64_t vec_isfinitef64 ( vf64_t vf64 )

inlinestatic

Return 2x64-bit vector boolean true values for each double element that is Finite (Not NaN nor Inf).

A IEEE Binary64 finite value has an exponent between 0x000 and 0x7fe (a 0x7ff indicates NaN or Inf). The significand can be any value.

Using the vec_cmpeq conditional to generate the predicate mask for NaN / Inf and then invert this for the finite condition. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-15	2/cycle
power9	5	2/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a vector boolean long, each containing all 0s(false) or 1s(true).

◆ vec_isinff64()

static vb64_t vec_isinff64 ( vf64_t vf64 )

inlinestatic

Return 2x64-bit vector boolean true values for each double, if infinity.

A IEEE Binary64 infinity has a exponent of 0x7ff and significand of all zeros.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	4-13	2/cycle
power9	3	2/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_isnanf64()

static vb64_t vec_isnanf64 ( vf64_t vf64 )

inlinestatic

Return 2x64-bit vector boolean true values, for each double NaN value.

A IEEE Binary64 NaN value has an exponent between 0x7ff and the significand is nonzero. The sign bit is ignored.

processor	Latency	Throughput
power8	4-13	2/cycle
power9	3	2/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_isnormalf64()

static vb64_t vec_isnormalf64 ( vf64_t vf64 )

inlinestatic

Return 2x64-bit vector boolean true values, for each double value, if normal (Not NaN, Inf, denormal, or zero).

A IEEE Binary64 normal value has an exponent between 0x001 and 0x7ffe (a 0x7ff indicates NaN or Inf). The significand can be any value (expect 0 if the exponent is zero).

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-15	1/cycle
power9	5	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_issubnormalf64()

static vb64_t vec_issubnormalf64 ( vf64_t vf64 )

inlinestatic

Return 2x64-bit vector boolean true values, for each double value that is subnormal (denormal).

A IEEE Binary64 subnormal has an exponent of 0x000 and a nonzero significand. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	6-16	1/cycle
power9	3	1/cycle

Parameters

vf64	a vector of __binary64 values.

Returns: a vector boolean long long, each containing all 0s(false) or 1s(true).

◆ vec_iszerof64()

static vb64_t vec_iszerof64 ( vf64_t vf64 )

inlinestatic

Return 2x64-bit vector boolean true values, for each double value that is +-0.0.

A IEEE Binary64 zero has an exponent of 0x000 and a zero significand. The sign bit is ignored.

Note: This function will not raise VXSNAN or VXVC (FE_INVALID) exceptions. A normal double compare can.

processor	Latency	Throughput
power8	4-13	2/cycle
power9	3	2/cycle

Parameters

vf64	a vector of __binary32 values.

Returns: a vector boolean int, each containing all 0s(false) or 1s(true).

◆ vec_pack_longdouble()

static long double vec_pack_longdouble ( vf64_t lval )

inlinestatic

Copy the pair of doubles from a vector to IBM long double.

Parameters

lval	vector double values containing the IBM long double.

Returns: IBM long double as FPR pair.

◆ vec_setb_dp()

static vb64_t vec_setb_dp ( vf64_t vra )

inlinestatic

Vector Set Bool from Sign, Double Precision.

For each double, propagate the sign bit to all 64-bits of that doubleword. The result is vector bool long long reflecting the sign bit of each 64-bit double.

The resulting mask can be used in vector masking and select operations.

Note: This operation will set the sign mask regardless of data class, while the Vector Test Data Class instructions will not distinguish between +/- NaN.

processor	Latency	Throughput
power8	2-4	2/cycle
power9	2-5	2/cycle

Parameters

vra	Vector double.

Returns: vector bool long long reflecting the sign bits of each double value.

◆ vec_unpack_longdouble()

static vf64_t vec_unpack_longdouble ( long double lval )

inlinestatic

Copy the pair of doubles from a IBM long double to a vector double.

Parameters

lval	IBM long double as FPR pair.

Returns: vector double values containing the IBM long double.

◆ vec_vglfddo()

static vf64_t vec_vglfddo	(	double *	array,
		vi64_t	vra
	)

inlinestatic

Vector Gather-Load Float Double from Doubleword Offsets.

For each doubleword element [i] of vra, load the float double element at *(char*)array+vra[i]. Merge those float double elements and return the resulting vector.

Note: As effective address calculation is modulo 64-bits, signed or unsigned doubleword offsets are equivalent.

processor	Latency	Throughput
power8	12	1/cycle
power9	11	1/cycle

Parameters

array	Pointer to array of doubles.
vra	Vector of doubleword (64-bit) byte offsets from &array.

Returns: vector double containing elements loaded from *(char*)array+vra[0] and *(char*)array+vra[1].

◆ vec_vglfddsx()

static vf64_t vec_vglfddsx	(	double *	array,
		vi64_t	vra,
		const unsigned char	scale
	)

inlinestatic

Vector Gather-Load Float Double from Doubleword Scaled Indexes.

For each doubleword element [i] of vra, load the float double element *array[vra[i] * (1 << scale)]. Merge those float double elements and return the resulting vector. Indexes are converted to offsets from *array by shifting each doubleword left (3+scale) bits.

Note: As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.

processor	Latency	Throughput
power8	14-23	1/cycle
power9	13-22	1/cycle

Parameters

array	Pointer to array of doubles.
vra	Vector of doubleword indexes.
scale	8-bit integer. Indexes are multiplied by 2^scale.

Returns: Vector double containing array[vra[0]*(1<<scale)] and array[vra[1]*(1<<scale)].

◆ vec_vglfddx()

static vf64_t vec_vglfddx	(	double *	array,
		vi64_t	vra
	)

inlinestatic

Vector Gather-Load Float Double from Doubleword indexes.

For each doubleword element [i] of vra, load the double element array[vra[i]]. Merge those float double elements and return the resulting vector. The indexes are converted to offsets from *array by shifting each doubleword index left 3-bits (*8).

Note: As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.

processor	Latency	Throughput
power8	14-23	1/cycle
power9	13-22	1/cycle

Parameters

array	Pointer to array of doubles.
vra	Vector of doubleword indexes.

Returns: vector double containing {array[vra[0]], array[vra[1]]}.

◆ vec_vglfdso()

static vf64_t vec_vglfdso	(	double *	array,
		const long long	offset0,
		const long long	offset1
	)

inlinestatic

Vector Gather-Load Float Double from scalar Offsets.

For each scalar offset[0|1], load the float double element at *(char*)array+offset[0|1]. Merge those float double elements and return the resulting vector.

processor	Latency	Throughput
power8	12	1/cycle
power9	11	1/cycle

Parameters

array	Pointer to array of doubles.
offset0	Scalar (64-bit) byte offsets from &array.
offset1	Scalar (64-bit) byte offsets from &array.

Returns: vector double containing elements loaded from *(char*)array+offset0 and *(char*)array+offset1.

◆ vec_vlxsfdx()

static vf64_t vec_vlxsfdx	(	const signed long long	ra,
		const double *	rb
	)

inlinestatic

Vector Load Scalar Float Double Indexed.

Load the left most doubleword of vector xt as a scalar double from the effective address formed by rb+ra. The operand rb is a pointer to an array of doubles. The operand ra is a doubleword integer byte offset from rb. The result xt is returned as a vf64_t vector. For best performance rb and ra should be doubleword aligned (integer multiple of 8).

Note: the right most doubleword of vector xt is left undefined by this operation.

This operation is an alternate form of Vector Load Element (vec_lde), with the added simplification that data is always left justified in the vector. This simplifies merging elements for gather operations.

Note: This is instruction was introduced in PowerISA 2.06 (POWER7). For POWER8/9 there are additional optimizations by effectively converting small constant index values into displacements. For POWER8 a specific pattern of addi/lsxdx instruction is fused into a single load displacement internal operation. For POWER9 we can use the lxsd (DS-form) instruction directly.

processor	Latency	Throughput
power8	5	2/cycle
power9	5	2/cycle

Parameters

ra	const doubleword index (offset/displacement).
rb	const doubleword pointer to an array of doubles.

Returns: The data stored at (ra + rb) is loaded into vector doubleword element 0. Element 1 is undefined.

◆ vec_vsstfddo()

static void vec_vsstfddo	(	vf64_t	xs,
		double *	array,
		vi64_t	vra
	)

inlinestatic

Vector Scatter-Store Float Double to Doubleword Offsets.

For each doubleword element [i] of vra, Store the double element xs[i] at *(char*)array+vra[i].

Note: As effective address calculation is modulo 64-bits, signed or unsigned doubleword offsets are equivalent.

processor	Latency	Throughput
power8	12	1/cycle
power9	8	1/cycle

Parameters

xs	Vector double elements to scatter store.
array	Pointer to array of doubles.
vra	Vector of doubleword (64-bit) byte offsets from &array.

◆ vec_vsstfddsx()

static void vec_vsstfddsx	(	vf64_t	xs,
		double *	array,
		vi64_t	vra,
		const unsigned char	scale
	)

inlinestatic

Vector Scatter-Store Float Double to Doubleword Scaled Index.

For each doubleword element [i] of vra, store the double element xs[i] at array[vra[i] * (1 << scale)]. Indexes are converted to offsets from *array by shifting each doubleword of vra left (3+scale) bits.

Note: As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.

processor	Latency	Throughput
power8	14-23	1/cycle
power9	10-19	1/cycle

Parameters

xs	Vector double elements to store.
array	Pointer to array of doubles.
vra	Vector of doubleword indexes.
scale	Factor effectually multiplying the indexes by 2^scale.

◆ vec_vsstfddx()

static void vec_vsstfddx	(	vf64_t	xs,
		double *	array,
		vi64_t	vra
	)

inlinestatic

Vector Scatter-Store Float Double to Doubleword Indexes.

For each doubleword element [i] of vra, store the double element xs[i] at array[vra[i]]. Indexes are converted to offsets from *array by shifting each doubleword of vra left 3 bits.

Note: As effective address calculation is modulo 64-bits, signed or unsigned doubleword indexes are equivalent.

processor	Latency	Throughput
power8	14-23	1/cycle
power9	10-19	1/cycle

Parameters

xs	Vector double elements to store.
array	Pointer to array of doubles.
vra	Vector of doubleword indexes.

◆ vec_vsstfdso()

static void vec_vsstfdso	(	vf64_t	xs,
		double *	array,
		const long long	offset0,
		const long long	offset1
	)

inlinestatic

Vector Scatter-Store Float Double to Scalar Offsets.

For each doubleword element [i] of vra, Store the double element xs[i] at *(char*)array+offset[0|1].

Note: As effective address calculation is modulo 64-bits, signed or unsigned doubleword offsets are equivalent.

processor	Latency	Throughput
power8	12	1/cycle
power9	8	1/cycle

Parameters

xs	Vector double elements to scatter store.
array	Pointer to array of doubles.
offset0	Scalar (64-bit) byte offset from &array.
offset1	Scalar (64-bit) byte offset from &array.

◆ vec_vstxsfdx()

static void vec_vstxsfdx	(	vf64_t	xs,
		const signed long long	ra,
		double *	rb
	)

inlinestatic

Vector Store Scalar Float Double Indexed.

Stores the left most doubleword of vector xs as a scalar double float at the effective address formed by rb+ra. The operand rb is a pointer to an array of doubles. The operand ra is a doubleword integer byte offset from rb. For best performance rb and ra should be doubleword aligned (integer multiple of 8).

This operation is an alternate form of vector store element, with the added simplification that data is always left justified in the vector. This simplifies scatter operations.

Note: This is instruction was introduced in PowerISA 2.06 (POWER7). For POWER9 there are additional optimizations by effectively converting small constant index values into displacements. For POWER9 we can use the stxsd (DS-form) instruction directly.

processor	Latency	Throughput
power8	0 - 2	2/cycle
power9	0 - 2	4/cycle

Parameters

xs	vector doubleword element 0 to be stored.
ra	const doubleword index (offset/displacement).
rb	const doubleword pointer to an array of doubles.

◆ vec_xviexpdp()

static vf64_t vec_xviexpdp	(	vui64_t	sig,
		vui64_t	exp
	)

inlinestatic

Vector Insert Exponent Double-Precision.

For each doubleword of sig and exp, merge the sign (bit 0) and significand (bits 12:63) from sig with the 11-bit exponent from exp (bits 53:63). The exponent is merged into bits 1:11 of the final result. The result is returned as a Vector Double-Precision floating point value.

Note: This operation is equivalent to the POWER9 xviexpdp instruction and the built-in vec_insert_exp. These require a POWER9-enabled compiler targeting -mcpu=power9 and are not available for older compilers nor POWER8 and earlier. This function provides this operation for all VSX-enabled platforms.

processor	Latency	Throughput
power8	6-15	2/cycle
power9	2	4/cycle

Parameters

sig	Vector unsigned long long containing the Sign Bit and 52-bit significand.
exp	Vector unsigned long long containing the 11-bit exponent.

Returns: a vf64_t value where the exponent bits (1:11) of sig are replaced from bits 53:63 of exp.

◆ vec_xvxexpdp()

static vui64_t vec_xvxexpdp ( vf64_t vrb )

inlinestatic

Vector Extract Exponent Double-Precision.

For each doubleword of vrb, Extract the double-precision exponent (bits 1:11) and right justify it to (bits 53:63 of) of the result vector doubleword. The result is returned as vector long long integer value.

Note: This operation is equivalent to the POWER9 xvxexpdp instruction and the built-in vec_extract_exp. These require a POWER9-enabled compiler targeting -mcpu=power9 and are not available for older compilers nor POWER8 and earlier. This function provides this operation for all VSX-enabled platforms.

processor	Latency	Throughput
power8	6-15	2/cycle
power9	2	4/cycle

Parameters

vrb	vector double value.

Returns: vector unsigned long long containing 11-bit exponent right justified in each doubleword

◆ vec_xvxsigdp()

static vui64_t vec_xvxsigdp ( vf64_t vrb )

inlinestatic

Vector Extract Significand Double-Precision.

For each doubleword of vrb, Extract the double-precision significand (bits 12:63) and restore the implied (hidden) bit (bit 11) if the double-precision value is normal (not zero, subnormal, Infinity or NaN). The result is return as vector long long integer value with up to 53 bits of significance.

Note: This operation is equivalent to the POWER9 xvxsigdp instruction and the built-in vec_extract_sig. These require a POWER9-enabled compiler targeting -mcpu=power9 and are not available for older compilers nor POWER8 and earlier. This function provides this operation for all VSX-enabled platforms.

processor	Latency	Throughput
power8	8-17	1/cycle
power9	3	2/cycle

Parameters

vrb	vector double value.

Returns: vector unsigned long long containing the significand.

Functions

Detailed Description

Examples

Performance data.

Function Documentation

◆ vec_absf64()

◆ vec_all_isfinitef64()

◆ vec_all_isinff64()

◆ vec_all_isnanf64()

◆ vec_all_isnormalf64()

◆ vec_all_issubnormalf64()

◆ vec_all_iszerof64()

◆ vec_any_isfinitef64()

◆ vec_any_isinff64()

◆ vec_any_isnanf64()

◆ vec_any_isnormalf64()

◆ vec_any_issubnormalf64()

◆ vec_any_iszerof64()

◆ vec_copysignf64()

◆ vec_isfinitef64()

◆ vec_isinff64()

◆ vec_isnanf64()

◆ vec_isnormalf64()

◆ vec_issubnormalf64()

◆ vec_iszerof64()

◆ vec_pack_longdouble()

◆ vec_setb_dp()

◆ vec_unpack_longdouble()

◆ vec_vglfddo()

◆ vec_vglfddsx()

◆ vec_vglfddx()

◆ vec_vglfdso()

◆ vec_vlxsfdx()

◆ vec_vsstfddo()

◆ vec_vsstfddsx()

◆ vec_vsstfddx()

◆ vec_vsstfdso()

◆ vec_vstxsfdx()

◆ vec_xviexpdp()

◆ vec_xvxexpdp()

◆ vec_xvxsigdp()