POWER Vector Library Manual  1.0.4
Classes | Macros | Typedefs | Functions | Variables
vec_common_ppc.h File Reference

Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers. More...

#include <stdint.h>
#include <altivec.h>

Go to the source code of this file.

Classes

union  __VEC_U_128
 Union used to transfer 128-bit data between vector and non-vector types. More...
 

Macros

#define CONST_VINT64_DW(__dw0, __dw1)   {__dw1, __dw0}
 Arrange elements of dword initializer in high->low order.

 
#define CONST_VINT128_DW(__dw0, __dw1)   (vui64_t){__dw1, __dw0}
 Initializer for 128-bits vector, as two unsigned long long elements in high->low order. May require an explicit cast.
 
#define CONST_VINT128_DW128(__dw0, __dw1)   (vui128_t)((vui64_t){__dw1, __dw0})
 A vector unsigned __int128 initializer, as two unsigned long long elements in high->low order.

 
#define CONST_VINT128_W(__w0, __w1, __w2, __w3)   (vui32_t){__w3, __w2, __w1, __w0}
 Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast.

 
#define CONST_VINT32_W(__w0, __w1, __w2, __w3)   {__w3, __w2, __w1, __w0}
 Arrange elements of word initializer in high->low order.

 
#define CONST_VINT128_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7)   (vui16_t){__hw7, __hw6, __hw5, __hw4, __hw3, __hw2, __hw1, __hw0}
 Arrange halfword elements of a unsigned int initializer in high->low order. May require an explicit cast.

 
#define CONST_VINT16_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7)   {__hw7, __hw6, __hw5, __hw4, __hw3, __hw2, __hw1, __hw0}
 Arrange elements of halfword initializer in high->low order.

 
#define CONST_VINT128_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15)   (vui8_t){_b15, _b14, _b13, _b12, _b11, _b10, _b9, _b8, _b7, _b6, _b5, _b4, _b3, _b2, _b1, _b0}
 Arrange byte elements of a unsigned int initializer in high->low order. May require an explicit cast.

 
#define CONST_VINT8_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15)   {_b15, _b14, _b13, _b12, _b11, _b10, _b9, _b8, _b7, _b6, _b5, _b4, _b3, _b2, _b1, _b0}
 Arrange elements of byte initializer in high->low order.

 
#define VEC_DW_H   1
 Element index for high order dword.

 
#define VEC_DW_L   0
 Element index for low order dword.

 
#define VEC_W_H   3
 Element index for highest order word.

 
#define VEC_W_L   0
 Element index for lowest order word.

 
#define VEC_WE_0   3
 Element index for vector splat word 0.

 
#define VEC_WE_1   2
 Element index for vector splat word 1.

 
#define VEC_WE_2   1
 Element index for vector splat word 2.

 
#define VEC_WE_3   0
 Element index for vector splat word 3.

 
#define VEC_HW_H   7
 Element index for highest order hword.

 
#define VEC_HW_L_DWH   4
 Element index for lowest order hword of the high dword.

 
#define VEC_HW_L   0
 Element index for lowest order hword.

 
#define VEC_BYTE_L   0
 Element index for lowest order byte.

 
#define VEC_BYTE_L_DWH   8
 Element index for lowest order byte of the high dword.

 
#define VEC_BYTE_L_DWL   0
 Element index for lowest order byte of the low dword.

 
#define VEC_BYTE_H   15
 Element index for highest order byte.

 
#define VEC_BYTE_HHW   14
 Element index for second lowest order byte.

 

Typedefs

typedef __vector unsigned char vui8_t
 vector of 8-bit unsigned char elements.
 
typedef __vector unsigned short vui16_t
 vector of 16-bit unsigned short elements.
 
typedef __vector unsigned int vui32_t
 vector of 32-bit unsigned int elements.
 
typedef __vector unsigned long long vui64_t
 vector of 64-bit unsigned long long elements.
 
typedef __vector signed char vi8_t
 vector of 8-bit signed char elements.
 
typedef __vector short vi16_t
 vector of 16-bit signed short elements.
 
typedef __vector int vi32_t
 vector of 32-bit signed int elements.
 
typedef __vector long long vi64_t
 vector of 64-bit signed long long elements.
 
typedef __vector float vf32_t
 vector of 32-bit float elements.
 
typedef __vector double vf64_t
 vector of 64-bit double elements.
 
typedef __vector __bool char vb8_t
 vector of 8-bit bool char elements.
 
typedef __vector __bool short vb16_t
 vector of 16-bit bool short elements.
 
typedef __vector __bool int vb32_t
 vector of 32-bit bool int elements.
 
typedef __vector __bool long long vb64_t
 vector of 64-bit bool long long elements.
 
typedef __vector __int128 vi128_t
 vector of one 128-bit signed __int128 element.
 
typedef __vector unsigned __int128 vui128_t
 vector of one 128-bit unsigned __int128 element.
 
typedef __vector __bool __int128 vb128_t
 vector of one 128-bit bool __int128 element.
 

Functions

static unsigned __int128 vec_transfer_vui128t_to_uint128 (vui128_t vra)
 Transfer a vector unsigned __int128 to __int128 scalar. More...
 
static vui128_t vec_transfer_uint128_to_vui128t (unsigned __int128 gprp)
 Transfer a __int128 scalar to vector unsigned __int128. More...
 
static unsigned long long scalar_extract_uint64_from_low_uint128 (unsigned __int128 gprp)
 Extract the low doubleword from a __int128 scalar. More...
 
static unsigned long long scalar_extract_uint64_from_high_uint128 (unsigned __int128 gprp)
 Extract the high doubleword from a __int128 scalar. More...
 
static unsigned __int128 scalar_insert_uint64_to_uint128 (unsigned long long high, unsigned long long low)
 Insert High/low doublewords into a __int128 scalar. More...
 

Variables

const vui128_t vtipowof10 []
 table powers of 10 [0-38] in vector __int128 format.

 
const vui128_t vtifrexpof10 []
 table used to verify 128-bit frexp operations for powers of 10.

 
const _Decimal128 decpowof2 []
 table powers of 2 [0-1077] in _Decimal128 format.

 

Detailed Description

Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers.

This includes:

Consistent vector type naming

Type names should be short, concise, and consistent. The ABI defines the vector types as extensions of the existing C Language types. So while vector unsigned long long is consistent it is neither short or concise. Pveclib uses the following naming convention for typedefs used in its operations, function prototypes, and internal variables.

For example: vi32_t is a vector int, vui32_t is a vector unsigned int, vb32_t is a vector bool int, and vf32_t is vector float.

Transferring 128-bit types

The OpenPOWER ABI and the GCC compiler define a number of 128-bit scalar types that are not vector types:

These are not cast nor assignment compatible with any vector type. However it may be useful to transfer to/from vector types for conversion or manipulation within an operation. For example:

Here we use the __VEC_U_128 union to affect the transfer between the various types. We assume (fervently hope) that the compiler will recognize and optimize these as registers to registers transfers using the hardware instructions provided.

The vector to/from __float128 transfer should be the simplest as __float128 operations are defined over the vector register set. However __float128 types are defined in the PowerISA and OpenPOWER ABI, as scalars that just happens to use vector registers for parameter passing and operations. This distinction between scalars and vector prevents a direct cast between types. The __VEC_U_128 union is the simplest work around but in most cases no code should generated for this transfer. For example: vec_xfer_bin128_2_vui128t() and vec_xfer_vui128t_2_bin128().

Any vector to/from __int128 transfer requires a transfer between vector and general purpose registers. POWER8 (PowerISA 2.07B) added Move to/from Vector Scalar Register (mfvsr, mtvsr) instructions. Again the __VEC_U_128 union is used to effect the transfer and the compiler should leverage the move instructions in the generated code.

Any vector to/from __ibm128 or _Decimal128 requires a transfer between a pair of FPRs and a Vector Scalar Register (VSR). Technically this is transfer between the upper doubleword of two VSRs in the lower bank (VSR0-31) and another VSR. POWER7 (PowerISA 2.06B) provides the Permute Doubleword Immediate (xxpermdi) instruction. Again the __VEC_U_128 union is used to effect the transfer and the compiler should leverage the Permute Doubleword Immediate instructions in the generate code. For example: vec_BCD2DFP() and vec_DFP2BCD().

Endian and vector constants

Vector constants are often needed for: masking operations, range checks, permute selection, and radix conversion. Also compiler support for large integer and floating-point constants may be limited by the compiler. For example the GCC compilers support the (vector) __int128 type but do not directly support __int128 (39 digit) decimal constants. Another example is __float128 where the type and Q suffix constants are recent additions. In both cases we need to construct: large numeric constants, special values (infinity and NaN), masks for manipulating the sign bit and exponent bits. Often these values will be constructed from vectors of word or doubleword constants.

Note
GCC does not support expressing an integer constant of type __int128 for targets where long long integer is less than 128 bits wide. This applies to the PowerPC target as the long long type is reserved for 64-bit integers. This was verified in GCC 8.2,
GCC __float128 support for the PowerPC target began with GCC 6. In GCC 6 __float128 support is off by default and has to be explicitly enabled via the '-mfloat128' option. Starting with GCC 7, __float128 is enabled by default with VSX support.

Defining large constants for vectors is complicated by little-endian (LE) support as specified in the OpenPOWER ABI and as implemented in the compilers. Little-endian changes the effective vector element numbering and the order of constant elements in initializers. But the __int128 numerical order of magnitude or floating-point format does not change in registers. The high order bits are on the left and the low order bits are on the right.

So for example:

const vui32_t signmask = { 0x80000000, 0, 0, 0 };
const vui32_t expmask = { 0x7fff0000, 0, 0, 0 };

are correct sign and exponent masks for __float128 in big endian (BE) but would be incorrect for little endian (LE). To get correct results for both endians, one could code something like this:

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
const vui32_t signmask = { 0, 0, 0, 0x80000000 };
const vui32_t expmask = { 0, 0, 0, 0x7fff0000 };
#else
const vui32_t signmask = { 0x80000000, 0, 0, 0 };
const vui32_t expmask = { 0x7fff0000, 0, 0, 0 };
#endif

But this gets tedious after the first dozen times. Also this can be confusing because it does not appear to the match the floating-point format diagrams in the PowerISA. The sign-bit and the exponent are always on the left.

So this header provides endian sensitive macros that maintain consistent "magnitude" order. For example:

const vui32_t signmask = CONST_VINT128_W (0x80000000, 0, 0, 0);
const vui32_t expmask = CONST_VINT128_W (0x7fff0000, 0, 0, 0);

This is always correct in either endian.

Another example; the multiplicative inverse for __int128 10**32 is 211857340822306639531405861550393824741. The GCC compiler will not accept this constant in a vector __int128 initializer. The next best thing would be

// The multiplicative inverse for 1 / 10**32 is
// 211857340822306639531405861550393824741
// or 0x9f623d5a8a732974cfbc31db4b0295e5
const vui128_t mulinv_10to32 =
(vui128_t) CONST_VINT128_DW128 ( 0x9f623d5a8a732974UL,
0xcfbc31db4b0295e5UL );

Here we use the CONST_VINT128_DW128 macro to maintain magnitude order across endian. Again the high order bits are on the left and the low order bits are on the right.

See also
Endian problems with word operations
General Endian Issues

Function Documentation

◆ scalar_extract_uint64_from_high_uint128()

static unsigned long long scalar_extract_uint64_from_high_uint128 ( unsigned __int128  gprp)
inlinestatic

Extract the high doubleword from a __int128 scalar.

Parameters
gprpa unsigned __int128 value.
Returns
The high doubleword of __int128.

◆ scalar_extract_uint64_from_low_uint128()

static unsigned long long scalar_extract_uint64_from_low_uint128 ( unsigned __int128  gprp)
inlinestatic

Extract the low doubleword from a __int128 scalar.

Parameters
gprpa unsigned __int128 value.
Returns
The low doubleword of __int128.

◆ scalar_insert_uint64_to_uint128()

static unsigned __int128 scalar_insert_uint64_to_uint128 ( unsigned long long  high,
unsigned long long  low 
)
inlinestatic

Insert High/low doublewords into a __int128 scalar.

Parameters
highdoubleword of a __int128.
lowdoubleword of a __int128.
Returns
The combined quadword as a __int128 scalar.

◆ vec_transfer_uint128_to_vui128t()

static vui128_t vec_transfer_uint128_to_vui128t ( unsigned __int128  gprp)
inlinestatic

Transfer a __int128 scalar to vector unsigned __int128.

The compiler does not allow direct transfer (assignment or type cast) between __int128 scalars and vector types. Vectors are held in 128-bit VRs (VSRs) and __int128 scalars are held in pair of 64-bit GPRs. So this operation requires a transfer between registers of different types/sizes.

processor Latency Throughput
power8 7 1/cycle
power9 5 1/cycle
Parameters
gprpa unsigned __int128 value.
Returns
The original value returned as a vector unsigned__int128.

◆ vec_transfer_vui128t_to_uint128()

static unsigned __int128 vec_transfer_vui128t_to_uint128 ( vui128_t  vra)
inlinestatic

Transfer a vector unsigned __int128 to __int128 scalar.

The compiler does not allow direct transfer (assignment or type cast) between __int128 scalars and vector types. Vectors are held in 128-bit VRs (VSRs) and __int128 scalars are held in pair of 64-bit GPRs. So this operation requires a transfer between registers of different types/sizes.

processor Latency Throughput
power8 6-7 1/cycle
power9 5-6 2/cycle
Parameters
vraa vector unsigned __int128 value.
Returns
The original value returned as a __int128 scalar.
CONST_VINT128_W
#define CONST_VINT128_W(__w0, __w1, __w2, __w3)
Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast.
Definition: vec_common_ppc.h:304
vui128_t
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
vui32_t
__vector unsigned int vui32_t
vector of 32-bit unsigned int elements.
Definition: vec_common_ppc.h:206
CONST_VINT128_DW128
#define CONST_VINT128_DW128(__dw0, __dw1)
A vector unsigned __int128 initializer, as two unsigned long long elements in high->low order.
Definition: vec_common_ppc.h:301