POWER Vector Library Manual
1.0.4
|
Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers. More...
#include <stdint.h>
#include <altivec.h>
Go to the source code of this file.
Classes | |
union | __VEC_U_128 |
Union used to transfer 128-bit data between vector and non-vector types. More... | |
Macros | |
#define | CONST_VINT64_DW(__dw0, __dw1) {__dw1, __dw0} |
Arrange elements of dword initializer in high->low order. | |
#define | CONST_VINT128_DW(__dw0, __dw1) (vui64_t){__dw1, __dw0} |
Initializer for 128-bits vector, as two unsigned long long elements in high->low order. May require an explicit cast. | |
#define | CONST_VINT128_DW128(__dw0, __dw1) (vui128_t)((vui64_t){__dw1, __dw0}) |
A vector unsigned __int128 initializer, as two unsigned long long elements in high->low order. | |
#define | CONST_VINT128_W(__w0, __w1, __w2, __w3) (vui32_t){__w3, __w2, __w1, __w0} |
Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast. | |
#define | CONST_VINT32_W(__w0, __w1, __w2, __w3) {__w3, __w2, __w1, __w0} |
Arrange elements of word initializer in high->low order. | |
#define | CONST_VINT128_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7) (vui16_t){__hw7, __hw6, __hw5, __hw4, __hw3, __hw2, __hw1, __hw0} |
Arrange halfword elements of a unsigned int initializer in high->low order. May require an explicit cast. | |
#define | CONST_VINT16_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7) {__hw7, __hw6, __hw5, __hw4, __hw3, __hw2, __hw1, __hw0} |
Arrange elements of halfword initializer in high->low order. | |
#define | CONST_VINT128_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15) (vui8_t){_b15, _b14, _b13, _b12, _b11, _b10, _b9, _b8, _b7, _b6, _b5, _b4, _b3, _b2, _b1, _b0} |
Arrange byte elements of a unsigned int initializer in high->low order. May require an explicit cast. | |
#define | CONST_VINT8_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15) {_b15, _b14, _b13, _b12, _b11, _b10, _b9, _b8, _b7, _b6, _b5, _b4, _b3, _b2, _b1, _b0} |
Arrange elements of byte initializer in high->low order. | |
#define | VEC_DW_H 1 |
Element index for high order dword. | |
#define | VEC_DW_L 0 |
Element index for low order dword. | |
#define | VEC_W_H 3 |
Element index for highest order word. | |
#define | VEC_W_L 0 |
Element index for lowest order word. | |
#define | VEC_WE_0 3 |
Element index for vector splat word 0. | |
#define | VEC_WE_1 2 |
Element index for vector splat word 1. | |
#define | VEC_WE_2 1 |
Element index for vector splat word 2. | |
#define | VEC_WE_3 0 |
Element index for vector splat word 3. | |
#define | VEC_HW_H 7 |
Element index for highest order hword. | |
#define | VEC_HW_L_DWH 4 |
Element index for lowest order hword of the high dword. | |
#define | VEC_HW_L 0 |
Element index for lowest order hword. | |
#define | VEC_BYTE_L 0 |
Element index for lowest order byte. | |
#define | VEC_BYTE_L_DWH 8 |
Element index for lowest order byte of the high dword. | |
#define | VEC_BYTE_L_DWL 0 |
Element index for lowest order byte of the low dword. | |
#define | VEC_BYTE_H 15 |
Element index for highest order byte. | |
#define | VEC_BYTE_HHW 14 |
Element index for second lowest order byte. | |
Typedefs | |
typedef __vector unsigned char | vui8_t |
vector of 8-bit unsigned char elements. | |
typedef __vector unsigned short | vui16_t |
vector of 16-bit unsigned short elements. | |
typedef __vector unsigned int | vui32_t |
vector of 32-bit unsigned int elements. | |
typedef __vector unsigned long long | vui64_t |
vector of 64-bit unsigned long long elements. | |
typedef __vector signed char | vi8_t |
vector of 8-bit signed char elements. | |
typedef __vector short | vi16_t |
vector of 16-bit signed short elements. | |
typedef __vector int | vi32_t |
vector of 32-bit signed int elements. | |
typedef __vector long long | vi64_t |
vector of 64-bit signed long long elements. | |
typedef __vector float | vf32_t |
vector of 32-bit float elements. | |
typedef __vector double | vf64_t |
vector of 64-bit double elements. | |
typedef __vector __bool char | vb8_t |
vector of 8-bit bool char elements. | |
typedef __vector __bool short | vb16_t |
vector of 16-bit bool short elements. | |
typedef __vector __bool int | vb32_t |
vector of 32-bit bool int elements. | |
typedef __vector __bool long long | vb64_t |
vector of 64-bit bool long long elements. | |
typedef __vector __int128 | vi128_t |
vector of one 128-bit signed __int128 element. | |
typedef __vector unsigned __int128 | vui128_t |
vector of one 128-bit unsigned __int128 element. | |
typedef __vector __bool __int128 | vb128_t |
vector of one 128-bit bool __int128 element. | |
Functions | |
static unsigned __int128 | vec_transfer_vui128t_to_uint128 (vui128_t vra) |
Transfer a vector unsigned __int128 to __int128 scalar. More... | |
static vui128_t | vec_transfer_uint128_to_vui128t (unsigned __int128 gprp) |
Transfer a __int128 scalar to vector unsigned __int128. More... | |
static unsigned long long | scalar_extract_uint64_from_low_uint128 (unsigned __int128 gprp) |
Extract the low doubleword from a __int128 scalar. More... | |
static unsigned long long | scalar_extract_uint64_from_high_uint128 (unsigned __int128 gprp) |
Extract the high doubleword from a __int128 scalar. More... | |
static unsigned __int128 | scalar_insert_uint64_to_uint128 (unsigned long long high, unsigned long long low) |
Insert High/low doublewords into a __int128 scalar. More... | |
Variables | |
const vui128_t | vtipowof10 [] |
table powers of 10 [0-38] in vector __int128 format. | |
const vui128_t | vtifrexpof10 [] |
table used to verify 128-bit frexp operations for powers of 10. | |
const _Decimal128 | decpowof2 [] |
table powers of 2 [0-1077] in _Decimal128 format. | |
Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers.
This includes:
Type names should be short, concise, and consistent. The ABI defines the vector types as extensions of the existing C Language types. So while vector unsigned long long is consistent it is neither short or concise. Pveclib uses the following naming convention for typedefs used in its operations, function prototypes, and internal variables.
For example: vi32_t is a vector int, vui32_t is a vector unsigned int, vb32_t is a vector bool int, and vf32_t is vector float.
The OpenPOWER ABI and the GCC compiler define a number of 128-bit scalar types that are not vector types:
These are not cast nor assignment compatible with any vector type. However it may be useful to transfer to/from vector types for conversion or manipulation within an operation. For example:
Here we use the __VEC_U_128 union to affect the transfer between the various types. We assume (fervently hope) that the compiler will recognize and optimize these as registers to registers transfers using the hardware instructions provided.
The vector to/from __float128 transfer should be the simplest as __float128 operations are defined over the vector register set. However __float128 types are defined in the PowerISA and OpenPOWER ABI, as scalars that just happens to use vector registers for parameter passing and operations. This distinction between scalars and vector prevents a direct cast between types. The __VEC_U_128 union is the simplest work around but in most cases no code should generated for this transfer. For example: vec_xfer_bin128_2_vui128t() and vec_xfer_vui128t_2_bin128().
Any vector to/from __int128 transfer requires a transfer between vector and general purpose registers. POWER8 (PowerISA 2.07B) added Move to/from Vector Scalar Register (mfvsr, mtvsr) instructions. Again the __VEC_U_128 union is used to effect the transfer and the compiler should leverage the move instructions in the generated code.
Any vector to/from __ibm128 or _Decimal128 requires a transfer between a pair of FPRs and a Vector Scalar Register (VSR). Technically this is transfer between the upper doubleword of two VSRs in the lower bank (VSR0-31) and another VSR. POWER7 (PowerISA 2.06B) provides the Permute Doubleword Immediate (xxpermdi) instruction. Again the __VEC_U_128 union is used to effect the transfer and the compiler should leverage the Permute Doubleword Immediate instructions in the generate code. For example: vec_BCD2DFP() and vec_DFP2BCD().
Vector constants are often needed for: masking operations, range checks, permute selection, and radix conversion. Also compiler support for large integer and floating-point constants may be limited by the compiler. For example the GCC compilers support the (vector) __int128 type but do not directly support __int128 (39 digit) decimal constants. Another example is __float128 where the type and Q suffix constants are recent additions. In both cases we need to construct: large numeric constants, special values (infinity and NaN), masks for manipulating the sign bit and exponent bits. Often these values will be constructed from vectors of word or doubleword constants.
Defining large constants for vectors is complicated by little-endian (LE) support as specified in the OpenPOWER ABI and as implemented in the compilers. Little-endian changes the effective vector element numbering and the order of constant elements in initializers. But the __int128 numerical order of magnitude or floating-point format does not change in registers. The high order bits are on the left and the low order bits are on the right.
So for example:
are correct sign and exponent masks for __float128 in big endian (BE) but would be incorrect for little endian (LE). To get correct results for both endians, one could code something like this:
But this gets tedious after the first dozen times. Also this can be confusing because it does not appear to the match the floating-point format diagrams in the PowerISA. The sign-bit and the exponent are always on the left.
So this header provides endian sensitive macros that maintain consistent "magnitude" order. For example:
This is always correct in either endian.
Another example; the multiplicative inverse for __int128 10**32 is 211857340822306639531405861550393824741. The GCC compiler will not accept this constant in a vector __int128 initializer. The next best thing would be
Here we use the CONST_VINT128_DW128 macro to maintain magnitude order across endian. Again the high order bits are on the left and the low order bits are on the right.
|
inlinestatic |
Extract the high doubleword from a __int128 scalar.
gprp | a unsigned __int128 value. |
|
inlinestatic |
Extract the low doubleword from a __int128 scalar.
gprp | a unsigned __int128 value. |
|
inlinestatic |
Insert High/low doublewords into a __int128 scalar.
high | doubleword of a __int128. |
low | doubleword of a __int128. |
|
inlinestatic |
Transfer a __int128 scalar to vector unsigned __int128.
The compiler does not allow direct transfer (assignment or type cast) between __int128 scalars and vector types. Vectors are held in 128-bit VRs (VSRs) and __int128 scalars are held in pair of 64-bit GPRs. So this operation requires a transfer between registers of different types/sizes.
processor | Latency | Throughput |
---|---|---|
power8 | 7 | 1/cycle |
power9 | 5 | 1/cycle |
gprp | a unsigned __int128 value. |
|
inlinestatic |
Transfer a vector unsigned __int128 to __int128 scalar.
The compiler does not allow direct transfer (assignment or type cast) between __int128 scalars and vector types. Vectors are held in 128-bit VRs (VSRs) and __int128 scalars are held in pair of 64-bit GPRs. So this operation requires a transfer between registers of different types/sizes.
processor | Latency | Throughput |
---|---|---|
power8 | 6-7 | 1/cycle |
power9 | 5-6 | 2/cycle |
vra | a vector unsigned __int128 value. |