POWER Vector Library Manual
1.0.4
|
Go to the documentation of this file.
24 #ifndef SRC_PVECLIB_VEC_INT512_PPC_H_
25 #define SRC_PVECLIB_VEC_INT512_PPC_H_
810 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
811 #define CONST_VINT512_Q(__q0, __q1, __q2, __q3) {__q3, __q2, __q1, __q0}
813 #define CONST_VINT512_Q(__q0, __q1, __q2, __q3) {__q0, __q1, __q2, __q3}
826 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
845 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
870 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
907 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
917 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
937 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
974 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1011 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1067 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1101 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1111 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1141 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1194 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1287 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1297 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1311 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1340 #ifdef __VEC_EXPLICIT_FENCE_NOPS__
1342 #define COMPILE_FENCE __asm ("nop":::)
1344 #define COMPILE_FENCE __asm (";":::)
1349 #define __VEC_PWR_IMP(FNAME) FNAME ## _PWR10
1352 #define __VEC_PWR_IMP(FNAME) FNAME ## _PWR9
1355 #define __VEC_PWR_IMP(FNAME) FNAME ## _PWR8
1357 #define __VEC_PWR_IMP(FNAME) FNAME ## _PWR7
1385 result.vx0 =
vec_addcq (&mc, a.vx0, b.vx0);
1386 result.vx1 =
vec_addeq (&mp, a.vx1, b.vx1, mc);
1387 result.vx2 =
vec_addeq (&mc, a.vx2, b.vx2, mp);
1388 result.vx3 =
vec_addeq (&result.vx4, a.vx3, b.vx3, mc);
1417 result.vx0 =
vec_addeq (&mq, a.vx0, b.vx0, c);
1418 result.vx1 =
vec_addeq (&mp, a.vx1, b.vx1, mq);
1419 result.vx2 =
vec_addeq (&mq, a.vx2, b.vx2, mp);
1420 result.vx3 =
vec_addeq (&result.vx4, a.vx3, b.vx3, mq);
1449 result.vx0 =
vec_addeq (&mq, a.vx0, b.vx0, c);
1450 result.vx1 =
vec_addeq (&mp, a.vx1, b.vx1, mq);
1451 result.vx2 =
vec_addeq (&mq, a.vx2, b.vx2, mp);
1479 result.vx0 =
vec_addcq (&mc, a.vx0, b.vx0);
1480 result.vx1 =
vec_addeq (&mp, a.vx1, b.vx1, mc);
1481 result.vx2 =
vec_addeq (&mc, a.vx2, b.vx2, mp);
1612 mp =
vec_madduq (&mphl, m1.vx1, m2.vx0, mplh);
1618 mp =
vec_madd2uq (&mphh, m1.vx1, m2.vx1, mphl, mq);
1757 mpx0 =
vec_madduq (&mq0, m1.vx0, m2, a2.vx0);
1758 mpx1 =
vec_madd2uq (&mq1, m1.vx1, m2, mq0, a2.vx1);
1760 mpx2 =
vec_madd2uq (&mq2, m1.vx2, m2, mq1, a2.vx2);
1761 mpx3 =
vec_madd2uq (&mq3, m1.vx3, m2, mq2, a2.vx3);
1807 mpx0 =
vec_madd2uq (&mq0, m1.vx0, m2, a1, a2.vx0);
1808 mpx1 =
vec_madd2uq (&mq1, m1.vx1, m2, mq0, a2.vx1);
1810 mpx2 =
vec_madd2uq (&mq2, m1.vx2, m2, mq1, a2.vx2);
1811 mpx3 =
vec_madd2uq (&mq3, m1.vx3, m2, mq2, a2.vx3);
1852 result.vx0 = mp0.x3.v1x128;
1855 result.vx1 = mp1.x3.v1x128;
1858 result.vx2 = mp2.x3.v1x128;
1862 result.vx3 = mp3.x3.v1x128;
1863 result.vx4 = mp3.x3.v0x512.vx0;
1864 result.vx5 = mp3.x3.v0x512.vx1;
1865 result.vx6 = mp3.x3.v0x512.vx2;
1866 result.vx7 = mp3.x3.v0x512.vx3;
1904 result.vx0 = mp0.x3.v1x128;
1907 result.vx1 = mp1.x3.v1x128;
1910 result.vx2 = mp2.x3.v1x128;
1914 result.vx3 = mp3.x3.v1x128;
1915 result.vx4 = mp3.x3.v0x512.vx0;
1916 result.vx5 = mp3.x3.v0x512.vx1;
1917 result.vx6 = mp3.x3.v0x512.vx2;
1918 result.vx7 = mp3.x3.v0x512.vx3;
2139 unsigned long M,
unsigned long N);
2172 unsigned long M,
unsigned long N);
2215 unsigned long M,
unsigned long N);
2220 unsigned long M,
unsigned long N);
#define __VEC_PWR_IMP(FNAME)
Macro to add platform suffix for static calls.
Definition: vec_int512_ppc.h:1357
A vector representation of a 1152-bit unsigned integer.
Definition: vec_int512_ppc.h:971
A vector representation of a 2048-bit unsigned integer as 4 x 512-bit integer fields.
Definition: vec_int512_ppc.h:1095
static __VEC_U_1024 vec_madd512x512a512_inline(__VEC_U_512 m1, __VEC_U_512 m2, __VEC_U_512 a1)
Vector 512-bit Unsigned Integer Multiply-Add.
Definition: vec_int512_ppc.h:1898
static vui128_t vec_addecuq(vui128_t a, vui128_t b, vui128_t ci)
Vector Add Extended & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2622
__VEC_U_1024 vec_mul512x512(__VEC_U_512 m1, __VEC_U_512 m2)
Vector 512x512-bit Unsigned Integer Multiply.
__VEC_U_640 vec_madd512x128a512(__VEC_U_512 m1, vui128_t m2, __VEC_U_512 a2)
Vector 512x128-bit Multiply-Add Unsigned Integer.
A vector representation of a 4096-bit unsigned integer as 8 x 512-bit integer fields.
Definition: vec_int512_ppc.h:1281
A vector representation of a 512-bit unsigned integer.
Definition: vec_int512_ppc.h:842
__VEC_U_256 vec_mul128x128(vui128_t m1, vui128_t m2)
Vector 128x128bit Unsigned Integer Multiply.
static __VEC_U_640 vec_add512cu(__VEC_U_512 a, __VEC_U_512 b)
Vector Add 512-bit Unsigned Integer & Write Carry.
Definition: vec_int512_ppc.h:1380
static vui128_t vec_addeuqm(vui128_t a, vui128_t b, vui128_t ci)
Vector Add Extended Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:2684
A vector representation of a 640-bit unsigned integer.
Definition: vec_int512_ppc.h:867
A vector representation of a 2176-bit unsigned integer.
Definition: vec_int512_ppc.h:1138
static vui128_t vec_madd2uq(vui128_t *mulu, vui128_t a, vui128_t b, vui128_t c1, vui128_t c2)
Vector Multiply-Add2 Unsigned Quadword.
Definition: vec_int128_ppc.h:6184
A vector representation of a 256-bit unsigned integer.
Definition: vec_int512_ppc.h:823
static __VEC_U_640 vec_madd512x128a128_inline(__VEC_U_512 m1, vui128_t m2, vui128_t a1)
Vector 512x128-bit Multiply-Add Unsigned Integer.
Definition: vec_int512_ppc.h:1702
A vector representation of a 1024-bit unsigned integer.
Definition: vec_int512_ppc.h:934
static __VEC_U_512 vec_add512um(__VEC_U_512 a, __VEC_U_512 b)
Vector Add 512-bit Unsigned Integer Modulo.
Definition: vec_int512_ppc.h:1474
static __VEC_U_256 vec_mul128x128_inline(vui128_t a, vui128_t b)
Vector 128x128bit Unsigned Integer Multiply.
Definition: vec_int512_ppc.h:1574
void vec_mul1024x1024(__VEC_U_2048 *p2048, __VEC_U_1024 *m1, __VEC_U_1024 *m2)
Vector 1024x1024-bit Unsigned Integer Multiply.
Header package containing a collection of 128-bit computation functions implemented with PowerISA VMX...
static vui128_t vec_madduq(vui128_t *mulu, vui128_t a, vui128_t b, vui128_t c)
Vector Multiply-Add Unsigned Quadword.
Definition: vec_int128_ppc.h:5956
static vui128_t vec_muludq(vui128_t *mulu, vui128_t a, vui128_t b)
Vector Multiply Unsigned Double Quadword.
Definition: vec_int128_ppc.h:5734
static __VEC_U_640 vec_madd512x128a128a512_inline(__VEC_U_512 m1, vui128_t m2, vui128_t a1, __VEC_U_512 a2)
Vector 512x128-bit Multiply-Add Unsigned Integer.
Definition: vec_int512_ppc.h:1801
static __VEC_U_640 vec_add512ecu(__VEC_U_512 a, __VEC_U_512 b, vui128_t c)
Vector Add Extended 512-bit Unsigned Integer & Write Carry.
Definition: vec_int512_ppc.h:1412
static vui128_t vec_addcuq(vui128_t a, vui128_t b)
Vector Add & write Carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2568
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
A vector representation of a 4096-bit unsigned integer.
Definition: vec_int512_ppc.h:1191
static __VEC_U_640 vec_madd512x128a512_inline(__VEC_U_512 m1, vui128_t m2, __VEC_U_512 a2)
Vector 512x128-bit Multiply-Add Unsigned Integer.
Definition: vec_int512_ppc.h:1751
A vector representation of a 512-bit unsigned integer and a 128-bit carry-out.
Definition: vec_int512_ppc.h:901
static vui128_t vec_addcq(vui128_t *cout, vui128_t a, vui128_t b)
Vector Add with carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2788
static __VEC_U_512 vec_add512eum(__VEC_U_512 a, __VEC_U_512 b, vui128_t c)
Vector Add Extended 512-bit Unsigned Integer Modulo.
Definition: vec_int512_ppc.h:1444
static __VEC_U_512 vec_add512ze(__VEC_U_512 a, vui128_t c)
Vector Add 512-bit to Zero Extended Unsigned Integer Modulo.
Definition: vec_int512_ppc.h:1506
static __VEC_U_640 vec_mul512x128_inline(__VEC_U_512 m1, vui128_t m2)
Vector 512x128-bit Unsigned Integer Multiply.
Definition: vec_int512_ppc.h:1653
__VEC_U_512 vec_mul256x256(__VEC_U_256 m1, __VEC_U_256 m2)
Vector 256x256-bit Unsigned Integer Multiply.
static __VEC_U_1024 vec_mul512x512_inline(__VEC_U_512 m1, __VEC_U_512 m2)
Vector 512x512-bit Unsigned Integer Multiply.
Definition: vec_int512_ppc.h:1846
static __VEC_U_512 vec_mul256x256_inline(__VEC_U_256 m1, __VEC_U_256 m2)
Vector 256x256-bit Unsigned Integer Multiply.
Definition: vec_int512_ppc.h:1605
static vui128_t vec_adduqm(vui128_t a, vui128_t b)
Vector Add Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:2739
static vui128_t vec_addeq(vui128_t *cout, vui128_t a, vui128_t b, vui128_t ci)
Vector Add Extend with carry Unsigned Quadword.
Definition: vec_int128_ppc.h:2849
#define COMPILE_FENCE
A compiler fence to prevent excessive code motion.
Definition: vec_int512_ppc.h:1344
A vector representation of a 2048-bit unsigned integer.
Definition: vec_int512_ppc.h:1008
void vec_mul512_byMN(__VEC_U_512 *p, __VEC_U_512 *m1, __VEC_U_512 *m2, unsigned long M, unsigned long N)
Vector Unsigned Integer Quadword 4xMxN Multiply.
void vec_mul128_byMN(vui128_t *p, vui128_t *m1, vui128_t *m2, unsigned long M, unsigned long N)
Vector Unsigned Integer Quadword MxN Multiply.
static __VEC_U_512 vec_add512ze2(__VEC_U_512 a, vui128_t c1, vui128_t c2)
Vector Add 512-bit to Zero Extended2 Unsigned Integer Modulo.
Definition: vec_int512_ppc.h:1542
__VEC_U_640 vec_mul512x128(__VEC_U_512 m1, vui128_t m2)
Vector 512x128-bit Unsigned Integer Multiply.
A vector representation of a 1024-bit unsigned integer as two 512-bit fields.
Definition: vec_int512_ppc.h:1061
void vec_mul2048x2048(__VEC_U_4096 *p4096, __VEC_U_2048 *m1, __VEC_U_2048 *m2)
Vector 2048x2048-bit Unsigned Integer Multiply.