POWER Vector Library Manual  1.0.4
vec_common_ppc.h
Go to the documentation of this file.
1 /*
2  Copyright (c) [2017, 2018] IBM Corporation.
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  vec_common_ppc.h
17 
18  Contributors:
19  IBM Corporation, Steven Munroe
20  */
21 
22 #ifndef VEC_COMMON_PPC_H_
23 #define VEC_COMMON_PPC_H_
24 
25 #include <stdint.h>
26 #include <altivec.h>
27 
202 typedef __vector unsigned char vui8_t;
204 typedef __vector unsigned short vui16_t;
206 typedef __vector unsigned int vui32_t;
208 typedef __vector unsigned long long vui64_t;
209 
211 typedef __vector signed char vi8_t;
213 typedef __vector short vi16_t;
215 typedef __vector int vi32_t;
217 typedef __vector long long vi64_t;
219 typedef __vector float vf32_t;
221 typedef __vector double vf64_t;
222 
224 typedef __vector __bool char vb8_t;
226 typedef __vector __bool short vb16_t;
228 typedef __vector __bool int vb32_t;
230 typedef __vector __bool long long vb64_t;
231 
232 /* did not get vector __int128 until GCC4.8. */
233 #ifndef PVECLIB_DISABLE_INT128
234 
235 typedef __vector __int128 vi128_t;
237 typedef __vector unsigned __int128 vui128_t;
238 #ifndef PVECLIB_DISABLE_BOOLINT128
239 
240 typedef __vector __bool __int128 vb128_t;
241 #else
242 
243 typedef __vector __bool int vb128_t;
244 #endif
245 #else
246 
247 typedef __vector int vi128_t;
249 typedef __vector unsigned int vui128_t;
251 typedef __vector __bool int vb128_t;
252 #endif
253 
256 typedef union
257 {
259  signed __int128 i128;
261  unsigned __int128 ui128;
262 #ifndef PVECLIB_DISABLE_DFP
263 
264  _Decimal128 dpd128;
265 #endif
266 
267  long double ldbl128;
281  struct
282  {
283 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
284  uint64_t lower;
285  uint64_t upper;
286 #else
287  uint64_t upper;
288  uint64_t lower;
289 #endif
290  } ulong;
291 } __VEC_U_128;
292 
293 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
294 
295 #define CONST_VINT64_DW(__dw0, __dw1) {__dw1, __dw0}
296 
298 #define CONST_VINT128_DW(__dw0, __dw1) (vui64_t){__dw1, __dw0}
299 
301 #define CONST_VINT128_DW128(__dw0, __dw1) (vui128_t)((vui64_t){__dw1, __dw0})
302 
304 #define CONST_VINT128_W(__w0, __w1, __w2, __w3) (vui32_t){__w3, __w2, __w1, __w0}
305 
306 #define CONST_VINT32_W(__w0, __w1, __w2, __w3) {__w3, __w2, __w1, __w0}
307 
309 #define CONST_VINT128_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7) \
310  (vui16_t){__hw7, __hw6, __hw5, __hw4, __hw3, __hw2, __hw1, __hw0}
311 
312 #define CONST_VINT16_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7) \
313  {__hw7, __hw6, __hw5, __hw4, __hw3, __hw2, __hw1, __hw0}
314 
316 #define CONST_VINT128_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15) \
317  (vui8_t){_b15, _b14, _b13, _b12, _b11, _b10, _b9, _b8, _b7, _b6, _b5, _b4, _b3, _b2, _b1, _b0}
318 
319 #define CONST_VINT8_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15) \
320  {_b15, _b14, _b13, _b12, _b11, _b10, _b9, _b8, _b7, _b6, _b5, _b4, _b3, _b2, _b1, _b0}
321 
322 #define VEC_DW_H 1
323 
324 #define VEC_DW_L 0
325 
326 #define VEC_W_H 3
327 
328 #define VEC_W_L 0
329 
330 #define VEC_WE_0 3
331 
332 #define VEC_WE_1 2
333 
334 #define VEC_WE_2 1
335 
336 #define VEC_WE_3 0
337 
338 #define VEC_HW_H 7
339 
340 #define VEC_HW_L_DWH 4
341 
342 #define VEC_HW_L 0
343 
344 #define VEC_BYTE_L 0
345 
346 #define VEC_BYTE_L_DWH 8
347 
348 #define VEC_BYTE_L_DWL 0
349 
350 #define VEC_BYTE_H 15
351 
352 #define VEC_BYTE_HHW 14
353 #else
354 #define CONST_VINT64_DW(__dw0, __dw1) {__dw0, __dw1}
355 #define CONST_VINT128_DW(__dw0, __dw1) (vui64_t){__dw0, __dw1}
356 #define CONST_VINT128_DW128(__dw0, __dw1) (vui128_t)((vui64_t){__dw0, __dw1})
357 #define CONST_VINT128_W(__w0, __w1, __w2, __w3) (vui32_t){__w0, __w1, __w2, __w3}
358 #define CONST_VINT32_W(__w0, __w1, __w2, __w3) {__w0, __w1, __w2, __w3}
359 
361 #define CONST_VINT128_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7) \
362  (vui16_t){__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7}
363 
364 #define CONST_VINT16_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7) \
365  {__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7}
366 
368 #define CONST_VINT128_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15) \
369  (vui8_t){_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15}
370 
371 #define CONST_VINT8_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15) \
372  {_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15}
373 #define VEC_DW_H 0
374 #define VEC_DW_L 1
375 #define VEC_W_H 0
376 #define VEC_W_L 3
377 #define VEC_WE_0 0
378 #define VEC_WE_1 1
379 #define VEC_WE_2 2
380 #define VEC_WE_3 3
381 #define VEC_HW_H 0
382 
383 #define VEC_HW_L_DWH 3
384 #define VEC_HW_L 7
385 #define VEC_BYTE_L 15
386 
387 #define VEC_BYTE_L_DWH 7
388 
389 #define VEC_BYTE_L_DWL 15
390 #define VEC_BYTE_H 0
391 #define VEC_BYTE_HHW 1
392 #endif
393 
395 extern const vui128_t vtipowof10[];
397 extern const vui128_t vtifrexpof10[];
398 
399 #ifndef PVECLIB_DISABLE_DFP
400 
401 extern const _Decimal128 decpowof2[];
402 #endif
403 
419 static inline unsigned __int128
421 {
422  __VEC_U_128 t;
423  unsigned __int128 result;
424 #if defined(_ARCH_PWR8) || defined (__clang__)
425  // PWR8/9 should generate Move From VSR Doubleword instructions.
426  t.vx1 = vra;
427  result = t.ui128;
428 #else
429 #ifdef _ARCH_PWR7
430  /* PWR7 and earlier must transfer through storage. This requires
431  * care as we want to avoid load-hit-store flushes in the pipeline.
432  * First split the vector into a pair of dword FPRs (vra_u, vra_l). */
433  vui64_t vra_u = (vui64_t) vra;
434  vui64_t vra_l = vec_xxpermdi ((vui64_t) vra, (vui64_t) vra, 2);
435  /* Store this pair as adjacent dwords, followed by a group ending
436  * nop. This prevents the hardware from dispatching the stores in the
437  * same cycle as the following loads (a guaranteed pipeline flush).
438  * Also the load addresses and data size will match these stores and
439  * increase the possibility of store forwarding from the store queue.
440  */
441  __asm__(
442  "stxsdx %x2,%y0;"
443  "stxsdx %x3,%y1;"
444  "ori 2,2,0;"
445  : "=Z" (t.ulong.lower),
446  "=Z" (t.ulong.upper)
447  : "wa" (vra_l), "wa" (vra_u)
448  : );
449 #else //_ARCH_PWR6/970
450  /* Just have to go through storage and let the hardware deal with
451  * load/store ordering. */
452  t.vx1 = vra;
453 #endif
454  // Load the dwords into a pair of GPRs for the __int128 result.
455  result = t.ui128;
456 #endif
457  return (result);
458 }
459 
476 static inline vui128_t
477 vec_transfer_uint128_to_vui128t (unsigned __int128 gprp)
478 {
479  __VEC_U_128 t;
480  t.ui128 = gprp;
481  return t.vx1;
482 }
483 
489 static inline unsigned long long
491 {
492  __VEC_U_128 t;
493  t.ui128 = gprp;
494  return t.ulong.lower;
495 }
496 
502 static inline unsigned long long
504 {
505  __VEC_U_128 t;
506  t.ui128 = gprp;
507  return t.ulong.upper;
508 }
509 
516 static inline unsigned __int128
517 scalar_insert_uint64_to_uint128 (unsigned long long high,
518  unsigned long long low)
519 {
520  __VEC_U_128 t;
521  t.ulong.lower = low;
522  t.ulong.upper = high;
523  return t.ui128;
524 }
525 
526 #endif /* VEC_COMMON_PPC_H_ */
scalar_extract_uint64_from_high_uint128
static unsigned long long scalar_extract_uint64_from_high_uint128(unsigned __int128 gprp)
Extract the high doubleword from a __int128 scalar.
Definition: vec_common_ppc.h:503
vb32_t
__vector __bool int vb32_t
vector of 32-bit bool int elements.
Definition: vec_common_ppc.h:228
vf32_t
__vector float vf32_t
vector of 32-bit float elements.
Definition: vec_common_ppc.h:219
vb128_t
__vector __bool __int128 vb128_t
vector of one 128-bit bool __int128 element.
Definition: vec_common_ppc.h:240
__VEC_U_128::dpd128
_Decimal128 dpd128
128 bit Decimal Float from pair of double float registers.
Definition: vec_common_ppc.h:264
vtifrexpof10
const vui128_t vtifrexpof10[]
table used to verify 128-bit frexp operations for powers of 10.
scalar_insert_uint64_to_uint128
static unsigned __int128 scalar_insert_uint64_to_uint128(unsigned long long high, unsigned long long low)
Insert High/low doublewords into a __int128 scalar.
Definition: vec_common_ppc.h:517
vec_transfer_vui128t_to_uint128
static unsigned __int128 vec_transfer_vui128t_to_uint128(vui128_t vra)
Transfer a vector unsigned __int128 to __int128 scalar.
Definition: vec_common_ppc.h:420
vui16_t
__vector unsigned short vui16_t
vector of 16-bit unsigned short elements.
Definition: vec_common_ppc.h:204
vi128_t
__vector __int128 vi128_t
vector of one 128-bit signed __int128 element.
Definition: vec_common_ppc.h:235
vui64_t
__vector unsigned long long vui64_t
vector of 64-bit unsigned long long elements.
Definition: vec_common_ppc.h:208
__VEC_U_128::ui128
unsigned __int128 ui128
Unsigned 128-bit integer from pair of 64-bit GPRs.
Definition: vec_common_ppc.h:261
__VEC_U_128::i128
signed __int128 i128
Signed 128-bit integer from pair of 64-bit GPRs.
Definition: vec_common_ppc.h:259
vui8_t
__vector unsigned char vui8_t
vector of 8-bit unsigned char elements.
Definition: vec_common_ppc.h:202
vi32_t
__vector int vi32_t
vector of 32-bit signed int elements.
Definition: vec_common_ppc.h:215
__VEC_U_128::vx16
vui8_t vx16
128 bit Vector of 16 unsigned char elements.
Definition: vec_common_ppc.h:269
__VEC_U_128::ulong
struct __VEC_U_128::@0 ulong
Struct of two unsigned long int (64-bit GPR) fields.
__VEC_U_128::vx8
vui16_t vx8
128 bit Vector of 8 unsigned short int elements.
Definition: vec_common_ppc.h:271
vb8_t
__vector __bool char vb8_t
vector of 8-bit bool char elements.
Definition: vec_common_ppc.h:224
__VEC_U_128::vx1
vui128_t vx1
128 bit Vector of 1 unsigned __int128 element.
Definition: vec_common_ppc.h:277
vui128_t
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
vb64_t
__vector __bool long long vb64_t
vector of 64-bit bool long long elements.
Definition: vec_common_ppc.h:230
__VEC_U_128
Union used to transfer 128-bit data between vector and non-vector types.
Definition: vec_common_ppc.h:256
decpowof2
const _Decimal128 decpowof2[]
table powers of 2 [0-1077] in _Decimal128 format.
vi64_t
__vector long long vi64_t
vector of 64-bit signed long long elements.
Definition: vec_common_ppc.h:217
vi8_t
__vector signed char vi8_t
vector of 8-bit signed char elements.
Definition: vec_common_ppc.h:211
vb16_t
__vector __bool short vb16_t
vector of 16-bit bool short elements.
Definition: vec_common_ppc.h:226
vec_transfer_uint128_to_vui128t
static vui128_t vec_transfer_uint128_to_vui128t(unsigned __int128 gprp)
Transfer a __int128 scalar to vector unsigned __int128.
Definition: vec_common_ppc.h:477
vtipowof10
const vui128_t vtipowof10[]
table powers of 10 [0-38] in vector __int128 format.
vui32_t
__vector unsigned int vui32_t
vector of 32-bit unsigned int elements.
Definition: vec_common_ppc.h:206
scalar_extract_uint64_from_low_uint128
static unsigned long long scalar_extract_uint64_from_low_uint128(unsigned __int128 gprp)
Extract the low doubleword from a __int128 scalar.
Definition: vec_common_ppc.h:490
__VEC_U_128::vx2
vui64_t vx2
128 bit Vector of 2 unsigned long int (64-bit) elements.
Definition: vec_common_ppc.h:275
vi16_t
__vector short vi16_t
vector of 16-bit signed short elements.
Definition: vec_common_ppc.h:213
vf64_t
__vector double vf64_t
vector of 64-bit double elements.
Definition: vec_common_ppc.h:221
__VEC_U_128::vx4
vui32_t vx4
128 bit Vector of 4 unsigned int elements.
Definition: vec_common_ppc.h:273
__VEC_U_128::vf2
vf64_t vf2
128 bit Vector of 2 double float elements.
Definition: vec_common_ppc.h:279
__VEC_U_128::ldbl128
long double ldbl128
IBM long double float from pair of double float registers.
Definition: vec_common_ppc.h:267