POWER Vector Library Manual  1.0.4
vec_bcd_ppc.h
Go to the documentation of this file.
1 /*
2  Copyright (c) [2017] IBM Corporation.
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  vec_bcd_ppc.h
17 
18  Contributors:
19  IBM Corporation, Steven Munroe
20  Created on: Aug 12, 2015
21  */
22 
23 #ifndef VEC_BCD_PPC_H_
24 #define VEC_BCD_PPC_H_
25 #ifdef PVECLIB_DISABLE_DFP
26 // clang does not support Decimal Floating Point at this time.
27 // Pveclib uses decimal FP quadword instructions to fill in functional
28 // gaps in the vector BCD operations before POWER9.
29 #ifndef NDEBUG
30 #warning Support for BCD is disabled until _Decimal128 support is added.
31 #endif
32 #else
33 #include <pveclib/vec_common_ppc.h>
34 #include <pveclib/vec_char_ppc.h>
35 #include <pveclib/vec_int128_ppc.h>
36 
1565 #define vBCD_t vui32_t
1566 
1567 #define vbBCD_t vb32_t
1568 
1570 #define _BCD_CONST_PLUS_NINES ((vBCD_t) CONST_VINT128_DW128(0x9999999999999999, 0x999999999999999c))
1571 
1572 #define _BCD_CONST_PLUS_ONE ((vBCD_t) CONST_VINT128_DW128(0, 0x1c))
1573 
1574 #define _BCD_CONST_MINUS_ONE ((vBCD_t) CONST_VINT128_DW128(0, 0x1d))
1575 
1576 #define _BCD_CONST_ZERO ((vBCD_t) CONST_VINT128_DW128(0, 0x0c))
1577 
1578 #define _BCD_CONST_SIGN_MASK ((vBCD_t) CONST_VINT128_DW128(0, 0xf))
1579 
1581 static inline vBCD_t vec_bcdcpsgn (vBCD_t vra, vBCD_t vrb);
1582 static inline vBCD_t vec_bcdcfuq (vui128_t vra);
1583 static inline vui128_t vec_bcdctuq (vBCD_t vra);
1584 static inline vBCD_t vec_bcdsrqi (vBCD_t vra, const unsigned int _N);
1585 static inline vBCD_t vec_bcdsub (vBCD_t a, vBCD_t b);
1586 static inline vBCD_t vec_bcdus (vBCD_t vra, vi8_t vrb);
1587 static inline vf64_t vec_pack_Decimal128 (_Decimal128 lval);
1588 static inline _Decimal128 vec_quantize0_Decimal128 (_Decimal128 val);
1589 static inline vui8_t vec_rdxcf100b (vui8_t vra);
1590 static inline vui8_t vec_rdxcf10kh (vui16_t vra);
1591 static inline vui16_t vec_rdxcf100mw (vui32_t vra);
1592 static inline vui32_t vec_rdxcf10E16d (vui64_t vra);
1593 static inline vui64_t vec_rdxcf10e32q (vui128_t vra);
1594 static inline vui8_t vec_rdxcfzt100b (vui8_t zone00, vui8_t zone16);
1595 static inline vui8_t vec_rdxct100b (vui8_t vra);
1596 static inline vui16_t vec_rdxct10kh (vui8_t vra);
1597 static inline vui32_t vec_rdxct100mw (vui16_t vra);
1598 static inline vui64_t vec_rdxct10E16d (vui32_t vra);
1599 static inline vui128_t vec_rdxct10e32q (vui64_t vra);
1600 static inline vb128_t vec_setbool_bcdsq (vBCD_t vra);
1601 static inline int vec_signbit_bcdsq (vBCD_t vra);
1602 static inline _Decimal128 vec_unpack_Decimal128 (vf64_t lval);
1604 
1621 static inline vui64_t
1623 {
1624 #ifdef _ARCH_PWR6
1625  vui64_t t;
1626  _Decimal128 x, y, z;
1627  // unpack the vector into a FPRp
1628  z = vec_unpack_Decimal128 ((vf64_t) val);
1629  // Convert 2 long int values into 2 _Decimal64 values
1630  // Then convert each _Decimal64 value into 16-digit BCD
1631  __asm__(
1632  "denbcd 0,%1,%2;\n"
1633  "denbcd 0,%L1,%L2;\n"
1634  "dctfix %0,%1;\n"
1635  "dctfix %L0,%L1;\n"
1636  : "=d" (x),
1637  "=&d" (y)
1638  : "d" (z)
1639  : );
1640  // Pack the FPRp back into a vector
1641  t = (vui64_t) vec_pack_Decimal128 (x);
1642  return (t);
1643 #else
1644  // todo no solution before P6
1645 #endif
1646 }
1647 
1662 static inline _Decimal128
1664 {
1665 #ifdef _ARCH_PWR7
1666  _Decimal128 t;
1667 #if (__GNUC__ < 5)
1668  __asm__(
1669  "xxpermdi %0,%x1,%x1,0b00;\n"
1670  "\txxpermdi %L0,%x1,%x1,0b10;\n"
1671  "\tdenbcdq 1,%0,%0;\n"
1672  : "=&d" (t)
1673  : "v" (val)
1674  : );
1675 #else
1676  t = vec_unpack_Decimal128 ((vf64_t) val);
1677  t = __builtin_denbcdq (1, t);
1678 #endif
1679  return (t);
1680 #else
1681  // needs work for P6 without xxpermdi
1682  __VEC_U_128 t, x;
1683  x.vx4 = val;
1684  t.dpd128 = __builtin_denbcdq (1, x.dpd128);
1685  return (t.dpd128);
1686 #endif
1687 }
1688 
1705 static inline vBCD_t
1707 {
1708 #ifdef _ARCH_PWR6
1709  vBCD_t t;
1710  _Decimal128 x, y, z;
1711  // unpack the vector into a FPRp
1712  z = vec_unpack_Decimal128 ((vf64_t) val);
1713  // Convert 2 long int values into 2 _Decimal64 values
1714  // Then convert each _Decimal64 value into 16-digit BCD
1715  __asm__(
1716  "dcffix %1,%2;\n"
1717  "dcffix %L1,%L2;\n"
1718  "ddedpd 0,%0,%1;\n"
1719  "ddedpd 0,%L0,%L1;\n"
1720  : "=d" (x),
1721  "=&d" (y)
1722  : "d" (z)
1723  : );
1724  // Pack the FPRp back into a vector
1725  t = (vBCD_t) vec_pack_Decimal128 (x);
1726  return (t);
1727 #else
1728  // todo no solution before P6
1729 #endif
1730 }
1731 
1747 static inline vBCD_t
1748 vec_DFP2BCD (_Decimal128 val)
1749 {
1750 #ifdef _ARCH_PWR7
1751  vBCD_t t;
1752  _Decimal128 x;
1753 #if (__GNUC__ < 5)
1754  __asm__(
1755  "ddedpdq 2,%1,%2;\n"
1756  "\txxpermdi %x0,%1,%L1,0b00;\n"
1757  : "=v" (t),
1758  "=&d" (x)
1759  : "d" (val)
1760  : );
1761 #else
1762  x = __builtin_ddedpdq (2, val);
1763  t = (vBCD_t) vec_pack_Decimal128 (x);
1764 #endif
1765  return (t);
1766 #else
1767  // needs work for P6 without xxpermdi
1768  __VEC_U_128 t, x;
1769  t.dpd128 = __builtin_ddedpdq (1, val);
1770  return (t.vx4);
1771 #endif
1772 }
1773 
1788 static inline vBCD_t
1790 {
1791  vBCD_t t;
1792 #ifdef _ARCH_PWR8
1793 #if (__GNUC__ < 7)
1794  __asm__(
1795  "bcdadd. %0,%1,%2,0;\n"
1796  : "=v" (t)
1797  : "v" (a),
1798  "v" (b)
1799  : "cr6" );
1800 #else
1801  t = (vBCD_t) __builtin_bcdadd ((vi128_t) a, (vi128_t) b, 0);
1802 #endif
1803 #else
1804  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
1805  _Decimal128 d_t;
1806  d_t = vec_BCD2DFP (a) + vec_BCD2DFP (b);
1807  t = vec_DFP2BCD(d_t);
1808  // fix up spurious negative zeros
1809  if (vec_all_eq((vui32_t) t, mz))
1810  t = _BCD_CONST_ZERO;
1811 #endif
1812  return (t);
1813 }
1814 
1835 static inline vBCD_t
1837 {
1838  vBCD_t t;
1839 #if defined ( _ARCH_PWR8) && (__GNUC__ > 6)
1840 #ifdef _ARCH_PWR9
1841  // Generate BCD zero from (a - a), which is 3 cycles on PWR9
1842  t = vec_bcdsub (a, a);
1843 #else // Else load a BCD const 0.
1844  t = _BCD_CONST_ZERO;
1845 #endif
1846  if (__builtin_expect (__builtin_bcdadd_ov ((vi128_t) a, (vi128_t) b, 0), 0))
1847  {
1848  vBCD_t a_b;
1849  a_b = vec_bcdadd (a, b);
1850  t = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, a_b);
1851  }
1852 #else
1853  _Decimal128 d_a, d_b, d_s, d_t;
1854  const vui32_t mz = CONST_VINT128_W(0, 0, 0, 0x0000000d);
1855  d_a = vec_BCD2DFP (a);
1856  d_b = vec_BCD2DFP (b);
1857  d_s = d_a + d_b;
1858  // Shift right 31 digits, leaving the carry.
1859  d_t = __builtin_dscriq (d_s, 31);
1860  t = vec_DFP2BCD (d_t);
1861  // fix up spurious negative zeros
1862  if (vec_all_eq ((vui32_t) t, mz))
1863  t = _BCD_CONST_ZERO;
1864 #endif
1865  return (t);
1866 }
1867 
1891 static inline vBCD_t
1893 {
1894  vBCD_t t;
1895 #ifdef _ARCH_PWR8
1896  vBCD_t a_b, a_b_c;
1897 
1898  a_b = vec_bcdadd (a, b);
1899  if (__builtin_expect (__builtin_bcdadd_ov ((vi128_t) a, (vi128_t) b, 0), 0))
1900  {
1901  t = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, a_b);
1902  }
1903  else // (a + b) did not overflow, what about (a + b + c)
1904  {
1905  a_b_c = (vBCD_t) vec_bcdadd (a_b, c);
1906  if (__builtin_bcdadd_ov ((vi128_t) a_b, (vi128_t) c, 0))
1907  {
1908  t = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, a_b_c);
1909  }
1910  else
1911  {
1912 #ifdef _ARCH_PWR9
1913  // Generate BCD zero from (a - a), which is 3 cycles on PWR9
1914  t = vec_bcdsub (a, a);
1915 #else // Else load a BCD const 0.
1916  t = _BCD_CONST_ZERO;
1917 #endif
1918  }
1919  }
1920 #else
1921  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
1922  _Decimal128 d_a, d_b, d_c, d_s, d_t;
1923  d_a = vec_BCD2DFP (a);
1924  d_b = vec_BCD2DFP (b);
1925  d_c = vec_BCD2DFP (c);
1926  d_s = d_a + d_b + d_c;
1927  // Shift right 31 digits, leaving the carry.
1928  d_t = __builtin_dscriq (d_s, 31);
1929  t = vec_DFP2BCD (d_t);
1930  // fix up spurious negative zeros
1931  if (vec_all_eq ((vui32_t) t, mz))
1932  t = _BCD_CONST_ZERO;
1933 #endif
1934  return (t);
1935 }
1936 
1954 static inline vBCD_t
1956 {
1957  vBCD_t t;
1958 #ifdef _ARCH_PWR8
1959  t = vec_bcdadd (vec_bcdadd (a, b), c);
1960 #else
1961  _Decimal128 d_t;
1962  d_t = vec_BCD2DFP (a) + vec_BCD2DFP (b) + vec_BCD2DFP (c);
1963  t = vec_DFP2BCD(d_t);
1964 #endif
1965  return (t);
1966 }
1967 
1991 static inline vBCD_t
1993 {
1994  vBCD_t result;
1995 #ifdef _ARCH_PWR9
1996  __asm__(
1997  "bcdcfsq. %0,%1,0;\n"
1998  : "=v" (result)
1999  : "v" (vrb)
2000  : "cr6" );
2001 #else
2002  const vui128_t zero = (vui128_t) vec_splats ((int) 0);
2003  vBCD_t ubcd, bcdsign;
2004  vui128_t uvrb;
2005  vb128_t negbool;
2006 
2007  bcdsign = _BCD_CONST_PLUS_ONE;
2008  negbool = vec_setb_sq (vrb);
2009 
2010  uvrb = (vui128_t) vec_sel ((vui32_t) vrb,
2011  (vui32_t) vec_subuqm (zero, (vui128_t) vrb),
2012  (vb32_t) negbool);
2013  bcdsign = (vBCD_t) vec_sel ((vui32_t) bcdsign, (vui32_t) _BCD_CONST_MINUS_ONE,
2014  (vb32_t) negbool);
2015 
2016  ubcd = vec_bcdcfuq (uvrb);
2017 
2018  result = (vBCD_t) vec_slqi ((vui128_t) ubcd, 4);
2019  result = vec_bcdcpsgn (result, bcdsign);
2020 #endif
2021  return (vBCD_t) result;
2022 }
2023 
2045 static inline vBCD_t
2047 {
2048 #ifdef _ARCH_PWR7
2049  return vec_BIN2BCD (vrb);
2050 #else
2051  vui8_t d100;
2052  vui16_t d10k;
2053  vui32_t d100m;
2054  d100m = vec_rdxcf10E16d (vrb);
2055  d10k = vec_rdxcf100mw (d100m);
2056  d100 = vec_rdxcf10kh (d10k);
2057  return (vBCD_t) vec_rdxcf100b (d100);
2058 #endif
2059 }
2060 
2082 static inline vBCD_t
2084 {
2085  vui64_t d10e;
2086  d10e =vec_rdxcf10e32q (vra);
2087 #ifdef _ARCH_PWR7
2088  return (vBCD_t) vec_BIN2BCD (d10e);
2089 #else
2090  vui8_t d100;
2091  vui16_t d10k;
2092  vui32_t d100m;
2093  d100m = vec_rdxcf10E16d (d10e);
2094  d10k = vec_rdxcf100mw (d100m);
2095  d100 = vec_rdxcf10kh (d10k);
2096  return (vBCD_t) vec_rdxcf100b (d10e);
2097 #endif
2098 }
2099 
2125 static inline vBCD_t
2127 {
2128  vBCD_t vrt;
2129 #ifdef _ARCH_PWR9
2130  __asm__(
2131  "bcdcfz. %0,%1,0;\n"
2132  : "=v" (vrt)
2133  : "v" (vrb)
2134  : "cr6" );
2135 #else
2136  const vui8_t dmask = vec_splat_u8(15);
2137  const vui8_t dx0 = vec_splat_u8(0);
2138  vui8_t znd_s;
2139  vui8_t znd_d, znd_t;
2140  vui8_t bcd, bcd_h, bcd_l;
2141  // Isolate the BCD digit from each zoned character.
2142  znd_d = vec_and (vrb, dmask);
2143  znd_t = (vui8_t) vec_srqi ((vui128_t) znd_d, 4);
2144  // Isolate the bit (1) that matters in the Zoned sign code.
2145  znd_s = vec_slbi (vrb, 1);
2146  znd_s = vec_srbi (znd_s, 7);
2147  // Convert to BCD preferred sign code 0xC or 0xD
2148  znd_s = vec_or (znd_s, (vui8_t) _BCD_CONST_ZERO);
2149  // Pack the even/odd zone digits into a single vector.
2150 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
2151  bcd = vec_pack ((vui16_t) znd_d, (vui16_t) znd_t);
2152 #else
2153  bcd = vec_pack ((vui16_t) znd_t, (vui16_t) znd_d);
2154 #endif
2155  // Swap even/odd DWs to low half and OR to get unsigned 16-digit BCD.
2156  bcd_l = (vui8_t) vec_mrgald ((vui128_t) dx0, (vui128_t) bcd);
2157  bcd_h = (vui8_t) vec_mrgahd ((vui128_t) dx0, (vui128_t) bcd);
2158  bcd = vec_or (bcd_h, bcd_l);
2159  // Shift left to make room for sign code
2160  vrt = (vBCD_t) vec_slqi ((vui128_t) bcd, 4);
2161  // Merge sign code from zone code.
2162  vrt = vec_bcdcpsgn (vrt, (vBCD_t) znd_s);
2163 #endif
2164  return (vrt);
2165 }
2166 
2184 static inline vbBCD_t
2186 {
2187  vbBCD_t result = (vbBCD_t) vec_splat_s32 (0);
2188 #ifdef _ARCH_PWR8
2189  if (__builtin_bcdsub_eq ((vi128_t) vra, (vi128_t) vrb, 0))
2190  result = (vbBCD_t) vec_splat_s32 (-1);
2191 #else
2192  _Decimal128 d_a, d_b;
2193  d_a = vec_BCD2DFP (vra);
2194  d_b = vec_BCD2DFP (vrb);
2195  if (d_a == d_b)
2196  result = (vbBCD_t) vec_splat_s32 (-1);
2197 #endif
2198  return result;
2199 }
2200 
2218 static inline vbBCD_t
2220 {
2221  vbBCD_t result = (vbBCD_t) vec_splat_s32 (-1);
2222 #ifdef _ARCH_PWR8
2223  if (__builtin_bcdsub_lt ((vi128_t) vra, (vi128_t) vrb, 0))
2224  result = (vbBCD_t) vec_splat_s32 (0);
2225 #else
2226  _Decimal128 d_a, d_b;
2227  d_a = vec_BCD2DFP (vra);
2228  d_b = vec_BCD2DFP (vrb);
2229  if (d_a < d_b)
2230  result = (vbBCD_t) vec_splat_s32 (0);
2231 #endif
2232  return result;
2233 }
2234 
2252 static inline vbBCD_t
2254 {
2255  vbBCD_t result = (vbBCD_t) vec_splat_s32 (0);
2256 #ifdef _ARCH_PWR8
2257  if (__builtin_bcdsub_gt ((vi128_t) vra, (vi128_t) vrb, 0))
2258  result = (vbBCD_t) vec_splat_s32 (-1);
2259 #else
2260  _Decimal128 d_a, d_b;
2261  d_a = vec_BCD2DFP (vra);
2262  d_b = vec_BCD2DFP (vrb);
2263  if (d_a > d_b)
2264  result = (vbBCD_t) vec_splat_s32 (-1);
2265 #endif
2266  return result;
2267 }
2268 
2286 static inline vbBCD_t
2288 {
2289  vbBCD_t result = (vbBCD_t) vec_splat_s32 (-1);
2290 #ifdef _ARCH_PWR8
2291  if (__builtin_bcdsub_gt ((vi128_t) vra, (vi128_t) vrb, 0))
2292  result = (vbBCD_t) vec_splat_s32 (0);
2293 #else
2294  _Decimal128 d_a, d_b;
2295  d_a = vec_BCD2DFP (vra);
2296  d_b = vec_BCD2DFP (vrb);
2297  if (d_a > d_b)
2298  result = (vbBCD_t) vec_splat_s32 (0);
2299 #endif
2300  return result;
2301 }
2302 
2320 static inline vbBCD_t
2322 {
2323  vbBCD_t result = (vbBCD_t) vec_splat_s32 (0);
2324 #ifdef _ARCH_PWR8
2325  if (__builtin_bcdsub_lt ((vi128_t) vra, (vi128_t) vrb, 0))
2326  result = (vbBCD_t) vec_splat_s32 (-1);
2327 #else
2328  _Decimal128 d_a, d_b;
2329  d_a = vec_BCD2DFP (vra);
2330  d_b = vec_BCD2DFP (vrb);
2331  if (d_a < d_b)
2332  result = (vbBCD_t) vec_splat_s32 (-1);
2333 #endif
2334  return result;
2335 }
2336 
2354 static inline vbBCD_t
2356 {
2357 #ifdef _ARCH_PWR8
2358  vbBCD_t result = (vbBCD_t) vec_splat_s32 (-1);
2359  if (__builtin_bcdsub_eq ((vi128_t) vra, (vi128_t) vrb, 0))
2360  result = (vbBCD_t) vec_splat_s32 (0);
2361 
2362  return result;
2363 #else
2364  /* vec_cmpneuq works for both signed and unsigned compares. */
2365  return (vbBCD_t) vec_cmpneuq ((vui128_t) vra, (vui128_t) vrb);
2366 #endif
2367 }
2368 
2386 static inline int
2388 {
2389 #ifdef _ARCH_PWR8
2390  return __builtin_bcdsub_eq ((vi128_t) vra, (vi128_t) vrb, 0);
2391 #else
2392  _Decimal128 d_a, d_b;
2393  d_a = vec_BCD2DFP (vra);
2394  d_b = vec_BCD2DFP (vrb);
2395  return (d_a == d_b);
2396 #endif
2397 }
2398 
2416 static inline int
2418 {
2419 #ifdef _ARCH_PWR8
2420  return !__builtin_bcdsub_lt ((vi128_t) vra, (vi128_t) vrb, 0);
2421 #else
2422  _Decimal128 d_a, d_b;
2423  d_a = vec_BCD2DFP (vra);
2424  d_b = vec_BCD2DFP (vrb);
2425  return (d_a >= d_b);
2426 #endif
2427 }
2428 
2446 static inline int
2448 {
2449 #ifdef _ARCH_PWR8
2450  return __builtin_bcdsub_gt ((vi128_t) vra, (vi128_t) vrb, 0);
2451 #else
2452  _Decimal128 d_a, d_b;
2453  d_a = vec_BCD2DFP (vra);
2454  d_b = vec_BCD2DFP (vrb);
2455  return (d_a > d_b);
2456 #endif
2457 }
2458 
2476 static inline int
2478 {
2479 #ifdef _ARCH_PWR8
2480  return !__builtin_bcdsub_gt ((vi128_t) vra, (vi128_t) vrb, 0);
2481 #else
2482  _Decimal128 d_a, d_b;
2483  d_a = vec_BCD2DFP (vra);
2484  d_b = vec_BCD2DFP (vrb);
2485  return (d_a <= d_b);
2486 #endif
2487 }
2488 
2506 static inline int
2508 {
2509 #ifdef _ARCH_PWR8
2510  return __builtin_bcdsub_lt ((vi128_t) vra, (vi128_t) vrb, 0);
2511 #else
2512  _Decimal128 d_a, d_b;
2513  d_a = vec_BCD2DFP (vra);
2514  d_b = vec_BCD2DFP (vrb);
2515  return (d_a < d_b);
2516 #endif
2517 }
2518 
2536 static inline int
2538 {
2539 #ifdef _ARCH_PWR8
2540  return !__builtin_bcdsub_eq ((vi128_t) vra, (vi128_t) vrb, 0);
2541 #else
2542  return vec_cmpuq_all_ne ((vui128_t) vra, (vui128_t) vrb);
2543 #endif
2544 }
2545 
2562 static inline vBCD_t
2564 {
2565  vBCD_t vrt;
2566 #ifdef _ARCH_PWR9
2567  __asm__(
2568  "bcdcpsgn. %0,%1,%2;\n"
2569  : "=v" (vrt)
2570  : "v" (vra),
2571  "v" (vrb)
2572  : "cr6" );
2573 #else
2574  const vui32_t sign_mask = (vui32_t) _BCD_CONST_SIGN_MASK;
2575  vrt = (vBCD_t) vec_sel ((vui32_t) vra, (vui32_t) vrb, sign_mask);
2576 #endif
2577  return (vrt);
2578 }
2579 
2596 static inline vi128_t
2598 {
2599  vui128_t result;
2600 #ifdef _ARCH_PWR9
2601  __asm__(
2602  "bcdctsq. %0,%1;\n"
2603  : "=v" (result)
2604  : "v" (vra)
2605  : "cr6" );
2606 #else
2607  const vui128_t zero = (vui128_t) vec_splats ((int) 0);
2608  vBCD_t ubcd;
2609 
2610  ubcd = (vBCD_t) vec_srqi ((vui128_t)vra, 4);
2611  result = vec_bcdctuq (ubcd);
2612  if (vec_signbit_bcdsq (vra))
2613  result = vec_subuqm (zero, result);
2614 #endif
2615  return (vi128_t) result;
2616 }
2617 
2635 static inline vui8_t
2637 {
2638  return vec_rdxct100b ((vui8_t) vra);
2639 }
2640 
2658 static inline vui16_t
2660 {
2661  vui8_t d100;
2662  d100 = vec_rdxct100b ((vui8_t) vra);
2663  return vec_rdxct10kh (d100);
2664 }
2665 
2683 static inline vui32_t
2685 {
2686  vui8_t d100;
2687  vui16_t d10k;
2688  d100 = vec_rdxct100b ((vui8_t) vra);
2689  d10k = vec_rdxct10kh (d100);
2690  return vec_rdxct100mw (d10k);
2691 }
2692 
2710 static inline vui64_t
2712 {
2713 #ifdef _ARCH_PWR7
2714  return vec_BCD2BIN (vra);
2715 #else
2716  vui8_t d100;
2717  vui16_t d10k;
2718  vui32_t d100m;
2719  d100 = vec_rdxct100b ((vui8_t) vra);
2720  d10k = vec_rdxct10kh (d100);
2721  d100m = vec_rdxct100mw (d10k);
2722  return vec_rdxct10E16d (d100m);
2723 #endif
2724 }
2725 
2744 static inline vui128_t
2746 {
2747  vui128_t vrt;
2748 #ifdef _ARCH_PWR9
2749  const vui32_t bcd_one = (vui32_t) _BCD_CONST_PLUS_ONE;
2750  const vui32_t sign_mask = (vui32_t) _BCD_CONST_SIGN_MASK;
2751  vui128_t vrd;
2752  vBCD_t sbcd;
2753  // Need to convert BCD unsigned to signed for bcdctsq
2754  // But can't use bcdcpsgn as the unit digit is not a sign code
2755  // So use vec_and/sel to extract unit digit and insert sign
2756  vrd = (vui128_t) vec_and ((vui32_t) vra, sign_mask);
2757  sbcd = (vBCD_t) vec_sel ((vui32_t) vra, bcd_one, sign_mask);
2758  // Convert top 31 digits to binary
2759  vrt = (vui128_t) vec_bcdctsq (sbcd);
2760  // Then X 10 plus the unit digit to complete 32-digit convert
2761  vrt = vec_mul10euq (vrt, vrd);
2762 #else
2763  vui64_t d10e;
2764 #ifdef _ARCH_PWR7
2765  d10e = vec_BCD2BIN (vra);
2766 #else
2767  vui8_t d100;
2768  vui16_t d10k;
2769  vui32_t d100m;
2770  d100 = vec_rdxct100b ((vui8_t) vra);
2771  d10k = vec_rdxct10kh (d100);
2772  d100m = vec_rdxct100mw (d10k);
2773  d10e = vec_rdxct10E16d (d100m);
2774 #endif
2775  vrt = vec_rdxct10e32q (d10e);
2776 #endif
2777  return vrt;
2778 }
2779 
2805 static inline vui8_t
2807 {
2808  vui8_t vrt;
2809 #ifdef _ARCH_PWR9
2810  __asm__(
2811  "bcdctz. %0,%1,0;\n"
2812  : "=v" (vrt)
2813  : "v" (vrb)
2814  : "cr6" );
2815 #else
2816  const vui8_t dmask = vec_splat_u8(15);
2817  const vui8_t zone_minus = CONST_VINT128_B ( '0', '0', '0', '0',
2818  '0', '0', '0', '0',
2819  '0', '0', '0', '0',
2820  '0', '0', '0', 0x70 );
2821 // const vui32_t minus_sign = (vui32_t) CONST_VINT128_W(0x0b, 0x0d, 0x0b, 0x0d);
2822  const vui32_t plus_sign = (vui32_t) CONST_VINT128_W(0x0a, 0x0c, 0x0e, 0x0f);
2823  vui32_t sign_splat;
2824  const vui32_t bcd_sign_mask = vec_splat_u32(15);
2825  vui8_t znd_s, znd_d, znd_t;
2826  vui8_t bcd_s, bcd_u;
2827  vui8_t zone_code;
2828  // Isolate the BCD Sign code
2829  bcd_s = vec_and ((vui8_t) vrb, (vui8_t) bcd_sign_mask);
2830  // Replicate the byte containing the sign to words
2831  sign_splat = vec_splat ((vui32_t) bcd_s, VEC_W_L);
2832  // Isolate the low 16 digits as unsigned BCD
2833  bcd_u = (vui8_t) vec_srqi ((vui128_t) vrb, 4);
2834  // Isolate the even/odd nibbles and merge low bytes for zoned
2835  znd_d = vec_and (bcd_u, dmask);
2836  znd_t = vec_srbi (bcd_u, 4);
2837 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
2838  znd_s = vec_mergeh (znd_d, znd_t);
2839 #else
2840  znd_s = vec_mergel (znd_t, znd_d);
2841 #endif
2842  // Initialize the zone_code with negative zone mask.
2843  zone_code = zone_minus;
2844  // SIMD compare for match to any positive sign code
2845  if (vec_any_eq(sign_splat, plus_sign))
2846  // Convert to positive zone mask.
2847  zone_code = (vui8_t) vec_xxspltd ((vui64_t) zone_code, 0);
2848 
2849  // Merge the zone nibbles with the digit nibble to
2850  vrt = vec_or (znd_s, zone_code);
2851 #endif
2852  return (vrt);
2853 }
2854 
2869 static inline vBCD_t
2871 {
2872  vBCD_t t;
2873  _Decimal128 d_t, d_a, d_b;
2874  d_a = vec_BCD2DFP (a);
2875  d_b = vec_BCD2DFP (b);
2876  d_t = vec_quantize0_Decimal128 (d_a / d_b);
2877  t = vec_DFP2BCD (d_t);
2878  return (t);
2879 }
2880 
2899 static inline vBCD_t
2901 {
2902  vBCD_t t;
2903  _Decimal128 d_t, d_a, d_b;
2904  d_a = vec_BCD2DFP (a);
2905  d_b = vec_BCD2DFP (b);
2906  // Look into using DFP Insert Biased Exponent here.
2907  d_a = d_a * 10E31DL;
2908  d_t = vec_quantize0_Decimal128 (d_a / d_b);
2909  t = vec_DFP2BCD (d_t);
2910  return (t);
2911 }
2912 
2948 static inline vBCD_t
2950 {
2951 #ifndef _ARCH_PWR9
2952  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
2953 #endif
2954  const vBCD_t dword_mask = (vBCD_t) CONST_VINT128_DW(15, -1);
2955  vBCD_t t, low_a, low_b, high_a, high_b;
2956  _Decimal128 d_p, d_t, d_a, d_b;
2957 
2958  low_a = vec_and (a, dword_mask);
2959  low_b = vec_and (b, dword_mask);
2960  d_a = vec_BCD2DFP (low_a);
2961  d_b = vec_BCD2DFP (low_b);
2962  d_p = d_a * d_b;
2963  if (__builtin_expect ((vec_cmpuq_all_eq ((vui128_t) low_a, (vui128_t) a)
2964  && vec_cmpuq_all_eq ((vui128_t) low_b, (vui128_t) b)), 1))
2965  {
2966  d_t = d_p;
2967  }
2968  else
2969  {
2970  _Decimal128 d_ah, d_bh, d_hl, d_lh, d_h;
2971 
2972  high_a = vec_bcdsrqi (a, 16);
2973  high_b = vec_bcdsrqi (b, 16);
2974 
2975  d_ah = vec_BCD2DFP (high_a);
2976  d_bh = vec_BCD2DFP (high_b);
2977 
2978  d_hl = d_ah * d_b;
2979  d_lh = d_a * d_bh;
2980 
2981  d_h = d_hl + d_lh;
2982  d_h = __builtin_dscliq (d_h, 17);
2983  d_h = __builtin_dscriq (d_h, 1);
2984 
2985  d_t = d_p + d_h;
2986  }
2987  t = vec_DFP2BCD (d_t);
2988  // fix up spurious negative zeros
2989 #ifdef _ARCH_PWR9
2990  t = vec_bcdadd (t, _BCD_CONST_ZERO);
2991 #else
2992  if (vec_all_eq((vui32_t) t, mz))
2993  t = _BCD_CONST_ZERO;
2994 #endif
2995  return (t);
2996 }
2997 
3032 static inline vBCD_t
3034 {
3035  const vBCD_t dword_mask = (vBCD_t) CONST_VINT128_DW(15, -1);
3036 #ifndef _ARCH_PWR9
3037  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
3038 #endif
3039  vBCD_t t, low_a, low_b, high_a, high_b;
3040  _Decimal128 d_p, d_t, d_al, d_bl;
3041 
3042  low_a = vec_and (a, dword_mask);
3043  low_b = vec_and (b, dword_mask);
3044  d_al = vec_BCD2DFP (low_a);
3045  d_bl = vec_BCD2DFP (low_b);
3046  d_p = d_al * d_bl;
3047  if (__builtin_expect ((vec_cmpuq_all_eq ((vui128_t) low_a, (vui128_t) a)
3048  && vec_cmpuq_all_eq ((vui128_t) low_b, (vui128_t) b)), 1))
3049  {
3050  d_t = __builtin_dscriq (d_p, 31);
3051  }
3052  else
3053  {
3054  _Decimal128 d_ah, d_bh, d_hl, d_lh, d_h, d_ll, d_m;
3055 
3056  high_a = vec_bcdsrqi (a, 16);
3057  high_b = vec_bcdsrqi (b, 16);
3058  d_ah = vec_BCD2DFP (high_a);
3059  d_bh = vec_BCD2DFP (high_b);
3060 
3061  d_hl = d_ah * d_bl;
3062  d_lh = d_al * d_bh;
3063  d_ll = __builtin_dscriq (d_p, 16);
3064 
3065  d_m = d_hl + d_lh + d_ll;
3066  d_m = __builtin_dscriq (d_m, 15);
3067 
3068  d_h = d_ah * d_bh;
3069  d_h = __builtin_dscliq (d_h, 1);
3070  d_t = d_m + d_h;
3071  }
3072  t = vec_DFP2BCD (d_t);
3073  // fix up spurious negative zeros
3074 #ifdef _ARCH_PWR9
3075  t = vec_bcdadd (t, _BCD_CONST_ZERO);
3076 #else
3077  if (vec_all_eq((vui32_t) t, mz))
3078  t = _BCD_CONST_ZERO;
3079 #endif
3080  return (t);
3081 }
3082 
3096 static inline vBCD_t
3098 {
3099  vBCD_t vrt;
3100 #ifdef _ARCH_PWR9
3101  __asm__(
3102  "bcds. %0,%1,%2,0;\n"
3103  : "=v" (vrt)
3104  : "v" (vrb),
3105  "v" (vra)
3106  : "cr6" );
3107 #else
3108  const vi8_t zero = vec_splat_s8(0);
3109  vi8_t shd = vec_splat (vrb, VEC_BYTE_L_DWH);
3110  vui128_t t;
3111  // Multiply digit shift by 4 to get bit shift count
3112  shd = vec_add (shd, shd);
3113  shd = vec_add (shd, shd);
3114  // Clear sign nibble before shift.
3115  t = (vui128_t) vec_andc ((vui32_t) vra, (vui32_t) _BCD_CONST_SIGN_MASK);
3116  // Compare shift positive or negative
3117  if (vec_all_ge(shd, zero))
3118  {
3119  // Positive, shift left
3120  t = vec_slq (t, (vui128_t) shd);
3121  }
3122  else
3123  {
3124  // Negative, shift right by absolute value
3125  shd = vec_sub (zero, shd);
3126  t = vec_srq (t, (vui128_t) shd);
3127  }
3128  // restore original sign nibble
3129  vrt = vec_bcdcpsgn ((vBCD_t) t, vra);
3130 #endif
3131  return (vrt);
3132 }
3133 
3157 static inline vBCD_t
3159 {
3160  vBCD_t vrt;
3161 #ifdef _ARCH_PWR9
3162  __asm__(
3163  "bcdsetsgn. %0,%1,0;\n"
3164  : "=v" (vrt)
3165  : "v" (vrb)
3166  : "cr6" );
3167 #else
3168  const vui32_t match_mask = vec_splat_u32(15);
3169  // The preferred sign is in the correct position for vec_bcdcpsgn
3170  const vui32_t minus_sign = (vui32_t) CONST_VINT128_W(0x0b, 0x0d, 0x0b, 0x0d);
3171  const vui32_t plus_sign = (vui32_t) CONST_VINT128_W(0x0a, 0x0f, 0x0e, 0x0c);
3172  vui32_t sign_splat;
3173  vui32_t sign_code;
3174 
3175  // Replicate the byte containing the sign to words
3176  sign_splat = vec_splat ((vui32_t) vrb, VEC_W_L);
3177  // Apply the code match mask
3178  sign_code = vec_and (sign_splat, match_mask);
3179  // SIMD compare for match to any positive sign code
3180  if (vec_any_eq (sign_code, plus_sign))
3181  vrt = vec_bcdcpsgn (vrb, (vBCD_t) plus_sign);
3182  else
3183  {
3184  // SIMD compare for match to any negative sign code
3185  if (vec_any_eq (sign_code, minus_sign))
3186  vrt = vec_bcdcpsgn (vrb, (vBCD_t) minus_sign);
3187  else
3188  vrt = vrb;
3189  }
3190 #endif
3191  return (vrt);
3192 }
3193 
3207 static inline vBCD_t
3208 vec_bcdslqi (vBCD_t vra, const unsigned int _N)
3209 {
3210  vBCD_t vrt;
3211 #ifdef _ARCH_PWR9
3212  vi8_t shd = vec_splats ((const signed char) (_N));
3213  vrt = vec_bcds (vra, shd);
3214 #else
3215  vui128_t t;
3216 
3217  t = (vui128_t) vec_andc ((vui32_t) vra, (vui32_t) _BCD_CONST_SIGN_MASK);
3218  t = vec_slqi (t, (_N*4));
3219  vrt = vec_bcdcpsgn ((vBCD_t) t, vra);
3220 #endif
3221  return (vrt);
3222 }
3223 
3237 static inline vBCD_t
3238 vec_bcdsluqi (vBCD_t vra, const unsigned int _N)
3239 {
3240 #ifdef _ARCH_PWR9
3241  vi8_t shd = vec_splats ((const signed char) (_N));
3242  return vec_bcdus (vra, shd);
3243 #else
3244  return (vBCD_t) vec_slqi ((vui128_t) vra, (_N*4));
3245 #endif
3246 }
3247 
3264 static inline vBCD_t
3266 {
3267  vBCD_t vrt;
3268 #ifdef _ARCH_PWR9
3269  __asm__(
3270  "bcdsr. %0,%1,%2,0;\n"
3271  : "=v" (vrt)
3272  : "v" (vrb),
3273  "v" (vra)
3274  : "cr6" );
3275 #else
3276  const vi8_t zero = vec_splat_s8(0);
3277  vi8_t shd = vec_splat (vrb, VEC_BYTE_L_DWH);
3278  vui128_t t;
3279  vui32_t r_d;
3280  // Multiply digit shift by 4 to get bit shift count
3281  shd = vec_add (shd, shd);
3282  shd = vec_add (shd, shd);
3283  // Clear sign nibble before shift.
3284  t = (vui128_t) vec_andc ((vui32_t) vra, (vui32_t) _BCD_CONST_SIGN_MASK);
3285  // Compare shift positive or negative
3286  if (vec_all_ge(shd, zero))
3287  {
3288  // Positive, shift left
3289  t = vec_slq (t, (vui128_t) shd);
3290  // restore original sign nibble
3291  vrt = vec_bcdcpsgn ((vBCD_t) t, vra);
3292  }
3293  else
3294  {
3295  const vui32_t rnd6 = CONST_VINT128_W (0, 0, 0, (5+6));
3296  vBCD_t rnd_d;
3297  // Negative, shift right by absolute value
3298  shd = vec_sub (zero, shd);
3299  t = vec_srq (t, (vui128_t) shd);
3300  // extract the last digit shifted out for rounding.
3301  r_d = (vui32_t) vec_and ((vui32_t) t, (vui32_t) _BCD_CONST_SIGN_MASK);
3302  // Add decimal 6's +5 to generate rounding digit
3303  r_d = vec_add (r_d, rnd6);
3304  // Set the sign from original value
3305  rnd_d = vec_bcdcpsgn (r_d, vra);
3306  // restore original sign nibble
3307  vrt = vec_bcdcpsgn ((vBCD_t) t, vra);
3308  // round the last digit
3309  vrt = vec_bcdadd (vrt, rnd_d);
3310 #ifdef _ARCH_PWR7
3311  // Special fixup for P7 via DFP. But in case of shift right
3312  // resulting in 0, the bcdadd above will return the preferred
3313  // +0, while bcdsr should not change the sign.
3314  vrt = vec_bcdcpsgn (vrt, vra);
3315 #endif
3316  }
3317 #endif
3318  return (vrt);
3319 }
3320 
3334 static inline vBCD_t
3335 vec_bcdsrqi (vBCD_t vra, const unsigned int _N)
3336 {
3337  vBCD_t vrt;
3338 #ifdef _ARCH_PWR9
3339  vi8_t shd = vec_splats ((const signed char) (-_N));
3340  vrt = vec_bcds (vra, shd);
3341 #else
3342  vui128_t t;
3343 
3344  t = vec_srqi ((vui128_t) vra, (_N*4));
3345  vrt = vec_bcdcpsgn ((vBCD_t) t, vra);
3346 #endif
3347  return (vrt);
3348 }
3349 
3363 static inline vBCD_t
3364 vec_bcdsrrqi (vBCD_t vra, const unsigned int _N)
3365 {
3366  vBCD_t vrt;
3367 #ifdef _ARCH_PWR9
3368  vi8_t shd = vec_splats ((const signed char) (-_N));
3369  vrt = vec_bcdsr (vra, shd);
3370 #else
3371  vui128_t t;
3372  vui32_t r_d;
3373  // Compare shift positive or negative
3374  if (_N < 32)
3375  {
3376  const vui32_t rnd6 = CONST_VINT128_W(0, 0, 0, (5 + 6));
3377  vBCD_t rnd_d;
3378  // Clear sign nibble before shift.
3379  t = (vui128_t) vec_andc ((vui32_t) vra, (vui32_t) _BCD_CONST_SIGN_MASK);
3380  t = vec_srqi (t, (_N * 4));
3381  // extract the last digit shifted out for rounding.
3382  r_d = (vui32_t) vec_and ((vui32_t) t, (vui32_t) _BCD_CONST_SIGN_MASK);
3383  // Add decimal 6's +5 to generate rounding digit
3384  r_d = vec_add (r_d, rnd6);
3385  // Set the sign from original value
3386  rnd_d = vec_bcdcpsgn (r_d, vra);
3387  // restore original sign nibble
3388  vrt = vec_bcdcpsgn ((vBCD_t) t, vra);
3389  // round the last digit
3390  vrt = vec_bcdadd (vrt, rnd_d);
3391 #ifdef _ARCH_PWR7
3392  // Special fixup for P7 via DFP. But in case of shift right
3393  // resulting in 0, the bcdadd above will return the preferred
3394  // +0, while bcdsr should not change the sign.
3395  vrt = vec_bcdcpsgn (vrt, vra);
3396 #endif
3397  }
3398  else
3399  {
3400  vrt = vra;
3401  }
3402 #endif
3403  return (vrt);
3404 }
3405 
3419 static inline vBCD_t
3420 vec_bcdsruqi (vBCD_t vra, const unsigned int _N)
3421 {
3422 #ifdef _ARCH_PWR9
3423  vi8_t shd = vec_splats ((const signed char) (-_N));
3424  return vec_bcdus (vra, shd);
3425 #else
3426  return (vBCD_t) vec_srqi ((vui128_t) vra, (_N*4));
3427 #endif
3428 }
3429 
3444 static inline vBCD_t
3446 {
3447  vBCD_t t;
3448 #ifdef _ARCH_PWR8
3449 #if (__GNUC__ < 7)
3450  __asm__(
3451  "bcdsub. %0,%1,%2,0;\n"
3452  : "=v" (t)
3453  : "v" (a),
3454  "v" (b)
3455  : "cr6" );
3456 #else
3457  t = (vBCD_t) __builtin_bcdsub ((vi128_t) a, (vi128_t) b, 0);
3458 #endif
3459 #else
3460  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
3461  _Decimal128 d_t, d_a, d_b;
3462  d_a = vec_BCD2DFP (a);
3463  d_b = vec_BCD2DFP (b);
3464  d_t = d_a - d_b;
3465  t = vec_DFP2BCD(d_t);
3466  // fix up spurious negative zeros
3467  if (vec_all_eq((vui32_t) t, mz))
3468  t = _BCD_CONST_ZERO;
3469 #endif
3470  return (t);
3471 }
3472 
3493 static inline vBCD_t
3495 {
3496  vBCD_t t;
3497 #if defined (_ARCH_PWR8) && (__GNUC__ > 6)
3498  vBCD_t a_b;
3499 #ifdef _ARCH_PWR9
3500  // Generate BCD zero from (a - a), which is 3 cycles on PWR9
3501  t = vec_bcdsub (a, a);
3502 #else // Else load a BCD const 0.
3503  t = _BCD_CONST_ZERO;
3504 #endif
3505  if (__builtin_expect (__builtin_bcdsub_ov ((vi128_t) a, (vi128_t) b, 0), 0))
3506  {
3507  a_b = vec_bcdsub (a, b);
3508  t = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, a_b);
3509  }
3510 #else
3511  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
3512  _Decimal128 d_a, d_b, d_s, d_t;
3513  d_a = vec_BCD2DFP (a);
3514  d_b = vec_BCD2DFP (b);
3515  d_s = d_a - d_b;
3516  // Shift right 31 digits, leaving the carry.
3517  d_t = __builtin_dscriq (d_s, 31);
3518  t = vec_DFP2BCD(d_t);
3519  // fix up spurious negative zeros
3520  if (vec_all_eq ((vui32_t) t, mz))
3521  t = _BCD_CONST_ZERO;
3522 #endif
3523  return (t);
3524 }
3525 
3549 static inline vBCD_t
3551 {
3552  vBCD_t t;
3553 #ifdef _ARCH_PWR8
3554  vBCD_t a_b, a_b_c;
3555 
3556  a_b = vec_bcdsub (a, b);
3557  if (__builtin_expect (__builtin_bcdsub_ov ((vi128_t) a, (vi128_t) b, 0), 0))
3558  {
3559  t = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, a_b);
3560  }
3561  else // (a - b) did not overflow, what about (a - b + c)
3562  {
3563  a_b_c = vec_bcdadd (a_b, c);
3564  if (__builtin_bcdadd_ov ((vi128_t) a_b, (vi128_t) c, 0))
3565  {
3566  t = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, a_b_c);
3567  }
3568  else
3569  {
3570 #ifdef _ARCH_PWR9
3571  // Generate BCD zero from (a - a), which is 3 cycles on PWR9
3572  t = vec_bcdsub (a, a);
3573 #else // Else load a BCD const 0.
3574  t = _BCD_CONST_ZERO;
3575 #endif
3576  }
3577  }
3578 #else
3579  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
3580  _Decimal128 d_a, d_b, d_c, d_s, d_t;
3581  d_a = vec_BCD2DFP (a);
3582  d_b = vec_BCD2DFP (b);
3583  d_c = vec_BCD2DFP (c);
3584  d_s = d_a - d_b + d_c;
3585  // Shift right 31 digits, leaving the carry.
3586  d_t = __builtin_dscriq (d_s, 31);
3587  t = vec_DFP2BCD (d_t);
3588  // fix up spurious negative zeros
3589  if (vec_all_eq ((vui32_t) t, mz))
3590  t = _BCD_CONST_ZERO;
3591 #endif
3592  return (t);
3593 }
3594 
3612 static inline vBCD_t
3614 {
3615  vBCD_t t;
3616 #ifdef _ARCH_PWR8
3617  t = vec_bcdadd (vec_bcdsub (a, b), c);
3618 #else
3619  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
3620  _Decimal128 d_t;
3621  d_t = vec_BCD2DFP (a) - vec_BCD2DFP (b) + vec_BCD2DFP (c);
3622  t = vec_DFP2BCD(d_t);
3623  // fix up spurious negative zeros
3624  if (vec_all_eq ((vui32_t) t, mz))
3625  t = _BCD_CONST_ZERO;
3626 #endif
3627  return (t);
3628 }
3629 
3645 static inline vBCD_t
3647 {
3648  vBCD_t vrt;
3649 #ifdef _ARCH_PWR9
3650  __asm__(
3651  "bcdtrunc. %0,%1,%2,0;\n"
3652  : "=v" (vrt)
3653  : "v" (vrb),
3654  "v" (vra)
3655  : "cr6" );
3656 #else
3657  const vui16_t c124 = vec_splats ((unsigned short) 124);
3658  const vui16_t c4 = vec_splats ((unsigned short) 4);
3659  vui16_t shd = vec_splat (vrb, VEC_HW_L_DWH);
3660  vui128_t t;
3661  // Multiply digit shift by 4 to get bit shift count
3662  shd = vec_add (shd, shd);
3663  shd = vec_add (shd, shd);
3664  vui16_t one_s;
3665  // compensate for the sign nibble
3666  shd = vec_add (shd, c4);
3667  // generation all ones if in range, zeros if greater than
3668  one_s = (vui16_t) vec_cmple (shd, c124);
3669  // Generate a mask for the digits we will clear
3670  t = vec_slq ((vui128_t) one_s, (vui128_t) shd);
3671  // Clear the digits we are truncating
3672  vrt = (vBCD_t) vec_andc ((vui32_t)vra, (vui32_t) t);
3673 #endif
3674  return (vrt);
3675 }
3676 
3692 static inline vBCD_t
3693 vec_bcdtruncqi (vBCD_t vra, const unsigned short _N)
3694 {
3695  vBCD_t vrt;
3696 #ifdef _ARCH_PWR9
3697  vui16_t shd = vec_splats ((const unsigned short) (_N));
3698  vrt = vec_bcdtrunc (vra, shd);
3699 #else
3700  vui128_t t;
3701  const vui16_t ones = vec_splat_u16(-1);
3702  // Compare shift < 32 (128-bits)
3703  if (_N < 31)
3704  {
3705  // Generate a mask for the digits we will keep
3706  t = vec_srqi ((vui128_t) ones, ((31 -_N) * 4));
3707  // Clear the digits we are truncating
3708  vrt = (vBCD_t) vec_and ((vui32_t) t, (vui32_t)vra);
3709  }
3710  else
3711  vrt = vra;
3712 #endif
3713  return (vrt);
3714 }
3715 
3729 static inline vBCD_t
3731 {
3732  vBCD_t vrt;
3733 #ifdef _ARCH_PWR9
3734  __asm__(
3735  "bcdus. %0,%1,%2;\n"
3736  : "=v" (vrt)
3737  : "v" (vrb),
3738  "v" (vra)
3739  : "cr6" );
3740 #else
3741  const vi8_t zero = vec_splat_s8(0);
3742  vi8_t shd = vec_splat (vrb, VEC_BYTE_L_DWH);
3743  vui128_t t;
3744  // Multiply digit shift by 4 to get bit shift count
3745  shd = vec_add (shd, shd);
3746  shd = vec_add (shd, shd);
3747  t = (vui128_t) vra;
3748  // Compare shift positive or negative
3749  if (vec_all_ge(shd, zero))
3750  {
3751  // Positive, shift left
3752  t = vec_slq (t, (vui128_t) shd);
3753  }
3754  else
3755  {
3756  // Negative, shift right by absolute value
3757  shd = vec_sub (zero, shd);
3758  t = vec_srq (t, (vui128_t) shd);
3759  }
3760  vrt = (vBCD_t) t;
3761 #endif
3762  return (vrt);
3763 }
3764 
3780 static inline vBCD_t
3782 {
3783  vBCD_t vrt;
3784 #ifdef _ARCH_PWR9
3785  __asm__(
3786  "bcdutrunc. %0,%1,%2;\n"
3787  : "=v" (vrt)
3788  : "v" (vrb),
3789  "v" (vra)
3790  : "cr6" );
3791 #else
3792  const vui16_t c128 = vec_splats ((unsigned short) 128);
3793  vui16_t shd = vec_splat (vrb, VEC_HW_L_DWH);
3794  vui16_t one_s;
3795  vui128_t t;
3796  // Multiply digit shift by 4 to get bit shift count
3797  shd = vec_add (shd, shd);
3798  shd = vec_add (shd, shd);
3799  // generation all ones if in range, zeros if greater than
3800  one_s = (vui16_t) vec_cmplt (shd, c128);
3801  // Generate a mask for the digits we will clear
3802  t = vec_slq ((vui128_t) one_s, (vui128_t) shd);
3803  // Clear the digits we are truncating
3804  vrt = (vBCD_t) vec_andc ((vui32_t)vra, (vui32_t) t);
3805 #endif
3806  return (vrt);
3807 }
3808 
3824 static inline vBCD_t
3825 vec_bcdutruncqi (vBCD_t vra, const unsigned short _N)
3826 {
3827  vBCD_t vrt;
3828 #ifdef _ARCH_PWR9
3829  vui16_t shd = vec_splats ((const unsigned short) (_N));
3830  vrt = vec_bcdutrunc (vra, shd);
3831 #else
3832  vui128_t t;
3833  const vui16_t ones = vec_splat_u16(-1);
3834  // Compare shift < 32 (128-bits)
3835  if (_N < 32)
3836  {
3837  // Generate a mask for the digits we will keep
3838  t = vec_srqi ((vui128_t) ones, ((32 -_N) * 4));
3839  // Clear the digits we are truncating
3840  vrt = (vBCD_t) vec_and ((vui32_t) t, (vui32_t)vra);
3841  }
3842  else
3843  vrt = vra;
3844 #endif
3845  return (vrt);
3846 }
3847 
3868 static inline vBCD_t
3870 {
3871  vBCD_t t, c;
3872 #ifdef _ARCH_PWR8
3873  vBCD_t sum_ab, sign_ab;
3874 
3875  sum_ab = vec_bcdadd (a, b);
3876  if (__builtin_expect (__builtin_bcdadd_ov ((vi128_t) a, (vi128_t) b, 0), 0))
3877  {
3878  c = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, sum_ab);
3879  }
3880  else // (a + b) did not overflow, but did it borrow?
3881  {
3882  c = _BCD_CONST_ZERO;
3883  sign_ab = vec_bcdcpsgn (sum_ab, a);
3884  if (!vec_all_eq(sign_ab, sum_ab) && !vec_all_eq(_BCD_CONST_ZERO, sum_ab))
3885  {
3888  c = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, sum_ab);
3889  sum_ab = vec_bcdaddesqm (nines, sum_ab, tensc);
3890  }
3891  }
3892  t = sum_ab;
3893 #else
3894  vBCD_t sign_ab;
3895  _Decimal128 d_a, d_b, d_s, d_t;
3896  const vui32_t mz = CONST_VINT128_W(0, 0, 0, 0x0000000d);
3897  d_a = vec_BCD2DFP (a);
3898  d_b = vec_BCD2DFP (b);
3899  d_s = d_a + d_b;
3900  t = vec_DFP2BCD (d_s);
3901  // Shift right 31 digits, leaving the carry.
3902  d_t = __builtin_dscriq (d_s, 31);
3903  c = vec_DFP2BCD (d_t);
3904  // fix up spurious negative zeros
3905  if (vec_all_eq((vui32_t ) c, mz))
3906  c = _BCD_CONST_ZERO;
3907  // (a + b) did not overflow, but did it borrow?
3908  sign_ab = vec_bcdcpsgn (t, a);
3909  if (!vec_all_eq(sign_ab, t) && !vec_all_eq(_BCD_CONST_ZERO, t))
3910  {
3914  t = vec_bcdaddesqm (nines, t, tensc);
3915  }
3916 #endif
3917  *cout = c;
3918  return (t);
3919 }
3920 
3943 static inline vBCD_t
3945 {
3946  vBCD_t t, c;
3947 #ifdef _ARCH_PWR8
3948  vBCD_t sum_ab, sum_abc, sign_abc;
3949 
3950  sum_ab = vec_bcdadd (a, b);
3951 
3952  if (__builtin_expect (__builtin_bcdadd_ov ((vi128_t) a, (vi128_t) b, 0), 0))
3953  {
3954  sum_abc = vec_bcdadd (sum_ab, cin);
3955  c = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, sum_abc);
3956  }
3957  else // (a + b) did not overflow, but did (a + b + c) overflow?
3958  {
3959  sum_abc = vec_bcdadd (sum_ab, cin);
3960  if (__builtin_expect (__builtin_bcdadd_ov ((vi128_t) sum_ab, (vi128_t) cin, 0), 0))
3961  {
3962  c = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, sum_abc);
3963  }
3964  else // (a + b + c) did not overflow, but did it borrow?
3965  {
3966  c = _BCD_CONST_ZERO;
3967  sign_abc = vec_bcdcpsgn (sum_abc, a);
3968  if (!vec_all_eq(sign_abc, sum_abc) && !vec_all_eq(_BCD_CONST_ZERO, sum_abc))
3969  {
3972  c = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, sum_abc);
3973  sum_abc = vec_bcdaddesqm (nines, sum_abc, tensc);
3974  }
3975  }
3976  }
3977  t = sum_abc;
3978 #else
3979  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
3980  vBCD_t sign_abc;
3981  _Decimal128 d_a, d_b, d_c, d_s, d_t;
3982  d_a = vec_BCD2DFP (a);
3983  d_b = vec_BCD2DFP (b);
3984  d_c = vec_BCD2DFP (cin);
3985  d_s = d_a + d_b + d_c;
3986  t = vec_DFP2BCD (d_s);
3987  // Shift right 31 digits, leaving the carry.
3988  d_t = __builtin_dscriq (d_s, 31);
3989  c = vec_DFP2BCD (d_t);
3990  // fix up spurious negative zeros
3991  if (vec_all_eq((vui32_t) c, mz))
3992  c = _BCD_CONST_ZERO;
3993  // (a + b + c) did not overflow, but did it borrow?
3994  sign_abc = vec_bcdcpsgn (t, a);
3995  if (!vec_all_eq(sign_abc, t) && !vec_all_eq(_BCD_CONST_ZERO, t))
3996  {
4000  t = vec_bcdaddesqm (nines, t, tensc);
4001  }
4002 #endif
4003  *cout = c;
4004  return (t);
4005 }
4006 
4044 static inline vBCD_t
4046 {
4047  const vBCD_t dword_mask = (vBCD_t) CONST_VINT128_DW(15, -1);
4048 #ifndef _ARCH_PWR9
4049  const vui32_t mz = CONST_VINT128_W (0, 0, 0, 0x0000000d);
4050 #endif
4051  vBCD_t t, ph, low_a, low_b, high_a, high_b;
4052  _Decimal128 d_p, d_t, d_al, d_bl;
4053 
4054  low_a = vec_and (a, dword_mask);
4055  low_b = vec_and (b, dword_mask);
4056  d_al = vec_BCD2DFP (low_a);
4057  d_bl = vec_BCD2DFP (low_b);
4058  d_p = d_al * d_bl;
4059  if (__builtin_expect ((vec_cmpuq_all_eq ((vui128_t) low_a, (vui128_t) a)
4060  && vec_cmpuq_all_eq ((vui128_t) low_b, (vui128_t) b)), 1))
4061  {
4062  d_t = __builtin_dscriq (d_p, 31);
4063  ph = vec_DFP2BCD (d_t);
4064  d_t = d_p;
4065  }
4066  else
4067  {
4068  _Decimal128 d_ah, d_bh, d_hl, d_lh, d_h, d_hh, d_ll, d_m, d_mp;
4069  high_a = vec_bcdsrqi (a, 16);
4070  high_b = vec_bcdsrqi (b, 16);
4071  d_ah = vec_BCD2DFP (high_a);
4072  d_bh = vec_BCD2DFP (high_b);
4073  d_hl = d_ah * d_bl;
4074  d_ll = __builtin_dscriq (d_p, 16);
4075 
4076  d_lh = d_al * d_bh;
4077 
4078  d_mp = d_hl + d_lh;
4079  d_m = d_mp + d_ll;
4080  d_m = __builtin_dscriq (d_m, 15);
4081 
4082  d_hh = d_ah * d_bh;
4083  d_hh = __builtin_dscliq (d_hh, 1);
4084 
4085  d_t = d_m + d_hh;
4086  ph = vec_DFP2BCD (d_t);
4087 
4088  d_h = __builtin_dscliq (d_mp, 17);
4089  d_h = __builtin_dscriq (d_h, 1);
4090 
4091  d_t = d_p + d_h;
4092  }
4093  t = vec_DFP2BCD (d_t);
4094  // fix up spurious negative zeros
4095 #ifdef _ARCH_PWR9
4096  ph = vec_bcdadd (ph, _BCD_CONST_ZERO);
4097 #else
4098  if (vec_all_eq((vui32_t) ph, mz))
4099  ph = _BCD_CONST_ZERO;
4100 #endif
4101  *p_high = ph;
4102 
4103  // fix up spurious negative zeros
4104 #ifdef _ARCH_PWR9
4105  t = vec_bcdadd (t, _BCD_CONST_ZERO);
4106 #else
4107  if (vec_all_eq((vui32_t) t, mz))
4108  t = _BCD_CONST_ZERO;
4109 #endif
4110  return (t);
4111 }
4112 
4134 static inline vBCD_t
4136 {
4137  vBCD_t t, c;
4138 #ifdef _ARCH_PWR8
4139  vBCD_t sum_ab, sign_ab;
4140 
4141  sum_ab = vec_bcdsub (a, b);
4142  if (__builtin_expect (__builtin_bcdsub_ov ((vi128_t) a, (vi128_t) b, 0), 0))
4143  {
4144  c = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, sum_ab);
4145  }
4146  else // (a + b) did not overflow, but did it borrow?
4147  {
4148  c = _BCD_CONST_ZERO;
4149  sign_ab = vec_bcdcpsgn (sum_ab, a);
4150  if (!vec_all_eq(sign_ab, sum_ab) && !vec_all_eq(_BCD_CONST_ZERO, sum_ab))
4151  {
4154  c = vec_bcdcpsgn (_BCD_CONST_PLUS_ONE, sum_ab);
4155  sum_ab = vec_bcdaddesqm (nines, sum_ab, tensc);
4156  }
4157  }
4158  t = sum_ab;
4159 #else
4160  vBCD_t sign_ab;
4161  _Decimal128 d_a, d_b, d_s, d_t;
4162  const vui32_t mz = CONST_VINT128_W(0, 0, 0, 0x0000000d);
4163  d_a = vec_BCD2DFP (a);
4164  d_b = vec_BCD2DFP (b);
4165  d_s = d_a - d_b;
4166  t = vec_DFP2BCD (d_s);
4167  // Shift right 31 digits, leaving the carry.
4168  d_t = __builtin_dscriq (d_s, 31);
4169  c = vec_DFP2BCD (d_t);
4170  // fix up spurious negative zeros
4171  if (vec_all_eq((vui32_t ) c, mz))
4172  c = _BCD_CONST_ZERO;
4173  // (a + b) did not overflow, but did it borrow?
4174  sign_ab = vec_bcdcpsgn (t, a);
4175  if (!vec_all_eq(sign_ab, t) && !vec_all_eq(_BCD_CONST_ZERO, t))
4176  {
4180  t = vec_bcdaddesqm (nines, t, tensc);
4181  }
4182 #endif
4183  *cout = c;
4184  return (t);
4185 }
4186 
4198 static inline vf64_t
4199 vec_pack_Decimal128 (_Decimal128 lval)
4200 {
4201 #ifdef _ARCH_PWR7
4202  vf64_t t;
4203  __asm__(
4204  "\txxpermdi %x0,%1,%L1,0b00;\n"
4205  : "=v" (t)
4206  : "d" (lval)
4207  : );
4208  return (t);
4209 #else
4210  //needs to work for P6 without xxpermdi
4211  __VEC_U_128 t;
4212  t.dpd128 = lval;
4213  return (t.vf2);
4214 #endif
4215 }
4216 
4234 static inline _Decimal128
4235 vec_quantize0_Decimal128 (_Decimal128 val)
4236 {
4237 #ifdef _ARCH_PWR7
4238  _Decimal128 t;
4239  __asm__(
4240  "dquaiq 0,%0,%1,0b01;\n"
4241  : "=d" (t)
4242  : "d" (val)
4243  : );
4244  return (t);
4245 #else
4246  return (quantized128(val, 0DL));
4247 #endif
4248 }
4249 
4274 static inline vui8_t
4276 {
4277  vui8_t result;
4278  vui8_t x6, high_digit;
4279  /* Compute the high digit correction factor. For binary 100s to BCD
4280  * this is the radix 100 value divided by 10 times by the radix
4281  * difference in binary. For this stage we use 0x10 - 10 = 6. */
4282  high_digit = vra / 10;
4283 #if (__GNUC__ > 6)
4284  // Allow the compiler to do strength reduction for const 6 multiplier
4285  x6 = high_digit * 6;
4286  result = vra + x6;
4287 #else
4288  {
4289  vui8_t c6;
4290  c6 = vec_splats ((unsigned char) 0x06);
4291  x6 = vec_mulubm (high_digit, c6);
4292  /* Add the high digit correction bytes to the original
4293  * radix 100 bytes in binary. */
4294  result = vec_add (vra, x6);
4295  }
4296 #endif
4297  return result;
4298 }
4299 
4325 static inline vui8_t
4327 {
4328  vui8_t result;
4329  vui16_t x156, c156, high_digit;
4330  /* Compute the high digit correction factor. For binary 10**4 to 100s
4331  * this is the radix 10000 value divided by 100 times by the radix
4332  * difference in binary. For this stage we use 0x100 - 100 = 156. */
4333  high_digit = vra / 100;
4334  c156 = vec_splats ((unsigned short) 156);
4335 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4336  x156 = vec_vmuleub ((vui8_t) high_digit, (vui8_t) c156);
4337 #else
4338  x156 = vec_vmuloub ((vui8_t) high_digit, (vui8_t) c156);
4339 #endif
4340  /* Add the high digit correction bytes from the original
4341  * radix 10000 hword in binary. */
4342  result = (vui8_t) vec_add (vra, x156);
4343  return result;
4344 }
4345 
4371 static inline vui16_t
4373 {
4374  vui16_t result;
4375  vui32_t high_digit;
4376  /* Compute the high digit correction factor. For binary 10**8 to 10**4
4377  * this is the radix 100000000 value divided by 10000 times by the radix
4378  * difference in binary. For this stage we use 0x10000 - 10000 = 55536. */
4379  const vui32_t c = vec_splats ((unsigned int) 55536);
4380 
4381  high_digit = vra / 10000;
4382 #ifdef _ARCH_PWR8
4383  // 0 in the even hword of const c reduces vmsumuhm to vmulouh
4384  result = (vui16_t) vec_msum ((vui16_t) high_digit, (vui16_t) c, vra);
4385 #else
4386  {
4387  vui32_t x;
4388 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4389  x = vec_vmuleuh ((vui16_t) high_digit, (vui16_t) c);
4390 #else
4391  x = vec_vmulouh ((vui16_t) high_digit, (vui16_t) c);
4392 #endif
4393  /* Add the high digit correction word to the original
4394  * radix 10**8 word in binary. */
4395  result = (vui16_t) vec_add (vra, x);
4396  }
4397 #endif
4398  return result;
4399 }
4400 
4426 static inline vui32_t
4428 {
4429  /* Magic numbers for multiplicative inverse to divide by 10**8
4430  are 12379400392853802749, no corrective add,
4431  and shift right 26 bits. */
4432  const vui64_t mul_invs_ten8 = CONST_VINT128_DW(
4433  12379400392853802749UL, 12379400392853802749UL);
4434  const int shift_ten8 = 26;
4435  vui32_t result;
4436  vui64_t x, c, high_digit;
4437  /* Compute the high digit correction factor. For binary 10**16 to
4438  * 10**8 this is the radix 10000000000000000 value divided by
4439  * 100000000 times by the radix difference in binary. For this
4440  * stage we use 0x100000000 - 100000000 = 4194967296. */
4441 
4442  // high_digit = vra / 100000000;
4443  // Next divide the 16 digits by 10**8.
4444  // This separates the high 8 digits into words.
4445  high_digit = vec_mulhud (vra, mul_invs_ten8);
4446  high_digit = vec_srdi (high_digit, shift_ten8);
4447  c = vec_splats ((unsigned long)4194967296);
4448 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4449  x = vec_muleuw ((vui32_t) high_digit, (vui32_t) c);
4450 #else
4451  x = vec_mulouw ((vui32_t) high_digit, (vui32_t) c);
4452 #endif
4453  /* Add the high digit correction dword to the original
4454  * radix 10**16 dword in binary. */
4455  result = (vui32_t) vec_addudm (vra, x);
4456  return result;
4457 }
4458 
4459 /* Convert radix 10**16 binary dwords to radix 10**8 words */
4460 
4488 static inline vui64_t
4490 {
4491  // Compute the high digit correction factor. For binary 10**32 to
4492  // 10**16, this is (16**16 - 10**16) = 18436744073709551616.
4493  const vui64_t c = CONST_VINT128_DW (0, 18436744073709551616UL);
4494 
4495  /* Magic numbers for multiplicative inverse to divide by 10**16
4496  are 76624777043294442917917351357515459181, no corrective add,
4497  and shift right 51 bits. */
4498  const vui128_t mul_invs_ten16 = (vui128_t) CONST_VINT128_DW(
4499  0x39a5652fb1137856UL, 0xd30baf9a1e626a6dUL);
4500  const int shift_ten16 = 51;
4501 
4502  vui64_t result;
4503  vui128_t high_digit;
4504 
4505  // high_digit = vra / 10000000000000000;
4506  // Next divide the 32 digits by 10**16.
4507  // This separates the high 16 digits into doublewords.
4508  high_digit = vec_mulhuq (vra, mul_invs_ten16);
4509  high_digit = vec_srqi (high_digit, shift_ten16);
4510 
4511  // multiply high_digit by the radix difference c and add vra
4512 #ifdef _ARCH_PWR9
4513  // 0 in the high dword of const c reduces vmsumudm to vmuloud
4514  result = (vui64_t) vec_msumudm ((vui64_t) high_digit, c, vra);
4515 #else
4516  {
4517  vui128_t x;
4518  x = vec_vmuloud ((vui64_t) high_digit, c);
4519  /* Add the high digit correction qword to the original
4520  * radix 10**32 qword in binary. */
4521  result = (vui64_t) vec_adduqm (vra, x);
4522  }
4523 #endif
4524  return result;
4525 }
4526 
4558 static inline vui8_t
4560 {
4561  const vui8_t dmask = vec_splat_u8 (15);
4562  const vui8_t dx10 = vec_splat_u8 (10);
4563  vui8_t znd00, znd16;
4564  vui8_t ones, tens;
4565  vui16_t ten00, ten16;
4566 
4567  /* Isolate the BCD digit from each zoned character. */
4568  znd00 = vec_and (zone00, dmask);
4569  znd16 = vec_and (zone16, dmask);
4570  /* Pack the odd zone digits into a single vector.
4571  This is the unit digit of each zoned digit pair. */
4572 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4573  ones = vec_pack ((vui16_t) znd16, (vui16_t) znd00);
4574 #else
4575  ones = vec_pack ((vui16_t) znd00, (vui16_t) znd16);
4576 #endif
4577  /* Multiply the even zone digits by 10 before packing
4578  them into a single vector.
4579  This is the tens digit of each zoned digit pair. */
4580 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4581  ten00 = vec_mulo (znd00, dx10);
4582  ten16 = vec_mulo (znd16, dx10);
4583  tens = vec_pack (ten16, ten00);
4584 #else
4585  ten00 = vec_mule (znd00, dx10);
4586  ten16 = vec_mule (znd16, dx10);
4587  tens = vec_pack (ten00, ten16);
4588 #endif
4589  /* sum adjacent tens and unit digit pairs, into a single
4590  * binary value in the range 0-99. */
4591  return vec_add (tens, ones);
4592 }
4593 
4622 static inline vui8_t
4624 {
4625  vui8_t x6, c6, high_digit;
4626  /* Compute the high digit correction factor. For BCD to binary 100s
4627  * this is the isolated high digit multiplied by the radix difference
4628  * in binary. For this stage we use 0x10 - 10 = 6. */
4629  high_digit = vec_srbi (vra, 4);
4630  c6 = vec_splats ((unsigned char) 0x06);
4631 #if (__GNUC__ > 7)
4632  // Allow the compiler to do strength reduction for const 6 multiplier
4633  x6 = vec_mul (high_digit, c6);
4634 #else
4635  x6 = vec_mulubm (high_digit, c6);
4636 #endif
4637  /* Subtract the high digit correction bytes from the original
4638  * BCD bytes in binary. This reduces byte range to 0-99. */
4639  return vec_sub (vra, x6);
4640 }
4641 
4671 static inline vui16_t
4673 {
4674  vui8_t c156;
4675  vui16_t x156;
4676  /* Compute the high digit correction factor. For 100s to binary 10ks
4677  * this is the isolated high digit multiplied by the radix difference
4678  * in binary. For this stage we use 0x100 - 100 = 156. */
4679  c156 = vec_splats ((unsigned char) 156);
4680 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4681  x156 = vec_mulo ((vui8_t) vra, c156);
4682 #else
4683  x156 = vec_mule ((vui8_t) vra, c156);
4684 #endif
4685  /* Subtract the high digit correction halfword from the original
4686  * 100s byte pair in binary. This reduces the range to 0-9999. */
4687  return vec_sub ((vui16_t) vra, x156);
4688 }
4689 
4719 static inline vui32_t
4721 {
4722  vui16_t c55536;
4723  vui32_t x55536;
4724  /* Compute the high digit correction factor. For 10ks to binary 100ms
4725  * this is the isolated high digit multiplied by the radix difference
4726  * in binary. For this stage we use 0x10000 - 10000 = 55536. */
4727  c55536 = vec_splats ((unsigned short) 55536);
4728 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4729  x55536 = vec_mulo ((vui16_t) vra, c55536);
4730 #else
4731  x55536 = vec_mule ((vui16_t) vra, c55536);
4732 #endif
4733  /* Subtract the high digit correction word from the original
4734  * 10ks byte pair in binary. This reduces the range to
4735  * 0-99999999. */
4736  return vec_sub ((vui32_t) vra, x55536);
4737 }
4738 
4768 static inline vui64_t
4770 {
4771  vui32_t c4194967296;
4772  vui64_t x4194967296;
4773  /* Compute the high digit correction factor. For 100ms to binary 10ts
4774  * this is the isolated high digit multiplied by the radix difference
4775  * in binary. For this stage we use 0x100000000 - 100000000 =
4776  * 4194967296. */
4777  c4194967296 = vec_splats ((unsigned int) 4194967296);
4778 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
4779  x4194967296 = vec_mulouw ((vui32_t) vra, c4194967296);
4780 #else
4781  x4194967296 = vec_muleuw ((vui32_t) vra, c4194967296);
4782 #endif
4783  /* Subtract the high digit correction doubleword from the original
4784  * 100m word pair in binary. This reduces the range to
4785  * 0-9999999999999999. */
4786  return vec_subudm ((vui64_t) vra, x4194967296);
4787 }
4788 
4817 static inline vui128_t
4819 {
4820  const vui64_t c18436744073709551616 = CONST_VINT128_DW (18436744073709551616UL, 0);
4821  vui128_t x18436744073709551616;
4822  /* Compute the high digit correction factor for 10ts to binary 10e32s
4823  * This is the isolated high digit multiplied by the radix difference
4824  * in binary. For this stage we use
4825  * 0x10000000000000000 - 10000000000000000 = 18436744073709551616. */
4826 // c18436744073709551616 = vec_splats ((unsigned long) 18436744073709551616UL);
4827 #ifdef _ARCH_PWR9
4828  const vui128_t zero = CONST_VINT128_DW128 (0, 0);
4829  // 0 in the low dword of const c reduces vmsumudm to vmuleud
4830  x18436744073709551616 = (vui128_t) vec_msumudm ((vui64_t) vra, c18436744073709551616, zero);
4831 #else
4832  x18436744073709551616 = vec_vmuleud ((vui64_t) vra, c18436744073709551616);
4833 #endif
4834 
4835  /* Subtract the high digit correction quadword from the original
4836  * 10e doubleword pair in binary. This reduces the range to
4837  * 0-99999999999999999999999999999999. */
4838  return vec_subuqm ((vui128_t) vra, x18436744073709551616);
4839 }
4840 
4856 static inline vb128_t
4858 {
4859  vb128_t result;
4860 #if defined (_ARCH_PWR8) && (__GNUC__ > 6)
4861  // The OV and INV status results overlay CR.bit[59] for bcdadd/sub.
4862  // For valid inputs OV will never be set for both bcdadd/sub.
4863  // So if both bcdadd/bcdsub return OV then must be invalid data.
4864  if (__builtin_bcdadd_ov ((vi128_t) vra, (vi128_t) _BCD_CONST_ZERO, 0)
4865  && __builtin_bcdsub_ov ((vi128_t) vra, (vi128_t) _BCD_CONST_ZERO, 0))
4866  result = (vb128_t) vec_splat_s8 (-1);
4867  else
4868  result = (vb128_t) vec_splat_s8 (0);
4869 #else
4870  const vui16_t sign_mask = vec_splat_u16(15);
4871  // Load all 6 valid sign nibble values into a vector unsigned short
4872  // After splatting the sign, we can compare any of six in one op
4873  const vui16_t sign_vals = CONST_VINT128_H(0x0b, 0x0d, 0x0b, 0x0d, 0x0a, 0x0c,
4874  0x0e, 0x0f);
4875  const vui8_t max_digit = vec_splat_u8(9);
4876  const vui8_t msk_digit = vec_splat_u8(15);
4877  vui16_t sign_splat;
4878  vui16_t sign_code;
4879  vui8_t even, odd;
4880 
4881  // Replicate the halfword containing the sign nibble
4882  sign_splat = vec_splat ((vui16_t) vra, VEC_HW_L);
4883  // Apply the sign nibble mask
4884  sign_code = vec_and (sign_splat, sign_mask);
4885  // SIMD compare for match to any positive/negative sign code
4886  if (vec_any_eq(sign_code, sign_vals))
4887  {
4888  // Split even/odd digits out so there only one digit per byte.
4889  // This insures the binary compare can detect any digits > 9
4890  even = vec_andc ((vui8_t) vra, msk_digit);
4891  odd = vec_and ((vui8_t) vra, msk_digit);
4892  // Align the compare digits with max_digit
4893  even = (vui8_t) vec_srqi ((vui128_t) even, 4);
4894  // And eliminate the sign nibble
4895  odd = (vui8_t) vec_srqi ((vui128_t) odd, 8);
4896  if (vec_any_gt (even, max_digit) || vec_any_gt(odd, max_digit))
4897  result = (vb128_t) vec_splat_s8(-1);
4898  else
4899  result = (vb128_t) vec_splat_s8(0);
4900  }
4901  else
4902  result = (vb128_t) vec_splat_s8(-1);
4903 #endif
4904  return (result);
4905 }
4906 
4927 static inline vb128_t
4929 {
4930  vb128_t result;
4931 #if defined (_ARCH_PWR8) && (__GNUC__ > 6)
4932  if (__builtin_bcdsub_gt ((vi128_t) vra, (vi128_t) _BCD_CONST_MINUS_ONE, 0))
4933  result = (vb128_t) vec_splat_s8 (0);
4934  else
4935  result = (vb128_t) vec_splat_s8 (-1);
4936 #else
4937  const vui32_t sign_mask = vec_splat_u32(15);
4938 // const vui32_t neg_sign = (vui32_t) CONST_VINT128_W(0x0b, 0x0d, 0x0b, 0x0d);
4939  const vui32_t plus_sign = (vui32_t) CONST_VINT128_W(0x0a, 0x0c, 0x0e, 0x0f);
4940  vui32_t sign_splat;
4941  vui32_t sign_code;
4942 
4943  // Replicate the byte containing the sign to words
4944  sign_splat = vec_splat ((vui32_t) vra, VEC_W_L);
4945  // Apply the code mask
4946  sign_code = vec_and (sign_splat, sign_mask);
4947  // SIMD compare for match to any positive sign code
4948  if (vec_any_eq(sign_code, plus_sign))
4949  result = (vb128_t) vec_splat_s8(0);
4950  else
4951  result = (vb128_t) vec_splat_s8(-1);
4952 #endif
4953  return (result);
4954 }
4955 
4975 static inline int
4977 {
4978  int result;
4979 #if defined (_ARCH_PWR8) && (__GNUC__ > 6)
4980  result = __builtin_bcdsub_lt ((vi128_t) vra, (vi128_t) _BCD_CONST_ZERO, 0);
4981 #else
4982  const vui32_t sign_mask = vec_splat_u32(15);
4983  const vui32_t minus_sign = (vui32_t) CONST_VINT128_W(0x0b, 0x0d, 0x0b, 0x0d);
4984  vui32_t sign_splat;
4985  vui32_t sign_code;
4986 
4987  // Replicate the byte containing the sign to words
4988  sign_splat = vec_splat ((vui32_t) vra, VEC_W_L);
4989  // Apply the code mask
4990  sign_code = vec_and (sign_splat, sign_mask);
4991  // SIMD compare for match to any negative sign code
4992  result = vec_any_eq(sign_code, minus_sign);
4993 #endif
4994  return (result);
4995 }
4996 
5008 static inline _Decimal128
5010 {
5011 #ifdef _ARCH_PWR7
5012  _Decimal128 t;
5013  __asm__(
5014  "xxpermdi %0,%x1,%x1,0b00;\n"
5015  "\txxpermdi %L0,%x1,%x1,0b10;\n"
5016  : "=&d" (t)
5017  : "v" (lval)
5018  : );
5019  return (t);
5020 #else
5021  // needs to work for P6 without xxpermdi
5022  __VEC_U_128 t;
5023  t.vf2 = lval;
5024  return (t.dpd128);
5025 #endif
5026 }
5027 
5048 static inline vui128_t
5049 vec_zndctuq (vui8_t zone00, vui8_t zone16)
5050 {
5051  vui8_t d100;
5052  vui16_t d10k;
5053  vui32_t d100m;
5054  vui64_t d10e;
5055  d100 = vec_rdxcfzt100b (zone00, zone16);
5056  d10k = vec_rdxct10kh (d100);
5057  d100m = vec_rdxct100mw (d10k);
5058  d10e = vec_rdxct10E16d (d100m);
5059  return vec_rdxct10e32q (d10e);
5060 }
5061 #endif /* ndef PVECLIB_DISABLE_DFP */
5062 #endif /* VEC_BCD_PPC_H_ */
vbBCD_t
#define vbBCD_t
vector vector bool from 128-bit signed BCD integer.
Definition: vec_bcd_ppc.h:1567
vec_bcdcmplt
static int vec_bcdcmplt(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for less than.
Definition: vec_bcd_ppc.h:2507
vec_bcdcmpne
static int vec_bcdcmpne(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for not equal.
Definition: vec_bcd_ppc.h:2537
vec_bcdaddesqm
static vBCD_t vec_bcdaddesqm(vBCD_t a, vBCD_t b, vBCD_t c)
Decimal Add Extended Signed Modulo Quadword.
Definition: vec_bcd_ppc.h:1955
vec_rdxcf10E16d
static vui32_t vec_rdxcf10E16d(vui64_t vra)
Vector Decimal Convert radix 10**16 Binary doublewords to pairs of radix 10**8 binary words.
Definition: vec_bcd_ppc.h:4427
vec_bcdctub
static vui8_t vec_bcdctub(vBCD_t vra)
Vector Decimal Convert Binary Coded Decimal (BCD) digit pairs to binary unsigned bytes .
Definition: vec_bcd_ppc.h:2636
vec_bcdus
static vBCD_t vec_bcdus(vBCD_t vra, vi8_t vrb)
Decimal Unsigned Shift. Shift a vector unsigned BCD value, left or right a variable amount of digits ...
Definition: vec_bcd_ppc.h:3730
vec_muleuw
static vui64_t vec_muleuw(vui32_t a, vui32_t b)
Vector multiply even unsigned words.
Definition: vec_int32_ppc.h:1007
vec_BIN2BCD
static vBCD_t vec_BIN2BCD(vui64_t val)
Convert vector unsigned doubleword binary values to Vector unsigned 16-digit BCD values.
Definition: vec_bcd_ppc.h:1706
vec_subuqm
static vui128_t vec_subuqm(vui128_t vra, vui128_t vrb)
Vector Subtract Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:7439
vb32_t
__vector __bool int vb32_t
vector of 32-bit bool int elements.
Definition: vec_common_ppc.h:228
vec_bcdcpsgn
static vBCD_t vec_bcdcpsgn(vBCD_t vra, vBCD_t vrb)
Vector copy sign BCD.
Definition: vec_bcd_ppc.h:2563
vec_bcdsub
static vBCD_t vec_bcdsub(vBCD_t a, vBCD_t b)
Subtract two Vector Signed BCD 31 digit values.
Definition: vec_bcd_ppc.h:3445
vec_xxspltd
static vui64_t vec_xxspltd(vui64_t vra, const int ctl)
Vector splat doubleword. Duplicate the selected doubleword element across the doubleword elements of ...
Definition: vec_int64_ppc.h:4647
vec_bcdcmpeq
static int vec_bcdcmpeq(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for equal.
Definition: vec_bcd_ppc.h:2387
vb128_t
__vector __bool __int128 vb128_t
vector of one 128-bit bool __int128 element.
Definition: vec_common_ppc.h:240
vec_bcdctz
static vui8_t vec_bcdctz(vBCD_t vrb)
Vector Decimal Convert To Zoned.
Definition: vec_bcd_ppc.h:2806
vec_cmpneuq
static vb128_t vec_cmpneuq(vui128_t vra, vui128_t vrb)
Vector Compare Not Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3475
vec_unpack_Decimal128
static _Decimal128 vec_unpack_Decimal128(vf64_t lval)
Unpack a doubleword vector (vector double) into a FPR pair. (_Decimal128).
Definition: vec_bcd_ppc.h:5009
vec_bcdcfud
static vBCD_t vec_bcdcfud(vui64_t vrb)
Vector Decimal Convert From Unsigned doubleword returning up to 2x16 BCD digits.
Definition: vec_bcd_ppc.h:2046
vec_bcdslqi
static vBCD_t vec_bcdslqi(vBCD_t vra, const unsigned int _N)
Vector BCD Shift Right Signed Quadword.
Definition: vec_bcd_ppc.h:3208
vec_rdxct10e32q
static vui128_t vec_rdxct10e32q(vui64_t vra)
Vector Decimal Convert radix 10E16 digit pairs to radix 10E32 __int128 quadwords.
Definition: vec_bcd_ppc.h:4818
__VEC_U_128::dpd128
_Decimal128 dpd128
128 bit Decimal Float from pair of double float registers.
Definition: vec_common_ppc.h:264
vec_bcdcmp_eqsq
static vbBCD_t vec_bcdcmp_eqsq(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for equal.
Definition: vec_bcd_ppc.h:2185
CONST_VINT128_W
#define CONST_VINT128_W(__w0, __w1, __w2, __w3)
Arrange word elements of a unsigned int initializer in high->low order. May require an explicit cast.
Definition: vec_common_ppc.h:304
vec_bcdctuq
static vui128_t vec_bcdctuq(vBCD_t vra)
Vector Decimal Convert groups of 32 BCD digits to binary unsigned quadword.
Definition: vec_bcd_ppc.h:2745
vec_slbi
static vui8_t vec_slbi(vui8_t vra, const unsigned int shb)
Vector Shift left Byte Immediate.
Definition: vec_char_ppc.h:809
vec_bcdmulh
static vBCD_t vec_bcdmulh(vBCD_t a, vBCD_t b)
Vector Signed BCD Multiply High.
Definition: vec_bcd_ppc.h:3033
vec_rdxcf10kh
static vui8_t vec_rdxcf10kh(vui16_t vra)
Vector Decimal Convert radix 10,000 Binary halfwords to pairs of radix 100 binary bytes.
Definition: vec_bcd_ppc.h:4326
vec_bcdcmpge
static int vec_bcdcmpge(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for greater than or equal.
Definition: vec_bcd_ppc.h:2417
vec_bcdsubcsq
static vBCD_t vec_bcdsubcsq(vBCD_t a, vBCD_t b)
Decimal Sudtract & write Carry Signed Quadword.
Definition: vec_bcd_ppc.h:3494
vec_cmpuq_all_ne
static int vec_cmpuq_all_ne(vui128_t vra, vui128_t vrb)
Vector Compare all Not Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:4025
vec_cbcdaddcsq
static vBCD_t vec_cbcdaddcsq(vBCD_t *cout, vBCD_t a, vBCD_t b)
Combined Decimal Add & Write Carry Signed Quadword.
Definition: vec_bcd_ppc.h:3869
vui16_t
__vector unsigned short vui16_t
vector of 16-bit unsigned short elements.
Definition: vec_common_ppc.h:204
vec_srdi
static vui64_t vec_srdi(vui64_t vra, const unsigned int shb)
Vector Shift Right Doubleword Immediate.
Definition: vec_int64_ppc.h:3604
vec_rdxcf100b
static vui8_t vec_rdxcf100b(vui8_t vra)
Vector Decimal Convert Binary Coded Decimal (BCD) digit pairs from radix 100 binary integer bytes.
Definition: vec_bcd_ppc.h:4275
vec_vmuleud
static vui128_t vec_vmuleud(vui64_t a, vui64_t b)
Vector Multiply Even Unsigned Doublewords.
Definition: vec_int128_ppc.h:7487
vec_bcdctuh
static vui16_t vec_bcdctuh(vBCD_t vra)
Vector Decimal Convert groups of 4 BCD digits to binary unsigned halfwords.
Definition: vec_bcd_ppc.h:2659
vec_cmpuq_all_eq
static int vec_cmpuq_all_eq(vui128_t vra, vui128_t vrb)
Vector Compare all Equal Unsigned Quadword.
Definition: vec_int128_ppc.h:3804
vec_bcdcfsq
static vBCD_t vec_bcdcfsq(vi128_t vrb)
Vector Decimal Convert From Signed Quadword returning up to 31 BCD digits.
Definition: vec_bcd_ppc.h:1992
vec_cbcdaddecsq
static vBCD_t vec_cbcdaddecsq(vBCD_t *cout, vBCD_t a, vBCD_t b, vBCD_t cin)
Combined Decimal Add Extended & write Carry Signed Quadword.
Definition: vec_bcd_ppc.h:3944
vi128_t
__vector __int128 vi128_t
vector of one 128-bit signed __int128 element.
Definition: vec_common_ppc.h:235
vec_bcdsruqi
static vBCD_t vec_bcdsruqi(vBCD_t vra, const unsigned int _N)
Vector BCD Shift Right Unsigned Quadword immediate.
Definition: vec_bcd_ppc.h:3420
vec_bcdtruncqi
static vBCD_t vec_bcdtruncqi(vBCD_t vra, const unsigned short _N)
Decimal Truncate Quadword Immediate. Truncate a vector signed BCD value vra to N-digits,...
Definition: vec_bcd_ppc.h:3693
vec_bcdsr
static vBCD_t vec_bcdsr(vBCD_t vra, vi8_t vrb)
Decimal Shift and Round. Shift a vector signed BCD value, left or right a variable amount of digits (...
Definition: vec_bcd_ppc.h:3265
vec_bcds
static vBCD_t vec_bcds(vBCD_t vra, vi8_t vrb)
Decimal Shift. Shift a vector signed BCD value, left or right a variable amount of digits (nibbles)....
Definition: vec_bcd_ppc.h:3097
vec_bcdtrunc
static vBCD_t vec_bcdtrunc(vBCD_t vra, vui16_t vrb)
Decimal Truncate. Truncate a vector signed BCD value vra to N-digits, where N is the unsigned integer...
Definition: vec_bcd_ppc.h:3646
vec_bcdcmp_nesq
static vbBCD_t vec_bcdcmp_nesq(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for not equal.
Definition: vec_bcd_ppc.h:2355
vec_bcdsubecsq
static vBCD_t vec_bcdsubecsq(vBCD_t a, vBCD_t b, vBCD_t c)
Decimal Add Extended & write Carry Signed Quadword.
Definition: vec_bcd_ppc.h:3550
_BCD_CONST_MINUS_ONE
#define _BCD_CONST_MINUS_ONE
vector signed BCD constant -1.
Definition: vec_bcd_ppc.h:1574
vui64_t
__vector unsigned long long vui64_t
vector of 64-bit unsigned long long elements.
Definition: vec_common_ppc.h:208
vec_bcdmul
static vBCD_t vec_bcdmul(vBCD_t a, vBCD_t b)
Multiply two Vector Signed BCD 31 digit values.
Definition: vec_bcd_ppc.h:2949
CONST_VINT128_H
#define CONST_VINT128_H(__hw0, __hw1, __hw2, __hw3, __hw4, __hw5, __hw6, __hw7)
Arrange halfword elements of a unsigned int initializer in high->low order. May require an explicit c...
Definition: vec_common_ppc.h:309
vec_bcdcmp_gtsq
static vbBCD_t vec_bcdcmp_gtsq(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for greater than.
Definition: vec_bcd_ppc.h:2253
vec_common_ppc.h
Common definitions and typedef used by the collection of Power Vector Library (pveclib) headers.
vec_bcdaddecsq
static vBCD_t vec_bcdaddecsq(vBCD_t a, vBCD_t b, vBCD_t c)
Decimal Add Extended & write Carry Signed Quadword.
Definition: vec_bcd_ppc.h:1892
vui8_t
__vector unsigned char vui8_t
vector of 8-bit unsigned char elements.
Definition: vec_common_ppc.h:202
vec_srbi
static vui8_t vec_srbi(vui8_t vra, const unsigned int shb)
Vector Shift Right Byte Immediate.
Definition: vec_char_ppc.h:905
vec_DFP2BCD
static vBCD_t vec_DFP2BCD(_Decimal128 val)
Convert a __Decimal128 value to Vector BCD.
Definition: vec_bcd_ppc.h:1748
_BCD_CONST_ZERO
#define _BCD_CONST_ZERO
vector signed BCD constant +0.
Definition: vec_bcd_ppc.h:1576
vec_bcddiv
static vBCD_t vec_bcddiv(vBCD_t a, vBCD_t b)
Divide a Vector Signed BCD 31 digit value by another BCD value.
Definition: vec_bcd_ppc.h:2870
vec_bcdsrrqi
static vBCD_t vec_bcdsrrqi(vBCD_t vra, const unsigned int _N)
Vector BCD Shift Right and Round Signed Quadword Immediate.
Definition: vec_bcd_ppc.h:3364
vec_rdxct10E16d
static vui64_t vec_rdxct10E16d(vui32_t vra)
Vector Decimal Convert radix 100,000,000 digit word pairs to radix 10E16 binary integer doublewords.
Definition: vec_bcd_ppc.h:4769
vec_subudm
static vui64_t vec_subudm(vui64_t a, vui64_t b)
Vector Subtract Unsigned Doubleword Modulo.
Definition: vec_int64_ppc.h:3746
_BCD_CONST_PLUS_NINES
#define _BCD_CONST_PLUS_NINES
vector signed BCD constant +9s.
Definition: vec_bcd_ppc.h:1570
vec_bcdcmple
static int vec_bcdcmple(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for less than or equal.
Definition: vec_bcd_ppc.h:2477
vec_int128_ppc.h
Header package containing a collection of 128-bit computation functions implemented with PowerISA VMX...
vec_srqi
static vui128_t vec_srqi(vui128_t vra, const unsigned int shb)
Vector Shift Right Quadword Immediate.
Definition: vec_int128_ppc.h:7154
vec_setbool_bcdsq
static vb128_t vec_setbool_bcdsq(vBCD_t vra)
Vector Set Bool from Signed BCD Quadword.
Definition: vec_bcd_ppc.h:4928
vec_bcdadd
static vBCD_t vec_bcdadd(vBCD_t a, vBCD_t b)
Decimal Add Signed Modulo Quadword.
Definition: vec_bcd_ppc.h:1789
vec_rdxct100b
static vui8_t vec_rdxct100b(vui8_t vra)
Vector Decimal Convert Binary Coded Decimal (BCD) digit pairs to radix 100 binary integer bytes.
Definition: vec_bcd_ppc.h:4623
vec_BCD2DFP
static _Decimal128 vec_BCD2DFP(vBCD_t val)
Convert a Vector Signed BCD value to __Decimal128.
Definition: vec_bcd_ppc.h:1663
vec_signbit_bcdsq
static int vec_signbit_bcdsq(vBCD_t vra)
Vector Sign bit from Signed BCD Quadword.
Definition: vec_bcd_ppc.h:4976
vui128_t
__vector unsigned __int128 vui128_t
vector of one 128-bit unsigned __int128 element.
Definition: vec_common_ppc.h:237
vec_bcdctud
static vui64_t vec_bcdctud(vBCD_t vra)
Vector Decimal Convert groups of 16 BCD digits to binary unsigned doublewords.
Definition: vec_bcd_ppc.h:2711
vec_msumudm
static vui128_t vec_msumudm(vui64_t a, vui64_t b, vui128_t c)
Vector Multiply-Sum Unsigned Doubleword Modulo.
Definition: vec_int128_ppc.h:5202
vec_vmuloud
static vui128_t vec_vmuloud(vui64_t a, vui64_t b)
Vector Multiply Odd Unsigned Doublewords.
Definition: vec_int128_ppc.h:7733
__VEC_U_128
Union used to transfer 128-bit data between vector and non-vector types.
Definition: vec_common_ppc.h:256
vec_rdxcf10e32q
static vui64_t vec_rdxcf10e32q(vui128_t vra)
Vector Decimal Convert radix 10**32 Binary quadword to pairs of radix 10**16 binary doublewords.
Definition: vec_bcd_ppc.h:4489
vec_zndctuq
static vui128_t vec_zndctuq(vui8_t zone00, vui8_t zone16)
Vector Zoned Decimal Convert 32 digits to binary unsigned quadword.
Definition: vec_bcd_ppc.h:5049
CONST_VINT128_DW
#define CONST_VINT128_DW(__dw0, __dw1)
Initializer for 128-bits vector, as two unsigned long long elements in high->low order....
Definition: vec_common_ppc.h:298
vec_mulouw
static vui64_t vec_mulouw(vui32_t a, vui32_t b)
Vector multiply odd unsigned words.
Definition: vec_int32_ppc.h:1043
vec_mulubm
static vui8_t vec_mulubm(vui8_t vra, vui8_t vrb)
Vector Multiply Unsigned Byte Modulo.
Definition: vec_char_ppc.h:664
vec_bcdcmp_gesq
static vbBCD_t vec_bcdcmp_gesq(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for greater than or equal.
Definition: vec_bcd_ppc.h:2219
vi8_t
__vector signed char vi8_t
vector of 8-bit signed char elements.
Definition: vec_common_ppc.h:211
_BCD_CONST_PLUS_ONE
#define _BCD_CONST_PLUS_ONE
vector signed BCD constant +1.
Definition: vec_bcd_ppc.h:1572
vec_bcdsubesqm
static vBCD_t vec_bcdsubesqm(vBCD_t a, vBCD_t b, vBCD_t c)
Decimal Subtract Extended Signed Modulo Quadword.
Definition: vec_bcd_ppc.h:3613
vec_setb_sq
static vb128_t vec_setb_sq(vi128_t vra)
Vector Set Bool from Signed Quadword.
Definition: vec_int128_ppc.h:6576
vec_bcdcfuq
static vBCD_t vec_bcdcfuq(vui128_t vra)
Vector Decimal Convert From Unsigned Quadword returning up to 32 BCD digits.
Definition: vec_bcd_ppc.h:2083
vec_bcdcfz
static vBCD_t vec_bcdcfz(vui8_t vrb)
Vector Decimal Convert From Zoned.
Definition: vec_bcd_ppc.h:2126
vec_bcdaddcsq
static vBCD_t vec_bcdaddcsq(vBCD_t a, vBCD_t b)
Decimal Add & write Carry Signed Quadword.
Definition: vec_bcd_ppc.h:1836
vec_BCD2BIN
static vui64_t vec_BCD2BIN(vBCD_t val)
Convert vector of 2 x unsigned 16-digit BCD values to vector 2 x doubleword binary values.
Definition: vec_bcd_ppc.h:1622
vec_bcdutruncqi
static vBCD_t vec_bcdutruncqi(vBCD_t vra, const unsigned short _N)
Decimal Unsigned Truncate Quadword Immediate. Truncate a vector unsigned BCD value vra to N-digits,...
Definition: vec_bcd_ppc.h:3825
vec_cbcdsubcsq
static vBCD_t vec_cbcdsubcsq(vBCD_t *cout, vBCD_t a, vBCD_t b)
Combined Decimal Subtract & Write Carry Signed Quadword.
Definition: vec_bcd_ppc.h:4135
vec_bcdcmpgt
static int vec_bcdcmpgt(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for greater than.
Definition: vec_bcd_ppc.h:2447
CONST_VINT128_B
#define CONST_VINT128_B(_b0, _b1, _b2, _b3, _b4, _b5, _b6, _b7, _b8, _b9, _b10, _b11, _b12, _b13, _b14, _b15)
Arrange byte elements of a unsigned int initializer in high->low order. May require an explicit cast.
Definition: vec_common_ppc.h:316
vec_mrgald
static vui64_t vec_mrgald(vui128_t vra, vui128_t vrb)
Vector Merge Algebraic Low Doublewords.
Definition: vec_int64_ppc.h:2736
vec_quantize0_Decimal128
static _Decimal128 vec_quantize0_Decimal128(_Decimal128 val)
Quantize (truncate) a _Decimal128 value before convert to BCD.
Definition: vec_bcd_ppc.h:4235
vec_bcdctsq
static vi128_t vec_bcdctsq(vBCD_t vra)
Vector Decimal Convert to Signed Quadword.
Definition: vec_bcd_ppc.h:2597
vec_mrgahd
static vui64_t vec_mrgahd(vui128_t vra, vui128_t vrb)
Vector Merge Algebraic High Doublewords.
Definition: vec_int64_ppc.h:2710
vui32_t
__vector unsigned int vui32_t
vector of 32-bit unsigned int elements.
Definition: vec_common_ppc.h:206
CONST_VINT128_DW128
#define CONST_VINT128_DW128(__dw0, __dw1)
A vector unsigned __int128 initializer, as two unsigned long long elements in high->low order.
Definition: vec_common_ppc.h:301
vec_mulhud
static vui64_t vec_mulhud(vui64_t vra, vui64_t vrb)
Vector Multiply High Unsigned Doubleword.
Definition: vec_int128_ppc.h:5277
vec_bcddive
static vBCD_t vec_bcddive(vBCD_t a, vBCD_t b)
Decimal Divide Extended.
Definition: vec_bcd_ppc.h:2900
vec_bcdctuw
static vui32_t vec_bcdctuw(vBCD_t vra)
Vector Decimal Convert groups of 8 BCD digits to binary unsigned words.
Definition: vec_bcd_ppc.h:2684
_BCD_CONST_SIGN_MASK
#define _BCD_CONST_SIGN_MASK
vector BCD sign mask in bits 124:127.
Definition: vec_bcd_ppc.h:1578
VEC_HW_L_DWH
#define VEC_HW_L_DWH
Element index for lowest order hword of the high dword.
Definition: vec_common_ppc.h:340
vec_adduqm
static vui128_t vec_adduqm(vui128_t a, vui128_t b)
Vector Add Unsigned Quadword Modulo.
Definition: vec_int128_ppc.h:2739
vec_rdxcfzt100b
static vui8_t vec_rdxcfzt100b(vui8_t zone00, vui8_t zone16)
Vector Decimal Convert Zoned Decimal digit pairs to to radix 100 binary integer bytes....
Definition: vec_bcd_ppc.h:4559
vec_addudm
static vui64_t vec_addudm(vui64_t a, vui64_t b)
Vector Add Unsigned Doubleword Modulo.
Definition: vec_int64_ppc.h:1261
vf64_t
__vector double vf64_t
vector of 64-bit double elements.
Definition: vec_common_ppc.h:221
vec_srq
static vui128_t vec_srq(vui128_t vra, vui128_t vrb)
Vector Shift Right Quadword.
Definition: vec_int128_ppc.h:7114
vec_mulhuq
static vui128_t vec_mulhuq(vui128_t a, vui128_t b)
Vector Multiply High Unsigned Quadword.
Definition: vec_int128_ppc.h:5387
vec_bcdsrqi
static vBCD_t vec_bcdsrqi(vBCD_t vra, const unsigned int _N)
Vector BCD Shift Right Signed Quadword Immediate.
Definition: vec_bcd_ppc.h:3335
__VEC_U_128::vx4
vui32_t vx4
128 bit Vector of 4 unsigned int elements.
Definition: vec_common_ppc.h:273
vec_setbool_bcdinv
static vb128_t vec_setbool_bcdinv(vBCD_t vra)
Vector Set Bool from Signed BCD Quadword if invalid.
Definition: vec_bcd_ppc.h:4857
__VEC_U_128::vf2
vf64_t vf2
128 bit Vector of 2 double float elements.
Definition: vec_common_ppc.h:279
vBCD_t
#define vBCD_t
vector signed BCD integer of up to 31 decimal digits.
Definition: vec_bcd_ppc.h:1565
vec_rdxcf100mw
static vui16_t vec_rdxcf100mw(vui32_t vra)
Vector Decimal Convert radix 10**8 Binary words to pairs of radix 10,000 binary halfwords.
Definition: vec_bcd_ppc.h:4372
vec_rdxct100mw
static vui32_t vec_rdxct100mw(vui16_t vra)
Vector Decimal Convert radix 10,000 digit halfword pairs to radix 100,000,000 binary integer words.
Definition: vec_bcd_ppc.h:4720
vec_bcdcmp_lesq
static vbBCD_t vec_bcdcmp_lesq(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for less than or equal.
Definition: vec_bcd_ppc.h:2287
vec_bcdutrunc
static vBCD_t vec_bcdutrunc(vBCD_t vra, vui16_t vrb)
Decimal Unsigned Truncate. Truncate a vector unsigned BCD value vra to N-digits, where N is the unsig...
Definition: vec_bcd_ppc.h:3781
vec_bcdsetsgn
static vBCD_t vec_bcdsetsgn(vBCD_t vrb)
Vector Set preferred BCD Sign.
Definition: vec_bcd_ppc.h:3158
vec_slqi
static vui128_t vec_slqi(vui128_t vra, const unsigned int shb)
Vector Shift Left Quadword Immediate.
Definition: vec_int128_ppc.h:6748
vec_rdxct10kh
static vui16_t vec_rdxct10kh(vui8_t vra)
Vector Decimal Convert radix 100 digit pairs to radix 10,000 binary integer halfwords.
Definition: vec_bcd_ppc.h:4672
vec_cbcdmul
static vBCD_t vec_cbcdmul(vBCD_t *p_high, vBCD_t a, vBCD_t b)
Combined Vector Signed BCD Multiply High/Low.
Definition: vec_bcd_ppc.h:4045
vec_slq
static vui128_t vec_slq(vui128_t vra, vui128_t vrb)
Vector Shift Left Quadword.
Definition: vec_int128_ppc.h:6707
vec_bcdsluqi
static vBCD_t vec_bcdsluqi(vBCD_t vra, const unsigned int _N)
Vector BCD Shift Right unsigned Quadword.
Definition: vec_bcd_ppc.h:3238
VEC_W_L
#define VEC_W_L
Element index for lowest order word.
Definition: vec_common_ppc.h:328
vec_pack_Decimal128
static vf64_t vec_pack_Decimal128(_Decimal128 lval)
Pack a FPR pair (_Decimal128) to a doubleword vector (vector double).
Definition: vec_bcd_ppc.h:4199
vec_mul10euq
static vui128_t vec_mul10euq(vui128_t a, vui128_t cin)
Vector Multiply by 10 Extended Unsigned Quadword.
Definition: vec_int128_ppc.h:4903
vec_bcdcmp_ltsq
static vbBCD_t vec_bcdcmp_ltsq(vBCD_t vra, vBCD_t vrb)
Vector Compare Signed BCD Quadword for less than.
Definition: vec_bcd_ppc.h:2321
vec_char_ppc.h
Header package containing a collection of 128-bit SIMD operations over 8-bit integer (char) elements.
VEC_BYTE_L_DWH
#define VEC_BYTE_L_DWH
Element index for lowest order byte of the high dword.
Definition: vec_common_ppc.h:346
VEC_HW_L
#define VEC_HW_L
Element index for lowest order hword.
Definition: vec_common_ppc.h:342