diff options
Diffstat (limited to 'runtime/arith.c')
-rw-r--r-- | runtime/arith.c | 440 |
1 files changed, 335 insertions, 105 deletions
diff --git a/runtime/arith.c b/runtime/arith.c index 97121ae8..60576090 100644 --- a/runtime/arith.c +++ b/runtime/arith.c @@ -20,7 +20,7 @@ /* 64-bit division for 64-bit cpus and i386 */ /* Other 32-bit cpus will need to modify this file. */ -#ifdef __i386__ +#if defined (__i386__) || defined(__arm__) long long _div64 (long long u, long long v); long long _mod64 (long long u, long long v); #endif @@ -46,7 +46,7 @@ int64_t _stp_div64 (const char **error, int64_t x, int64_t y) if (unlikely (y == -1)) return -x; -#ifdef __LP64__ +#if defined (__LP64__) return x/y; #else if (likely ((x >= LONG_MIN && x <= LONG_MAX) && @@ -72,7 +72,7 @@ int64_t _stp_mod64 (const char **error, int64_t x, int64_t y) if (unlikely (y == 1 || y == -1)) return 0; -#ifdef __LP64__ +#if defined (__LP64__) return x%y; #else if (likely ((x >= LONG_MIN && x <= LONG_MAX) && @@ -106,14 +106,15 @@ int _stp_random_pm (int n) #endif /* _STP_TEST_ */ +#if defined (__i386__) || defined (__arm__) -#ifdef __i386__ /* 64-bit division functions extracted from libgcc */ typedef long long DWtype; typedef unsigned long long UDWtype; typedef unsigned long UWtype; typedef long Wtype; typedef unsigned int USItype; +typedef unsigned int UQItype __attribute__ ((mode (QI))); #ifdef _BIG_ENDIAN struct DWstruct {Wtype high, low;}; @@ -121,8 +122,15 @@ struct DWstruct {Wtype high, low;}; struct DWstruct {Wtype low, high;}; #endif +#define __CLOBBER_CC : "cc" + #define W_TYPE_SIZE 32 +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + typedef union { struct DWstruct s; @@ -130,6 +138,7 @@ typedef union } DWunion; +#if defined (__i386__) /* these are the i386 versions of these macros from gcc/longlong.h */ #define umul_ppmm(w1, w0, u, v) \ @@ -164,111 +173,330 @@ typedef union (count) = __cbtmp ^ 31; \ } while (0) -inline UDWtype _stp_udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) +#elif defined (__arm__) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \ + : "=r" ((USItype) (sh)), \ + "=&r" ((USItype) (sl)) \ + : "r" ((USItype) (ah)), \ + "rI" ((USItype) (bh)), \ + "r" ((USItype) (al)), \ + "rI" ((USItype) (bl)) __CLOBBER_CC) +#define umul_ppmm(xh, xl, a, b) \ +{register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + " mov %2, %5, lsr #16\n" \ + " mov %0, %6, lsr #16\n" \ + " bic %3, %5, %2, lsl #16\n" \ + " bic %4, %6, %0, lsl #16\n" \ + " mul %1, %3, %4\n" \ + " mul %4, %2, %4\n" \ + " mul %3, %0, %3\n" \ + " mul %0, %2, %0\n" \ + " adds %3, %4, %3\n" \ + " addcs %0, %0, #65536\n" \ + " adds %1, %1, %3, lsl #16\n" \ + " adc %0, %0, %3, lsr #16" \ + : "=&r" ((USItype) (xh)), \ + "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), \ + "r" ((USItype) (b)) __CLOBBER_CC );} + +#endif + +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0; \ + UWtype __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +#if !defined (udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#else +#define UDIV_NEEDS_NORMALIZATION 0 +#endif + +#if !defined (count_leading_zeros) +const UQItype _stp_clz_tab[256] = { - const DWunion nn = {.ll = n}; - const DWunion dd = {.ll = d}; - DWunion ww,rr; - UWtype d0, d1, n0, n1, n2; - UWtype q0, q1; - UWtype b, bm; - - d0 = dd.s.low; - d1 = dd.s.high; - n0 = nn.s.low; - n1 = nn.s.high; - - if (d1 == 0) { - if (d0 > n1) { - /* 0q = nn / 0D */ - udiv_qrnnd (q0, n0, n1, n0, d0); - q1 = 0; - /* Remainder in n0. */ - } else { - /* qq = NN / 0d */ - if (d0 == 0) - d0 = 1 / d0; /* Divide intentionally by zero. */ - udiv_qrnnd (q1, n1, 0, n1, d0); - udiv_qrnnd (q0, n0, n1, n0, d0); - /* Remainder in n0. */ + 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 +}; + +#define count_leading_zeros(count, x) \ + do { \ + UWtype __xr = (x); \ + UWtype __a; \ + \ + if (W_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((UWtype)1<<2*__BITS4) \ + ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \ + : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ + } \ + else \ + { \ + for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = W_TYPE_SIZE - (_stp_clz_tab[__xr >> __a] + __a); \ + } while (0) +#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE +#endif + +UDWtype +_stp_udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) +{ + const DWunion nn = {.ll = n}; + const DWunion dd = {.ll = d}; + DWunion ww, rr; + UWtype d0, d1, n0, n1, n2; + UWtype q0, q1; + UWtype b, bm; + + d0 = dd.s.low; + d1 = dd.s.high; + n0 = nn.s.low; + n1 = nn.s.high; + +#if !UDIV_NEEDS_NORMALIZATION + if (d1 == 0) + { + if (d0 > n1) + { + /* 0q = nn / 0D */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + udiv_qrnnd (q1, n1, 0, n1, d0); + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0. */ + } + + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = 0; + *rp = rr.ll; + } + } + +#else /* UDIV_NEEDS_NORMALIZATION */ + + if (d1 == 0) + { + if (d0 > n1) + { + /* 0q = nn / 0D */ + + count_leading_zeros (bm, d0); + + if (bm != 0) + { + /* Normalize, i.e. make the most significant bit of the + denominator set. */ + + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0 >> bm. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + count_leading_zeros (bm, d0); + + if (bm == 0) + { + /* From (n1 >= d0) /\ (the most significant bit of d0 is set), + conclude (the most significant bit of n1 is set) /\ (the + leading quotient digit q1 = 1). + + This special case is necessary, not an optimization. + (Shifts counts of W_TYPE_SIZE are undefined.) */ + + n1 -= d0; + q1 = 1; + } + else + { + /* Normalize. */ + + b = W_TYPE_SIZE - bm; + + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q1, n1, n2, n1, d0); + } + + /* n1 != d0... */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0 >> bm. */ + } + + if (rp != 0) + { + rr.s.low = n0 >> bm; + rr.s.high = 0; + *rp = rr.ll; + } + } +#endif /* UDIV_NEEDS_NORMALIZATION */ + + else + { + if (d1 > n1) + { + /* 00 = nn / DD */ + + q0 = 0; + q1 = 0; + + /* Remainder in n1n0. */ + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else + { + /* 0q = NN / dd */ + + count_leading_zeros (bm, d1); + if (bm == 0) + { + /* From (n1 >= d1) /\ (the most significant bit of d1 is set), + conclude (the most significant bit of n1 is set) /\ (the + quotient digit q0 = 0 or 1). + + This special case is necessary, not an optimization. */ + + /* The condition on the next line takes advantage of that + n1 >= d1 (true due to program flow). */ + if (n1 > d1 || n0 >= d0) + { + q0 = 1; + sub_ddmmss (n1, n0, n1, n0, d1, d0); } - - if (rp != 0) { - rr.s.low = n0; - rr.s.high = 0; - *rp = rr.ll; + else + q0 = 0; + + q1 = 0; + + if (rp != 0) + { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; } - } else { - if (d1 > n1) { - /* 00 = nn / DD */ - q0 = 0; - q1 = 0; - - /* Remainder in n1n0. */ - if (rp != 0) { - rr.s.low = n0; - rr.s.high = n1; - *rp = rr.ll; - } - } else { - /* 0q = NN / dd */ - count_leading_zeros (bm, d1); - if (bm == 0) { - /* From (n1 >= d1) /\ (the most significant bit of d1 is set), - conclude (the most significant bit of n1 is set) /\ (the - quotient digit q0 = 0 or 1). - This special case is necessary, not an optimization. */ - - /* The condition on the next line takes advantage of that - n1 >= d1 (true due to program flow). */ - if (n1 > d1 || n0 >= d0) { - q0 = 1; - sub_ddmmss (n1, n0, n1, n0, d1, d0); - } else - q0 = 0; - - q1 = 0; - - if (rp != 0) { - rr.s.low = n0; - rr.s.high = n1; - *rp = rr.ll; - } - } else { - UWtype m1, m0; - /* Normalize. */ - - b = W_TYPE_SIZE - bm; - - d1 = (d1 << bm) | (d0 >> b); - d0 = d0 << bm; - n2 = n1 >> b; - n1 = (n1 << bm) | (n0 >> b); - n0 = n0 << bm; - - udiv_qrnnd (q0, n1, n2, n1, d1); - umul_ppmm (m1, m0, q0, d0); - - if (m1 > n1 || (m1 == n1 && m0 > n0)) { - q0--; - sub_ddmmss (m1, m0, m1, m0, d1, d0); - } - - q1 = 0; - - /* Remainder in (n1n0 - m1m0) >> bm. */ - if (rp != 0) { - sub_ddmmss (n1, n0, n1, n0, m1, m0); - rr.s.low = (n1 << b) | (n0 >> bm); - rr.s.high = n1 >> bm; - *rp = rr.ll; - } - } + } + else + { + UWtype m1, m0; + /* Normalize. */ + + b = W_TYPE_SIZE - bm; + + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q0, n1, n2, n1, d1); + umul_ppmm (m1, m0, q0, d0); + + if (m1 > n1 || (m1 == n1 && m0 > n0)) + { + q0--; + sub_ddmmss (m1, m0, m1, m0, d1, d0); + } + + q1 = 0; + + /* Remainder in (n1n0 - m1m0) >> bm. */ + if (rp != 0) + { + sub_ddmmss (n1, n0, n1, n0, m1, m0); + rr.s.low = (n1 << b) | (n0 >> bm); + rr.s.high = n1 >> bm; + *rp = rr.ll; } + } } - - ww.s.low = q0; ww.s.high = q1; - return ww.ll; + } + + ww.s.low = q0; ww.s.high = q1; + return ww.ll; } long long _div64 (long long u, long long v) @@ -311,5 +539,7 @@ long long _mod64 (long long u, long long v) return w; } -#endif /* __i386__ */ + +#endif /* __i386__ || __arm__ */ + #endif /* _ARITH_C_ */ |