summaryrefslogtreecommitdiffstats
path: root/runtime/arith.c
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/arith.c')
-rw-r--r--runtime/arith.c440
1 files changed, 335 insertions, 105 deletions
diff --git a/runtime/arith.c b/runtime/arith.c
index 97121ae8..60576090 100644
--- a/runtime/arith.c
+++ b/runtime/arith.c
@@ -20,7 +20,7 @@
/* 64-bit division for 64-bit cpus and i386 */
/* Other 32-bit cpus will need to modify this file. */
-#ifdef __i386__
+#if defined (__i386__) || defined(__arm__)
long long _div64 (long long u, long long v);
long long _mod64 (long long u, long long v);
#endif
@@ -46,7 +46,7 @@ int64_t _stp_div64 (const char **error, int64_t x, int64_t y)
if (unlikely (y == -1))
return -x;
-#ifdef __LP64__
+#if defined (__LP64__)
return x/y;
#else
if (likely ((x >= LONG_MIN && x <= LONG_MAX) &&
@@ -72,7 +72,7 @@ int64_t _stp_mod64 (const char **error, int64_t x, int64_t y)
if (unlikely (y == 1 || y == -1))
return 0;
-#ifdef __LP64__
+#if defined (__LP64__)
return x%y;
#else
if (likely ((x >= LONG_MIN && x <= LONG_MAX) &&
@@ -106,14 +106,15 @@ int _stp_random_pm (int n)
#endif /* _STP_TEST_ */
+#if defined (__i386__) || defined (__arm__)
-#ifdef __i386__
/* 64-bit division functions extracted from libgcc */
typedef long long DWtype;
typedef unsigned long long UDWtype;
typedef unsigned long UWtype;
typedef long Wtype;
typedef unsigned int USItype;
+typedef unsigned int UQItype __attribute__ ((mode (QI)));
#ifdef _BIG_ENDIAN
struct DWstruct {Wtype high, low;};
@@ -121,8 +122,15 @@ struct DWstruct {Wtype high, low;};
struct DWstruct {Wtype low, high;};
#endif
+#define __CLOBBER_CC : "cc"
+
#define W_TYPE_SIZE 32
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
typedef union
{
struct DWstruct s;
@@ -130,6 +138,7 @@ typedef union
} DWunion;
+#if defined (__i386__)
/* these are the i386 versions of these macros from gcc/longlong.h */
#define umul_ppmm(w1, w0, u, v) \
@@ -164,111 +173,330 @@ typedef union
(count) = __cbtmp ^ 31; \
} while (0)
-inline UDWtype _stp_udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
+#elif defined (__arm__)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
+ : "=r" ((USItype) (sh)), \
+ "=&r" ((USItype) (sl)) \
+ : "r" ((USItype) (ah)), \
+ "rI" ((USItype) (bh)), \
+ "r" ((USItype) (al)), \
+ "rI" ((USItype) (bl)) __CLOBBER_CC)
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2; \
+ __asm__ ("%@ Inlined umul_ppmm\n" \
+ " mov %2, %5, lsr #16\n" \
+ " mov %0, %6, lsr #16\n" \
+ " bic %3, %5, %2, lsl #16\n" \
+ " bic %4, %6, %0, lsl #16\n" \
+ " mul %1, %3, %4\n" \
+ " mul %4, %2, %4\n" \
+ " mul %3, %0, %3\n" \
+ " mul %0, %2, %0\n" \
+ " adds %3, %4, %3\n" \
+ " addcs %0, %0, #65536\n" \
+ " adds %1, %1, %3, lsl #16\n" \
+ " adc %0, %0, %3, lsr #16" \
+ : "=&r" ((USItype) (xh)), \
+ "=r" ((USItype) (xl)), \
+ "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
+ : "r" ((USItype) (a)), \
+ "r" ((USItype) (b)) __CLOBBER_CC );}
+
+#endif
+
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+ do { \
+ UWtype __d1, __d0, __q1, __q0; \
+ UWtype __r1, __r0, __m; \
+ __d1 = __ll_highpart (d); \
+ __d0 = __ll_lowpart (d); \
+ \
+ __r1 = (n1) % __d1; \
+ __q1 = (n1) / __d1; \
+ __m = (UWtype) __q1 * __d0; \
+ __r1 = __r1 * __ll_B | __ll_highpart (n0); \
+ if (__r1 < __m) \
+ { \
+ __q1--, __r1 += (d); \
+ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+ if (__r1 < __m) \
+ __q1--, __r1 += (d); \
+ } \
+ __r1 -= __m; \
+ \
+ __r0 = __r1 % __d1; \
+ __q0 = __r1 / __d1; \
+ __m = (UWtype) __q0 * __d0; \
+ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
+ if (__r0 < __m) \
+ { \
+ __q0--, __r0 += (d); \
+ if (__r0 >= (d)) \
+ if (__r0 < __m) \
+ __q0--, __r0 += (d); \
+ } \
+ __r0 -= __m; \
+ \
+ (q) = (UWtype) __q1 * __ll_B | __q0; \
+ (r) = __r0; \
+ } while (0)
+
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#else
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
+
+#if !defined (count_leading_zeros)
+const UQItype _stp_clz_tab[256] =
{
- const DWunion nn = {.ll = n};
- const DWunion dd = {.ll = d};
- DWunion ww,rr;
- UWtype d0, d1, n0, n1, n2;
- UWtype q0, q1;
- UWtype b, bm;
-
- d0 = dd.s.low;
- d1 = dd.s.high;
- n0 = nn.s.low;
- n1 = nn.s.high;
-
- if (d1 == 0) {
- if (d0 > n1) {
- /* 0q = nn / 0D */
- udiv_qrnnd (q0, n0, n1, n0, d0);
- q1 = 0;
- /* Remainder in n0. */
- } else {
- /* qq = NN / 0d */
- if (d0 == 0)
- d0 = 1 / d0; /* Divide intentionally by zero. */
- udiv_qrnnd (q1, n1, 0, n1, d0);
- udiv_qrnnd (q0, n0, n1, n0, d0);
- /* Remainder in n0. */
+ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+};
+
+#define count_leading_zeros(count, x) \
+ do { \
+ UWtype __xr = (x); \
+ UWtype __a; \
+ \
+ if (W_TYPE_SIZE <= 32) \
+ { \
+ __a = __xr < ((UWtype)1<<2*__BITS4) \
+ ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
+ : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
+ } \
+ else \
+ { \
+ for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+ if (((__xr >> __a) & 0xff) != 0) \
+ break; \
+ } \
+ \
+ (count) = W_TYPE_SIZE - (_stp_clz_tab[__xr >> __a] + __a); \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+UDWtype
+_stp_udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
+{
+ const DWunion nn = {.ll = n};
+ const DWunion dd = {.ll = d};
+ DWunion ww, rr;
+ UWtype d0, d1, n0, n1, n2;
+ UWtype q0, q1;
+ UWtype b, bm;
+
+ d0 = dd.s.low;
+ d1 = dd.s.high;
+ n0 = nn.s.low;
+ n1 = nn.s.high;
+
+#if !UDIV_NEEDS_NORMALIZATION
+ if (d1 == 0)
+ {
+ if (d0 > n1)
+ {
+ /* 0q = nn / 0D */
+
+ udiv_qrnnd (q0, n0, n1, n0, d0);
+ q1 = 0;
+
+ /* Remainder in n0. */
+ }
+ else
+ {
+ /* qq = NN / 0d */
+
+ if (d0 == 0)
+ d0 = 1 / d0; /* Divide intentionally by zero. */
+
+ udiv_qrnnd (q1, n1, 0, n1, d0);
+ udiv_qrnnd (q0, n0, n1, n0, d0);
+
+ /* Remainder in n0. */
+ }
+
+ if (rp != 0)
+ {
+ rr.s.low = n0;
+ rr.s.high = 0;
+ *rp = rr.ll;
+ }
+ }
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+ if (d1 == 0)
+ {
+ if (d0 > n1)
+ {
+ /* 0q = nn / 0D */
+
+ count_leading_zeros (bm, d0);
+
+ if (bm != 0)
+ {
+ /* Normalize, i.e. make the most significant bit of the
+ denominator set. */
+
+ d0 = d0 << bm;
+ n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+ n0 = n0 << bm;
+ }
+
+ udiv_qrnnd (q0, n0, n1, n0, d0);
+ q1 = 0;
+
+ /* Remainder in n0 >> bm. */
+ }
+ else
+ {
+ /* qq = NN / 0d */
+
+ if (d0 == 0)
+ d0 = 1 / d0; /* Divide intentionally by zero. */
+
+ count_leading_zeros (bm, d0);
+
+ if (bm == 0)
+ {
+ /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+ conclude (the most significant bit of n1 is set) /\ (the
+ leading quotient digit q1 = 1).
+
+ This special case is necessary, not an optimization.
+ (Shifts counts of W_TYPE_SIZE are undefined.) */
+
+ n1 -= d0;
+ q1 = 1;
+ }
+ else
+ {
+ /* Normalize. */
+
+ b = W_TYPE_SIZE - bm;
+
+ d0 = d0 << bm;
+ n2 = n1 >> b;
+ n1 = (n1 << bm) | (n0 >> b);
+ n0 = n0 << bm;
+
+ udiv_qrnnd (q1, n1, n2, n1, d0);
+ }
+
+ /* n1 != d0... */
+
+ udiv_qrnnd (q0, n0, n1, n0, d0);
+
+ /* Remainder in n0 >> bm. */
+ }
+
+ if (rp != 0)
+ {
+ rr.s.low = n0 >> bm;
+ rr.s.high = 0;
+ *rp = rr.ll;
+ }
+ }
+#endif /* UDIV_NEEDS_NORMALIZATION */
+
+ else
+ {
+ if (d1 > n1)
+ {
+ /* 00 = nn / DD */
+
+ q0 = 0;
+ q1 = 0;
+
+ /* Remainder in n1n0. */
+ if (rp != 0)
+ {
+ rr.s.low = n0;
+ rr.s.high = n1;
+ *rp = rr.ll;
+ }
+ }
+ else
+ {
+ /* 0q = NN / dd */
+
+ count_leading_zeros (bm, d1);
+ if (bm == 0)
+ {
+ /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+ conclude (the most significant bit of n1 is set) /\ (the
+ quotient digit q0 = 0 or 1).
+
+ This special case is necessary, not an optimization. */
+
+ /* The condition on the next line takes advantage of that
+ n1 >= d1 (true due to program flow). */
+ if (n1 > d1 || n0 >= d0)
+ {
+ q0 = 1;
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
}
-
- if (rp != 0) {
- rr.s.low = n0;
- rr.s.high = 0;
- *rp = rr.ll;
+ else
+ q0 = 0;
+
+ q1 = 0;
+
+ if (rp != 0)
+ {
+ rr.s.low = n0;
+ rr.s.high = n1;
+ *rp = rr.ll;
}
- } else {
- if (d1 > n1) {
- /* 00 = nn / DD */
- q0 = 0;
- q1 = 0;
-
- /* Remainder in n1n0. */
- if (rp != 0) {
- rr.s.low = n0;
- rr.s.high = n1;
- *rp = rr.ll;
- }
- } else {
- /* 0q = NN / dd */
- count_leading_zeros (bm, d1);
- if (bm == 0) {
- /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
- conclude (the most significant bit of n1 is set) /\ (the
- quotient digit q0 = 0 or 1).
- This special case is necessary, not an optimization. */
-
- /* The condition on the next line takes advantage of that
- n1 >= d1 (true due to program flow). */
- if (n1 > d1 || n0 >= d0) {
- q0 = 1;
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
- } else
- q0 = 0;
-
- q1 = 0;
-
- if (rp != 0) {
- rr.s.low = n0;
- rr.s.high = n1;
- *rp = rr.ll;
- }
- } else {
- UWtype m1, m0;
- /* Normalize. */
-
- b = W_TYPE_SIZE - bm;
-
- d1 = (d1 << bm) | (d0 >> b);
- d0 = d0 << bm;
- n2 = n1 >> b;
- n1 = (n1 << bm) | (n0 >> b);
- n0 = n0 << bm;
-
- udiv_qrnnd (q0, n1, n2, n1, d1);
- umul_ppmm (m1, m0, q0, d0);
-
- if (m1 > n1 || (m1 == n1 && m0 > n0)) {
- q0--;
- sub_ddmmss (m1, m0, m1, m0, d1, d0);
- }
-
- q1 = 0;
-
- /* Remainder in (n1n0 - m1m0) >> bm. */
- if (rp != 0) {
- sub_ddmmss (n1, n0, n1, n0, m1, m0);
- rr.s.low = (n1 << b) | (n0 >> bm);
- rr.s.high = n1 >> bm;
- *rp = rr.ll;
- }
- }
+ }
+ else
+ {
+ UWtype m1, m0;
+ /* Normalize. */
+
+ b = W_TYPE_SIZE - bm;
+
+ d1 = (d1 << bm) | (d0 >> b);
+ d0 = d0 << bm;
+ n2 = n1 >> b;
+ n1 = (n1 << bm) | (n0 >> b);
+ n0 = n0 << bm;
+
+ udiv_qrnnd (q0, n1, n2, n1, d1);
+ umul_ppmm (m1, m0, q0, d0);
+
+ if (m1 > n1 || (m1 == n1 && m0 > n0))
+ {
+ q0--;
+ sub_ddmmss (m1, m0, m1, m0, d1, d0);
+ }
+
+ q1 = 0;
+
+ /* Remainder in (n1n0 - m1m0) >> bm. */
+ if (rp != 0)
+ {
+ sub_ddmmss (n1, n0, n1, n0, m1, m0);
+ rr.s.low = (n1 << b) | (n0 >> bm);
+ rr.s.high = n1 >> bm;
+ *rp = rr.ll;
}
+ }
}
-
- ww.s.low = q0; ww.s.high = q1;
- return ww.ll;
+ }
+
+ ww.s.low = q0; ww.s.high = q1;
+ return ww.ll;
}
long long _div64 (long long u, long long v)
@@ -311,5 +539,7 @@ long long _mod64 (long long u, long long v)
return w;
}
-#endif /* __i386__ */
+
+#endif /* __i386__ || __arm__ */
+
#endif /* _ARITH_C_ */