فهرست منبع

Auto merge of #192 - est31:master, r=alexcrichton

Refactor float implementation

Refactors the float implementation. Fixes #169. Parts of the PR were inspired a previous PR by @mattico .
bors 7 سال پیش
والد
کامیت
35dec6bd8a
5فایلهای تغییر یافته به همراه391 افزوده شده و 346 حذف شده
  1. 159 159
      src/float/add.rs
  2. 133 128
      src/float/conv.rs
  3. 18 1
      src/float/mod.rs
  4. 13 9
      src/float/pow.rs
  5. 68 49
      src/int/mod.rs

+ 159 - 159
src/float/add.rs

@@ -1,196 +1,196 @@
-use core::num::Wrapping;
-
+use int::{Int, CastInto};
 use float::Float;
 
 /// Returns `a + b`
-macro_rules! add {
-    ($a:expr, $b:expr, $ty:ty) => ({
-        let a = $a;
-        let b = $b;
-        let one = Wrapping(1 as <$ty as Float>::Int);
-        let zero = Wrapping(0 as <$ty as Float>::Int);
-
-        let bits =             Wrapping(<$ty>::BITS as <$ty as Float>::Int);
-        let significand_bits = Wrapping(<$ty>::SIGNIFICAND_BITS as <$ty as Float>::Int);
-        let exponent_bits =    bits - significand_bits - one;
-        let max_exponent =     (one << exponent_bits.0 as usize) - one;
-
-        let implicit_bit =     one << significand_bits.0 as usize;
-        let significand_mask = implicit_bit - one;
-        let sign_bit =         one << (significand_bits + exponent_bits).0 as usize;
-        let abs_mask =         sign_bit - one;
-        let exponent_mask =    abs_mask ^ significand_mask;
-        let inf_rep =          exponent_mask;
-        let quiet_bit =        implicit_bit >> 1;
-        let qnan_rep =         exponent_mask | quiet_bit;
-
-        let mut a_rep = Wrapping(a.repr());
-        let mut b_rep = Wrapping(b.repr());
-        let a_abs = a_rep & abs_mask;
-        let b_abs = b_rep & abs_mask;
-
-        // Detect if a or b is zero, infinity, or NaN.
-        if a_abs - one >= inf_rep - one ||
-            b_abs - one >= inf_rep - one {
-            // NaN + anything = qNaN
-            if a_abs > inf_rep {
-                return <$ty as Float>::from_repr((a_abs | quiet_bit).0);
-            }
-            // anything + NaN = qNaN
-            if b_abs > inf_rep {
-                return <$ty as Float>::from_repr((b_abs | quiet_bit).0);
-            }
-
-            if a_abs == inf_rep {
-                // +/-infinity + -/+infinity = qNaN
-                if (a.repr() ^ b.repr()) == sign_bit.0 {
-                    return <$ty as Float>::from_repr(qnan_rep.0);
-                } else {
-                    // +/-infinity + anything remaining = +/- infinity
-                    return a;
-                }
-            }
+fn add<F: Float>(a: F, b: F) -> F where
+    u32: CastInto<F::Int>,
+    F::Int: CastInto<u32>,
+    i32: CastInto<F::Int>,
+    F::Int: CastInto<i32>,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    let bits =             F::BITS.cast();
+    let significand_bits = F::SIGNIFICAND_BITS;
+    let max_exponent =     F::EXPONENT_MAX;
+
+    let implicit_bit =     F::IMPLICIT_BIT;
+    let significand_mask = F::SIGNIFICAND_MASK;
+    let sign_bit =         F::SIGN_MASK as F::Int;
+    let abs_mask =         sign_bit - one;
+    let exponent_mask =    F::EXPONENT_MASK;
+    let inf_rep =          exponent_mask;
+    let quiet_bit =        implicit_bit >> 1;
+    let qnan_rep =         exponent_mask | quiet_bit;
+
+    let mut a_rep = a.repr();
+    let mut b_rep = b.repr();
+    let a_abs = a_rep & abs_mask;
+    let b_abs = b_rep & abs_mask;
+
+    // Detect if a or b is zero, infinity, or NaN.
+    if a_abs.wrapping_sub(one) >= inf_rep - one ||
+        b_abs.wrapping_sub(one) >= inf_rep - one {
+        // NaN + anything = qNaN
+        if a_abs > inf_rep {
+            return F::from_repr(a_abs | quiet_bit);
+        }
+        // anything + NaN = qNaN
+        if b_abs > inf_rep {
+            return F::from_repr(b_abs | quiet_bit);
+        }
 
-            // anything remaining + +/-infinity = +/-infinity
-            if b_abs == inf_rep {
-                return b;
+        if a_abs == inf_rep {
+            // +/-infinity + -/+infinity = qNaN
+            if (a.repr() ^ b.repr()) == sign_bit {
+                return F::from_repr(qnan_rep);
+            } else {
+                // +/-infinity + anything remaining = +/- infinity
+                return a;
             }
+        }
 
-            // zero + anything = anything
-            if a_abs.0 == 0 {
-                // but we need to get the sign right for zero + zero
-                if b_abs.0 == 0 {
-                    return <$ty as Float>::from_repr(a.repr() & b.repr());
-                } else {
-                    return b;
-                }
-            }
+        // anything remaining + +/-infinity = +/-infinity
+        if b_abs == inf_rep {
+            return b;
+        }
 
-            // anything + zero = anything
-            if b_abs.0 == 0 {
-                 return a;
+        // zero + anything = anything
+        if a_abs == Int::ZERO {
+            // but we need to get the sign right for zero + zero
+            if b_abs == Int::ZERO {
+                return F::from_repr(a.repr() & b.repr());
+            } else {
+                return b;
             }
         }
 
-        // Swap a and b if necessary so that a has the larger absolute value.
-        if b_abs > a_abs {
-            // Don't use mem::swap because it may generate references to memcpy in unoptimized code.
-            let tmp = a_rep;
-            a_rep = b_rep;
-            b_rep = tmp;
+        // anything + zero = anything
+        if b_abs == Int::ZERO {
+             return a;
         }
+    }
+
+    // Swap a and b if necessary so that a has the larger absolute value.
+    if b_abs > a_abs {
+        // Don't use mem::swap because it may generate references to memcpy in unoptimized code.
+        let tmp = a_rep;
+        a_rep = b_rep;
+        b_rep = tmp;
+    }
+
+    // Extract the exponent and significand from the (possibly swapped) a and b.
+    let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits).cast();
+    let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits).cast();
+    let mut a_significand = a_rep & significand_mask;
+    let mut b_significand = b_rep & significand_mask;
+
+    // normalize any denormals, and adjust the exponent accordingly.
+    if a_exponent == 0 {
+        let (exponent, significand) = F::normalize(a_significand);
+        a_exponent = exponent;
+        a_significand = significand;
+    }
+    if b_exponent == 0 {
+        let (exponent, significand) = F::normalize(b_significand);
+        b_exponent = exponent;
+        b_significand = significand;
+    }
 
-        // Extract the exponent and significand from the (possibly swapped) a and b.
-        let mut a_exponent = Wrapping((a_rep >> significand_bits.0 as usize & max_exponent).0 as i32);
-        let mut b_exponent = Wrapping((b_rep >> significand_bits.0 as usize & max_exponent).0 as i32);
-        let mut a_significand = a_rep & significand_mask;
-        let mut b_significand = b_rep & significand_mask;
-
-        // normalize any denormals, and adjust the exponent accordingly.
-        if a_exponent.0 == 0 {
-            let (exponent, significand) = <$ty>::normalize(a_significand.0);
-            a_exponent = Wrapping(exponent);
-            a_significand = Wrapping(significand);
+    // The sign of the result is the sign of the larger operand, a.  If they
+    // have opposite signs, we are performing a subtraction; otherwise addition.
+    let result_sign = a_rep & sign_bit;
+    let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
+
+    // Shift the significands to give us round, guard and sticky, and or in the
+    // implicit significand bit.  (If we fell through from the denormal path it
+    // was already set by normalize(), but setting it twice won't hurt
+    // anything.)
+    a_significand = (a_significand | implicit_bit) << 3;
+    b_significand = (b_significand | implicit_bit) << 3;
+
+    // Shift the significand of b by the difference in exponents, with a sticky
+    // bottom bit to get rounding correct.
+    let align = a_exponent.wrapping_sub(b_exponent).cast();
+    if align != Int::ZERO {
+        if align < bits {
+            let sticky = F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
+            b_significand = (b_significand >> align.cast()) | sticky;
+        } else {
+            b_significand = one; // sticky; b is known to be non-zero.
         }
-        if b_exponent.0 == 0 {
-            let (exponent, significand) = <$ty>::normalize(b_significand.0);
-            b_exponent = Wrapping(exponent);
-            b_significand = Wrapping(significand);
+    }
+    if subtraction {
+        a_significand = a_significand.wrapping_sub(b_significand);
+        // If a == -b, return +zero.
+        if a_significand == Int::ZERO {
+            return F::from_repr(Int::ZERO);
         }
 
-        // The sign of the result is the sign of the larger operand, a.  If they
-        // have opposite signs, we are performing a subtraction; otherwise addition.
-        let result_sign = a_rep & sign_bit;
-        let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
-
-        // Shift the significands to give us round, guard and sticky, and or in the
-        // implicit significand bit.  (If we fell through from the denormal path it
-        // was already set by normalize(), but setting it twice won't hurt
-        // anything.)
-        a_significand = (a_significand | implicit_bit) << 3;
-        b_significand = (b_significand | implicit_bit) << 3;
-
-        // Shift the significand of b by the difference in exponents, with a sticky
-        // bottom bit to get rounding correct.
-        let align = Wrapping((a_exponent - b_exponent).0 as <$ty as Float>::Int);
-        if align.0 != 0 {
-            if align < bits {
-                let sticky = ((b_significand << (bits - align).0 as usize).0 != 0) as <$ty as Float>::Int;
-                b_significand = (b_significand >> align.0 as usize) | Wrapping(sticky);
-            } else {
-                b_significand = one; // sticky; b is known to be non-zero.
-            }
+        // If partial cancellation occured, we need to left-shift the result
+        // and adjust the exponent:
+        if a_significand < implicit_bit << 3 {
+            let shift = a_significand.leading_zeros() as i32
+                - (implicit_bit << 3).leading_zeros() as i32;
+            a_significand <<= shift;
+            a_exponent -= shift;
         }
-        if subtraction {
-            a_significand -= b_significand;
-            // If a == -b, return +zero.
-            if a_significand.0 == 0 {
-                return <$ty as Float>::from_repr(0);
-            }
-
-            // If partial cancellation occured, we need to left-shift the result
-            // and adjust the exponent:
-            if a_significand < implicit_bit << 3 {
-                let shift = a_significand.0.leading_zeros() as i32
-                    - (implicit_bit << 3).0.leading_zeros() as i32;
-                a_significand <<= shift as usize;
-                a_exponent -= Wrapping(shift);
-            }
-        } else /* addition */ {
-            a_significand += b_significand;
-
-            // If the addition carried up, we need to right-shift the result and
-            // adjust the exponent:
-            if (a_significand & implicit_bit << 4).0 != 0 {
-                let sticky = ((a_significand & one).0 != 0) as <$ty as Float>::Int;
-                a_significand = a_significand >> 1 | Wrapping(sticky);
-                a_exponent += Wrapping(1);
-            }
+    } else /* addition */ {
+        a_significand += b_significand;
+
+        // If the addition carried up, we need to right-shift the result and
+        // adjust the exponent:
+        if a_significand & implicit_bit << 4 != Int::ZERO {
+            let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
+            a_significand = a_significand >> 1 | sticky;
+            a_exponent += 1;
         }
+    }
 
-        // If we have overflowed the type, return +/- infinity:
-        if a_exponent >= Wrapping(max_exponent.0 as i32) {
-            return <$ty>::from_repr((inf_rep | result_sign).0);
-        }
+    // If we have overflowed the type, return +/- infinity:
+    if a_exponent >= max_exponent as i32 {
+        return F::from_repr(inf_rep | result_sign);
+    }
 
-        if a_exponent.0 <= 0 {
-            // Result is denormal before rounding; the exponent is zero and we
-            // need to shift the significand.
-            let shift = Wrapping((Wrapping(1) - a_exponent).0 as <$ty as Float>::Int);
-            let sticky = ((a_significand << (bits - shift).0 as usize).0 != 0) as <$ty as Float>::Int;
-            a_significand = a_significand >> shift.0 as usize | Wrapping(sticky);
-            a_exponent = Wrapping(0);
-        }
+    if a_exponent <= 0 {
+        // Result is denormal before rounding; the exponent is zero and we
+        // need to shift the significand.
+        let shift = (1 - a_exponent).cast();
+        let sticky = F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
+        a_significand = a_significand >> shift.cast() | sticky;
+        a_exponent = 0;
+    }
 
-        // Low three bits are round, guard, and sticky.
-        let round_guard_sticky: i32 = (a_significand.0 & 0x7) as i32;
+    // Low three bits are round, guard, and sticky.
+    let a_significand_i32: i32 = a_significand.cast();
+    let round_guard_sticky: i32 = a_significand_i32 & 0x7;
 
-        // Shift the significand into place, and mask off the implicit bit.
-        let mut result = a_significand >> 3 & significand_mask;
+    // Shift the significand into place, and mask off the implicit bit.
+    let mut result = a_significand >> 3 & significand_mask;
 
-        // Insert the exponent and sign.
-        result |= Wrapping(a_exponent.0 as <$ty as Float>::Int) << significand_bits.0 as usize;
-        result |= result_sign;
+    // Insert the exponent and sign.
+    result |= a_exponent.cast() << significand_bits;
+    result |= result_sign;
 
-        // Final rounding.  The result may overflow to infinity, but that is the
-        // correct result in that case.
-        if round_guard_sticky > 0x4 { result += one; }
-        if round_guard_sticky == 0x4 { result += result & one; }
+    // Final rounding.  The result may overflow to infinity, but that is the
+    // correct result in that case.
+    if round_guard_sticky > 0x4 { result += one; }
+    if round_guard_sticky == 0x4 { result += result & one; }
 
-        <$ty>::from_repr(result.0)
-    })
+    F::from_repr(result)
 }
 
 intrinsics! {
     #[aapcs_on_arm]
     #[arm_aeabi_alias = __aeabi_fadd]
     pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 {
-        add!(a, b, f32)
+        add(a, b)
     }
 
     #[aapcs_on_arm]
     #[arm_aeabi_alias = __aeabi_dadd]
     pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 {
-        add!(a, b, f64)
+        add(a, b)
     }
 }

+ 133 - 128
src/float/conv.rs

@@ -1,83 +1,87 @@
 use float::Float;
-use int::Int;
-
-macro_rules! int_to_float {
-    ($i:expr, $ity:ty, $fty:ty) => ({
-        let i = $i;
-        if i == 0 {
-            return 0.0
-        }
+use int::{Int, CastInto};
+
+fn int_to_float<I: Int, F: Float>(i: I) -> F where
+    F::Int: CastInto<u32>,
+    F::Int: CastInto<I>,
+    I::UnsignedInt: CastInto<F::Int>,
+    u32: CastInto<F::Int>,
+{
+    if i == I::ZERO {
+        return F::ZERO;
+    }
 
-        let mant_dig = <$fty>::SIGNIFICAND_BITS + 1;
-        let exponent_bias = <$fty>::EXPONENT_BIAS;
+    let two = I::UnsignedInt::ONE + I::UnsignedInt::ONE;
+    let four = two + two;
+    let mant_dig = F::SIGNIFICAND_BITS + 1;
+    let exponent_bias = F::EXPONENT_BIAS;
 
-        let n = <$ity>::BITS;
-        let (s, a) = i.extract_sign();
-        let mut a = a;
+    let n = I::BITS;
+    let (s, a) = i.extract_sign();
+    let mut a = a;
 
-        // number of significant digits
-        let sd = n - a.leading_zeros();
+    // number of significant digits
+    let sd = n - a.leading_zeros();
 
-        // exponent
-        let mut e = sd - 1;
+    // exponent
+    let mut e = sd - 1;
 
-        if <$ity>::BITS < mant_dig {
-            return <$fty>::from_parts(s,
-                (e + exponent_bias) as <$fty as Float>::Int,
-                (a as <$fty as Float>::Int) << (mant_dig - e - 1))
-        }
+    if I::BITS < mant_dig {
+        return F::from_parts(s,
+            (e + exponent_bias).cast(),
+            a.cast() << (mant_dig - e - 1));
+    }
 
-        a = if sd > mant_dig {
-            /* start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
-            *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
-            *                                                12345678901234567890123456
-            *  1 = msb 1 bit
-            *  P = bit MANT_DIG-1 bits to the right of 1
-            *  Q = bit MANT_DIG bits to the right of 1
-            *  R = "or" of all bits to the right of Q
-            */
-            let mant_dig_plus_one = mant_dig + 1;
-            let mant_dig_plus_two = mant_dig + 2;
-            a = if sd == mant_dig_plus_one {
-                a << 1
-            } else if sd == mant_dig_plus_two {
-                a
-            } else {
-                (a >> (sd - mant_dig_plus_two)) as <$ity as Int>::UnsignedInt |
-                ((a & <$ity as Int>::UnsignedInt::max_value()).wrapping_shl((n + mant_dig_plus_two) - sd) != 0) as <$ity as Int>::UnsignedInt
-            };
-
-            /* finish: */
-            a |= ((a & 4) != 0) as <$ity as Int>::UnsignedInt; /* Or P into R */
-            a += 1; /* round - this step may add a significant bit */
-            a >>= 2; /* dump Q and R */
-
-            /* a is now rounded to mant_dig or mant_dig+1 bits */
-            if (a & (1 << mant_dig)) != 0 {
-                a >>= 1; e += 1;
-            }
+    a = if sd > mant_dig {
+        /* start:  0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx
+        *  finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR
+        *                                                12345678901234567890123456
+        *  1 = msb 1 bit
+        *  P = bit MANT_DIG-1 bits to the right of 1
+        *  Q = bit MANT_DIG bits to the right of 1
+        *  R = "or" of all bits to the right of Q
+        */
+        let mant_dig_plus_one = mant_dig + 1;
+        let mant_dig_plus_two = mant_dig + 2;
+        a = if sd == mant_dig_plus_one {
+            a << 1
+        } else if sd == mant_dig_plus_two {
             a
-            /* a is now rounded to mant_dig bits */
         } else {
-            a.wrapping_shl(mant_dig - sd)
-            /* a is now rounded to mant_dig bits */
+            (a >> (sd - mant_dig_plus_two)) |
+            Int::from_bool((a & I::UnsignedInt::max_value()).wrapping_shl((n + mant_dig_plus_two) - sd) != Int::ZERO)
         };
 
-        <$fty>::from_parts(s,
-            (e + exponent_bias) as <$fty as Float>::Int,
-            a as <$fty as Float>::Int)
-    })
+        /* finish: */
+        a |= Int::from_bool((a & four) != I::UnsignedInt::ZERO); /* Or P into R */
+        a += Int::ONE; /* round - this step may add a significant bit */
+        a >>= 2; /* dump Q and R */
+
+        /* a is now rounded to mant_dig or mant_dig+1 bits */
+        if (a & (I::UnsignedInt::ONE << mant_dig)) != Int::ZERO {
+            a >>= 1; e += 1;
+        }
+        a
+        /* a is now rounded to mant_dig bits */
+    } else {
+        a.wrapping_shl(mant_dig - sd)
+        /* a is now rounded to mant_dig bits */
+    };
+
+    F::from_parts(s,
+                 (e + exponent_bias).cast(),
+                 a.cast())
 }
 
 intrinsics! {
     #[arm_aeabi_alias = __aeabi_i2f]
     pub extern "C" fn __floatsisf(i: i32) -> f32 {
-        int_to_float!(i, i32, f32)
+        int_to_float(i)
     }
 
     #[arm_aeabi_alias = __aeabi_i2d]
     pub extern "C" fn __floatsidf(i: i32) -> f64 {
-        int_to_float!(i, i32, f64)
+        int_to_float(i)
     }
 
     #[use_c_shim_if(all(target_arch = "x86", not(target_env = "msvc")))]
@@ -88,28 +92,28 @@ intrinsics! {
         if cfg!(target_arch = "x86_64") {
             i as f64
         } else {
-            int_to_float!(i, i64, f64)
+            int_to_float(i)
         }
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __floattisf(i: i128) -> f32 {
-        int_to_float!(i, i128, f32)
+        int_to_float(i)
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __floattidf(i: i128) -> f64 {
-        int_to_float!(i, i128, f64)
+        int_to_float(i)
     }
 
     #[arm_aeabi_alias = __aeabi_ui2f]
     pub extern "C" fn __floatunsisf(i: u32) -> f32 {
-        int_to_float!(i, u32, f32)
+        int_to_float(i)
     }
 
     #[arm_aeabi_alias = __aeabi_ui2d]
     pub extern "C" fn __floatunsidf(i: u32) -> f64 {
-        int_to_float!(i, u32, f64)
+        int_to_float(i)
     }
 
     #[use_c_shim_if(all(not(target_env = "msvc"),
@@ -117,17 +121,17 @@ intrinsics! {
                             all(not(windows), target_arch = "x86_64"))))]
     #[arm_aeabi_alias = __aeabi_ul2d]
     pub extern "C" fn __floatundidf(i: u64) -> f64 {
-        int_to_float!(i, u64, f64)
+        int_to_float(i)
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __floatuntisf(i: u128) -> f32 {
-        int_to_float!(i, u128, f32)
+        int_to_float(i)
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __floatuntidf(i: u128) -> f64 {
-        int_to_float!(i, u128, f64)
+        int_to_float(i)
     }
 }
 
@@ -137,115 +141,116 @@ enum Sign {
     Negative
 }
 
-macro_rules! float_to_int {
-    ($f:expr, $fty:ty, $ity:ty) => ({
-        let f = $f;
-        let fixint_min = <$ity>::min_value();
-        let fixint_max = <$ity>::max_value();
-        let fixint_bits = <$ity>::BITS as usize;
-        let fixint_unsigned = fixint_min == 0;
-
-        let sign_bit = <$fty>::SIGN_MASK;
-        let significand_bits = <$fty>::SIGNIFICAND_BITS as usize;
-        let exponent_bias = <$fty>::EXPONENT_BIAS as usize;
-        //let exponent_max = <$fty>::exponent_max() as usize;
-
-        // Break a into sign, exponent, significand
-        let a_rep = <$fty>::repr(f);
-        let a_abs = a_rep & !sign_bit;
-
-        // this is used to work around -1 not being available for unsigned
-        let sign = if (a_rep & sign_bit) == 0 { Sign::Positive } else { Sign::Negative };
-        let mut exponent = (a_abs >> significand_bits) as usize;
-        let significand = (a_abs & <$fty>::SIGNIFICAND_MASK) | <$fty>::IMPLICIT_BIT;
-
-        // if < 1 or unsigned & negative
-        if  exponent < exponent_bias ||
-            fixint_unsigned && sign == Sign::Negative {
-            return 0
-        }
-        exponent -= exponent_bias;
-
-        // If the value is infinity, saturate.
-        // If the value is too large for the integer type, 0.
-        if exponent >= (if fixint_unsigned {fixint_bits} else {fixint_bits -1}) {
-            return if sign == Sign::Positive {fixint_max} else {fixint_min}
-        }
-        // If 0 <= exponent < significand_bits, right shift to get the result.
-        // Otherwise, shift left.
-        // (sign - 1) will never overflow as negative signs are already returned as 0 for unsigned
-        let r = if exponent < significand_bits {
-            (significand >> (significand_bits - exponent)) as $ity
-        } else {
-            (significand as $ity) << (exponent - significand_bits)
-        };
+fn float_to_int<F: Float, I: Int>(f: F) -> I where
+    F::Int: CastInto<u32>,
+    F::Int: CastInto<I>,
+{
+    let f = f;
+    let fixint_min = I::min_value();
+    let fixint_max = I::max_value();
+    let fixint_bits = I::BITS;
+    let fixint_unsigned = fixint_min == I::ZERO;
+
+    let sign_bit = F::SIGN_MASK;
+    let significand_bits = F::SIGNIFICAND_BITS;
+    let exponent_bias = F::EXPONENT_BIAS;
+    //let exponent_max = F::exponent_max() as usize;
+
+    // Break a into sign, exponent, significand
+    let a_rep = F::repr(f);
+    let a_abs = a_rep & !sign_bit;
+
+    // this is used to work around -1 not being available for unsigned
+    let sign = if (a_rep & sign_bit) == F::Int::ZERO { Sign::Positive } else { Sign::Negative };
+    let mut exponent: u32 = (a_abs >> significand_bits).cast();
+    let significand = (a_abs & F::SIGNIFICAND_MASK) | F::IMPLICIT_BIT;
+
+    // if < 1 or unsigned & negative
+    if exponent < exponent_bias ||
+        fixint_unsigned && sign == Sign::Negative {
+        return I::ZERO;
+    }
+    exponent -= exponent_bias;
 
-        if sign == Sign::Negative {
-            (!r).wrapping_add(1)
-        } else {
-            r
-        }
-    })
+    // If the value is infinity, saturate.
+    // If the value is too large for the integer type, 0.
+    if exponent >= (if fixint_unsigned {fixint_bits} else {fixint_bits -1}) {
+        return if sign == Sign::Positive {fixint_max} else {fixint_min}
+    }
+    // If 0 <= exponent < significand_bits, right shift to get the result.
+    // Otherwise, shift left.
+    // (sign - 1) will never overflow as negative signs are already returned as 0 for unsigned
+    let r: I = if exponent < significand_bits {
+        (significand >> (significand_bits - exponent)).cast()
+    } else {
+        (significand << (exponent - significand_bits)).cast()
+    };
+
+    if sign == Sign::Negative {
+        (!r).wrapping_add(I::ONE)
+    } else {
+        r
+    }
 }
 
 intrinsics! {
     #[arm_aeabi_alias = __aeabi_f2iz]
     pub extern "C" fn __fixsfsi(f: f32) -> i32 {
-        float_to_int!(f, f32, i32)
+        float_to_int(f)
     }
 
     #[arm_aeabi_alias = __aeabi_f2lz]
     pub extern "C" fn __fixsfdi(f: f32) -> i64 {
-        float_to_int!(f, f32, i64)
+        float_to_int(f)
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __fixsfti(f: f32) -> i128 {
-        float_to_int!(f, f32, i128)
+        float_to_int(f)
     }
 
     #[arm_aeabi_alias = __aeabi_d2iz]
     pub extern "C" fn __fixdfsi(f: f64) -> i32 {
-        float_to_int!(f, f64, i32)
+        float_to_int(f)
     }
 
     #[arm_aeabi_alias = __aeabi_d2lz]
     pub extern "C" fn __fixdfdi(f: f64) -> i64 {
-        float_to_int!(f, f64, i64)
+        float_to_int(f)
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __fixdfti(f: f64) -> i128 {
-        float_to_int!(f, f64, i128)
+        float_to_int(f)
     }
 
     #[arm_aeabi_alias = __aeabi_f2uiz]
     pub extern "C" fn __fixunssfsi(f: f32) -> u32 {
-        float_to_int!(f, f32, u32)
+        float_to_int(f)
     }
 
     #[arm_aeabi_alias = __aeabi_f2ulz]
     pub extern "C" fn __fixunssfdi(f: f32) -> u64 {
-        float_to_int!(f, f32, u64)
+        float_to_int(f)
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __fixunssfti(f: f32) -> u128 {
-        float_to_int!(f, f32, u128)
+        float_to_int(f)
     }
 
     #[arm_aeabi_alias = __aeabi_d2uiz]
     pub extern "C" fn __fixunsdfsi(f: f64) -> u32 {
-        float_to_int!(f, f64, u32)
+        float_to_int(f)
     }
 
     #[arm_aeabi_alias = __aeabi_d2ulz]
     pub extern "C" fn __fixunsdfdi(f: f64) -> u64 {
-        float_to_int!(f, f64, u64)
+        float_to_int(f)
     }
 
     #[unadjusted_on_win64]
     pub extern "C" fn __fixunsdfti(f: f64) -> u128 {
-        float_to_int!(f, f64, u128)
+        float_to_int(f)
     }
 }

+ 18 - 1
src/float/mod.rs

@@ -1,4 +1,5 @@
 use core::mem;
+use core::ops;
 
 use super::int::Int;
 
@@ -8,10 +9,23 @@ pub mod pow;
 pub mod sub;
 
 /// Trait for some basic operations on floats
-pub trait Float: Sized + Copy {
+pub trait Float:
+    Copy +
+    PartialEq +
+    PartialOrd +
+    ops::AddAssign +
+    ops::MulAssign +
+    ops::Add<Output = Self> +
+    ops::Sub<Output = Self> +
+    ops::Div<Output = Self> +
+    ops::Rem<Output = Self> +
+{
     /// A uint of the same with as the float
     type Int: Int;
 
+    const ZERO: Self;
+    const ONE: Self;
+
     /// The bitwidth of the float type
     const BITS: u32;
 
@@ -64,6 +78,9 @@ macro_rules! float_impl {
     ($ty:ident, $ity:ident, $bits:expr, $significand_bits:expr) => {
         impl Float for $ty {
             type Int = $ity;
+            const ZERO: Self = 0.0;
+            const ONE: Self = 1.0;
+
             const BITS: u32 = $bits;
             const SIGNIFICAND_BITS: u32 = $significand_bits;
 

+ 13 - 9
src/float/pow.rs

@@ -1,11 +1,12 @@
 use int::Int;
+use float::Float;
 
-/// Returns `a` raised to the power `b`
-macro_rules! pow {
-    ($a: expr, $b: expr) => ({
-        let (mut a, mut b) = ($a, $b);
+trait Pow: Float {
+    /// Returns `a` raised to the power `b`
+    fn pow(self, mut b: i32) -> Self {
+        let mut a = self;
         let recip = b < 0;
-        let mut r = 1.0;
+        let mut r = Self::ONE;
         loop {
             if (b & 1) != 0 {
                 r *= a;
@@ -18,19 +19,22 @@ macro_rules! pow {
         }
 
         if recip {
-            1.0 / r
+            Self::ONE / r
         } else {
             r
         }
-    })
+    }
 }
 
+impl Pow for f32 {}
+impl Pow for f64 {}
+
 intrinsics! {
     pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 {
-        pow!(a, b)
+        a.pow(b)
     }
 
     pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 {
-        pow!(a, b)
+        a.pow(b)
     }
 }

+ 68 - 49
src/int/mod.rs

@@ -23,6 +23,10 @@ pub trait Int:
     PartialEq +
     PartialOrd +
     ops::AddAssign +
+    ops::BitAndAssign +
+    ops::BitOrAssign +
+    ops::ShlAssign<i32> +
+    ops::ShrAssign<u32> +
     ops::Add<Output = Self> +
     ops::Sub<Output = Self> +
     ops::Div<Output = Self> +
@@ -31,7 +35,6 @@ pub trait Int:
     ops::BitOr<Output = Self> +
     ops::BitXor<Output = Self> +
     ops::BitAnd<Output = Self> +
-    ops::BitAndAssign +
     ops::Not<Output = Self> +
 {
     /// Type with the same width but other signedness
@@ -60,14 +63,18 @@ pub trait Int:
     fn unsigned(self) -> Self::UnsignedInt;
     fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
 
+    fn from_bool(b: bool) -> Self;
+
     // copied from primitive integers, but put in a trait
     fn max_value() -> Self;
     fn min_value() -> Self;
     fn wrapping_add(self, other: Self) -> Self;
     fn wrapping_mul(self, other: Self) -> Self;
     fn wrapping_sub(self, other: Self) -> Self;
+    fn wrapping_shl(self, other: u32) -> Self;
     fn aborting_div(self, other: Self) -> Self;
     fn aborting_rem(self, other: Self) -> Self;
+    fn leading_zeros(self) -> u32;
 }
 
 fn unwrap<T>(t: Option<T>) -> T {
@@ -77,27 +84,15 @@ fn unwrap<T>(t: Option<T>) -> T {
     }
 }
 
-macro_rules! int_impl {
-    ($ity:ty, $uty:ty, $bits:expr) => {
-        impl Int for $uty {
-            type OtherSign = $ity;
-            type UnsignedInt = $uty;
-
+macro_rules! int_impl_common {
+    ($ty:ty, $bits:expr) => {
             const BITS: u32 = $bits;
 
             const ZERO: Self = 0;
             const ONE: Self = 1;
 
-            fn extract_sign(self) -> (bool, $uty) {
-                (false, self)
-            }
-
-            fn unsigned(self) -> $uty {
-                self
-            }
-
-            fn from_unsigned(me: $uty) -> Self {
-                me
+            fn from_bool(b: bool) -> Self {
+                b as $ty
             }
 
             fn max_value() -> Self {
@@ -120,6 +115,10 @@ macro_rules! int_impl {
                 <Self>::wrapping_sub(self, other)
             }
 
+            fn wrapping_shl(self, other: u32) -> Self {
+                <Self>::wrapping_shl(self, other)
+            }
+
             fn aborting_div(self, other: Self) -> Self {
                 unwrap(<Self>::checked_div(self, other))
             }
@@ -127,17 +126,38 @@ macro_rules! int_impl {
             fn aborting_rem(self, other: Self) -> Self {
                 unwrap(<Self>::checked_rem(self, other))
             }
+
+            fn leading_zeros(self) -> u32 {
+                <Self>::leading_zeros(self)
+            }
+    }
+}
+
+macro_rules! int_impl {
+    ($ity:ty, $uty:ty, $bits:expr) => {
+        impl Int for $uty {
+            type OtherSign = $ity;
+            type UnsignedInt = $uty;
+
+            fn extract_sign(self) -> (bool, $uty) {
+                (false, self)
+            }
+
+            fn unsigned(self) -> $uty {
+                self
+            }
+
+            fn from_unsigned(me: $uty) -> Self {
+                me
+            }
+
+            int_impl_common!($uty, $bits);
         }
 
         impl Int for $ity {
             type OtherSign = $uty;
             type UnsignedInt = $uty;
 
-            const BITS: u32 = $bits;
-
-            const ZERO: Self = 0;
-            const ONE: Self = 1;
-
             fn extract_sign(self) -> (bool, $uty) {
                 if self < 0 {
                     (true, (!(self as $uty)).wrapping_add(1))
@@ -154,33 +174,7 @@ macro_rules! int_impl {
                 me as $ity
             }
 
-            fn max_value() -> Self {
-                <Self>::max_value()
-            }
-
-            fn min_value() -> Self {
-                <Self>::min_value()
-            }
-
-            fn wrapping_add(self, other: Self) -> Self {
-                <Self>::wrapping_add(self, other)
-            }
-
-            fn wrapping_mul(self, other: Self) -> Self {
-                <Self>::wrapping_mul(self, other)
-            }
-
-            fn wrapping_sub(self, other: Self) -> Self {
-                <Self>::wrapping_sub(self, other)
-            }
-
-            fn aborting_div(self, other: Self) -> Self {
-                unwrap(<Self>::checked_div(self, other))
-            }
-
-            fn aborting_rem(self, other: Self) -> Self {
-                unwrap(<Self>::checked_rem(self, other))
-            }
+            int_impl_common!($ity, $bits);
         }
     }
 }
@@ -230,3 +224,28 @@ large_int!(u64, u32, u32, 32);
 large_int!(i64, u32, i32, 32);
 large_int!(u128, u64, u64, 64);
 large_int!(i128, u64, i64, 64);
+
+/// Trait to express (possibly lossy) casting of integers
+pub trait CastInto<T: Copy>: Copy {
+    fn cast(self) -> T;
+}
+
+macro_rules! cast_into {
+    ($ty:ty) => {
+        cast_into!($ty; usize, isize, u32, i32, u64, i64, u128, i128);
+    };
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
+cast_into!(u32);
+cast_into!(i32);
+cast_into!(u64);
+cast_into!(i64);
+cast_into!(u128);
+cast_into!(i128);