|
@@ -116,6 +116,62 @@ pub mod big_digit {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Generic functions for add/subtract/multiply with carry/borrow:
|
|
|
+ */
|
|
|
+
|
|
|
+// Add with carry:
|
|
|
+#[inline]
|
|
|
+fn adc(a: BigDigit, b: BigDigit, carry: &mut BigDigit) -> BigDigit {
|
|
|
+ let (hi, lo) = big_digit::from_doublebigdigit(
|
|
|
+ (a as DoubleBigDigit) +
|
|
|
+ (b as DoubleBigDigit) +
|
|
|
+ (*carry as DoubleBigDigit));
|
|
|
+
|
|
|
+ *carry = hi;
|
|
|
+ lo
|
|
|
+}
|
|
|
+
|
|
|
+// Subtract with borrow:
|
|
|
+#[inline]
|
|
|
+fn sbb(a: BigDigit, b: BigDigit, borrow: &mut BigDigit) -> BigDigit {
|
|
|
+ let (hi, lo) = big_digit::from_doublebigdigit(
|
|
|
+ big_digit::BASE
|
|
|
+ + (a as DoubleBigDigit)
|
|
|
+ - (b as DoubleBigDigit)
|
|
|
+ - (*borrow as DoubleBigDigit));
|
|
|
+ /*
|
|
|
+ hi * (base) + lo == 1*(base) + ai - bi - borrow
|
|
|
+ => ai - bi - borrow < 0 <=> hi == 0
|
|
|
+ */
|
|
|
+ *borrow = if hi == 0 { 1 } else { 0 };
|
|
|
+ lo
|
|
|
+}
|
|
|
+
|
|
|
+#[inline]
|
|
|
+fn mac_with_carry(a: BigDigit, b: BigDigit, c: BigDigit, carry: &mut BigDigit) -> BigDigit {
|
|
|
+ let (hi, lo) = big_digit::from_doublebigdigit(
|
|
|
+ (a as DoubleBigDigit) +
|
|
|
+ (b as DoubleBigDigit) * (c as DoubleBigDigit) +
|
|
|
+ (*carry as DoubleBigDigit));
|
|
|
+ *carry = hi;
|
|
|
+ lo
|
|
|
+}
|
|
|
+
|
|
|
+/// Divide a two digit numerator by a one digit divisor, returns quotient and remainder:
|
|
|
+///
|
|
|
+/// Note: the caller must ensure that both the quotient and remainder will fit into a single digit.
|
|
|
+/// This is _not_ true for an arbitrary numerator/denominator.
|
|
|
+///
|
|
|
+/// (This function also matches what the x86 divide instruction does).
|
|
|
+#[inline]
|
|
|
+fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
|
|
|
+ debug_assert!(hi < divisor);
|
|
|
+
|
|
|
+ let lhs = big_digit::to_doublebigdigit(hi, lo);
|
|
|
+ let rhs = divisor as DoubleBigDigit;
|
|
|
+ ((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
|
|
|
+}
|
|
|
/// A big unsigned integer type.
|
|
|
///
|
|
|
/// A `BigUint`-typed value `BigUint { data: vec!(a, b, c) }` represents a number
|
|
@@ -140,18 +196,25 @@ impl PartialOrd for BigUint {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+fn cmp_slice(a: &[BigDigit], b: &[BigDigit]) -> Ordering {
|
|
|
+ debug_assert!(a.last() != Some(&0));
|
|
|
+ debug_assert!(b.last() != Some(&0));
|
|
|
+
|
|
|
+ let (a_len, b_len) = (a.len(), b.len());
|
|
|
+ if a_len < b_len { return Less; }
|
|
|
+ if a_len > b_len { return Greater; }
|
|
|
+
|
|
|
+ for (&ai, &bi) in a.iter().rev().zip(b.iter().rev()) {
|
|
|
+ if ai < bi { return Less; }
|
|
|
+ if ai > bi { return Greater; }
|
|
|
+ }
|
|
|
+ return Equal;
|
|
|
+}
|
|
|
+
|
|
|
impl Ord for BigUint {
|
|
|
#[inline]
|
|
|
fn cmp(&self, other: &BigUint) -> Ordering {
|
|
|
- let (s_len, o_len) = (self.data.len(), other.data.len());
|
|
|
- if s_len < o_len { return Less; }
|
|
|
- if s_len > o_len { return Greater; }
|
|
|
-
|
|
|
- for (&self_i, &other_i) in self.data.iter().rev().zip(other.data.iter().rev()) {
|
|
|
- if self_i < other_i { return Less; }
|
|
|
- if self_i > other_i { return Greater; }
|
|
|
- }
|
|
|
- return Equal;
|
|
|
+ cmp_slice(&self.data[..], &other.data[..])
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -467,146 +530,323 @@ impl Unsigned for BigUint {}
|
|
|
|
|
|
forward_all_binop_to_val_ref_commutative!(impl Add for BigUint, add);
|
|
|
|
|
|
-impl<'a> Add<&'a BigUint> for BigUint {
|
|
|
- type Output = BigUint;
|
|
|
+// Only for the Add impl:
|
|
|
+#[must_use]
|
|
|
+#[inline]
|
|
|
+fn __add2(a: &mut [BigDigit], b: &[BigDigit]) -> BigDigit {
|
|
|
+ let mut b_iter = b.iter();
|
|
|
+ let mut carry = 0;
|
|
|
+
|
|
|
+ for ai in a.iter_mut() {
|
|
|
+ if let Some(bi) = b_iter.next() {
|
|
|
+ *ai = adc(*ai, *bi, &mut carry);
|
|
|
+ } else if carry != 0 {
|
|
|
+ *ai = adc(*ai, 0, &mut carry);
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ debug_assert!(b_iter.next() == None);
|
|
|
+ carry
|
|
|
+}
|
|
|
+
|
|
|
+/// /Two argument addition of raw slices:
|
|
|
+/// a += b
|
|
|
+///
|
|
|
+/// The caller _must_ ensure that a is big enough to store the result - typically this means
|
|
|
+/// resizing a to max(a.len(), b.len()) + 1, to fit a possible carry.
|
|
|
+fn add2(a: &mut [BigDigit], b: &[BigDigit]) {
|
|
|
+ let carry = __add2(a, b);
|
|
|
|
|
|
- fn add(self, other: &BigUint) -> BigUint {
|
|
|
- let mut sum = self.data;
|
|
|
+ debug_assert!(carry == 0);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * We'd really prefer to avoid using add2/sub2 directly as much as possible - since they make the
|
|
|
+ * caller entirely responsible for ensuring a's vector is big enough, and that the result is
|
|
|
+ * normalized, they're rather error prone and verbose:
|
|
|
+ *
|
|
|
+ * We could implement the Add and Sub traits for BigUint + BigDigit slices, like below - this works
|
|
|
+ * great, except that then it becomes the module's public interface, which we probably don't want:
|
|
|
+ *
|
|
|
+ * I'm keeping the code commented out, because I think this is worth revisiting:
|
|
|
+
|
|
|
+impl<'a> Add<&'a [BigDigit]> for BigUint {
|
|
|
+ type Output = BigUint;
|
|
|
|
|
|
- if other.data.len() > sum.len() {
|
|
|
- let additional = other.data.len() - sum.len();
|
|
|
- sum.reserve(additional);
|
|
|
- sum.extend(repeat(ZERO_BIG_DIGIT).take(additional));
|
|
|
+ fn add(mut self, other: &[BigDigit]) -> BigUint {
|
|
|
+ if self.data.len() < other.len() {
|
|
|
+ let extra = other.len() - self.data.len();
|
|
|
+ self.data.extend(repeat(0).take(extra));
|
|
|
}
|
|
|
- let other_iter = other.data.iter().cloned().chain(repeat(ZERO_BIG_DIGIT));
|
|
|
|
|
|
- let mut carry = 0;
|
|
|
- for (a, b) in sum.iter_mut().zip(other_iter) {
|
|
|
- let d = (*a as DoubleBigDigit)
|
|
|
- + (b as DoubleBigDigit)
|
|
|
- + (carry as DoubleBigDigit);
|
|
|
- let (hi, lo) = big_digit::from_doublebigdigit(d);
|
|
|
- carry = hi;
|
|
|
- *a = lo;
|
|
|
+ let carry = __add2(&mut self.data[..], other);
|
|
|
+ if carry != 0 {
|
|
|
+ self.data.push(carry);
|
|
|
}
|
|
|
|
|
|
- if carry != 0 { sum.push(carry); }
|
|
|
- BigUint::new(sum)
|
|
|
+ self
|
|
|
}
|
|
|
}
|
|
|
+ */
|
|
|
|
|
|
-forward_all_binop_to_val_ref!(impl Sub for BigUint, sub);
|
|
|
-
|
|
|
-impl<'a> Sub<&'a BigUint> for BigUint {
|
|
|
+impl<'a> Add<&'a BigUint> for BigUint {
|
|
|
type Output = BigUint;
|
|
|
|
|
|
- fn sub(self, other: &BigUint) -> BigUint {
|
|
|
- let mut diff = self.data;
|
|
|
- let other = &other.data;
|
|
|
- assert!(diff.len() >= other.len(), "arithmetic operation overflowed");
|
|
|
-
|
|
|
- let mut borrow: DoubleBigDigit = 0;
|
|
|
- for (a, &b) in diff.iter_mut().zip(other.iter()) {
|
|
|
- let d = big_digit::BASE - borrow
|
|
|
- + (*a as DoubleBigDigit)
|
|
|
- - (b as DoubleBigDigit);
|
|
|
- let (hi, lo) = big_digit::from_doublebigdigit(d);
|
|
|
- /*
|
|
|
- hi * (base) + lo == 1*(base) + ai - bi - borrow
|
|
|
- => ai - bi - borrow < 0 <=> hi == 0
|
|
|
- */
|
|
|
- borrow = if hi == 0 { 1 } else { 0 };
|
|
|
- *a = lo;
|
|
|
+ fn add(mut self, other: &BigUint) -> BigUint {
|
|
|
+ if self.data.len() < other.data.len() {
|
|
|
+ let extra = other.data.len() - self.data.len();
|
|
|
+ self.data.extend(repeat(0).take(extra));
|
|
|
}
|
|
|
|
|
|
- for a in &mut diff[other.len()..] {
|
|
|
- if borrow == 0 { break }
|
|
|
- let d = big_digit::BASE - borrow
|
|
|
- + (*a as DoubleBigDigit);
|
|
|
- let (hi, lo) = big_digit::from_doublebigdigit(d);
|
|
|
- borrow = if hi == 0 { 1 } else { 0 };
|
|
|
- *a = lo;
|
|
|
+ let carry = __add2(&mut self.data[..], &other.data[..]);
|
|
|
+ if carry != 0 {
|
|
|
+ self.data.push(carry);
|
|
|
}
|
|
|
|
|
|
- assert!(borrow == 0, "arithmetic operation overflowed");
|
|
|
- BigUint::new(diff)
|
|
|
+ self
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+forward_all_binop_to_val_ref!(impl Sub for BigUint, sub);
|
|
|
|
|
|
-forward_all_binop_to_val_ref_commutative!(impl Mul for BigUint, mul);
|
|
|
+fn sub2(a: &mut [BigDigit], b: &[BigDigit]) {
|
|
|
+ let mut b_iter = b.iter();
|
|
|
+ let mut borrow = 0;
|
|
|
|
|
|
-impl<'a> Mul<&'a BigUint> for BigUint {
|
|
|
+ for ai in a.iter_mut() {
|
|
|
+ if let Some(bi) = b_iter.next() {
|
|
|
+ *ai = sbb(*ai, *bi, &mut borrow);
|
|
|
+ } else if borrow != 0 {
|
|
|
+ *ai = sbb(*ai, 0, &mut borrow);
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* note: we're _required_ to fail on underflow */
|
|
|
+ assert!(borrow == 0 && b_iter.all(|x| *x == 0),
|
|
|
+ "Cannot subtract b from a because b is larger than a.");
|
|
|
+}
|
|
|
+
|
|
|
+impl<'a> Sub<&'a BigUint> for BigUint {
|
|
|
type Output = BigUint;
|
|
|
|
|
|
- fn mul(self, other: &BigUint) -> BigUint {
|
|
|
- if self.is_zero() || other.is_zero() { return Zero::zero(); }
|
|
|
-
|
|
|
- let (s_len, o_len) = (self.data.len(), other.data.len());
|
|
|
- if s_len == 1 { return mul_digit(other.clone(), self.data[0]); }
|
|
|
- if o_len == 1 { return mul_digit(self, other.data[0]); }
|
|
|
-
|
|
|
- // Using Karatsuba multiplication
|
|
|
- // (a1 * base + a0) * (b1 * base + b0)
|
|
|
- // = a1*b1 * base^2 +
|
|
|
- // (a1*b1 + a0*b0 - (a1-b0)*(b1-a0)) * base +
|
|
|
- // a0*b0
|
|
|
- let half_len = cmp::max(s_len, o_len) / 2;
|
|
|
- let (s_hi, s_lo) = cut_at(self, half_len);
|
|
|
- let (o_hi, o_lo) = cut_at(other.clone(), half_len);
|
|
|
-
|
|
|
- let ll = &s_lo * &o_lo;
|
|
|
- let hh = &s_hi * &o_hi;
|
|
|
- let mm = {
|
|
|
- let (s1, n1) = sub_sign(s_hi, s_lo);
|
|
|
- let (s2, n2) = sub_sign(o_hi, o_lo);
|
|
|
- match (s1, s2) {
|
|
|
- (Equal, _) | (_, Equal) => &hh + &ll,
|
|
|
- (Less, Greater) | (Greater, Less) => &hh + &ll + (n1 * n2),
|
|
|
- (Less, Less) | (Greater, Greater) => &hh + &ll - (n1 * n2)
|
|
|
- }
|
|
|
- };
|
|
|
+ fn sub(mut self, other: &BigUint) -> BigUint {
|
|
|
+ sub2(&mut self.data[..], &other.data[..]);
|
|
|
+ self.normalize()
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
- return ll + mm.shl_unit(half_len) + hh.shl_unit(half_len * 2);
|
|
|
+fn sub_sign(a: &[BigDigit], b: &[BigDigit]) -> BigInt {
|
|
|
+ // Normalize:
|
|
|
+ let a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
|
|
|
+ let b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)];
|
|
|
|
|
|
+ match cmp_slice(a, b) {
|
|
|
+ Greater => {
|
|
|
+ let mut ret = BigUint::from_slice(a);
|
|
|
+ sub2(&mut ret.data[..], b);
|
|
|
+ BigInt::from_biguint(Plus, ret.normalize())
|
|
|
+ },
|
|
|
+ Less => {
|
|
|
+ let mut ret = BigUint::from_slice(b);
|
|
|
+ sub2(&mut ret.data[..], a);
|
|
|
+ BigInt::from_biguint(Minus, ret.normalize())
|
|
|
+ },
|
|
|
+ _ => Zero::zero(),
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
- fn mul_digit(a: BigUint, n: BigDigit) -> BigUint {
|
|
|
- if n == 0 { return Zero::zero(); }
|
|
|
- if n == 1 { return a; }
|
|
|
+forward_all_binop_to_ref_ref!(impl Mul for BigUint, mul);
|
|
|
|
|
|
- let mut carry = 0;
|
|
|
- let mut prod = a.data;
|
|
|
- for a in &mut prod {
|
|
|
- let d = (*a as DoubleBigDigit)
|
|
|
- * (n as DoubleBigDigit)
|
|
|
- + (carry as DoubleBigDigit);
|
|
|
- let (hi, lo) = big_digit::from_doublebigdigit(d);
|
|
|
- carry = hi;
|
|
|
- *a = lo;
|
|
|
- }
|
|
|
- if carry != 0 { prod.push(carry); }
|
|
|
- BigUint::new(prod)
|
|
|
- }
|
|
|
+/// Three argument multiply accumulate:
|
|
|
+/// acc += b * c
|
|
|
+fn mac_digit(acc: &mut [BigDigit], b: &[BigDigit], c: BigDigit) {
|
|
|
+ if c == 0 { return; }
|
|
|
+
|
|
|
+ let mut b_iter = b.iter();
|
|
|
+ let mut carry = 0;
|
|
|
|
|
|
- #[inline]
|
|
|
- fn cut_at(mut a: BigUint, n: usize) -> (BigUint, BigUint) {
|
|
|
- let mid = cmp::min(a.data.len(), n);
|
|
|
- let hi = BigUint::from_slice(&a.data[mid ..]);
|
|
|
- a.data.truncate(mid);
|
|
|
- (hi, BigUint::new(a.data))
|
|
|
+ for ai in acc.iter_mut() {
|
|
|
+ if let Some(bi) = b_iter.next() {
|
|
|
+ *ai = mac_with_carry(*ai, *bi, c, &mut carry);
|
|
|
+ } else if carry != 0 {
|
|
|
+ *ai = mac_with_carry(*ai, 0, c, &mut carry);
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- #[inline]
|
|
|
- fn sub_sign(a: BigUint, b: BigUint) -> (Ordering, BigUint) {
|
|
|
- match a.cmp(&b) {
|
|
|
- Less => (Less, b - a),
|
|
|
- Greater => (Greater, a - b),
|
|
|
- _ => (Equal, Zero::zero())
|
|
|
- }
|
|
|
+ assert!(carry == 0);
|
|
|
+}
|
|
|
+
|
|
|
+/// Three argument multiply accumulate:
|
|
|
+/// acc += b * c
|
|
|
+fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) {
|
|
|
+ let (x, y) = if b.len() < c.len() { (b, c) } else { (c, b) };
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Karatsuba multiplication is slower than long multiplication for small x and y:
|
|
|
+ */
|
|
|
+ if x.len() <= 4 {
|
|
|
+ for (i, xi) in x.iter().enumerate() {
|
|
|
+ mac_digit(&mut acc[i..], y, *xi);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Karatsuba multiplication:
|
|
|
+ *
|
|
|
+ * The idea is that we break x and y up into two smaller numbers that each have about half
|
|
|
+ * as many digits, like so (note that multiplying by b is just a shift):
|
|
|
+ *
|
|
|
+ * x = x0 + x1 * b
|
|
|
+ * y = y0 + y1 * b
|
|
|
+ *
|
|
|
+ * With some algebra, we can compute x * y with three smaller products, where the inputs to
|
|
|
+ * each of the smaller products have only about half as many digits as x and y:
|
|
|
+ *
|
|
|
+ * x * y = (x0 + x1 * b) * (y0 + y1 * b)
|
|
|
+ *
|
|
|
+ * x * y = x0 * y0
|
|
|
+ * + x0 * y1 * b
|
|
|
+ * + x1 * y0 * b
|
|
|
+ * + x1 * y1 * b^2
|
|
|
+ *
|
|
|
+ * Let p0 = x0 * y0 and p2 = x1 * y1:
|
|
|
+ *
|
|
|
+ * x * y = p0
|
|
|
+ * + (x0 * y1 + x1 * p0) * b
|
|
|
+ * + p2 * b^2
|
|
|
+ *
|
|
|
+ * The real trick is that middle term:
|
|
|
+ *
|
|
|
+ * x0 * y1 + x1 * y0
|
|
|
+ *
|
|
|
+ * = x0 * y1 + x1 * y0 - p0 + p0 - p2 + p2
|
|
|
+ *
|
|
|
+ * = x0 * y1 + x1 * y0 - x0 * y0 - x1 * y1 + p0 + p2
|
|
|
+ *
|
|
|
+ * Now we complete the square:
|
|
|
+ *
|
|
|
+ * = -(x0 * y0 - x0 * y1 - x1 * y0 + x1 * y1) + p0 + p2
|
|
|
+ *
|
|
|
+ * = -((x1 - x0) * (y1 - y0)) + p0 + p2
|
|
|
+ *
|
|
|
+ * Let p1 = (x1 - x0) * (y1 - y0), and substitute back into our original formula:
|
|
|
+ *
|
|
|
+ * x * y = p0
|
|
|
+ * + (p0 + p2 - p1) * b
|
|
|
+ * + p2 * b^2
|
|
|
+ *
|
|
|
+ * Where the three intermediate products are:
|
|
|
+ *
|
|
|
+ * p0 = x0 * y0
|
|
|
+ * p1 = (x1 - x0) * (y1 - y0)
|
|
|
+ * p2 = x1 * y1
|
|
|
+ *
|
|
|
+ * In doing the computation, we take great care to avoid unnecessary temporary variables
|
|
|
+ * (since creating a BigUint requires a heap allocation): thus, we rearrange the formula a
|
|
|
+ * bit so we can use the same temporary variable for all the intermediate products:
|
|
|
+ *
|
|
|
+ * x * y = p2 * b^2 + p2 * b
|
|
|
+ * + p0 * b + p0
|
|
|
+ * - p1 * b
|
|
|
+ *
|
|
|
+ * The other trick we use is instead of doing explicit shifts, we slice acc at the
|
|
|
+ * appropriate offset when doing the add.
|
|
|
+ */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * When x is smaller than y, it's significantly faster to pick b such that x is split in
|
|
|
+ * half, not y:
|
|
|
+ */
|
|
|
+ let b = x.len() / 2;
|
|
|
+ let (x0, x1) = x.split_at(b);
|
|
|
+ let (y0, y1) = y.split_at(b);
|
|
|
+
|
|
|
+ /* We reuse the same BigUint for all the intermediate multiplies: */
|
|
|
+
|
|
|
+ let len = y.len() + 1;
|
|
|
+ let mut p: BigUint = BigUint { data: Vec::with_capacity(len) };
|
|
|
+ p.data.extend(repeat(0).take(len));
|
|
|
+
|
|
|
+ // p2 = x1 * y1
|
|
|
+ mac3(&mut p.data[..], x1, y1);
|
|
|
+
|
|
|
+ // Not required, but the adds go faster if we drop any unneeded 0s from the end:
|
|
|
+ p = p.normalize();
|
|
|
+
|
|
|
+ add2(&mut acc[b..], &p.data[..]);
|
|
|
+ add2(&mut acc[b * 2..], &p.data[..]);
|
|
|
+
|
|
|
+ // Zero out p before the next multiply:
|
|
|
+ p.data.truncate(0);
|
|
|
+ p.data.extend(repeat(0).take(len));
|
|
|
+
|
|
|
+ // p0 = x0 * y0
|
|
|
+ mac3(&mut p.data[..], x0, y0);
|
|
|
+ p = p.normalize();
|
|
|
+
|
|
|
+ add2(&mut acc[..], &p.data[..]);
|
|
|
+ add2(&mut acc[b..], &p.data[..]);
|
|
|
+
|
|
|
+ // p1 = (x1 - x0) * (y1 - y0)
|
|
|
+ // We do this one last, since it may be negative and acc can't ever be negative:
|
|
|
+ let j0 = sub_sign(x1, x0);
|
|
|
+ let j1 = sub_sign(y1, y0);
|
|
|
+
|
|
|
+ match j0.sign * j1.sign {
|
|
|
+ Plus => {
|
|
|
+ p.data.truncate(0);
|
|
|
+ p.data.extend(repeat(0).take(len));
|
|
|
+
|
|
|
+ mac3(&mut p.data[..], &j0.data.data[..], &j1.data.data[..]);
|
|
|
+ p = p.normalize();
|
|
|
+
|
|
|
+ sub2(&mut acc[b..], &p.data[..]);
|
|
|
+ },
|
|
|
+ Minus => {
|
|
|
+ mac3(&mut acc[b..], &j0.data.data[..], &j1.data.data[..]);
|
|
|
+ },
|
|
|
+ NoSign => (),
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+fn mul3(x: &[BigDigit], y: &[BigDigit]) -> BigUint {
|
|
|
+ let len = x.len() + y.len() + 1;
|
|
|
+ let mut prod: BigUint = BigUint { data: Vec::with_capacity(len) };
|
|
|
+
|
|
|
+ // resize isn't stable yet:
|
|
|
+ //prod.data.resize(len, 0);
|
|
|
+ prod.data.extend(repeat(0).take(len));
|
|
|
+
|
|
|
+ mac3(&mut prod.data[..], x, y);
|
|
|
+ prod.normalize()
|
|
|
+}
|
|
|
+
|
|
|
+impl<'a, 'b> Mul<&'b BigUint> for &'a BigUint {
|
|
|
+ type Output = BigUint;
|
|
|
+
|
|
|
+ #[inline]
|
|
|
+ fn mul(self, other: &BigUint) -> BigUint {
|
|
|
+ mul3(&self.data[..], &other.data[..])
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit) {
|
|
|
+ let mut rem = 0;
|
|
|
+
|
|
|
+ for d in a.data.iter_mut().rev() {
|
|
|
+ let (q, r) = div_wide(rem, *d, b);
|
|
|
+ *d = q;
|
|
|
+ rem = r;
|
|
|
+ }
|
|
|
+
|
|
|
+ (a.normalize(), rem)
|
|
|
+}
|
|
|
|
|
|
forward_all_binop_to_ref_ref!(impl Div for BigUint, div);
|
|
|
|
|
@@ -703,83 +943,94 @@ impl Integer for BigUint {
|
|
|
if self.is_zero() { return (Zero::zero(), Zero::zero()); }
|
|
|
if *other == One::one() { return (self.clone(), Zero::zero()); }
|
|
|
|
|
|
+ /* Required or the q_len calculation below can underflow: */
|
|
|
match self.cmp(other) {
|
|
|
Less => return (Zero::zero(), self.clone()),
|
|
|
Equal => return (One::one(), Zero::zero()),
|
|
|
Greater => {} // Do nothing
|
|
|
}
|
|
|
|
|
|
- let mut shift = 0;
|
|
|
- let mut n = *other.data.last().unwrap();
|
|
|
- while n < (1 << big_digit::BITS - 2) {
|
|
|
- n <<= 1;
|
|
|
- shift += 1;
|
|
|
- }
|
|
|
- assert!(shift < big_digit::BITS);
|
|
|
- let (d, m) = div_mod_floor_inner(self << shift, other << shift);
|
|
|
- return (d, m >> shift);
|
|
|
+ /*
|
|
|
+ * This algorithm is from Knuth, TAOCP vol 2 section 4.3, algorithm D:
|
|
|
+ *
|
|
|
+ * First, normalize the arguments so the highest bit in the highest digit of the divisor is
|
|
|
+ * set: the main loop uses the highest digit of the divisor for generating guesses, so we
|
|
|
+ * want it to be the largest number we can efficiently divide by.
|
|
|
+ */
|
|
|
+ let shift = other.data.last().unwrap().leading_zeros() as usize;
|
|
|
+ let mut a = self << shift;
|
|
|
+ let b = other << shift;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The algorithm works by incrementally calculating "guesses", q0, for part of the
|
|
|
+ * remainder. Once we have any number q0 such that q0 * b <= a, we can set
|
|
|
+ *
|
|
|
+ * q += q0
|
|
|
+ * a -= q0 * b
|
|
|
+ *
|
|
|
+ * and then iterate until a < b. Then, (q, a) will be our desired quotient and remainder.
|
|
|
+ *
|
|
|
+ * q0, our guess, is calculated by dividing the last few digits of a by the last digit of b
|
|
|
+ * - this should give us a guess that is "close" to the actual quotient, but is possibly
|
|
|
+ * greater than the actual quotient. If q0 * b > a, we simply use iterated subtraction
|
|
|
+ * until we have a guess such that q0 & b <= a.
|
|
|
+ */
|
|
|
+
|
|
|
+ let bn = *b.data.last().unwrap();
|
|
|
+ let q_len = a.data.len() - b.data.len() + 1;
|
|
|
+ let mut q: BigUint = BigUint { data: Vec::with_capacity(q_len) };
|
|
|
+
|
|
|
+ q.data.extend(repeat(0).take(q_len));
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We reuse the same temporary to avoid hitting the allocator in our inner loop - this is
|
|
|
+ * sized to hold a0 (in the common case; if a particular digit of the quotient is zero a0
|
|
|
+ * can be bigger).
|
|
|
+ */
|
|
|
+ let mut tmp: BigUint = BigUint { data: Vec::with_capacity(2) };
|
|
|
+
|
|
|
+ for j in (0..q_len).rev() {
|
|
|
+ /*
|
|
|
+ * When calculating our next guess q0, we don't need to consider the digits below j
|
|
|
+ * + b.data.len() - 1: we're guessing digit j of the quotient (i.e. q0 << j) from
|
|
|
+ * digit bn of the divisor (i.e. bn << (b.data.len() - 1) - so the product of those
|
|
|
+ * two numbers will be zero in all digits up to (j + b.data.len() - 1).
|
|
|
+ */
|
|
|
+ let offset = j + b.data.len() - 1;
|
|
|
+ if offset >= a.data.len() {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
+ /* just avoiding a heap allocation: */
|
|
|
+ let mut a0 = tmp;
|
|
|
+ a0.data.truncate(0);
|
|
|
+ a0.data.extend(a.data[offset..].iter().cloned());
|
|
|
|
|
|
- fn div_mod_floor_inner(a: BigUint, b: BigUint) -> (BigUint, BigUint) {
|
|
|
- let mut m = a;
|
|
|
- let mut d: BigUint = Zero::zero();
|
|
|
- let mut n = 1;
|
|
|
- while m >= b {
|
|
|
- let (d0, d_unit, b_unit) = div_estimate(&m, &b, n);
|
|
|
- let mut d0 = d0;
|
|
|
- let mut prod = &b * &d0;
|
|
|
- while prod > m {
|
|
|
- // FIXME(#5992): assignment operator overloads
|
|
|
- // d0 -= &d_unit
|
|
|
- d0 = d0 - &d_unit;
|
|
|
- // FIXME(#5992): assignment operator overloads
|
|
|
- // prod -= &b_unit;
|
|
|
- prod = prod - &b_unit
|
|
|
- }
|
|
|
- if d0.is_zero() {
|
|
|
- n = 2;
|
|
|
- continue;
|
|
|
- }
|
|
|
- n = 1;
|
|
|
- // FIXME(#5992): assignment operator overloads
|
|
|
- // d += d0;
|
|
|
- d = d + d0;
|
|
|
- // FIXME(#5992): assignment operator overloads
|
|
|
- // m -= prod;
|
|
|
- m = m - prod;
|
|
|
+ /*
|
|
|
+ * q0 << j * big_digit::BITS is our actual quotient estimate - we do the shifts
|
|
|
+ * implicitly at the end, when adding and subtracting to a and q. Not only do we
|
|
|
+ * save the cost of the shifts, the rest of the arithmetic gets to work with
|
|
|
+ * smaller numbers.
|
|
|
+ */
|
|
|
+ let (mut q0, _) = div_rem_digit(a0, bn);
|
|
|
+ let mut prod = &b * &q0;
|
|
|
+
|
|
|
+ while cmp_slice(&prod.data[..], &a.data[j..]) == Greater {
|
|
|
+ let one: BigUint = One::one();
|
|
|
+ q0 = q0 - one;
|
|
|
+ prod = prod - &b;
|
|
|
}
|
|
|
- return (d, m);
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- fn div_estimate(a: &BigUint, b: &BigUint, n: usize)
|
|
|
- -> (BigUint, BigUint, BigUint) {
|
|
|
- if a.data.len() < n {
|
|
|
- return (Zero::zero(), Zero::zero(), (*a).clone());
|
|
|
- }
|
|
|
-
|
|
|
- let an = &a.data[a.data.len() - n ..];
|
|
|
- let bn = *b.data.last().unwrap();
|
|
|
- let mut d = Vec::with_capacity(an.len());
|
|
|
- let mut carry = 0;
|
|
|
- for elt in an.iter().rev() {
|
|
|
- let ai = big_digit::to_doublebigdigit(carry, *elt);
|
|
|
- let di = ai / (bn as DoubleBigDigit);
|
|
|
- assert!(di < big_digit::BASE);
|
|
|
- carry = (ai % (bn as DoubleBigDigit)) as BigDigit;
|
|
|
- d.push(di as BigDigit)
|
|
|
- }
|
|
|
- d.reverse();
|
|
|
-
|
|
|
- let shift = (a.data.len() - an.len()) - (b.data.len() - 1);
|
|
|
- if shift == 0 {
|
|
|
- return (BigUint::new(d), One::one(), (*b).clone());
|
|
|
- }
|
|
|
- let one: BigUint = One::one();
|
|
|
- return (BigUint::new(d).shl_unit(shift),
|
|
|
- one.shl_unit(shift),
|
|
|
- b.shl_unit(shift));
|
|
|
- }
|
|
|
+
|
|
|
+ add2(&mut q.data[j..], &q0.data[..]);
|
|
|
+ sub2(&mut a.data[j..], &prod.data[..]);
|
|
|
+ a = a.normalize();
|
|
|
+
|
|
|
+ tmp = q0;
|
|
|
+ }
|
|
|
+
|
|
|
+ debug_assert!(a < b);
|
|
|
+
|
|
|
+ (q.normalize(), a >> shift)
|
|
|
}
|
|
|
|
|
|
/// Calculates the Greatest Common Divisor (GCD) of the number and `other`.
|
|
@@ -932,43 +1183,18 @@ fn to_str_radix_reversed(u: &BigUint, radix: u32) -> Vec<u8> {
|
|
|
vec![b'0']
|
|
|
} else {
|
|
|
let mut res = Vec::new();
|
|
|
- let mut digits = u.data.to_vec();
|
|
|
-
|
|
|
- while !digits.is_empty() {
|
|
|
- let rem = div_rem_in_place(&mut digits, radix);
|
|
|
- res.push(to_digit(rem as u8));
|
|
|
+ let mut digits = u.clone();
|
|
|
|
|
|
- // If we finished the most significant digit, drop it
|
|
|
- if let Some(&0) = digits.last() {
|
|
|
- digits.pop();
|
|
|
- }
|
|
|
+ while digits != Zero::zero() {
|
|
|
+ let (q, r) = div_rem_digit(digits, radix as BigDigit);
|
|
|
+ res.push(to_digit(r as u8));
|
|
|
+ digits = q;
|
|
|
}
|
|
|
|
|
|
res
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-fn div_rem_in_place(digits: &mut [BigDigit], divisor: BigDigit) -> BigDigit {
|
|
|
- let mut rem = 0;
|
|
|
-
|
|
|
- for d in digits.iter_mut().rev() {
|
|
|
- let (q, r) = full_div_rem(*d, divisor, rem);
|
|
|
- *d = q;
|
|
|
- rem = r;
|
|
|
- }
|
|
|
-
|
|
|
- rem
|
|
|
-}
|
|
|
-
|
|
|
-fn full_div_rem(a: BigDigit, b: BigDigit, borrow: BigDigit) -> (BigDigit, BigDigit) {
|
|
|
- let lo = a as DoubleBigDigit;
|
|
|
- let hi = borrow as DoubleBigDigit;
|
|
|
-
|
|
|
- let lhs = lo | (hi << big_digit::BITS);
|
|
|
- let rhs = b as DoubleBigDigit;
|
|
|
- ((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
|
|
|
-}
|
|
|
-
|
|
|
fn to_digit(b: u8) -> u8 {
|
|
|
match b {
|
|
|
0 ... 9 => b'0' + b,
|
|
@@ -982,12 +1208,8 @@ impl BigUint {
|
|
|
///
|
|
|
/// The digits are in little-endian base 2^32.
|
|
|
#[inline]
|
|
|
- pub fn new(mut digits: Vec<BigDigit>) -> BigUint {
|
|
|
- // omit trailing zeros
|
|
|
- while let Some(&0) = digits.last() {
|
|
|
- digits.pop();
|
|
|
- }
|
|
|
- BigUint { data: digits }
|
|
|
+ pub fn new(digits: Vec<BigDigit>) -> BigUint {
|
|
|
+ BigUint { data: digits }.normalize()
|
|
|
}
|
|
|
|
|
|
/// Creates and initializes a `BigUint`.
|
|
@@ -1176,6 +1398,16 @@ impl BigUint {
|
|
|
let zeros = self.data.last().unwrap().leading_zeros();
|
|
|
return self.data.len()*big_digit::BITS - zeros as usize;
|
|
|
}
|
|
|
+
|
|
|
+ /// Strips off trailing zero bigdigits - comparisons require the last element in the vector to
|
|
|
+ /// be nonzero.
|
|
|
+ #[inline]
|
|
|
+ fn normalize(mut self) -> BigUint {
|
|
|
+ while let Some(&0) = self.data.last() {
|
|
|
+ self.data.pop();
|
|
|
+ }
|
|
|
+ self
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
// `DoubleBigDigit` size dependent
|
|
@@ -3050,6 +3282,52 @@ mod biguint_tests {
|
|
|
// Switching u and l should fail:
|
|
|
let _n: BigUint = rng.gen_biguint_range(&u, &l);
|
|
|
}
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_sub_sign() {
|
|
|
+ use super::sub_sign;
|
|
|
+ let a = BigInt::from_str_radix("265252859812191058636308480000000", 10).unwrap();
|
|
|
+ let b = BigInt::from_str_radix("26525285981219105863630848000000", 10).unwrap();
|
|
|
+
|
|
|
+ assert_eq!(sub_sign(&a.data.data[..], &b.data.data[..]), &a - &b);
|
|
|
+ assert_eq!(sub_sign(&b.data.data[..], &a.data.data[..]), &b - &a);
|
|
|
+ }
|
|
|
+
|
|
|
+ fn test_mul_divide_torture_count(count: usize) {
|
|
|
+ use rand::{SeedableRng, StdRng, Rng};
|
|
|
+
|
|
|
+ let bits_max = 1 << 12;
|
|
|
+ let seed: &[_] = &[1, 2, 3, 4];
|
|
|
+ let mut rng: StdRng = SeedableRng::from_seed(seed);
|
|
|
+
|
|
|
+ for _ in 0..count {
|
|
|
+ /* Test with numbers of random sizes: */
|
|
|
+ let xbits = rng.gen_range(0, bits_max);
|
|
|
+ let ybits = rng.gen_range(0, bits_max);
|
|
|
+
|
|
|
+ let x = rng.gen_biguint(xbits);
|
|
|
+ let y = rng.gen_biguint(ybits);
|
|
|
+
|
|
|
+ if x.is_zero() || y.is_zero() {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ let prod = &x * &y;
|
|
|
+ assert_eq!(&prod / &x, y);
|
|
|
+ assert_eq!(&prod / &y, x);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn test_mul_divide_torture() {
|
|
|
+ test_mul_divide_torture_count(1000);
|
|
|
+ }
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ #[ignore]
|
|
|
+ fn test_mul_divide_torture_long() {
|
|
|
+ test_mul_divide_torture_count(1000000);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
#[cfg(test)]
|