|
@@ -185,3 +185,133 @@ macro_rules! impl_delegate {
|
|
|
}
|
|
|
};
|
|
|
}
|
|
|
+
|
|
|
+/// Returns `n / d` and sets `*rem = n % d`.
|
|
|
+///
|
|
|
+/// This specialization exists because:
|
|
|
+/// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`,
|
|
|
+/// so we have to use an old fashioned `&mut u128` argument to return the remainder.
|
|
|
+/// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the
|
|
|
+/// delegate algorithm strategy the only reasonably fast way to perform `u128` division.
|
|
|
+#[doc(hidden)]
|
|
|
+pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
|
|
|
+ use super::*;
|
|
|
+ let duo_lo = duo as u64;
|
|
|
+ let duo_hi = (duo >> 64) as u64;
|
|
|
+ let div_lo = div as u64;
|
|
|
+ let div_hi = (div >> 64) as u64;
|
|
|
+
|
|
|
+ match (div_lo == 0, div_hi == 0, duo_hi == 0) {
|
|
|
+ (true, true, _) => zero_div_fn(),
|
|
|
+ (_, false, true) => {
|
|
|
+ *rem = duo;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ (false, true, true) => {
|
|
|
+ let tmp = u64_by_u64_div_rem(duo_lo, div_lo);
|
|
|
+ *rem = tmp.1 as u128;
|
|
|
+ return tmp.0 as u128;
|
|
|
+ }
|
|
|
+ (false, true, false) => {
|
|
|
+ if duo_hi < div_lo {
|
|
|
+ let norm_shift = u64_normalization_shift(div_lo, duo_hi, false);
|
|
|
+ let shl = if norm_shift == 0 {
|
|
|
+ 64 - 1
|
|
|
+ } else {
|
|
|
+ 64 - norm_shift
|
|
|
+ };
|
|
|
+
|
|
|
+ let mut div: u128 = div << shl;
|
|
|
+ let mut pow_lo: u64 = 1 << shl;
|
|
|
+ let mut quo_lo: u64 = 0;
|
|
|
+ let mut duo = duo;
|
|
|
+ loop {
|
|
|
+ let sub = duo.wrapping_sub(div);
|
|
|
+ if 0 <= (sub as i128) {
|
|
|
+ duo = sub;
|
|
|
+ quo_lo |= pow_lo;
|
|
|
+ let duo_hi = (duo >> 64) as u64;
|
|
|
+ if duo_hi == 0 {
|
|
|
+ let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
|
|
|
+ *rem = tmp.1 as u128;
|
|
|
+ return (quo_lo | tmp.0) as u128;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ div >>= 1;
|
|
|
+ pow_lo >>= 1;
|
|
|
+ }
|
|
|
+ } else if duo_hi == div_lo {
|
|
|
+ let tmp = u64_by_u64_div_rem(duo as u64, div as u64);
|
|
|
+ *rem = tmp.1 as u128;
|
|
|
+ return (1 << 64) | (tmp.0 as u128);
|
|
|
+ } else {
|
|
|
+ if (div_lo >> 32) == 0 {
|
|
|
+ let div_0 = div_lo as u32 as u64;
|
|
|
+ let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0);
|
|
|
+
|
|
|
+ let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32);
|
|
|
+ let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0);
|
|
|
+
|
|
|
+ let duo_lo = (duo as u32 as u64) | (rem_2 << 32);
|
|
|
+ let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0);
|
|
|
+
|
|
|
+ *rem = rem_1 as u128;
|
|
|
+ return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64);
|
|
|
+ }
|
|
|
+
|
|
|
+ let duo_lo = duo as u64;
|
|
|
+ let tmp = u64_by_u64_div_rem(duo_hi, div_lo);
|
|
|
+ let quo_hi = tmp.0;
|
|
|
+ let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64);
|
|
|
+ if duo < div {
|
|
|
+ *rem = duo;
|
|
|
+ return (quo_hi as u128) << 64;
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut div: u128 = div << (64 - 1);
|
|
|
+ let mut pow_lo: u64 = 1 << (64 - 1);
|
|
|
+ let mut quo_lo: u64 = 0;
|
|
|
+ loop {
|
|
|
+ let sub = duo.wrapping_sub(div);
|
|
|
+ if 0 <= (sub as i128) {
|
|
|
+ duo = sub;
|
|
|
+ quo_lo |= pow_lo;
|
|
|
+ let duo_hi = (duo >> 64) as u64;
|
|
|
+ if duo_hi == 0 {
|
|
|
+ let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
|
|
|
+ *rem = tmp.1 as u128;
|
|
|
+ return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ div >>= 1;
|
|
|
+ pow_lo >>= 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ (_, false, false) => {
|
|
|
+ if duo < div {
|
|
|
+ *rem = duo;
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ let div_original = div;
|
|
|
+ let shl = u64_normalization_shift(duo_hi, div_hi, false);
|
|
|
+ let mut duo = duo;
|
|
|
+ let mut div: u128 = div << shl;
|
|
|
+ let mut pow_lo: u64 = 1 << shl;
|
|
|
+ let mut quo_lo: u64 = 0;
|
|
|
+ loop {
|
|
|
+ let sub = duo.wrapping_sub(div);
|
|
|
+ if 0 <= (sub as i128) {
|
|
|
+ duo = sub;
|
|
|
+ quo_lo |= pow_lo;
|
|
|
+ if duo < div_original {
|
|
|
+ *rem = duo;
|
|
|
+ return quo_lo as u128;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ div >>= 1;
|
|
|
+ pow_lo >>= 1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|