Browse Source

Merge pull request #380 from AaronKutch/division-tweaks

Amanieu d'Antras 4 years ago
parent
commit
3a769f6332

+ 2 - 0
src/int/leading_zeros.rs

@@ -4,6 +4,7 @@
 // Compilers will insert the check for zero in cases where it is needed.
 
 /// Returns the number of leading binary zeros in `x`.
+#[doc(hidden)]
 pub fn usize_leading_zeros_default(x: usize) -> usize {
     // The basic idea is to test if the higher bits of `x` are zero and bisect the number
     // of leading zeros. It is possible for all branches of the bisection to use the same
@@ -75,6 +76,7 @@ pub fn usize_leading_zeros_default(x: usize) -> usize {
 // RISC-V that allows `(x >= power-of-two) as usize` to be branchless.
 
 /// Returns the number of leading binary zeros in `x`.
+#[doc(hidden)]
 pub fn usize_leading_zeros_riscv(x: usize) -> usize {
     let mut x = x;
     // the number of potential leading zeros

+ 154 - 53
src/int/sdiv.rs

@@ -1,65 +1,166 @@
-use int::specialized_div_rem::*;
+use int::udiv::*;
 
-intrinsics! {
-    #[maybe_use_optimized_c_shim]
-    #[arm_aeabi_alias = __aeabi_idiv]
-    /// Returns `n / d`
-    pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
-        i32_div_rem(a, b).0
-    }
-
-    #[maybe_use_optimized_c_shim]
-    /// Returns `n % d`
-    pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 {
-        i32_div_rem(a, b).1
-    }
-
-    #[maybe_use_optimized_c_shim]
-    /// Returns `n / d` and sets `*rem = n % d`
-    pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 {
-        let quo_rem = i32_div_rem(a, b);
-        *rem = quo_rem.1;
-        quo_rem.0
+macro_rules! sdivmod {
+    (
+        $unsigned_fn:ident, // name of the unsigned division function
+        $signed_fn:ident, // name of the signed division function
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
+        $($attr:tt),* // attributes
+    ) => {
+        intrinsics! {
+            $(
+                #[$attr]
+            )*
+            /// Returns `n / d` and sets `*rem = n % d`
+            pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX {
+                let a_neg = a < 0;
+                let b_neg = b < 0;
+                let mut a = a;
+                let mut b = b;
+                if a_neg {
+                    a = a.wrapping_neg();
+                }
+                if b_neg {
+                    b = b.wrapping_neg();
+                }
+                let mut r = *rem as $uX;
+                let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX;
+                let mut r = r as $iX;
+                if a_neg {
+                    r = r.wrapping_neg();
+                }
+                *rem = r;
+                if a_neg != b_neg {
+                    t.wrapping_neg()
+                } else {
+                    t
+                }
+            }
+        }
     }
+}
 
-    #[maybe_use_optimized_c_shim]
-    /// Returns `n / d`
-    pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 {
-        i64_div_rem(a, b).0
+macro_rules! sdiv {
+    (
+        $unsigned_fn:ident, // name of the unsigned division function
+        $signed_fn:ident, // name of the signed division function
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
+        $($attr:tt),* // attributes
+    ) => {
+        intrinsics! {
+            $(
+                #[$attr]
+            )*
+            /// Returns `n / d`
+            pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
+                let a_neg = a < 0;
+                let b_neg = b < 0;
+                let mut a = a;
+                let mut b = b;
+                if a_neg {
+                    a = a.wrapping_neg();
+                }
+                if b_neg {
+                    b = b.wrapping_neg();
+                }
+                let t = $unsigned_fn(a as $uX, b as $uX) as $iX;
+                if a_neg != b_neg {
+                    t.wrapping_neg()
+                } else {
+                    t
+                }
+            }
+        }
     }
+}
 
-    #[maybe_use_optimized_c_shim]
-    /// Returns `n % d`
-    pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 {
-        i64_div_rem(a, b).1
+macro_rules! smod {
+    (
+        $unsigned_fn:ident, // name of the unsigned division function
+        $signed_fn:ident, // name of the signed division function
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
+        $($attr:tt),* // attributes
+    ) => {
+        intrinsics! {
+            $(
+                #[$attr]
+            )*
+            /// Returns `n % d`
+            pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
+                let a_neg = a < 0;
+                let b_neg = b < 0;
+                let mut a = a;
+                let mut b = b;
+                if a_neg {
+                    a = a.wrapping_neg();
+                }
+                if b_neg {
+                    b = b.wrapping_neg();
+                }
+                let r = $unsigned_fn(a as $uX, b as $uX) as $iX;
+                if a_neg {
+                    r.wrapping_neg()
+                } else {
+                    r
+                }
+            }
+        }
     }
+}
 
+sdivmod!(
+    __udivmodsi4,
+    __divmodsi4,
+    u32,
+    i32,
+    maybe_use_optimized_c_shim
+);
+// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros
+intrinsics! {
     #[maybe_use_optimized_c_shim]
-    /// Returns `n / d` and sets `*rem = n % d`
-    pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 {
-        let quo_rem = i64_div_rem(a, b);
-        *rem = quo_rem.1;
-        quo_rem.0
-    }
-
-    #[win64_128bit_abi_hack]
+    #[arm_aeabi_alias = __aeabi_idiv]
     /// Returns `n / d`
-    pub extern "C" fn __divti3(a: i128, b: i128) -> i128 {
-        i128_div_rem(a, b).0
+    pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
+        let a_neg = a < 0;
+        let b_neg = b < 0;
+        let mut a = a;
+        let mut b = b;
+        if a_neg {
+            a = a.wrapping_neg();
+        }
+        if b_neg {
+            b = b.wrapping_neg();
+        }
+        let t = __udivsi3(a as u32, b as u32) as i32;
+        if a_neg != b_neg {
+            t.wrapping_neg()
+        } else {
+            t
+        }
     }
+}
+smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim);
 
-    #[win64_128bit_abi_hack]
-    /// Returns `n % d`
-    pub extern "C" fn __modti3(a: i128, b: i128) -> i128 {
-        i128_div_rem(a, b).1
-    }
+sdivmod!(
+    __udivmoddi4,
+    __divmoddi4,
+    u64,
+    i64,
+    maybe_use_optimized_c_shim
+);
+sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim);
+smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim);
 
-    // LLVM does not currently have a `__divmodti4` function, but GCC does
-    #[maybe_use_optimized_c_shim]
-    /// Returns `n / d` and sets `*rem = n % d`
-    pub extern "C" fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 {
-        let quo_rem = i128_div_rem(a, b);
-        *rem = quo_rem.1;
-        quo_rem.0
-    }
-}
+// LLVM does not currently have a `__divmodti4` function, but GCC does
+sdivmod!(
+    __udivmodti4,
+    __divmodti4,
+    u128,
+    i128,
+    maybe_use_optimized_c_shim
+);
+sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack);
+smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack);

+ 25 - 124
src/int/specialized_div_rem/asymmetric.rs

@@ -1,44 +1,26 @@
-/// Creates unsigned and signed division functions optimized for dividing integers with the same
+/// Creates an unsigned division function optimized for dividing integers with the same
 /// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an
 /// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits
 /// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to
 /// construct a full 128 bit by 128 bit division.
+#[doc(hidden)]
 #[macro_export]
 macro_rules! impl_asymmetric {
     (
-        $unsigned_name:ident, // name of the unsigned division function
-        $signed_name:ident, // name of the signed division function
+        $fn:ident, // name of the unsigned division function
         $zero_div_fn:ident, // function called when division by zero is attempted
         $half_division:ident, // function for division of a $uX by a $uX
         $asymmetric_division:ident, // function for division of a $uD by a $uX
         $n_h:expr, // the number of bits in a $iH or $uH
         $uH:ident, // unsigned integer with half the bit width of $uX
         $uX:ident, // unsigned integer with half the bit width of $uD
-        $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
-        $iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
-        $($unsigned_attr:meta),*; // attributes for the unsigned function
-        $($signed_attr:meta),* // attributes for the signed function
+        $uD:ident // unsigned integer type for the inputs and outputs of `$fn`
     ) => {
         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
         /// tuple.
-        $(
-            #[$unsigned_attr]
-        )*
-        pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) {
-            fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) {
-                let tmp = (lhs as $uD).wrapping_mul(rhs as $uD);
-                (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
-            }
-            fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) {
-                let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD);
-                (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
-            }
-
+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
             let n: u32 = $n_h * 2;
 
-            // Many of these subalgorithms are taken from trifecta.rs, see that for better
-            // documentation.
-
             let duo_lo = duo as $uX;
             let duo_hi = (duo >> n) as $uX;
             let div_lo = div as $uX;
@@ -50,120 +32,39 @@ macro_rules! impl_asymmetric {
                 if duo_hi < div_lo {
                     // `$uD` by `$uX` division with a quotient that will fit into a `$uX`
                     let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) };
-                    return (quo as $uD, rem as $uD)
-                } else if (div_lo >> $n_h) == 0 {
-                    // Short division of $uD by a $uH.
-
-                    // Some x86_64 CPUs have bad division implementations that make specializing
-                    // this case faster.
-                    let div_0 = div_lo as $uH as $uX;
-                    let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
-
-                    let duo_mid =
-                        ((duo >> $n_h) as $uH as $uX)
-                        | (rem_3 << $n_h);
-                    let (quo_1, rem_2) = $half_division(duo_mid, div_0);
-
-                    let duo_lo =
-                        (duo as $uH as $uX)
-                        | (rem_2 << $n_h);
-                    let (quo_0, rem_1) = $half_division(duo_lo, div_0);
-
-                    return (
-                        (quo_0 as $uD)
-                        | ((quo_1 as $uD) << $n_h)
-                        | ((quo_hi as $uD) << n),
-                        rem_1 as $uD
-                    )
+                    return (quo as $uD, rem as $uD);
                 } else {
                     // Short division using the $uD by $uX division
                     let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo);
                     let tmp = unsafe {
                         $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo)
                     };
-                    return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD)
+                    return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD);
                 }
             }
 
-            let duo_lz = duo_hi.leading_zeros();
+            // This has been adapted from
+            // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn
+            // adapted from Hacker's Delight. This is similar to the two possibility algorithm
+            // in that it uses only more significant parts of `duo` and `div` to divide a large
+            // integer with a smaller division instruction.
             let div_lz = div_hi.leading_zeros();
-            let rel_leading_sb = div_lz.wrapping_sub(duo_lz);
-            if rel_leading_sb < $n_h {
-                // Some x86_64 CPUs have bad hardware division implementations that make putting
-                // a two possibility algorithm here beneficial. We also avoid a full `$uD`
-                // multiplication.
-                let shift = n - duo_lz;
-                let duo_sig_n = (duo >> shift) as $uX;
-                let div_sig_n = (div >> shift) as $uX;
-                let quo = $half_division(duo_sig_n, div_sig_n).0;
-                let div_lo = div as $uX;
-                let div_hi = (div >> n) as $uX;
-                let (tmp_lo, carry) = carrying_mul(quo, div_lo);
-                let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry);
-                let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n);
-                if (overflow != 0) || (duo < tmp) {
-                    return (
-                        (quo - 1) as $uD,
-                        duo.wrapping_add(div).wrapping_sub(tmp)
-                    )
-                } else {
-                    return (
-                        quo as $uD,
-                        duo - tmp
-                    )
-                }
-            } else {
-                // This has been adapted from
-                // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn
-                // adapted from Hacker's Delight. This is similar to the two possibility algorithm
-                // in that it uses only more significant parts of `duo` and `div` to divide a large
-                // integer with a smaller division instruction.
-
-                let div_extra = n - div_lz;
-                let div_sig_n = (div >> div_extra) as $uX;
-                let tmp = unsafe {
-                    $asymmetric_division(duo >> 1, div_sig_n)
-                };
-
-                let mut quo = tmp.0 >> ((n - 1) - div_lz);
-                if quo != 0 {
-                    quo -= 1;
-                }
+            let div_extra = n - div_lz;
+            let div_sig_n = (div >> div_extra) as $uX;
+            let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) };
 
-                // Note that this is a full `$uD` multiplication being used here
-                let mut rem = duo - (quo as $uD).wrapping_mul(div);
-                if div <= rem {
-                    quo += 1;
-                    rem -= div;
-                }
-                return (quo as $uD, rem)
+            let mut quo = tmp.0 >> ((n - 1) - div_lz);
+            if quo != 0 {
+                quo -= 1;
             }
-        }
 
-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
-        /// tuple.
-        $(
-            #[$signed_attr]
-        )*
-        pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
-            match (duo < 0, div < 0) {
-                (false, false) => {
-                    let t = $unsigned_name(duo as $uD, div as $uD);
-                    (t.0 as $iD, t.1 as $iD)
-                },
-                (true, false) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
-                    ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
-                },
-                (false, true) => {
-                    let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
-                    ((t.0 as $iD).wrapping_neg(), t.1 as $iD)
-                },
-                (true, true) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
-                    (t.0 as $iD, (t.1 as $iD).wrapping_neg())
-                },
+            // Note that this is a full `$uD` multiplication being used here
+            let mut rem = duo - (quo as $uD).wrapping_mul(div);
+            if div <= rem {
+                quo += 1;
+                rem -= div;
             }
+            return (quo as $uD, rem);
         }
-    }
+    };
 }

+ 9 - 56
src/int/specialized_div_rem/binary_long.rs

@@ -1,35 +1,30 @@
-/// Creates unsigned and signed division functions that use binary long division, designed for
+/// Creates an unsigned division function that uses binary long division, designed for
 /// computer architectures without division instructions. These functions have good performance for
 /// microarchitectures with large branch miss penalties and architectures without the ability to
 /// predicate instructions. For architectures with predicated instructions, one of the algorithms
 /// described in the documentation of these functions probably has higher performance, and a custom
 /// assembly routine should be used instead.
+#[doc(hidden)]
 #[macro_export]
 macro_rules! impl_binary_long {
     (
-        $unsigned_name:ident, // name of the unsigned division function
-        $signed_name:ident, // name of the signed division function
+        $fn:ident, // name of the unsigned division function
         $zero_div_fn:ident, // function called when division by zero is attempted
         $normalization_shift:ident, // function for finding the normalization shift
         $n:tt, // the number of bits in a $iX or $uX
-        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
-        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
-        $($unsigned_attr:meta),*; // attributes for the unsigned function
-        $($signed_attr:meta),* // attributes for the signed function
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$fn`
+        $iX:ident // signed integer type with same bitwidth as `$uX`
     ) => {
         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
         /// tuple.
-        $(
-            #[$unsigned_attr]
-        )*
-        pub fn $unsigned_name(duo: $uX, div: $uX) -> ($uX, $uX) {
+        pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) {
             let mut duo = duo;
             // handle edge cases before calling `$normalization_shift`
             if div == 0 {
                 $zero_div_fn()
             }
             if duo < div {
-                return (0, duo)
+                return (0, duo);
             }
 
             // There are many variations of binary division algorithm that could be used. This
@@ -430,7 +425,7 @@ macro_rules! impl_binary_long {
             let mut i = shl;
             loop {
                 if i == 0 {
-                    break
+                    break;
                 }
                 i -= 1;
                 // shift left 1 and subtract
@@ -550,47 +545,5 @@ macro_rules! impl_binary_long {
             return ((duo & mask) | quo, duo >> shl);
             */
         }
-
-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
-        /// tuple.
-        $(
-            #[$signed_attr]
-        )*
-        pub fn $signed_name(duo: $iX, div: $iX) -> ($iX, $iX) {
-            // There is a way of doing this without any branches, but requires too many extra
-            // operations to be faster.
-            /*
-            let duo_s = duo >> ($n - 1);
-            let div_s = div >> ($n - 1);
-            let duo = (duo ^ duo_s).wrapping_sub(duo_s);
-            let div = (div ^ div_s).wrapping_sub(div_s);
-            let quo_s = duo_s ^ div_s;
-            let rem_s = duo_s;
-            let tmp = $unsigned_name(duo as $uX, div as $uX);
-            (
-                ((tmp.0 as $iX) ^ quo_s).wrapping_sub(quo_s),
-                ((tmp.1 as $iX) ^ rem_s).wrapping_sub(rem_s),
-            )
-            */
-
-            match (duo < 0, div < 0) {
-                (false, false) => {
-                    let t = $unsigned_name(duo as $uX, div as $uX);
-                    (t.0 as $iX, t.1 as $iX)
-                },
-                (true, false) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uX, div as $uX);
-                    ((t.0 as $iX).wrapping_neg(), (t.1 as $iX).wrapping_neg())
-                },
-                (false, true) => {
-                    let t = $unsigned_name(duo as $uX, div.wrapping_neg() as $uX);
-                    ((t.0 as $iX).wrapping_neg(), t.1 as $iX)
-                },
-                (true, true) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uX, div.wrapping_neg() as $uX);
-                    (t.0 as $iX, (t.1 as $iX).wrapping_neg())
-                },
-            }
-        }
-    }
+    };
 }

+ 19 - 58
src/int/specialized_div_rem/delegate.rs

@@ -1,29 +1,24 @@
-/// Creates unsigned and signed division functions that use a combination of hardware division and
+/// Creates an unsigned division function that uses a combination of hardware division and
 /// binary long division to divide integers larger than what hardware division by itself can do. This
 /// function is intended for microarchitectures that have division hardware, but not fast enough
 /// multiplication hardware for `impl_trifecta` to be faster.
+#[doc(hidden)]
 #[macro_export]
 macro_rules! impl_delegate {
     (
-        $unsigned_name:ident, // name of the unsigned division function
-        $signed_name:ident, // name of the signed division function
+        $fn:ident, // name of the unsigned division function
         $zero_div_fn:ident, // function called when division by zero is attempted
         $half_normalization_shift:ident, // function for finding the normalization shift of $uX
         $half_division:ident, // function for division of a $uX by a $uX
         $n_h:expr, // the number of bits in $iH or $uH
         $uH:ident, // unsigned integer with half the bit width of $uX
         $uX:ident, // unsigned integer with half the bit width of $uD.
-        $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
-        $iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
-        $($unsigned_attr:meta),*; // attributes for the unsigned function
-        $($signed_attr:meta),* // attributes for the signed function
+        $uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
+        $iD:ident // signed integer type with the same bitwidth as `$uD`
     ) => {
         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
         /// tuple.
-        $(
-            #[$unsigned_attr]
-        )*
-        pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) {
+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
             // The two possibility algorithm, undersubtracting long division algorithm, or any kind
             // of reciprocal based algorithm will not be fastest, because they involve large
             // multiplications that we assume to not be fast enough relative to the divisions to
@@ -38,17 +33,15 @@ macro_rules! impl_delegate {
             let div_hi = (div >> n) as $uX;
 
             match (div_lo == 0, div_hi == 0, duo_hi == 0) {
-                (true, true, _) => {
-                    $zero_div_fn()
-                }
+                (true, true, _) => $zero_div_fn(),
                 (_, false, true) => {
                     // `duo` < `div`
-                    return (0, duo)
+                    return (0, duo);
                 }
                 (false, true, true) => {
                     // delegate to smaller division
                     let tmp = $half_division(duo_lo, div_lo);
-                    return (tmp.0 as $uD, tmp.1 as $uD)
+                    return (tmp.0 as $uD, tmp.1 as $uD);
                 }
                 (false, true, false) => {
                     if duo_hi < div_lo {
@@ -96,7 +89,7 @@ macro_rules! impl_delegate {
                                     // Delegate to get the rest of the quotient. Note that the
                                     // `div_lo` here is the original unshifted `div`.
                                     let tmp = $half_division(duo as $uX, div_lo);
-                                    return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD)
+                                    return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD);
                                 }
                             }
                             div >>= 1;
@@ -105,7 +98,7 @@ macro_rules! impl_delegate {
                     } else if duo_hi == div_lo {
                         // `quo_hi == 1`. This branch is cheap and helps with edge cases.
                         let tmp = $half_division(duo as $uX, div as $uX);
-                        return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD)
+                        return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD);
                     } else {
                         // `div_lo < duo_hi`
                         // `rem_hi == 0`
@@ -114,22 +107,16 @@ macro_rules! impl_delegate {
                             let div_0 = div_lo as $uH as $uX;
                             let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
 
-                            let duo_mid =
-                                ((duo >> $n_h) as $uH as $uX)
-                                | (rem_3 << $n_h);
+                            let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
                             let (quo_1, rem_2) = $half_division(duo_mid, div_0);
 
-                            let duo_lo =
-                                (duo as $uH as $uX)
-                                | (rem_2 << $n_h);
+                            let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
                             let (quo_0, rem_1) = $half_division(duo_lo, div_0);
 
                             return (
-                                (quo_0 as $uD)
-                                | ((quo_1 as $uD) << $n_h)
-                                | ((quo_hi as $uD) << n),
-                                rem_1 as $uD
-                            )
+                                (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
+                                rem_1 as $uD,
+                            );
                         }
 
                         // This is basically a short division composed of a half division for the hi
@@ -161,7 +148,7 @@ macro_rules! impl_delegate {
                                     let tmp = $half_division(duo as $uX, div_lo);
                                     return (
                                         (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n),
-                                        tmp.1 as $uD
+                                        tmp.1 as $uD,
                                     );
                                 }
                             }
@@ -187,7 +174,7 @@ macro_rules! impl_delegate {
                             duo = sub;
                             quo_lo |= pow_lo;
                             if duo < div_original {
-                                return (quo_lo as $uD, duo)
+                                return (quo_lo as $uD, duo);
                             }
                         }
                         div >>= 1;
@@ -196,31 +183,5 @@ macro_rules! impl_delegate {
                 }
             }
         }
-
-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
-        /// tuple.
-        $(
-            #[$signed_attr]
-        )*
-        pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
-            match (duo < 0, div < 0) {
-                (false, false) => {
-                    let t = $unsigned_name(duo as $uD, div as $uD);
-                    (t.0 as $iD, t.1 as $iD)
-                },
-                (true, false) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
-                    ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
-                },
-                (false, true) => {
-                    let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
-                    ((t.0 as $iD).wrapping_neg(), t.1 as $iD)
-                },
-                (true, true) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
-                    (t.0 as $iD, (t.1 as $iD).wrapping_neg())
-                },
-            }
-        }
-    }
+    };
 }

+ 7 - 38
src/int/specialized_div_rem/mod.rs

@@ -111,13 +111,6 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
     zero_div_fn()
 }
 
-// `inline(never)` is placed on unsigned division functions so that there are just three division
-// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
-// division functions. The signed functions like `i32_div_rem` will get inlined into the
-// `compiler-builtins` signed division functions, so that they directly call the three division
-// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
-// signed division functions, which results in an explosion in code size.
-
 // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
 // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
 // faster if the target pointer width is at least 64.
@@ -127,16 +120,12 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
 ))]
 impl_trifecta!(
     u128_div_rem,
-    i128_div_rem,
     zero_div_fn,
     u64_by_u64_div_rem,
     32,
     u32,
     u64,
-    u128,
-    i128,
-    inline(never);
-    inline
+    u128
 );
 
 // If the pointer width less than 64, then the target architecture almost certainly does not have
@@ -147,7 +136,6 @@ impl_trifecta!(
 ))]
 impl_delegate!(
     u128_div_rem,
-    i128_div_rem,
     zero_div_fn,
     u64_normalization_shift,
     u64_by_u64_div_rem,
@@ -155,9 +143,7 @@ impl_delegate!(
     u32,
     u64,
     u128,
-    i128,
-    inline(never);
-    inline
+    i128
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -191,17 +177,13 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
 #[cfg(all(feature = "asm", target_arch = "x86_64"))]
 impl_asymmetric!(
     u128_div_rem,
-    i128_div_rem,
     zero_div_fn,
     u64_by_u64_div_rem,
     u128_by_u64_div_rem,
     32,
     u32,
     u64,
-    u128,
-    i128,
-    inline(never);
-    inline
+    u128
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -226,7 +208,6 @@ fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
 ))]
 impl_delegate!(
     u64_div_rem,
-    i64_div_rem,
     zero_div_fn,
     u32_normalization_shift,
     u32_by_u32_div_rem,
@@ -234,9 +215,7 @@ impl_delegate!(
     u16,
     u32,
     u64,
-    i64,
-    inline(never);
-    inline
+    i64
 );
 
 // When not on x86 and the pointer width is 64, use `binary_long`.
@@ -246,14 +225,11 @@ impl_delegate!(
 ))]
 impl_binary_long!(
     u64_div_rem,
-    i64_div_rem,
     zero_div_fn,
     u64_normalization_shift,
     64,
     u64,
-    i64,
-    inline(never);
-    inline
+    i64
 );
 
 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@@ -287,28 +263,21 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
 #[cfg(all(feature = "asm", target_arch = "x86"))]
 impl_asymmetric!(
     u64_div_rem,
-    i64_div_rem,
     zero_div_fn,
     u32_by_u32_div_rem,
     u64_by_u32_div_rem,
     16,
     u16,
     u32,
-    u64,
-    i64,
-    inline(never);
-    inline
+    u64
 );
 
 // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
 impl_binary_long!(
     u32_div_rem,
-    i32_div_rem,
     zero_div_fn,
     u32_normalization_shift,
     32,
     u32,
-    i32,
-    inline(never);
-    inline
+    i32
 );

+ 1 - 0
src/int/specialized_div_rem/norm_shift.rs

@@ -1,4 +1,5 @@
 /// Creates a function used by some division algorithms to compute the "normalization shift".
+#[doc(hidden)]
 #[macro_export]
 macro_rules! impl_normalization_shift {
     (

+ 26 - 80
src/int/specialized_div_rem/trifecta.rs

@@ -1,28 +1,22 @@
-/// Creates unsigned and signed division functions optimized for division of integers with bitwidths
+/// Creates an unsigned division function optimized for division of integers with bitwidths
 /// larger than the largest hardware integer division supported. These functions use large radix
 /// division algorithms that require both fast division and very fast widening multiplication on the
 /// target microarchitecture. Otherwise, `impl_delegate` should be used instead.
+#[doc(hidden)]
 #[macro_export]
 macro_rules! impl_trifecta {
     (
-        $unsigned_name:ident, // name of the unsigned division function
-        $signed_name:ident, // name of the signed division function
+        $fn:ident, // name of the unsigned division function
         $zero_div_fn:ident, // function called when division by zero is attempted
         $half_division:ident, // function for division of a $uX by a $uX
         $n_h:expr, // the number of bits in $iH or $uH
         $uH:ident, // unsigned integer with half the bit width of $uX
         $uX:ident, // unsigned integer with half the bit width of $uD
-        $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
-        $iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
-        $($unsigned_attr:meta),*; // attributes for the unsigned function
-        $($signed_attr:meta),* // attributes for the signed function
+        $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name`
     ) => {
         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
         /// tuple.
-        $(
-            #[$unsigned_attr]
-        )*
-        pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) {
+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
             // This is called the trifecta algorithm because it uses three main algorithms: short
             // division for small divisors, the two possibility algorithm for large divisors, and an
             // undersubtracting long division algorithm for intermediate cases.
@@ -34,7 +28,9 @@ macro_rules! impl_trifecta {
                 (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
             }
             fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) {
-                let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD);
+                let tmp = (lhs as $uD)
+                    .wrapping_mul(mul as $uD)
+                    .wrapping_add(add as $uD);
                 (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
             }
 
@@ -62,9 +58,9 @@ macro_rules! impl_trifecta {
                 // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at
                 // least one place higher than `div` for the quotient to be more than 1.
                 if duo >= div {
-                    return (1, duo - div)
+                    return (1, duo - div);
                 } else {
-                    return (0, duo)
+                    return (0, duo);
                 }
             }
 
@@ -76,10 +72,7 @@ macro_rules! impl_trifecta {
                 // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the
                 // `div_lz <= duo_lz` branch) so no numerical error.
                 let (quo, rem) = $half_division(duo as $uX, div as $uX);
-                return (
-                    quo as $uD,
-                    rem as $uD
-                )
+                return (quo as $uD, rem as $uD);
             }
 
             // `{2^n, 2^div_sb} <= duo < 2^n_d`
@@ -99,22 +92,16 @@ macro_rules! impl_trifecta {
                 let div_0 = div as $uH as $uX;
                 let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
 
-                let duo_mid =
-                    ((duo >> $n_h) as $uH as $uX)
-                    | (rem_3 << $n_h);
+                let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
                 let (quo_1, rem_2) = $half_division(duo_mid, div_0);
 
-                let duo_lo =
-                    (duo as $uH as $uX)
-                    | (rem_2 << $n_h);
+                let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
                 let (quo_0, rem_1) = $half_division(duo_lo, div_0);
 
                 return (
-                    (quo_0 as $uD)
-                    | ((quo_1 as $uD) << $n_h)
-                    | ((quo_hi as $uD) << n),
-                    rem_1 as $uD
-                )
+                    (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
+                    rem_1 as $uD,
+                );
             }
 
             // relative leading significant bits, cannot overflow because of above branches
@@ -237,13 +224,10 @@ macro_rules! impl_trifecta {
                         (quo - 1) as $uD,
                         // Both the addition and subtraction can overflow, but when combined end up
                         // as a correct positive number.
-                        duo.wrapping_add(div).wrapping_sub(tmp)
-                    )
+                        duo.wrapping_add(div).wrapping_sub(tmp),
+                    );
                 } else {
-                    return (
-                        quo as $uD,
-                        duo - tmp
-                    )
+                    return (quo as $uD, duo - tmp);
                 }
             }
 
@@ -372,13 +356,10 @@ macro_rules! impl_trifecta {
                     if duo < tmp {
                         return (
                             quo + ((quo_part - 1) as $uD),
-                            duo.wrapping_add(div).wrapping_sub(tmp)
-                        )
+                            duo.wrapping_add(div).wrapping_sub(tmp),
+                        );
                     } else {
-                        return (
-                            quo + (quo_part as $uD),
-                            duo - tmp
-                        )
+                        return (quo + (quo_part as $uD), duo - tmp);
                     }
                 }
 
@@ -387,15 +368,9 @@ macro_rules! impl_trifecta {
                 if div_lz <= duo_lz {
                     // quotient can have 0 or 1 added to it
                     if div <= duo {
-                        return (
-                            quo + 1,
-                            duo - div
-                        )
+                        return (quo + 1, duo - div);
                     } else {
-                        return (
-                            quo,
-                            duo
-                        )
+                        return (quo, duo);
                     }
                 }
 
@@ -404,38 +379,9 @@ macro_rules! impl_trifecta {
                 if n <= duo_lz {
                     // simple division and addition
                     let tmp = $half_division(duo as $uX, div as $uX);
-                    return (
-                        quo + (tmp.0 as $uD),
-                        tmp.1 as $uD
-                    )
+                    return (quo + (tmp.0 as $uD), tmp.1 as $uD);
                 }
             }
         }
-
-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
-        /// tuple.
-        $(
-            #[$signed_attr]
-        )*
-        pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
-            match (duo < 0, div < 0) {
-                (false, false) => {
-                    let t = $unsigned_name(duo as $uD, div as $uD);
-                    (t.0 as $iD, t.1 as $iD)
-                },
-                (true, false) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
-                    ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
-                },
-                (false, true) => {
-                    let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
-                    ((t.0 as $iD).wrapping_neg(), t.1 as $iD)
-                },
-                (true, true) => {
-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
-                    (t.0 as $iD, (t.1 as $iD).wrapping_neg())
-                },
-            }
-        }
-    }
+    };
 }