4 年之前 · 1d15e4e504
--- a/src/int/sdiv.rs
+++ b/src/int/sdiv.rs
@@ -1,65 +1,166 @@
 
				-use int::specialized_div_rem::*;
			
 
				+use int::udiv::*;
			
 
				 
			
 
				-intrinsics! {
			
 
				-    #[maybe_use_optimized_c_shim]
			
 
				-    #[arm_aeabi_alias = __aeabi_idiv]
			
 
				-    /// Returns `n / d`
			
 
				-    pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
			
 
				-        i32_div_rem(a, b).0
			
 
				-    }
			
 
				-
			
 
				-    #[maybe_use_optimized_c_shim]
			
 
				-    /// Returns `n % d`
			
 
				-    pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 {
			
 
				-        i32_div_rem(a, b).1
			
 
				-    }
			
 
				-
			
 
				-    #[maybe_use_optimized_c_shim]
			
 
				-    /// Returns `n / d` and sets `*rem = n % d`
			
 
				-    pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 {
			
 
				-        let quo_rem = i32_div_rem(a, b);
			
 
				-        *rem = quo_rem.1;
			
 
				-        quo_rem.0
			
 
				+macro_rules! sdivmod {
			
 
				+    (
			
 
				+        $unsigned_fn:ident, // name of the unsigned division function
			
 
				+        $signed_fn:ident, // name of the signed division function
			
 
				+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
			
 
				+        $($attr:tt),* // attributes
			
 
				+    ) => {
			
 
				+        intrinsics! {
			
 
				+            $(
			
 
				+                #[$attr]
			
 
				+            )*
			
 
				+            /// Returns `n / d` and sets `*rem = n % d`
			
 
				+            pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX {
			
 
				+                let a_neg = a < 0;
			
 
				+                let b_neg = b < 0;
			
 
				+                let mut a = a;
			
 
				+                let mut b = b;
			
 
				+                if a_neg {
			
 
				+                    a = a.wrapping_neg();
			
 
				+                }
			
 
				+                if b_neg {
			
 
				+                    b = b.wrapping_neg();
			
 
				+                }
			
 
				+                let mut r = *rem as $uX;
			
 
				+                let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX;
			
 
				+                let mut r = r as $iX;
			
 
				+                if a_neg {
			
 
				+                    r = r.wrapping_neg();
			
 
				+                }
			
 
				+                *rem = r;
			
 
				+                if a_neg != b_neg {
			
 
				+                    t.wrapping_neg()
			
 
				+                } else {
			
 
				+                    t
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				     }
			
 
				+}
			
 
				 
			
 
				-    #[maybe_use_optimized_c_shim]
			
 
				-    /// Returns `n / d`
			
 
				-    pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 {
			
 
				-        i64_div_rem(a, b).0
			
 
				+macro_rules! sdiv {
			
 
				+    (
			
 
				+        $unsigned_fn:ident, // name of the unsigned division function
			
 
				+        $signed_fn:ident, // name of the signed division function
			
 
				+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
			
 
				+        $($attr:tt),* // attributes
			
 
				+    ) => {
			
 
				+        intrinsics! {
			
 
				+            $(
			
 
				+                #[$attr]
			
 
				+            )*
			
 
				+            /// Returns `n / d`
			
 
				+            pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
			
 
				+                let a_neg = a < 0;
			
 
				+                let b_neg = b < 0;
			
 
				+                let mut a = a;
			
 
				+                let mut b = b;
			
 
				+                if a_neg {
			
 
				+                    a = a.wrapping_neg();
			
 
				+                }
			
 
				+                if b_neg {
			
 
				+                    b = b.wrapping_neg();
			
 
				+                }
			
 
				+                let t = $unsigned_fn(a as $uX, b as $uX) as $iX;
			
 
				+                if a_neg != b_neg {
			
 
				+                    t.wrapping_neg()
			
 
				+                } else {
			
 
				+                    t
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				     }
			
 
				+}
			
 
				 
			
 
				-    #[maybe_use_optimized_c_shim]
			
 
				-    /// Returns `n % d`
			
 
				-    pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 {
			
 
				-        i64_div_rem(a, b).1
			
 
				+macro_rules! smod {
			
 
				+    (
			
 
				+        $unsigned_fn:ident, // name of the unsigned division function
			
 
				+        $signed_fn:ident, // name of the signed division function
			
 
				+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
			
 
				+        $($attr:tt),* // attributes
			
 
				+    ) => {
			
 
				+        intrinsics! {
			
 
				+            $(
			
 
				+                #[$attr]
			
 
				+            )*
			
 
				+            /// Returns `n % d`
			
 
				+            pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
			
 
				+                let a_neg = a < 0;
			
 
				+                let b_neg = b < 0;
			
 
				+                let mut a = a;
			
 
				+                let mut b = b;
			
 
				+                if a_neg {
			
 
				+                    a = a.wrapping_neg();
			
 
				+                }
			
 
				+                if b_neg {
			
 
				+                    b = b.wrapping_neg();
			
 
				+                }
			
 
				+                let r = $unsigned_fn(a as $uX, b as $uX) as $iX;
			
 
				+                if a_neg {
			
 
				+                    r.wrapping_neg()
			
 
				+                } else {
			
 
				+                    r
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				     }
			
 
				+}
			
 
				 
			
 
				+sdivmod!(
			
 
				+    __udivmodsi4,
			
 
				+    __divmodsi4,
			
 
				+    u32,
			
 
				+    i32,
			
 
				+    maybe_use_optimized_c_shim
			
 
				+);
			
 
				+// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros
			
 
				+intrinsics! {
			
 
				     #[maybe_use_optimized_c_shim]
			
 
				-    /// Returns `n / d` and sets `*rem = n % d`
			
 
				-    pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 {
			
 
				-        let quo_rem = i64_div_rem(a, b);
			
 
				-        *rem = quo_rem.1;
			
 
				-        quo_rem.0
			
 
				-    }
			
 
				-
			
 
				-    #[win64_128bit_abi_hack]
			
 
				+    #[arm_aeabi_alias = __aeabi_idiv]
			
 
				     /// Returns `n / d`
			
 
				-    pub extern "C" fn __divti3(a: i128, b: i128) -> i128 {
			
 
				-        i128_div_rem(a, b).0
			
 
				+    pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
			
 
				+        let a_neg = a < 0;
			
 
				+        let b_neg = b < 0;
			
 
				+        let mut a = a;
			
 
				+        let mut b = b;
			
 
				+        if a_neg {
			
 
				+            a = a.wrapping_neg();
			
 
				+        }
			
 
				+        if b_neg {
			
 
				+            b = b.wrapping_neg();
			
 
				+        }
			
 
				+        let t = __udivsi3(a as u32, b as u32) as i32;
			
 
				+        if a_neg != b_neg {
			
 
				+            t.wrapping_neg()
			
 
				+        } else {
			
 
				+            t
			
 
				+        }
			
 
				     }
			
 
				+}
			
 
				+smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim);
			
 
				 
			
 
				-    #[win64_128bit_abi_hack]
			
 
				-    /// Returns `n % d`
			
 
				-    pub extern "C" fn __modti3(a: i128, b: i128) -> i128 {
			
 
				-        i128_div_rem(a, b).1
			
 
				-    }
			
 
				+sdivmod!(
			
 
				+    __udivmoddi4,
			
 
				+    __divmoddi4,
			
 
				+    u64,
			
 
				+    i64,
			
 
				+    maybe_use_optimized_c_shim
			
 
				+);
			
 
				+sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim);
			
 
				+smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim);
			
 
				 
			
 
				-    // LLVM does not currently have a `__divmodti4` function, but GCC does
			
 
				-    #[maybe_use_optimized_c_shim]
			
 
				-    /// Returns `n / d` and sets `*rem = n % d`
			
 
				-    pub extern "C" fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 {
			
 
				-        let quo_rem = i128_div_rem(a, b);
			
 
				-        *rem = quo_rem.1;
			
 
				-        quo_rem.0
			
 
				-    }
			
 
				-}
			
 
				+// LLVM does not currently have a `__divmodti4` function, but GCC does
			
 
				+sdivmod!(
			
 
				+    __udivmodti4,
			
 
				+    __divmodti4,
			
 
				+    u128,
			
 
				+    i128,
			
 
				+    maybe_use_optimized_c_shim
			
 
				+);
			
 
				+sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack);
			
 
				+smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack);
			
--- a/src/int/specialized_div_rem/asymmetric.rs
+++ b/src/int/specialized_div_rem/asymmetric.rs
@@ -1,4 +1,4 @@
 
				-/// Creates unsigned and signed division functions optimized for dividing integers with the same
			
 
				+/// Creates an unsigned division function optimized for dividing integers with the same
			
 
				 /// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an
			
 
				 /// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits
			
 
				 /// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to
			
@@ -6,25 +6,18 @@
 
				 #[macro_export]
			
 
				 macro_rules! impl_asymmetric {
			
 
				     (
			
 
				-        $unsigned_name:ident, // name of the unsigned division function
			
 
				-        $signed_name:ident, // name of the signed division function
			
 
				+        $fn:ident, // name of the unsigned division function
			
 
				         $zero_div_fn:ident, // function called when division by zero is attempted
			
 
				         $half_division:ident, // function for division of a $uX by a $uX
			
 
				         $asymmetric_division:ident, // function for division of a $uD by a $uX
			
 
				         $n_h:expr, // the number of bits in a $iH or $uH
			
 
				         $uH:ident, // unsigned integer with half the bit width of $uX
			
 
				         $uX:ident, // unsigned integer with half the bit width of $uD
			
 
				-        $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				-        $iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
			
 
				-        $($unsigned_attr:meta),*; // attributes for the unsigned function
			
 
				-        $($signed_attr:meta),* // attributes for the signed function
			
 
				+        $uD:ident // unsigned integer type for the inputs and outputs of `$fn`
			
 
				     ) => {
			
 
				         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				         /// tuple.
			
 
				-        $(
			
 
				-            #[$unsigned_attr]
			
 
				-        )*
			
 
				-        pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) {
			
 
				+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
			
 
				             let n: u32 = $n_h * 2;
			
 
				 
			
 
				             let duo_lo = duo as $uX;
			
@@ -38,14 +31,14 @@ macro_rules! impl_asymmetric {
 
				                 if duo_hi < div_lo {
			
 
				                     // `$uD` by `$uX` division with a quotient that will fit into a `$uX`
			
 
				                     let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) };
			
 
				-                    return (quo as $uD, rem as $uD)
			
 
				+                    return (quo as $uD, rem as $uD);
			
 
				                 } else {
			
 
				                     // Short division using the $uD by $uX division
			
 
				                     let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo);
			
 
				                     let tmp = unsafe {
			
 
				                         $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo)
			
 
				                     };
			
 
				-                    return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD)
			
 
				+                    return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD);
			
 
				                 }
			
 
				             }
			
 
				 
			
@@ -57,9 +50,7 @@ macro_rules! impl_asymmetric {
 
				             let div_lz = div_hi.leading_zeros();
			
 
				             let div_extra = n - div_lz;
			
 
				             let div_sig_n = (div >> div_extra) as $uX;
			
 
				-            let tmp = unsafe {
			
 
				-                $asymmetric_division(duo >> 1, div_sig_n)
			
 
				-            };
			
 
				+            let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) };
			
 
				 
			
 
				             let mut quo = tmp.0 >> ((n - 1) - div_lz);
			
 
				             if quo != 0 {
			
@@ -72,33 +63,7 @@ macro_rules! impl_asymmetric {
 
				                 quo += 1;
			
 
				                 rem -= div;
			
 
				             }
			
 
				-            return (quo as $uD, rem)
			
 
				+            return (quo as $uD, rem);
			
 
				         }
			
 
				-
			
 
				-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				-        /// tuple.
			
 
				-        $(
			
 
				-            #[$signed_attr]
			
 
				-        )*
			
 
				-        pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
			
 
				-            match (duo < 0, div < 0) {
			
 
				-                (false, false) => {
			
 
				-                    let t = $unsigned_name(duo as $uD, div as $uD);
			
 
				-                    (t.0 as $iD, t.1 as $iD)
			
 
				-                },
			
 
				-                (true, false) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
			
 
				-                    ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
			
 
				-                },
			
 
				-                (false, true) => {
			
 
				-                    let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
			
 
				-                    ((t.0 as $iD).wrapping_neg(), t.1 as $iD)
			
 
				-                },
			
 
				-                (true, true) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
			
 
				-                    (t.0 as $iD, (t.1 as $iD).wrapping_neg())
			
 
				-                },
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				+    };
			
 
				 }
			
--- a/src/int/specialized_div_rem/binary_long.rs
+++ b/src/int/specialized_div_rem/binary_long.rs
@@ -1,4 +1,4 @@
 
				-/// Creates unsigned and signed division functions that use binary long division, designed for
			
 
				+/// Creates an unsigned division function that uses binary long division, designed for
			
 
				 /// computer architectures without division instructions. These functions have good performance for
			
 
				 /// microarchitectures with large branch miss penalties and architectures without the ability to
			
 
				 /// predicate instructions. For architectures with predicated instructions, one of the algorithms
			
@@ -7,29 +7,23 @@
 
				 #[macro_export]
			
 
				 macro_rules! impl_binary_long {
			
 
				     (
			
 
				-        $unsigned_name:ident, // name of the unsigned division function
			
 
				-        $signed_name:ident, // name of the signed division function
			
 
				+        $fn:ident, // name of the unsigned division function
			
 
				         $zero_div_fn:ident, // function called when division by zero is attempted
			
 
				         $normalization_shift:ident, // function for finding the normalization shift
			
 
				         $n:tt, // the number of bits in a $iX or $uX
			
 
				-        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				-        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
			
 
				-        $($unsigned_attr:meta),*; // attributes for the unsigned function
			
 
				-        $($signed_attr:meta),* // attributes for the signed function
			
 
				+        $uX:ident, // unsigned integer type for the inputs and outputs of `$fn`
			
 
				+        $iX:ident // signed integer type with same bitwidth as `$uX`
			
 
				     ) => {
			
 
				         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				         /// tuple.
			
 
				-        $(
			
 
				-            #[$unsigned_attr]
			
 
				-        )*
			
 
				-        pub fn $unsigned_name(duo: $uX, div: $uX) -> ($uX, $uX) {
			
 
				+        pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) {
			
 
				             let mut duo = duo;
			
 
				             // handle edge cases before calling `$normalization_shift`
			
 
				             if div == 0 {
			
 
				                 $zero_div_fn()
			
 
				             }
			
 
				             if duo < div {
			
 
				-                return (0, duo)
			
 
				+                return (0, duo);
			
 
				             }
			
 
				 
			
 
				             // There are many variations of binary division algorithm that could be used. This
			
@@ -430,7 +424,7 @@ macro_rules! impl_binary_long {
 
				             let mut i = shl;
			
 
				             loop {
			
 
				                 if i == 0 {
			
 
				-                    break
			
 
				+                    break;
			
 
				                 }
			
 
				                 i -= 1;
			
 
				                 // shift left 1 and subtract
			
@@ -550,47 +544,5 @@ macro_rules! impl_binary_long {
 
				             return ((duo & mask) | quo, duo >> shl);
			
 
				             */
			
 
				         }
			
 
				-
			
 
				-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				-        /// tuple.
			
 
				-        $(
			
 
				-            #[$signed_attr]
			
 
				-        )*
			
 
				-        pub fn $signed_name(duo: $iX, div: $iX) -> ($iX, $iX) {
			
 
				-            // There is a way of doing this without any branches, but requires too many extra
			
 
				-            // operations to be faster.
			
 
				-            /*
			
 
				-            let duo_s = duo >> ($n - 1);
			
 
				-            let div_s = div >> ($n - 1);
			
 
				-            let duo = (duo ^ duo_s).wrapping_sub(duo_s);
			
 
				-            let div = (div ^ div_s).wrapping_sub(div_s);
			
 
				-            let quo_s = duo_s ^ div_s;
			
 
				-            let rem_s = duo_s;
			
 
				-            let tmp = $unsigned_name(duo as $uX, div as $uX);
			
 
				-            (
			
 
				-                ((tmp.0 as $iX) ^ quo_s).wrapping_sub(quo_s),
			
 
				-                ((tmp.1 as $iX) ^ rem_s).wrapping_sub(rem_s),
			
 
				-            )
			
 
				-            */
			
 
				-
			
 
				-            match (duo < 0, div < 0) {
			
 
				-                (false, false) => {
			
 
				-                    let t = $unsigned_name(duo as $uX, div as $uX);
			
 
				-                    (t.0 as $iX, t.1 as $iX)
			
 
				-                },
			
 
				-                (true, false) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uX, div as $uX);
			
 
				-                    ((t.0 as $iX).wrapping_neg(), (t.1 as $iX).wrapping_neg())
			
 
				-                },
			
 
				-                (false, true) => {
			
 
				-                    let t = $unsigned_name(duo as $uX, div.wrapping_neg() as $uX);
			
 
				-                    ((t.0 as $iX).wrapping_neg(), t.1 as $iX)
			
 
				-                },
			
 
				-                (true, true) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uX, div.wrapping_neg() as $uX);
			
 
				-                    (t.0 as $iX, (t.1 as $iX).wrapping_neg())
			
 
				-                },
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				+    };
			
 
				 }
			
--- a/src/int/specialized_div_rem/delegate.rs
+++ b/src/int/specialized_div_rem/delegate.rs
@@ -1,29 +1,23 @@
 
				-/// Creates unsigned and signed division functions that use a combination of hardware division and
			
 
				+/// Creates an unsigned division function that uses a combination of hardware division and
			
 
				 /// binary long division to divide integers larger than what hardware division by itself can do. This
			
 
				 /// function is intended for microarchitectures that have division hardware, but not fast enough
			
 
				 /// multiplication hardware for `impl_trifecta` to be faster.
			
 
				 #[macro_export]
			
 
				 macro_rules! impl_delegate {
			
 
				     (
			
 
				-        $unsigned_name:ident, // name of the unsigned division function
			
 
				-        $signed_name:ident, // name of the signed division function
			
 
				+        $fn:ident, // name of the unsigned division function
			
 
				         $zero_div_fn:ident, // function called when division by zero is attempted
			
 
				         $half_normalization_shift:ident, // function for finding the normalization shift of $uX
			
 
				         $half_division:ident, // function for division of a $uX by a $uX
			
 
				         $n_h:expr, // the number of bits in $iH or $uH
			
 
				         $uH:ident, // unsigned integer with half the bit width of $uX
			
 
				         $uX:ident, // unsigned integer with half the bit width of $uD.
			
 
				-        $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				-        $iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
			
 
				-        $($unsigned_attr:meta),*; // attributes for the unsigned function
			
 
				-        $($signed_attr:meta),* // attributes for the signed function
			
 
				+        $uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
			
 
				+        $iD:ident // signed integer type with the same bitwidth as `$uD`
			
 
				     ) => {
			
 
				         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				         /// tuple.
			
 
				-        $(
			
 
				-            #[$unsigned_attr]
			
 
				-        )*
			
 
				-        pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) {
			
 
				+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
			
 
				             // The two possibility algorithm, undersubtracting long division algorithm, or any kind
			
 
				             // of reciprocal based algorithm will not be fastest, because they involve large
			
 
				             // multiplications that we assume to not be fast enough relative to the divisions to
			
@@ -38,17 +32,15 @@ macro_rules! impl_delegate {
 
				             let div_hi = (div >> n) as $uX;
			
 
				 
			
 
				             match (div_lo == 0, div_hi == 0, duo_hi == 0) {
			
 
				-                (true, true, _) => {
			
 
				-                    $zero_div_fn()
			
 
				-                }
			
 
				+                (true, true, _) => $zero_div_fn(),
			
 
				                 (_, false, true) => {
			
 
				                     // `duo` < `div`
			
 
				-                    return (0, duo)
			
 
				+                    return (0, duo);
			
 
				                 }
			
 
				                 (false, true, true) => {
			
 
				                     // delegate to smaller division
			
 
				                     let tmp = $half_division(duo_lo, div_lo);
			
 
				-                    return (tmp.0 as $uD, tmp.1 as $uD)
			
 
				+                    return (tmp.0 as $uD, tmp.1 as $uD);
			
 
				                 }
			
 
				                 (false, true, false) => {
			
 
				                     if duo_hi < div_lo {
			
@@ -96,7 +88,7 @@ macro_rules! impl_delegate {
 
				                                     // Delegate to get the rest of the quotient. Note that the
			
 
				                                     // `div_lo` here is the original unshifted `div`.
			
 
				                                     let tmp = $half_division(duo as $uX, div_lo);
			
 
				-                                    return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD)
			
 
				+                                    return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD);
			
 
				                                 }
			
 
				                             }
			
 
				                             div >>= 1;
			
@@ -105,7 +97,7 @@ macro_rules! impl_delegate {
 
				                     } else if duo_hi == div_lo {
			
 
				                         // `quo_hi == 1`. This branch is cheap and helps with edge cases.
			
 
				                         let tmp = $half_division(duo as $uX, div as $uX);
			
 
				-                        return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD)
			
 
				+                        return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD);
			
 
				                     } else {
			
 
				                         // `div_lo < duo_hi`
			
 
				                         // `rem_hi == 0`
			
@@ -114,22 +106,16 @@ macro_rules! impl_delegate {
 
				                             let div_0 = div_lo as $uH as $uX;
			
 
				                             let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
			
 
				 
			
 
				-                            let duo_mid =
			
 
				-                                ((duo >> $n_h) as $uH as $uX)
			
 
				-                                | (rem_3 << $n_h);
			
 
				+                            let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
			
 
				                             let (quo_1, rem_2) = $half_division(duo_mid, div_0);
			
 
				 
			
 
				-                            let duo_lo =
			
 
				-                                (duo as $uH as $uX)
			
 
				-                                | (rem_2 << $n_h);
			
 
				+                            let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
			
 
				                             let (quo_0, rem_1) = $half_division(duo_lo, div_0);
			
 
				 
			
 
				                             return (
			
 
				-                                (quo_0 as $uD)
			
 
				-                                | ((quo_1 as $uD) << $n_h)
			
 
				-                                | ((quo_hi as $uD) << n),
			
 
				-                                rem_1 as $uD
			
 
				-                            )
			
 
				+                                (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
			
 
				+                                rem_1 as $uD,
			
 
				+                            );
			
 
				                         }
			
 
				 
			
 
				                         // This is basically a short division composed of a half division for the hi
			
@@ -161,7 +147,7 @@ macro_rules! impl_delegate {
 
				                                     let tmp = $half_division(duo as $uX, div_lo);
			
 
				                                     return (
			
 
				                                         (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n),
			
 
				-                                        tmp.1 as $uD
			
 
				+                                        tmp.1 as $uD,
			
 
				                                     );
			
 
				                                 }
			
 
				                             }
			
@@ -187,7 +173,7 @@ macro_rules! impl_delegate {
 
				                             duo = sub;
			
 
				                             quo_lo |= pow_lo;
			
 
				                             if duo < div_original {
			
 
				-                                return (quo_lo as $uD, duo)
			
 
				+                                return (quo_lo as $uD, duo);
			
 
				                             }
			
 
				                         }
			
 
				                         div >>= 1;
			
@@ -196,31 +182,5 @@ macro_rules! impl_delegate {
 
				                 }
			
 
				             }
			
 
				         }
			
 
				-
			
 
				-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				-        /// tuple.
			
 
				-        $(
			
 
				-            #[$signed_attr]
			
 
				-        )*
			
 
				-        pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
			
 
				-            match (duo < 0, div < 0) {
			
 
				-                (false, false) => {
			
 
				-                    let t = $unsigned_name(duo as $uD, div as $uD);
			
 
				-                    (t.0 as $iD, t.1 as $iD)
			
 
				-                },
			
 
				-                (true, false) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
			
 
				-                    ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
			
 
				-                },
			
 
				-                (false, true) => {
			
 
				-                    let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
			
 
				-                    ((t.0 as $iD).wrapping_neg(), t.1 as $iD)
			
 
				-                },
			
 
				-                (true, true) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
			
 
				-                    (t.0 as $iD, (t.1 as $iD).wrapping_neg())
			
 
				-                },
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				+    };
			
 
				 }
			
--- a/src/int/specialized_div_rem/mod.rs
+++ b/src/int/specialized_div_rem/mod.rs
@@ -111,13 +111,6 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
 
				     zero_div_fn()
			
 
				 }
			
 
				 
			
 
				-// `inline(never)` is placed on unsigned division functions so that there are just three division
			
 
				-// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
			
 
				-// division functions. The signed functions like `i32_div_rem` will get inlined into the
			
 
				-// `compiler-builtins` signed division functions, so that they directly call the three division
			
 
				-// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
			
 
				-// signed division functions, which results in an explosion in code size.
			
 
				-
			
 
				 // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
			
 
				 // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
			
 
				 // faster if the target pointer width is at least 64.
			
@@ -127,16 +120,12 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
 
				 ))]
			
 
				 impl_trifecta!(
			
 
				     u128_div_rem,
			
 
				-    i128_div_rem,
			
 
				     zero_div_fn,
			
 
				     u64_by_u64_div_rem,
			
 
				     32,
			
 
				     u32,
			
 
				     u64,
			
 
				-    u128,
			
 
				-    i128,
			
 
				-    inline(never);
			
 
				-    inline
			
 
				+    u128
			
 
				 );
			
 
				 
			
 
				 // If the pointer width less than 64, then the target architecture almost certainly does not have
			
@@ -147,7 +136,6 @@ impl_trifecta!(
 
				 ))]
			
 
				 impl_delegate!(
			
 
				     u128_div_rem,
			
 
				-    i128_div_rem,
			
 
				     zero_div_fn,
			
 
				     u64_normalization_shift,
			
 
				     u64_by_u64_div_rem,
			
@@ -155,9 +143,7 @@ impl_delegate!(
 
				     u32,
			
 
				     u64,
			
 
				     u128,
			
 
				-    i128,
			
 
				-    inline(never);
			
 
				-    inline
			
 
				+    i128
			
 
				 );
			
 
				 
			
 
				 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
			
@@ -191,17 +177,13 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
 
				 #[cfg(all(feature = "asm", target_arch = "x86_64"))]
			
 
				 impl_asymmetric!(
			
 
				     u128_div_rem,
			
 
				-    i128_div_rem,
			
 
				     zero_div_fn,
			
 
				     u64_by_u64_div_rem,
			
 
				     u128_by_u64_div_rem,
			
 
				     32,
			
 
				     u32,
			
 
				     u64,
			
 
				-    u128,
			
 
				-    i128,
			
 
				-    inline(never);
			
 
				-    inline
			
 
				+    u128
			
 
				 );
			
 
				 
			
 
				 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
			
@@ -226,7 +208,6 @@ fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
 
				 ))]
			
 
				 impl_delegate!(
			
 
				     u64_div_rem,
			
 
				-    i64_div_rem,
			
 
				     zero_div_fn,
			
 
				     u32_normalization_shift,
			
 
				     u32_by_u32_div_rem,
			
@@ -234,9 +215,7 @@ impl_delegate!(
 
				     u16,
			
 
				     u32,
			
 
				     u64,
			
 
				-    i64,
			
 
				-    inline(never);
			
 
				-    inline
			
 
				+    i64
			
 
				 );
			
 
				 
			
 
				 // When not on x86 and the pointer width is 64, use `binary_long`.
			
@@ -246,14 +225,11 @@ impl_delegate!(
 
				 ))]
			
 
				 impl_binary_long!(
			
 
				     u64_div_rem,
			
 
				-    i64_div_rem,
			
 
				     zero_div_fn,
			
 
				     u64_normalization_shift,
			
 
				     64,
			
 
				     u64,
			
 
				-    i64,
			
 
				-    inline(never);
			
 
				-    inline
			
 
				+    i64
			
 
				 );
			
 
				 
			
 
				 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
			
@@ -287,28 +263,21 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
 
				 #[cfg(all(feature = "asm", target_arch = "x86"))]
			
 
				 impl_asymmetric!(
			
 
				     u64_div_rem,
			
 
				-    i64_div_rem,
			
 
				     zero_div_fn,
			
 
				     u32_by_u32_div_rem,
			
 
				     u64_by_u32_div_rem,
			
 
				     16,
			
 
				     u16,
			
 
				     u32,
			
 
				-    u64,
			
 
				-    i64,
			
 
				-    inline(never);
			
 
				-    inline
			
 
				+    u64
			
 
				 );
			
 
				 
			
 
				 // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
			
 
				 impl_binary_long!(
			
 
				     u32_div_rem,
			
 
				-    i32_div_rem,
			
 
				     zero_div_fn,
			
 
				     u32_normalization_shift,
			
 
				     32,
			
 
				     u32,
			
 
				-    i32,
			
 
				-    inline(never);
			
 
				-    inline
			
 
				+    i32
			
 
				 );
			
--- a/src/int/specialized_div_rem/trifecta.rs
+++ b/src/int/specialized_div_rem/trifecta.rs
@@ -1,28 +1,21 @@
 
				-/// Creates unsigned and signed division functions optimized for division of integers with bitwidths
			
 
				+/// Creates an unsigned division function optimized for division of integers with bitwidths
			
 
				 /// larger than the largest hardware integer division supported. These functions use large radix
			
 
				 /// division algorithms that require both fast division and very fast widening multiplication on the
			
 
				 /// target microarchitecture. Otherwise, `impl_delegate` should be used instead.
			
 
				 #[macro_export]
			
 
				 macro_rules! impl_trifecta {
			
 
				     (
			
 
				-        $unsigned_name:ident, // name of the unsigned division function
			
 
				-        $signed_name:ident, // name of the signed division function
			
 
				+        $fn:ident, // name of the unsigned division function
			
 
				         $zero_div_fn:ident, // function called when division by zero is attempted
			
 
				         $half_division:ident, // function for division of a $uX by a $uX
			
 
				         $n_h:expr, // the number of bits in $iH or $uH
			
 
				         $uH:ident, // unsigned integer with half the bit width of $uX
			
 
				         $uX:ident, // unsigned integer with half the bit width of $uD
			
 
				-        $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				-        $iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
			
 
				-        $($unsigned_attr:meta),*; // attributes for the unsigned function
			
 
				-        $($signed_attr:meta),* // attributes for the signed function
			
 
				+        $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name`
			
 
				     ) => {
			
 
				         /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				         /// tuple.
			
 
				-        $(
			
 
				-            #[$unsigned_attr]
			
 
				-        )*
			
 
				-        pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) {
			
 
				+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
			
 
				             // This is called the trifecta algorithm because it uses three main algorithms: short
			
 
				             // division for small divisors, the two possibility algorithm for large divisors, and an
			
 
				             // undersubtracting long division algorithm for intermediate cases.
			
@@ -34,7 +27,9 @@ macro_rules! impl_trifecta {
 
				                 (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
			
 
				             }
			
 
				             fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) {
			
 
				-                let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD);
			
 
				+                let tmp = (lhs as $uD)
			
 
				+                    .wrapping_mul(mul as $uD)
			
 
				+                    .wrapping_add(add as $uD);
			
 
				                 (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
			
 
				             }
			
 
				 
			
@@ -62,9 +57,9 @@ macro_rules! impl_trifecta {
 
				                 // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at
			
 
				                 // least one place higher than `div` for the quotient to be more than 1.
			
 
				                 if duo >= div {
			
 
				-                    return (1, duo - div)
			
 
				+                    return (1, duo - div);
			
 
				                 } else {
			
 
				-                    return (0, duo)
			
 
				+                    return (0, duo);
			
 
				                 }
			
 
				             }
			
 
				 
			
@@ -76,10 +71,7 @@ macro_rules! impl_trifecta {
 
				                 // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the
			
 
				                 // `div_lz <= duo_lz` branch) so no numerical error.
			
 
				                 let (quo, rem) = $half_division(duo as $uX, div as $uX);
			
 
				-                return (
			
 
				-                    quo as $uD,
			
 
				-                    rem as $uD
			
 
				-                )
			
 
				+                return (quo as $uD, rem as $uD);
			
 
				             }
			
 
				 
			
 
				             // `{2^n, 2^div_sb} <= duo < 2^n_d`
			
@@ -99,22 +91,16 @@ macro_rules! impl_trifecta {
 
				                 let div_0 = div as $uH as $uX;
			
 
				                 let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
			
 
				 
			
 
				-                let duo_mid =
			
 
				-                    ((duo >> $n_h) as $uH as $uX)
			
 
				-                    | (rem_3 << $n_h);
			
 
				+                let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
			
 
				                 let (quo_1, rem_2) = $half_division(duo_mid, div_0);
			
 
				 
			
 
				-                let duo_lo =
			
 
				-                    (duo as $uH as $uX)
			
 
				-                    | (rem_2 << $n_h);
			
 
				+                let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
			
 
				                 let (quo_0, rem_1) = $half_division(duo_lo, div_0);
			
 
				 
			
 
				                 return (
			
 
				-                    (quo_0 as $uD)
			
 
				-                    | ((quo_1 as $uD) << $n_h)
			
 
				-                    | ((quo_hi as $uD) << n),
			
 
				-                    rem_1 as $uD
			
 
				-                )
			
 
				+                    (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
			
 
				+                    rem_1 as $uD,
			
 
				+                );
			
 
				             }
			
 
				 
			
 
				             // relative leading significant bits, cannot overflow because of above branches
			
@@ -237,13 +223,10 @@ macro_rules! impl_trifecta {
 
				                         (quo - 1) as $uD,
			
 
				                         // Both the addition and subtraction can overflow, but when combined end up
			
 
				                         // as a correct positive number.
			
 
				-                        duo.wrapping_add(div).wrapping_sub(tmp)
			
 
				-                    )
			
 
				+                        duo.wrapping_add(div).wrapping_sub(tmp),
			
 
				+                    );
			
 
				                 } else {
			
 
				-                    return (
			
 
				-                        quo as $uD,
			
 
				-                        duo - tmp
			
 
				-                    )
			
 
				+                    return (quo as $uD, duo - tmp);
			
 
				                 }
			
 
				             }
			
 
				 
			
@@ -372,13 +355,10 @@ macro_rules! impl_trifecta {
 
				                     if duo < tmp {
			
 
				                         return (
			
 
				                             quo + ((quo_part - 1) as $uD),
			
 
				-                            duo.wrapping_add(div).wrapping_sub(tmp)
			
 
				-                        )
			
 
				+                            duo.wrapping_add(div).wrapping_sub(tmp),
			
 
				+                        );
			
 
				                     } else {
			
 
				-                        return (
			
 
				-                            quo + (quo_part as $uD),
			
 
				-                            duo - tmp
			
 
				-                        )
			
 
				+                        return (quo + (quo_part as $uD), duo - tmp);
			
 
				                     }
			
 
				                 }
			
 
				 
			
@@ -387,15 +367,9 @@ macro_rules! impl_trifecta {
 
				                 if div_lz <= duo_lz {
			
 
				                     // quotient can have 0 or 1 added to it
			
 
				                     if div <= duo {
			
 
				-                        return (
			
 
				-                            quo + 1,
			
 
				-                            duo - div
			
 
				-                        )
			
 
				+                        return (quo + 1, duo - div);
			
 
				                     } else {
			
 
				-                        return (
			
 
				-                            quo,
			
 
				-                            duo
			
 
				-                        )
			
 
				+                        return (quo, duo);
			
 
				                     }
			
 
				                 }
			
 
				 
			
@@ -404,38 +378,9 @@ macro_rules! impl_trifecta {
 
				                 if n <= duo_lz {
			
 
				                     // simple division and addition
			
 
				                     let tmp = $half_division(duo as $uX, div as $uX);
			
 
				-                    return (
			
 
				-                        quo + (tmp.0 as $uD),
			
 
				-                        tmp.1 as $uD
			
 
				-                    )
			
 
				+                    return (quo + (tmp.0 as $uD), tmp.1 as $uD);
			
 
				                 }
			
 
				             }
			
 
				         }
			
 
				-
			
 
				-        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
			
 
				-        /// tuple.
			
 
				-        $(
			
 
				-            #[$signed_attr]
			
 
				-        )*
			
 
				-        pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
			
 
				-            match (duo < 0, div < 0) {
			
 
				-                (false, false) => {
			
 
				-                    let t = $unsigned_name(duo as $uD, div as $uD);
			
 
				-                    (t.0 as $iD, t.1 as $iD)
			
 
				-                },
			
 
				-                (true, false) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
			
 
				-                    ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
			
 
				-                },
			
 
				-                (false, true) => {
			
 
				-                    let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
			
 
				-                    ((t.0 as $iD).wrapping_neg(), t.1 as $iD)
			
 
				-                },
			
 
				-                (true, true) => {
			
 
				-                    let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
			
 
				-                    (t.0 as $iD, (t.1 as $iD).wrapping_neg())
			
 
				-                },
			
 
				-            }
			
 
				-        }
			
 
				-    }
			
 
				+    };
			
 
				 }