5 years ago · 0e6d75d746
--- a/src/int/specialized_div_rem/mod.rs
+++ b/src/int/specialized_div_rem/mod.rs
@@ -113,6 +113,13 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
 
															     zero_div_fn()
														
 
															 }
														
 
															+// `inline(never)` is placed on unsigned division functions so that there are just three division
														
 
															+// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
														
 
															+// division functions. The signed functions like `i32_div_rem` will get inlined into the
														
 
															+// `compiler-builtins` signed division functions, so that they directly call the three division
														
 
															+// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
														
 
															+// signed division functions, which results in an explosion in code size.
														
 
															+
														
 
															 // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
														
 
															 // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
														
 
															 // faster if the target pointer width is at least 64.
														
@@ -129,7 +136,9 @@ impl_trifecta!(
 
															     u32,
														
 
															     u64,
														
 
															     u128,
														
 
															-    i128,;
														
 
															+    i128,
														
 
															+    inline(never);
														
 
															+    inline
														
 
															 );
														
 
															 // If the pointer width less than 64, then the target architecture almost certainly does not have
														
@@ -148,7 +157,9 @@ impl_delegate!(
 
															     u32,
														
 
															     u64,
														
 
															     u128,
														
 
															-    i128,;
														
 
															+    i128,
														
 
															+    inline(never);
														
 
															+    inline
														
 
															 );
														
 
															 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
														
@@ -190,7 +201,9 @@ impl_asymmetric!(
 
															     u32,
														
 
															     u64,
														
 
															     u128,
														
 
															-    i128,;
														
 
															+    i128,
														
 
															+    inline(never);
														
 
															+    inline
														
 
															 );
														
 
															 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
														
@@ -223,7 +236,9 @@ impl_delegate!(
 
															     u16,
														
 
															     u32,
														
 
															     u64,
														
 
															-    i64,;
														
 
															+    i64,
														
 
															+    inline(never);
														
 
															+    inline
														
 
															 );
														
 
															 // When not on x86 and the pointer width is 64, use `binary_long`.
														
@@ -238,7 +253,9 @@ impl_binary_long!(
 
															     u64_normalization_shift,
														
 
															     64,
														
 
															     u64,
														
 
															-    i64,;
														
 
															+    i64,
														
 
															+    inline(never);
														
 
															+    inline
														
 
															 );
														
 
															 /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
														
@@ -280,7 +297,9 @@ impl_asymmetric!(
 
															     u16,
														
 
															     u32,
														
 
															     u64,
														
 
															-    i64,;
														
 
															+    i64,
														
 
															+    inline(never);
														
 
															+    inline
														
 
															 );
														
 
															 // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
														
@@ -291,5 +310,7 @@ impl_binary_long!(
 
															     u32_normalization_shift,
														
 
															     32,
														
 
															     u32,
														
 
															-    i32,;
														
 
															+    i32,
														
 
															+    inline(never);
														
 
															+    inline
														
 
															 );