|
@@ -113,6 +113,13 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
|
|
zero_div_fn()
|
|
zero_div_fn()
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+// `inline(never)` is placed on unsigned division functions so that there are just three division
|
|
|
|
+// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
|
|
|
|
+// division functions. The signed functions like `i32_div_rem` will get inlined into the
|
|
|
|
+// `compiler-builtins` signed division functions, so that they directly call the three division
|
|
|
|
+// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
|
|
|
|
+// signed division functions, which results in an explosion in code size.
|
|
|
|
+
|
|
// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
|
|
// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
|
|
// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
|
|
// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
|
|
// faster if the target pointer width is at least 64.
|
|
// faster if the target pointer width is at least 64.
|
|
@@ -129,7 +136,9 @@ impl_trifecta!(
|
|
u32,
|
|
u32,
|
|
u64,
|
|
u64,
|
|
u128,
|
|
u128,
|
|
- i128,;
|
|
|
|
|
|
+ i128,
|
|
|
|
+ inline(never);
|
|
|
|
+ inline
|
|
);
|
|
);
|
|
|
|
|
|
// If the pointer width less than 64, then the target architecture almost certainly does not have
|
|
// If the pointer width less than 64, then the target architecture almost certainly does not have
|
|
@@ -148,7 +157,9 @@ impl_delegate!(
|
|
u32,
|
|
u32,
|
|
u64,
|
|
u64,
|
|
u128,
|
|
u128,
|
|
- i128,;
|
|
|
|
|
|
+ i128,
|
|
|
|
+ inline(never);
|
|
|
|
+ inline
|
|
);
|
|
);
|
|
|
|
|
|
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
|
|
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
|
|
@@ -190,7 +201,9 @@ impl_asymmetric!(
|
|
u32,
|
|
u32,
|
|
u64,
|
|
u64,
|
|
u128,
|
|
u128,
|
|
- i128,;
|
|
|
|
|
|
+ i128,
|
|
|
|
+ inline(never);
|
|
|
|
+ inline
|
|
);
|
|
);
|
|
|
|
|
|
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
|
|
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
|
|
@@ -223,7 +236,9 @@ impl_delegate!(
|
|
u16,
|
|
u16,
|
|
u32,
|
|
u32,
|
|
u64,
|
|
u64,
|
|
- i64,;
|
|
|
|
|
|
+ i64,
|
|
|
|
+ inline(never);
|
|
|
|
+ inline
|
|
);
|
|
);
|
|
|
|
|
|
// When not on x86 and the pointer width is 64, use `binary_long`.
|
|
// When not on x86 and the pointer width is 64, use `binary_long`.
|
|
@@ -238,7 +253,9 @@ impl_binary_long!(
|
|
u64_normalization_shift,
|
|
u64_normalization_shift,
|
|
64,
|
|
64,
|
|
u64,
|
|
u64,
|
|
- i64,;
|
|
|
|
|
|
+ i64,
|
|
|
|
+ inline(never);
|
|
|
|
+ inline
|
|
);
|
|
);
|
|
|
|
|
|
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
|
|
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
|
|
@@ -280,7 +297,9 @@ impl_asymmetric!(
|
|
u16,
|
|
u16,
|
|
u32,
|
|
u32,
|
|
u64,
|
|
u64,
|
|
- i64,;
|
|
|
|
|
|
+ i64,
|
|
|
|
+ inline(never);
|
|
|
|
+ inline
|
|
);
|
|
);
|
|
|
|
|
|
// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
|
|
// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
|
|
@@ -291,5 +310,7 @@ impl_binary_long!(
|
|
u32_normalization_shift,
|
|
u32_normalization_shift,
|
|
32,
|
|
32,
|
|
u32,
|
|
u32,
|
|
- i32,;
|
|
|
|
|
|
+ i32,
|
|
|
|
+ inline(never);
|
|
|
|
+ inline
|
|
);
|
|
);
|