delegate.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /// Creates unsigned and signed division functions that use a combination of hardware division and
  2. /// binary long division to divide integers larger than what hardware division by itself can do. This
  3. /// function is intended for microarchitectures that have division hardware, but not fast enough
  4. /// multiplication hardware for `impl_trifecta` to be faster.
  5. #[macro_export]
  6. macro_rules! impl_delegate {
  7. (
  8. $unsigned_name:ident, // name of the unsigned division function
  9. $signed_name:ident, // name of the signed division function
  10. $zero_div_fn:ident, // function called when division by zero is attempted
  11. $half_normalization_shift:ident, // function for finding the normalization shift of $uX
  12. $half_division:ident, // function for division of a $uX by a $uX
  13. $n_h:expr, // the number of bits in $iH or $uH
  14. $uH:ident, // unsigned integer with half the bit width of $uX
  15. $uX:ident, // unsigned integer with half the bit width of $uD.
  16. $uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
  17. $iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
  18. $($unsigned_attr:meta),*; // attributes for the unsigned function
  19. $($signed_attr:meta),* // attributes for the signed function
  20. ) => {
  21. /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
  22. /// tuple.
  23. $(
  24. #[$unsigned_attr]
  25. )*
  26. pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) {
  27. // The two possibility algorithm, undersubtracting long division algorithm, or any kind
  28. // of reciprocal based algorithm will not be fastest, because they involve large
  29. // multiplications that we assume to not be fast enough relative to the divisions to
  30. // outweigh setup times.
  31. // the number of bits in a $uX
  32. let n = $n_h * 2;
  33. let duo_lo = duo as $uX;
  34. let duo_hi = (duo >> n) as $uX;
  35. let div_lo = div as $uX;
  36. let div_hi = (div >> n) as $uX;
  37. match (div_lo == 0, div_hi == 0, duo_hi == 0) {
  38. (true, true, _) => {
  39. $zero_div_fn()
  40. }
  41. (_, false, true) => {
  42. // `duo` < `div`
  43. return (0, duo)
  44. }
  45. (false, true, true) => {
  46. // delegate to smaller division
  47. let tmp = $half_division(duo_lo, div_lo);
  48. return (tmp.0 as $uD, tmp.1 as $uD)
  49. }
  50. (false, true, false) => {
  51. if duo_hi < div_lo {
  52. // `quo_hi` will always be 0. This performs a binary long division algorithm
  53. // to zero `duo_hi` followed by a half division.
  54. // We can calculate the normalization shift using only `$uX` size functions.
  55. // If we calculated the normalization shift using
  56. // `$half_normalization_shift(duo_hi, div_lo false)`, it would break the
  57. // assumption the function has that the first argument is more than the
  58. // second argument. If the arguments are switched, the assumption holds true
  59. // since `duo_hi < div_lo`.
  60. let norm_shift = $half_normalization_shift(div_lo, duo_hi, false);
  61. let shl = if norm_shift == 0 {
  62. // Consider what happens if the msbs of `duo_hi` and `div_lo` align with
  63. // no shifting. The normalization shift will always return
  64. // `norm_shift == 0` regardless of whether it is fully normalized,
  65. // because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would
  66. // result in shift overflow down the line. For the edge case, because
  67. // both `duo_hi < div_lo` and we are comparing all the significant bits
  68. // of `duo_hi` and `div`, we can make `shl = n - 1`.
  69. n - 1
  70. } else {
  71. // We also cannot just use `shl = n - norm_shift - 1` in the general
  72. // case, because when we are not in the edge case comparing all the
  73. // significant bits, then the full `duo < div` may not be true and thus
  74. // breaks the division algorithm.
  75. n - norm_shift
  76. };
  77. // The 3 variable restoring division algorithm (see binary_long.rs) is ideal
  78. // for this task, since `pow` and `quo` can be `$uX` and the delegation
  79. // check is simple.
  80. let mut div: $uD = div << shl;
  81. let mut pow_lo: $uX = 1 << shl;
  82. let mut quo_lo: $uX = 0;
  83. let mut duo = duo;
  84. loop {
  85. let sub = duo.wrapping_sub(div);
  86. if 0 <= (sub as $iD) {
  87. duo = sub;
  88. quo_lo |= pow_lo;
  89. let duo_hi = (duo >> n) as $uX;
  90. if duo_hi == 0 {
  91. // Delegate to get the rest of the quotient. Note that the
  92. // `div_lo` here is the original unshifted `div`.
  93. let tmp = $half_division(duo as $uX, div_lo);
  94. return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD)
  95. }
  96. }
  97. div >>= 1;
  98. pow_lo >>= 1;
  99. }
  100. } else if duo_hi == div_lo {
  101. // `quo_hi == 1`. This branch is cheap and helps with edge cases.
  102. let tmp = $half_division(duo as $uX, div as $uX);
  103. return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD)
  104. } else {
  105. // `div_lo < duo_hi`
  106. // `rem_hi == 0`
  107. if (div_lo >> $n_h) == 0 {
  108. // Short division of $uD by a $uH, using $uX by $uX division
  109. let div_0 = div_lo as $uH as $uX;
  110. let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
  111. let duo_mid =
  112. ((duo >> $n_h) as $uH as $uX)
  113. | (rem_3 << $n_h);
  114. let (quo_1, rem_2) = $half_division(duo_mid, div_0);
  115. let duo_lo =
  116. (duo as $uH as $uX)
  117. | (rem_2 << $n_h);
  118. let (quo_0, rem_1) = $half_division(duo_lo, div_0);
  119. return (
  120. (quo_0 as $uD)
  121. | ((quo_1 as $uD) << $n_h)
  122. | ((quo_hi as $uD) << n),
  123. rem_1 as $uD
  124. )
  125. }
  126. // This is basically a short division composed of a half division for the hi
  127. // part, specialized 3 variable binary long division in the middle, and
  128. // another half division for the lo part.
  129. let duo_lo = duo as $uX;
  130. let tmp = $half_division(duo_hi, div_lo);
  131. let quo_hi = tmp.0;
  132. let mut duo = (duo_lo as $uD) | ((tmp.1 as $uD) << n);
  133. // This check is required to avoid breaking the long division below.
  134. if duo < div {
  135. return ((quo_hi as $uD) << n, duo);
  136. }
  137. // The half division handled all shift alignments down to `n`, so this
  138. // division can continue with a shift of `n - 1`.
  139. let mut div: $uD = div << (n - 1);
  140. let mut pow_lo: $uX = 1 << (n - 1);
  141. let mut quo_lo: $uX = 0;
  142. loop {
  143. let sub = duo.wrapping_sub(div);
  144. if 0 <= (sub as $iD) {
  145. duo = sub;
  146. quo_lo |= pow_lo;
  147. let duo_hi = (duo >> n) as $uX;
  148. if duo_hi == 0 {
  149. // Delegate to get the rest of the quotient. Note that the
  150. // `div_lo` here is the original unshifted `div`.
  151. let tmp = $half_division(duo as $uX, div_lo);
  152. return (
  153. (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n),
  154. tmp.1 as $uD
  155. );
  156. }
  157. }
  158. div >>= 1;
  159. pow_lo >>= 1;
  160. }
  161. }
  162. }
  163. (_, false, false) => {
  164. // Full $uD by $uD binary long division. `quo_hi` will always be 0.
  165. if duo < div {
  166. return (0, duo);
  167. }
  168. let div_original = div;
  169. let shl = $half_normalization_shift(duo_hi, div_hi, false);
  170. let mut duo = duo;
  171. let mut div: $uD = div << shl;
  172. let mut pow_lo: $uX = 1 << shl;
  173. let mut quo_lo: $uX = 0;
  174. loop {
  175. let sub = duo.wrapping_sub(div);
  176. if 0 <= (sub as $iD) {
  177. duo = sub;
  178. quo_lo |= pow_lo;
  179. if duo < div_original {
  180. return (quo_lo as $uD, duo)
  181. }
  182. }
  183. div >>= 1;
  184. pow_lo >>= 1;
  185. }
  186. }
  187. }
  188. }
  189. /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
  190. /// tuple.
  191. $(
  192. #[$signed_attr]
  193. )*
  194. pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
  195. match (duo < 0, div < 0) {
  196. (false, false) => {
  197. let t = $unsigned_name(duo as $uD, div as $uD);
  198. (t.0 as $iD, t.1 as $iD)
  199. },
  200. (true, false) => {
  201. let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
  202. ((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
  203. },
  204. (false, true) => {
  205. let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
  206. ((t.0 as $iD).wrapping_neg(), t.1 as $iD)
  207. },
  208. (true, true) => {
  209. let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
  210. (t.0 as $iD, (t.1 as $iD).wrapping_neg())
  211. },
  212. }
  213. }
  214. }
  215. }