add.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. use core::mem;
  2. use core::num::Wrapping;
  3. use float::Float;
  4. macro_rules! add {
  5. ($intrinsic:ident: $ty:ty) => {
  6. /// Returns `a + b`
  7. #[allow(unused_parens)]
  8. #[cfg_attr(not(test), no_mangle)]
  9. pub extern fn $intrinsic(a: $ty, b: $ty) -> $ty {
  10. let one = Wrapping(1 as <$ty as Float>::Int);
  11. let zero = Wrapping(0 as <$ty as Float>::Int);
  12. let bits = Wrapping(<$ty>::bits() as <$ty as Float>::Int);
  13. let significand_bits = Wrapping(<$ty>::significand_bits() as <$ty as Float>::Int);
  14. let exponent_bits = bits - significand_bits - one;
  15. let max_exponent = (one << exponent_bits.0 as usize) - one;
  16. let implicit_bit = one << significand_bits.0 as usize;
  17. let significand_mask = implicit_bit - one;
  18. let sign_bit = one << (significand_bits + exponent_bits).0 as usize;
  19. let abs_mask = sign_bit - one;
  20. let exponent_mask = abs_mask ^ significand_mask;
  21. let inf_rep = exponent_mask;
  22. let quiet_bit = implicit_bit >> 1;
  23. let qnan_rep = exponent_mask | quiet_bit;
  24. let mut a_rep = Wrapping(a.repr());
  25. let mut b_rep = Wrapping(b.repr());
  26. let a_abs = a_rep & abs_mask;
  27. let b_abs = b_rep & abs_mask;
  28. // Detect if a or b is zero, infinity, or NaN.
  29. if a_abs - one >= inf_rep - one ||
  30. b_abs - one >= inf_rep - one {
  31. // NaN + anything = qNaN
  32. if a_abs > inf_rep {
  33. return (<$ty as Float>::from_repr((a_abs | quiet_bit).0));
  34. }
  35. // anything + NaN = qNaN
  36. if b_abs > inf_rep {
  37. return (<$ty as Float>::from_repr((b_abs | quiet_bit).0));
  38. }
  39. if a_abs == inf_rep {
  40. // +/-infinity + -/+infinity = qNaN
  41. if (a.repr() ^ b.repr()) == sign_bit.0 {
  42. return (<$ty as Float>::from_repr(qnan_rep.0));
  43. } else {
  44. // +/-infinity + anything remaining = +/- infinity
  45. return a;
  46. }
  47. }
  48. // anything remaining + +/-infinity = +/-infinity
  49. if b_abs == inf_rep {
  50. return b;
  51. }
  52. // zero + anything = anything
  53. if a_abs.0 == 0 {
  54. // but we need to get the sign right for zero + zero
  55. if b_abs.0 == 0 {
  56. return (<$ty as Float>::from_repr(a.repr() & b.repr()));
  57. } else {
  58. return b;
  59. }
  60. }
  61. // anything + zero = anything
  62. if b_abs.0 == 0 {
  63. return a;
  64. }
  65. }
  66. // Swap a and b if necessary so that a has the larger absolute value.
  67. if b_abs > a_abs {
  68. mem::swap(&mut a_rep, &mut b_rep);
  69. }
  70. // Extract the exponent and significand from the (possibly swapped) a and b.
  71. let mut a_exponent = Wrapping((a_rep >> significand_bits.0 as usize & max_exponent).0 as i32);
  72. let mut b_exponent = Wrapping((b_rep >> significand_bits.0 as usize & max_exponent).0 as i32);
  73. let mut a_significand = a_rep & significand_mask;
  74. let mut b_significand = b_rep & significand_mask;
  75. // normalize any denormals, and adjust the exponent accordingly.
  76. if a_exponent.0 == 0 {
  77. let (exponent, significand) = <$ty>::normalize(a_significand.0);
  78. a_exponent = Wrapping(exponent);
  79. a_significand = Wrapping(significand);
  80. }
  81. if b_exponent.0 == 0 {
  82. let (exponent, significand) = <$ty>::normalize(b_significand.0);
  83. b_exponent = Wrapping(exponent);
  84. b_significand = Wrapping(significand);
  85. }
  86. // The sign of the result is the sign of the larger operand, a. If they
  87. // have opposite signs, we are performing a subtraction; otherwise addition.
  88. let result_sign = a_rep & sign_bit;
  89. let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
  90. // Shift the significands to give us round, guard and sticky, and or in the
  91. // implicit significand bit. (If we fell through from the denormal path it
  92. // was already set by normalize(), but setting it twice won't hurt
  93. // anything.)
  94. a_significand = (a_significand | implicit_bit) << 3;
  95. b_significand = (b_significand | implicit_bit) << 3;
  96. // Shift the significand of b by the difference in exponents, with a sticky
  97. // bottom bit to get rounding correct.
  98. let align = Wrapping((a_exponent - b_exponent).0 as <$ty as Float>::Int);
  99. if align.0 != 0 {
  100. if align < bits {
  101. let sticky = ((b_significand << (bits - align).0 as usize).0 != 0) as <$ty as Float>::Int;
  102. b_significand = (b_significand >> align.0 as usize) | Wrapping(sticky);
  103. } else {
  104. b_significand = one; // sticky; b is known to be non-zero.
  105. }
  106. }
  107. if subtraction {
  108. a_significand -= b_significand;
  109. // If a == -b, return +zero.
  110. if a_significand.0 == 0 {
  111. return (<$ty as Float>::from_repr(0));
  112. }
  113. // If partial cancellation occured, we need to left-shift the result
  114. // and adjust the exponent:
  115. if a_significand < implicit_bit << 3 {
  116. let shift = a_significand.0.leading_zeros() as i32
  117. - (implicit_bit << 3).0.leading_zeros() as i32;
  118. a_significand <<= shift as usize;
  119. a_exponent -= Wrapping(shift);
  120. }
  121. } else /* addition */ {
  122. a_significand += b_significand;
  123. // If the addition carried up, we need to right-shift the result and
  124. // adjust the exponent:
  125. if (a_significand & implicit_bit << 4).0 != 0 {
  126. let sticky = ((a_significand & one).0 != 0) as <$ty as Float>::Int;
  127. a_significand = a_significand >> 1 | Wrapping(sticky);
  128. a_exponent += Wrapping(1);
  129. }
  130. }
  131. // If we have overflowed the type, return +/- infinity:
  132. if a_exponent >= Wrapping(max_exponent.0 as i32) {
  133. return (<$ty>::from_repr((inf_rep | result_sign).0));
  134. }
  135. if a_exponent.0 <= 0 {
  136. // Result is denormal before rounding; the exponent is zero and we
  137. // need to shift the significand.
  138. let shift = Wrapping((Wrapping(1) - a_exponent).0 as <$ty as Float>::Int);
  139. let sticky = ((a_significand << (bits - shift).0 as usize).0 != 0) as <$ty as Float>::Int;
  140. a_significand = a_significand >> shift.0 as usize | Wrapping(sticky);
  141. a_exponent = Wrapping(0);
  142. }
  143. // Low three bits are round, guard, and sticky.
  144. let round_guard_sticky: i32 = (a_significand.0 & 0x7) as i32;
  145. // Shift the significand into place, and mask off the implicit bit.
  146. let mut result = a_significand >> 3 & significand_mask;
  147. // Insert the exponent and sign.
  148. result |= Wrapping(a_exponent.0 as <$ty as Float>::Int) << significand_bits.0 as usize;
  149. result |= result_sign;
  150. // Final rounding. The result may overflow to infinity, but that is the
  151. // correct result in that case.
  152. if round_guard_sticky > 0x4 { result += one; }
  153. if round_guard_sticky == 0x4 { result += result & one; }
  154. return (<$ty>::from_repr(result.0));
  155. }
  156. }
  157. }
  158. add!(__addsf3: f32);
  159. add!(__adddf3: f64);
  160. // FIXME: Implement these using aliases
  161. #[cfg(target_arch = "arm")]
  162. #[cfg_attr(not(test), no_mangle)]
  163. pub extern fn __aeabi_dadd(a: f64, b: f64) -> f64 {
  164. __adddf3(a, b)
  165. }
  166. #[cfg(target_arch = "arm")]
  167. #[cfg_attr(not(test), no_mangle)]
  168. pub extern fn __aeabi_fadd(a: f32, b: f32) -> f32 {
  169. __addsf3(a, b)
  170. }
  171. #[cfg(test)]
  172. mod tests {
  173. use core::{f32, f64};
  174. use qc::{U32, U64};
  175. use float::Float;
  176. // NOTE The tests below have special handing for NaN values.
  177. // Because NaN != NaN, the floating-point representations must be used
  178. // Because there are many diffferent values of NaN, and the implementation
  179. // doesn't care about calculating the 'correct' one, if both values are NaN
  180. // the values are considered equivalent.
  181. // TODO: Add F32/F64 to qc so that they print the right values (at the very least)
  182. quickcheck! {
  183. fn addsf3(a: U32, b: U32) -> bool {
  184. let (a, b) = (f32::from_repr(a.0), f32::from_repr(b.0));
  185. let x = super::__addsf3(a, b);
  186. let y = a + b;
  187. x.eq_repr(y)
  188. }
  189. fn adddf3(a: U64, b: U64) -> bool {
  190. let (a, b) = (f64::from_repr(a.0), f64::from_repr(b.0));
  191. let x = super::__adddf3(a, b);
  192. let y = a + b;
  193. x.eq_repr(y)
  194. }
  195. }
  196. // More tests for special float values
  197. #[test]
  198. fn test_float_tiny_plus_tiny() {
  199. let tiny = f32::from_repr(1);
  200. let r = super::__addsf3(tiny, tiny);
  201. assert!(r.eq_repr(tiny + tiny));
  202. }
  203. #[test]
  204. fn test_double_tiny_plus_tiny() {
  205. let tiny = f64::from_repr(1);
  206. let r = super::__adddf3(tiny, tiny);
  207. assert!(r.eq_repr(tiny + tiny));
  208. }
  209. #[test]
  210. fn test_float_small_plus_small() {
  211. let a = f32::from_repr(327);
  212. let b = f32::from_repr(256);
  213. let r = super::__addsf3(a, b);
  214. assert!(r.eq_repr(a + b));
  215. }
  216. #[test]
  217. fn test_double_small_plus_small() {
  218. let a = f64::from_repr(327);
  219. let b = f64::from_repr(256);
  220. let r = super::__adddf3(a, b);
  221. assert!(r.eq_repr(a + b));
  222. }
  223. #[test]
  224. fn test_float_one_plus_one() {
  225. let r = super::__addsf3(1f32, 1f32);
  226. assert!(r.eq_repr(1f32 + 1f32));
  227. }
  228. #[test]
  229. fn test_double_one_plus_one() {
  230. let r = super::__adddf3(1f64, 1f64);
  231. assert!(r.eq_repr(1f64 + 1f64));
  232. }
  233. #[test]
  234. fn test_float_different_nan() {
  235. let a = f32::from_repr(1);
  236. let b = f32::from_repr(0b11111111100100010001001010101010);
  237. let x = super::__addsf3(a, b);
  238. let y = a + b;
  239. assert!(x.eq_repr(y));
  240. }
  241. #[test]
  242. fn test_double_different_nan() {
  243. let a = f64::from_repr(1);
  244. let b = f64::from_repr(
  245. 0b1111111111110010001000100101010101001000101010000110100011101011);
  246. let x = super::__adddf3(a, b);
  247. let y = a + b;
  248. assert!(x.eq_repr(y));
  249. }
  250. #[test]
  251. fn test_float_nan() {
  252. let r = super::__addsf3(f32::NAN, 1.23);
  253. assert_eq!(r.repr(), f32::NAN.repr());
  254. }
  255. #[test]
  256. fn test_double_nan() {
  257. let r = super::__adddf3(f64::NAN, 1.23);
  258. assert_eq!(r.repr(), f64::NAN.repr());
  259. }
  260. #[test]
  261. fn test_float_inf() {
  262. let r = super::__addsf3(f32::INFINITY, -123.4);
  263. assert_eq!(r, f32::INFINITY);
  264. }
  265. #[test]
  266. fn test_double_inf() {
  267. let r = super::__adddf3(f64::INFINITY, -123.4);
  268. assert_eq!(r, f64::INFINITY);
  269. }
  270. }