s_expl.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /*-
  2. * Copyright (c) 2009-2013 Steven G. Kargl
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice unmodified, this list of conditions, and the following
  10. * disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. *
  26. * Optimized by Bruce D. Evans.
  27. */
  28. #include <openlibm_compat.h>
  29. __FBSDID("$FreeBSD$");
  30. /**
  31. * Compute the exponential of x for Intel 80-bit format. This is based on:
  32. *
  33. * PTP Tang, "Table-driven implementation of the exponential function
  34. * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15,
  35. * 144-157 (1989).
  36. *
  37. * where the 32 table entries have been expanded to INTERVALS (see below).
  38. */
  39. #include <float.h>
  40. #ifdef __i386__
  41. #include <ieeefp.h>
  42. #endif
  43. #include "fpmath.h"
  44. #include <openlibm_math.h>
  45. #include "math_private.h"
  46. #include "k_expl.h"
  47. /* XXX Prevent compilers from erroneously constant folding these: */
  48. static const volatile long double
  49. huge = 0x1p10000L,
  50. tiny = 0x1p-10000L;
  51. static const long double
  52. twom10000 = 0x1p-10000L;
  53. static const union IEEEl2bits
  54. /* log(2**16384 - 0.5) rounded towards zero: */
  55. /* log(2**16384 - 0.5 + 1) rounded towards zero for expm1l() is the same: */
  56. o_thresholdu = LD80C(0xb17217f7d1cf79ab, 13, 11356.5234062941439488L),
  57. #define o_threshold (o_thresholdu.e)
  58. /* log(2**(-16381-64-1)) rounded towards zero: */
  59. u_thresholdu = LD80C(0xb21dfe7f09e2baa9, 13, -11399.4985314888605581L);
  60. #define u_threshold (u_thresholdu.e)
  61. long double
  62. expl(long double x)
  63. {
  64. union IEEEl2bits u;
  65. long double hi, lo, t, twopk;
  66. int k;
  67. uint16_t hx, ix;
  68. DOPRINT_START(&x);
  69. /* Filter out exceptional cases. */
  70. u.e = x;
  71. hx = u.xbits.expsign;
  72. ix = hx & 0x7fff;
  73. if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */
  74. if (ix == BIAS + LDBL_MAX_EXP) {
  75. if (hx & 0x8000) /* x is -Inf, -NaN or unsupported */
  76. RETURNP(-1 / x);
  77. RETURNP(x + x); /* x is +Inf, +NaN or unsupported */
  78. }
  79. if (x > o_threshold)
  80. RETURNP(huge * huge);
  81. if (x < u_threshold)
  82. RETURNP(tiny * tiny);
  83. } else if (ix < BIAS - 75) { /* |x| < 0x1p-75 (includes pseudos) */
  84. RETURN2P(1, x); /* 1 with inexact iff x != 0 */
  85. }
  86. ENTERI();
  87. twopk = 1;
  88. __k_expl(x, &hi, &lo, &k);
  89. t = SUM2P(hi, lo);
  90. /* Scale by 2**k. */
  91. if (k >= LDBL_MIN_EXP) {
  92. if (k == LDBL_MAX_EXP)
  93. RETURNI(t * 2 * 0x1p16383L);
  94. SET_LDBL_EXPSIGN(twopk, BIAS + k);
  95. RETURNI(t * twopk);
  96. } else {
  97. SET_LDBL_EXPSIGN(twopk, BIAS + k + 10000);
  98. RETURNI(t * twopk * twom10000);
  99. }
  100. }
  101. /**
  102. * Compute expm1l(x) for Intel 80-bit format. This is based on:
  103. *
  104. * PTP Tang, "Table-driven implementation of the Expm1 function
  105. * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 18,
  106. * 211-222 (1992).
  107. */
  108. /*
  109. * Our T1 and T2 are chosen to be approximately the points where method
  110. * A and method B have the same accuracy. Tang's T1 and T2 are the
  111. * points where method A's accuracy changes by a full bit. For Tang,
  112. * this drop in accuracy makes method A immediately less accurate than
  113. * method B, but our larger INTERVALS makes method A 2 bits more
  114. * accurate so it remains the most accurate method significantly
  115. * closer to the origin despite losing the full bit in our extended
  116. * range for it.
  117. */
  118. static const double
  119. T1 = -0.1659, /* ~-30.625/128 * log(2) */
  120. T2 = 0.1659; /* ~30.625/128 * log(2) */
  121. /*
  122. * Domain [-0.1659, 0.1659], range ~[-2.6155e-22, 2.5507e-23]:
  123. * |(exp(x)-1-x-x**2/2)/x - p(x)| < 2**-71.6
  124. *
  125. * XXX the coeffs aren't very carefully rounded, and I get 2.8 more bits,
  126. * but unlike for ld128 we can't drop any terms.
  127. */
  128. static const union IEEEl2bits
  129. B3 = LD80C(0xaaaaaaaaaaaaaaab, -3, 1.66666666666666666671e-1L),
  130. B4 = LD80C(0xaaaaaaaaaaaaaaac, -5, 4.16666666666666666712e-2L);
  131. static const double
  132. B5 = 8.3333333333333245e-3, /* 0x1.111111111110cp-7 */
  133. B6 = 1.3888888888888861e-3, /* 0x1.6c16c16c16c0ap-10 */
  134. B7 = 1.9841269841532042e-4, /* 0x1.a01a01a0319f9p-13 */
  135. B8 = 2.4801587302069236e-5, /* 0x1.a01a01a03cbbcp-16 */
  136. B9 = 2.7557316558468562e-6, /* 0x1.71de37fd33d67p-19 */
  137. B10 = 2.7557315829785151e-7, /* 0x1.27e4f91418144p-22 */
  138. B11 = 2.5063168199779829e-8, /* 0x1.ae94fabdc6b27p-26 */
  139. B12 = 2.0887164654459567e-9; /* 0x1.1f122d6413fe1p-29 */
  140. long double
  141. expm1l(long double x)
  142. {
  143. union IEEEl2bits u, v;
  144. long double fn, hx2_hi, hx2_lo, q, r, r1, r2, t, twomk, twopk, x_hi;
  145. long double x_lo, x2, z;
  146. long double x4;
  147. int k, n, n2;
  148. uint16_t hx, ix;
  149. DOPRINT_START(&x);
  150. /* Filter out exceptional cases. */
  151. u.e = x;
  152. hx = u.xbits.expsign;
  153. ix = hx & 0x7fff;
  154. if (ix >= BIAS + 6) { /* |x| >= 64 or x is NaN */
  155. if (ix == BIAS + LDBL_MAX_EXP) {
  156. if (hx & 0x8000) /* x is -Inf, -NaN or unsupported */
  157. RETURNP(-1 / x - 1);
  158. RETURNP(x + x); /* x is +Inf, +NaN or unsupported */
  159. }
  160. if (x > o_threshold)
  161. RETURNP(huge * huge);
  162. /*
  163. * expm1l() never underflows, but it must avoid
  164. * unrepresentable large negative exponents. We used a
  165. * much smaller threshold for large |x| above than in
  166. * expl() so as to handle not so large negative exponents
  167. * in the same way as large ones here.
  168. */
  169. if (hx & 0x8000) /* x <= -64 */
  170. RETURN2P(tiny, -1); /* good for x < -65ln2 - eps */
  171. }
  172. ENTERI();
  173. if (T1 < x && x < T2) {
  174. if (ix < BIAS - 74) { /* |x| < 0x1p-74 (includes pseudos) */
  175. /* x (rounded) with inexact if x != 0: */
  176. RETURNPI(x == 0 ? x :
  177. (0x1p100 * x + fabsl(x)) * 0x1p-100);
  178. }
  179. x2 = x * x;
  180. x4 = x2 * x2;
  181. q = x4 * (x2 * (x4 *
  182. /*
  183. * XXX the number of terms is no longer good for
  184. * pairwise grouping of all except B3, and the
  185. * grouping is no longer from highest down.
  186. */
  187. (x2 * B12 + (x * B11 + B10)) +
  188. (x2 * (x * B9 + B8) + (x * B7 + B6))) +
  189. (x * B5 + B4.e)) + x2 * x * B3.e;
  190. x_hi = (float)x;
  191. x_lo = x - x_hi;
  192. hx2_hi = x_hi * x_hi / 2;
  193. hx2_lo = x_lo * (x + x_hi) / 2;
  194. if (ix >= BIAS - 7)
  195. RETURN2PI(hx2_hi + x_hi, hx2_lo + x_lo + q);
  196. else
  197. RETURN2PI(x, hx2_lo + q + hx2_hi);
  198. }
  199. /* Reduce x to (k*ln2 + endpoint[n2] + r1 + r2). */
  200. /* Use a specialized rint() to get fn. Assume round-to-nearest. */
  201. fn = x * INV_L + 0x1.8p63 - 0x1.8p63;
  202. #if defined(HAVE_EFFICIENT_IRINTL)
  203. n = irintl(fn);
  204. #elif defined(HAVE_EFFICIENT_IRINT)
  205. n = irint(fn);
  206. #else
  207. n = (int)fn;
  208. #endif
  209. n2 = (unsigned)n % INTERVALS;
  210. k = n >> LOG2_INTERVALS;
  211. r1 = x - fn * L1;
  212. r2 = fn * -L2;
  213. r = r1 + r2;
  214. /* Prepare scale factor. */
  215. v.e = 1;
  216. v.xbits.expsign = BIAS + k;
  217. twopk = v.e;
  218. /*
  219. * Evaluate lower terms of
  220. * expl(endpoint[n2] + r1 + r2) = tbl[n2] * expl(r1 + r2).
  221. */
  222. z = r * r;
  223. q = r2 + z * (A2 + r * A3) + z * z * (A4 + r * A5) + z * z * z * A6;
  224. t = (long double)tbl[n2].lo + tbl[n2].hi;
  225. if (k == 0) {
  226. t = SUM2P(tbl[n2].hi - 1, tbl[n2].lo * (r1 + 1) + t * q +
  227. tbl[n2].hi * r1);
  228. RETURNI(t);
  229. }
  230. if (k == -1) {
  231. t = SUM2P(tbl[n2].hi - 2, tbl[n2].lo * (r1 + 1) + t * q +
  232. tbl[n2].hi * r1);
  233. RETURNI(t / 2);
  234. }
  235. if (k < -7) {
  236. t = SUM2P(tbl[n2].hi, tbl[n2].lo + t * (q + r1));
  237. RETURNI(t * twopk - 1);
  238. }
  239. if (k > 2 * LDBL_MANT_DIG - 1) {
  240. t = SUM2P(tbl[n2].hi, tbl[n2].lo + t * (q + r1));
  241. if (k == LDBL_MAX_EXP)
  242. RETURNI(t * 2 * 0x1p16383L - 1);
  243. RETURNI(t * twopk - 1);
  244. }
  245. v.xbits.expsign = BIAS - k;
  246. twomk = v.e;
  247. if (k > LDBL_MANT_DIG - 1)
  248. t = SUM2P(tbl[n2].hi, tbl[n2].lo - twomk + t * (q + r1));
  249. else
  250. t = SUM2P(tbl[n2].hi - twomk, tbl[n2].lo + t * (q + r1));
  251. RETURNI(t * twopk);
  252. }