s_erfl.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /* @(#)s_erf.c 5.1 93/09/24 */
  2. /*
  3. * ====================================================
  4. * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  5. *
  6. * Developed at SunPro, a Sun Microsystems, Inc. business.
  7. * Permission to use, copy, modify, and distribute this
  8. * software is freely granted, provided that this notice
  9. * is preserved.
  10. * ====================================================
  11. */
  12. #include <openlibm_compat.h>
  13. __FBSDID("$FreeBSD$");
  14. /*
  15. * See s_erf.c for complete comments.
  16. *
  17. * Converted to long double by Steven G. Kargl.
  18. */
  19. #include <float.h>
  20. #ifdef __i386__
  21. #include <ieeefp.h>
  22. #endif
  23. #include "fpmath.h"
  24. #include <openlibm_math.h>
  25. #include "math_private.h"
  26. /* XXX Prevent compilers from erroneously constant folding: */
  27. static const volatile long double tiny = 0x1p-10000L;
  28. static const double
  29. half= 0.5,
  30. one = 1,
  31. two = 2;
  32. /*
  33. * In the domain [0, 2**-34], only the first term in the power series
  34. * expansion of erf(x) is used. The magnitude of the first neglected
  35. * terms is less than 2**-102.
  36. */
  37. static const union IEEEl2bits
  38. efxu = LD80C(0x8375d410a6db446c, -3, 1.28379167095512573902e-1L),
  39. efx8u = LD80C(0x8375d410a6db446c, 0, 1.02703333676410059122e+0L),
  40. /*
  41. * Domain [0, 0.84375], range ~[-1.423e-22, 1.423e-22]:
  42. * |(erf(x) - x)/x - pp(x)/qq(x)| < 2**-72.573
  43. */
  44. pp0u = LD80C(0x8375d410a6db446c, -3, 1.28379167095512573902e-1L),
  45. pp1u = LD80C(0xa46c7d09ec3d0cec, -2, -3.21140201054840180596e-1L),
  46. pp2u = LD80C(0x9b31e66325576f86, -5, -3.78893851760347812082e-2L),
  47. pp3u = LD80C(0x804ac72c9a0b97dd, -7, -7.83032847030604679616e-3L),
  48. pp4u = LD80C(0x9f42bcbc3d5a601d, -12, -3.03765663857082048459e-4L),
  49. pp5u = LD80C(0x9ec4ad6193470693, -16, -1.89266527398167917502e-5L),
  50. qq1u = LD80C(0xdb4b8eb713188d6b, -2, 4.28310832832310510579e-1L),
  51. qq2u = LD80C(0xa5750835b2459bd1, -4, 8.07896272074540216658e-2L),
  52. qq3u = LD80C(0x8b85d6bd6a90b51c, -7, 8.51579638189385354266e-3L),
  53. qq4u = LD80C(0x87332f82cff4ff96, -11, 5.15746855583604912827e-4L),
  54. qq5u = LD80C(0x83466cb6bf9dca00, -16, 1.56492109706256700009e-5L),
  55. qq6u = LD80C(0xf5bf98c2f996bf63, -24, 1.14435527803073879724e-7L);
  56. #define efx (efxu.e)
  57. #define efx8 (efx8u.e)
  58. #define pp0 (pp0u.e)
  59. #define pp1 (pp1u.e)
  60. #define pp2 (pp2u.e)
  61. #define pp3 (pp3u.e)
  62. #define pp4 (pp4u.e)
  63. #define pp5 (pp5u.e)
  64. #define qq1 (qq1u.e)
  65. #define qq2 (qq2u.e)
  66. #define qq3 (qq3u.e)
  67. #define qq4 (qq4u.e)
  68. #define qq5 (qq5u.e)
  69. #define qq6 (qq6u.e)
  70. static const union IEEEl2bits
  71. erxu = LD80C(0xd7bb3d0000000000, -1, 8.42700779438018798828e-1L),
  72. /*
  73. * Domain [0.84375, 1.25], range ~[-8.132e-22, 8.113e-22]:
  74. * |(erf(x) - erx) - pa(x)/qa(x)| < 2**-71.762
  75. */
  76. pa0u = LD80C(0xe8211158da02c692, -27, 1.35116960705131296711e-8L),
  77. pa1u = LD80C(0xd488f89f36988618, -2, 4.15107507167065612570e-1L),
  78. pa2u = LD80C(0xece74f8c63fa3942, -4, -1.15675565215949226989e-1L),
  79. pa3u = LD80C(0xc8d31e020727c006, -4, 9.80589241379624665791e-2L),
  80. pa4u = LD80C(0x985d5d5fafb0551f, -5, 3.71984145558422368847e-2L),
  81. pa5u = LD80C(0xa5b6c4854d2f5452, -8, -5.05718799340957673661e-3L),
  82. pa6u = LD80C(0x85c8d58fe3993a47, -8, 4.08277919612202243721e-3L),
  83. pa7u = LD80C(0xddbfbc23677b35cf, -13, 2.11476292145347530794e-4L),
  84. qa1u = LD80C(0xb8a977896f5eff3f, -1, 7.21335860303380361298e-1L),
  85. qa2u = LD80C(0x9fcd662c3d4eac86, -1, 6.24227891731886593333e-1L),
  86. qa3u = LD80C(0x9d0b618eac67ba07, -2, 3.06727455774491855801e-1L),
  87. qa4u = LD80C(0x881a4293f6d6c92d, -3, 1.32912674218195890535e-1L),
  88. qa5u = LD80C(0xbab144f07dea45bf, -5, 4.55792134233613027584e-2L),
  89. qa6u = LD80C(0xa6c34ba438bdc900, -7, 1.01783980070527682680e-2L),
  90. qa7u = LD80C(0x8fa866dc20717a91, -9, 2.19204436518951438183e-3L);
  91. #define erx (erxu.e)
  92. #define pa0 (pa0u.e)
  93. #define pa1 (pa1u.e)
  94. #define pa2 (pa2u.e)
  95. #define pa3 (pa3u.e)
  96. #define pa4 (pa4u.e)
  97. #define pa5 (pa5u.e)
  98. #define pa6 (pa6u.e)
  99. #define pa7 (pa7u.e)
  100. #define qa1 (qa1u.e)
  101. #define qa2 (qa2u.e)
  102. #define qa3 (qa3u.e)
  103. #define qa4 (qa4u.e)
  104. #define qa5 (qa5u.e)
  105. #define qa6 (qa6u.e)
  106. #define qa7 (qa7u.e)
  107. static const union IEEEl2bits
  108. /*
  109. * Domain [1.25,2.85715], range ~[-2.334e-22,2.334e-22]:
  110. * |log(x*erfc(x)) + x**2 + 0.5625 - ra(x)/sa(x)| < 2**-71.860
  111. */
  112. ra0u = LD80C(0xa1a091e0fb4f335a, -7, -9.86494298915814308249e-3L),
  113. ra1u = LD80C(0xc2b0d045ae37df6b, -1, -7.60510460864878271275e-1L),
  114. ra2u = LD80C(0xf2cec3ee7da636c5, 3, -1.51754798236892278250e+1L),
  115. ra3u = LD80C(0x813cc205395adc7d, 7, -1.29237335516455333420e+2L),
  116. ra4u = LD80C(0x8737c8b7b4062c2f, 9, -5.40871625829510494776e+2L),
  117. ra5u = LD80C(0x8ffe5383c08d4943, 10, -1.15194769466026108551e+3L),
  118. ra6u = LD80C(0x983573e64d5015a9, 10, -1.21767039790249025544e+3L),
  119. ra7u = LD80C(0x92a794e763a6d4db, 9, -5.86618463370624636688e+2L),
  120. ra8u = LD80C(0xd5ad1fae77c3d9a3, 6, -1.06838132335777049840e+2L),
  121. ra9u = LD80C(0x934c1a247807bb9c, 2, -4.60303980944467334806e+0L),
  122. sa1u = LD80C(0xd342f90012bb1189, 4, 2.64077014928547064865e+1L),
  123. sa2u = LD80C(0x839be13d9d5da883, 8, 2.63217811300123973067e+2L),
  124. sa3u = LD80C(0x9f8cba6d1ae1b24b, 10, 1.27639775710344617587e+3L),
  125. sa4u = LD80C(0xcaa83f403713e33e, 11, 3.24251544209971162003e+3L),
  126. sa5u = LD80C(0x8796aff2f3c47968, 12, 4.33883591261332837874e+3L),
  127. sa6u = LD80C(0xb6ef97f9c753157b, 11, 2.92697460344182158454e+3L),
  128. sa7u = LD80C(0xe02aee5f83773d1c, 9, 8.96670799139389559818e+2L),
  129. sa8u = LD80C(0xc82b83855b88e07e, 6, 1.00084987800048510018e+2L),
  130. sa9u = LD80C(0x92f030aefadf28ad, 1, 2.29591004455459083843e+0L);
  131. #define ra0 (ra0u.e)
  132. #define ra1 (ra1u.e)
  133. #define ra2 (ra2u.e)
  134. #define ra3 (ra3u.e)
  135. #define ra4 (ra4u.e)
  136. #define ra5 (ra5u.e)
  137. #define ra6 (ra6u.e)
  138. #define ra7 (ra7u.e)
  139. #define ra8 (ra8u.e)
  140. #define ra9 (ra9u.e)
  141. #define sa1 (sa1u.e)
  142. #define sa2 (sa2u.e)
  143. #define sa3 (sa3u.e)
  144. #define sa4 (sa4u.e)
  145. #define sa5 (sa5u.e)
  146. #define sa6 (sa6u.e)
  147. #define sa7 (sa7u.e)
  148. #define sa8 (sa8u.e)
  149. #define sa9 (sa9u.e)
  150. /*
  151. * Domain [2.85715,7], range ~[-8.323e-22,8.390e-22]:
  152. * |log(x*erfc(x)) + x**2 + 0.5625 - rb(x)/sb(x)| < 2**-70.326
  153. */
  154. static const union IEEEl2bits
  155. rb0u = LD80C(0xa1a091cf43abcd26, -7, -9.86494292470284646962e-3L),
  156. rb1u = LD80C(0xd19d2df1cbb8da0a, -1, -8.18804618389296662837e-1L),
  157. rb2u = LD80C(0x9a4dd1383e5daf5b, 4, -1.92879967111618594779e+1L),
  158. rb3u = LD80C(0xbff0ae9fc0751de6, 7, -1.91940164551245394969e+2L),
  159. rb4u = LD80C(0xdde08465310b472b, 9, -8.87508080766577324539e+2L),
  160. rb5u = LD80C(0xe796e1d38c8c70a9, 10, -1.85271506669474503781e+3L),
  161. rb6u = LD80C(0xbaf655a76e0ab3b5, 10, -1.49569795581333675349e+3L),
  162. rb7u = LD80C(0x95d21e3e75503c21, 8, -2.99641547972948019157e+2L),
  163. sb1u = LD80C(0x814487ed823c8cbd, 5, 3.23169247732868256569e+1L),
  164. sb2u = LD80C(0xbe4bfbb1301304be, 8, 3.80593618534539961773e+2L),
  165. sb3u = LD80C(0x809c4ade46b927c7, 11, 2.05776827838541292848e+3L),
  166. sb4u = LD80C(0xa55284359f3395a8, 12, 5.29031455540062116327e+3L),
  167. sb5u = LD80C(0xbcfa72da9b820874, 12, 6.04730608102312640462e+3L),
  168. sb6u = LD80C(0x9d09a35988934631, 11, 2.51260238030767176221e+3L),
  169. sb7u = LD80C(0xd675bbe542c159fa, 7, 2.14459898308561015684e+2L);
  170. #define rb0 (rb0u.e)
  171. #define rb1 (rb1u.e)
  172. #define rb2 (rb2u.e)
  173. #define rb3 (rb3u.e)
  174. #define rb4 (rb4u.e)
  175. #define rb5 (rb5u.e)
  176. #define rb6 (rb6u.e)
  177. #define rb7 (rb7u.e)
  178. #define sb1 (sb1u.e)
  179. #define sb2 (sb2u.e)
  180. #define sb3 (sb3u.e)
  181. #define sb4 (sb4u.e)
  182. #define sb5 (sb5u.e)
  183. #define sb6 (sb6u.e)
  184. #define sb7 (sb7u.e)
  185. /*
  186. * Domain [7,108], range ~[-4.422e-22,4.422e-22]:
  187. * |log(x*erfc(x)) + x**2 + 0.5625 - rc(x)/sc(x)| < 2**-70.938
  188. */
  189. static const union IEEEl2bits
  190. /* err = -4.422092275318925082e-22 -70.937689 */
  191. rc0u = LD80C(0xa1a091cf437a17ad, -7, -9.86494292470008707260e-3L),
  192. rc1u = LD80C(0xbe79c5a978122b00, -1, -7.44045595049165939261e-1L),
  193. rc2u = LD80C(0xdb26f9bbe31a2794, 3, -1.36970155085888424425e+1L),
  194. rc3u = LD80C(0xb5f69a38f5747ac8, 6, -9.09816453742625888546e+1L),
  195. rc4u = LD80C(0xd79676d970d0a21a, 7, -2.15587750997584074147e+2L),
  196. rc5u = LD80C(0xfe528153c45ec97c, 6, -1.27161142938347796666e+2L),
  197. sc1u = LD80C(0xc5e8cd46d5604a96, 4, 2.47386727842204312937e+1L),
  198. sc2u = LD80C(0xc5f0f5a5484520eb, 7, 1.97941248254913378865e+2L),
  199. sc3u = LD80C(0x964e3c7b34db9170, 9, 6.01222441484087787522e+2L),
  200. sc4u = LD80C(0x99be1b89faa0596a, 9, 6.14970430845978077827e+2L),
  201. sc5u = LD80C(0xf80dfcbf37ffc5ea, 6, 1.24027318931184605891e+2L);
  202. #define rc0 (rc0u.e)
  203. #define rc1 (rc1u.e)
  204. #define rc2 (rc2u.e)
  205. #define rc3 (rc3u.e)
  206. #define rc4 (rc4u.e)
  207. #define rc5 (rc5u.e)
  208. #define sc1 (sc1u.e)
  209. #define sc2 (sc2u.e)
  210. #define sc3 (sc3u.e)
  211. #define sc4 (sc4u.e)
  212. #define sc5 (sc5u.e)
  213. long double
  214. erfl(long double x)
  215. {
  216. long double ax,R,S,P,Q,s,y,z,r;
  217. uint64_t lx;
  218. int32_t i;
  219. uint16_t hx;
  220. EXTRACT_LDBL80_WORDS(hx, lx, x);
  221. if((hx & 0x7fff) == 0x7fff) { /* erfl(nan)=nan */
  222. i = (hx>>15)<<1;
  223. return (1-i)+one/x; /* erfl(+-inf)=+-1 */
  224. }
  225. ENTERI();
  226. ax = fabsl(x);
  227. if(ax < 0.84375) {
  228. if(ax < 0x1p-34L) {
  229. if(ax < 0x1p-16373L)
  230. RETURNI((8*x+efx8*x)/8); /* avoid spurious underflow */
  231. RETURNI(x + efx*x);
  232. }
  233. z = x*x;
  234. r = pp0+z*(pp1+z*(pp2+z*(pp3+z*(pp4+z*pp5))));
  235. s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*(qq5+z*qq6)))));
  236. y = r/s;
  237. RETURNI(x + x*y);
  238. }
  239. if(ax < 1.25) {
  240. s = ax-one;
  241. P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*(pa6+s*pa7))))));
  242. Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*(qa6+s*qa7))))));
  243. if(x>=0) RETURNI(erx + P/Q); else RETURNI(-erx - P/Q);
  244. }
  245. if(ax >= 7) { /* inf>|x|>= 7 */
  246. if(x>=0) RETURNI(one-tiny); else RETURNI(tiny-one);
  247. }
  248. s = one/(ax*ax);
  249. if(ax < 2.85715) { /* |x| < 2.85715 */
  250. R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*(ra7+
  251. s*(ra8+s*ra9))))))));
  252. S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
  253. s*(sa8+s*sa9))))))));
  254. } else { /* |x| >= 2.85715 */
  255. R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*(rb6+s*rb7))))));
  256. S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
  257. }
  258. z=(float)ax;
  259. r=expl(-z*z-0.5625)*expl((z-ax)*(z+ax)+R/S);
  260. if(x>=0) RETURNI(one-r/ax); else RETURNI(r/ax-one);
  261. }
  262. long double
  263. erfcl(long double x)
  264. {
  265. long double ax,R,S,P,Q,s,y,z,r;
  266. uint64_t lx;
  267. uint16_t hx;
  268. EXTRACT_LDBL80_WORDS(hx, lx, x);
  269. if((hx & 0x7fff) == 0x7fff) { /* erfcl(nan)=nan */
  270. /* erfcl(+-inf)=0,2 */
  271. return ((hx>>15)<<1)+one/x;
  272. }
  273. ENTERI();
  274. ax = fabsl(x);
  275. if(ax < 0.84375L) {
  276. if(ax < 0x1p-34L)
  277. RETURNI(one-x);
  278. z = x*x;
  279. r = pp0+z*(pp1+z*(pp2+z*(pp3+z*(pp4+z*pp5))));
  280. s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*(qq5+z*qq6)))));
  281. y = r/s;
  282. if(ax < 0.25L) { /* x<1/4 */
  283. RETURNI(one-(x+x*y));
  284. } else {
  285. r = x*y;
  286. r += (x-half);
  287. RETURNI(half - r);
  288. }
  289. }
  290. if(ax < 1.25L) {
  291. s = ax-one;
  292. P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*(pa6+s*pa7))))));
  293. Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*(qa6+s*qa7))))));
  294. if(x>=0) {
  295. z = one-erx; RETURNI(z - P/Q);
  296. } else {
  297. z = (erx+P/Q); RETURNI(one+z);
  298. }
  299. }
  300. if(ax < 108) { /* |x| < 108 */
  301. s = one/(ax*ax);
  302. if(ax < 2.85715) { /* |x| < 2.85715 */
  303. R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(ra5+s*(ra6+s*(ra7+
  304. s*(ra8+s*ra9))))))));
  305. S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(sa5+s*(sa6+s*(sa7+
  306. s*(sa8+s*sa9))))))));
  307. } else if(ax < 7) { /* | |x| < 7 */
  308. R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(rb5+s*(rb6+s*rb7))))));
  309. S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(sb5+s*(sb6+s*sb7))))));
  310. } else {
  311. if(x < -7) RETURNI(two-tiny);/* x < -7 */
  312. R=rc0+s*(rc1+s*(rc2+s*(rc3+s*(rc4+s*rc5))));
  313. S=one+s*(sc1+s*(sc2+s*(sc3+s*(sc4+s*sc5))));
  314. }
  315. z = (float)ax;
  316. r = expl(-z*z-0.5625)*expl((z-ax)*(z+ax)+R/S);
  317. if(x>0) RETURNI(r/ax); else RETURNI(two-r/ax);
  318. } else {
  319. if(x>0) RETURNI(tiny*tiny); else RETURNI(two-tiny);
  320. }
  321. }