s_erfl.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /*
  2. * ====================================================
  3. * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  4. *
  5. * Developed at SunPro, a Sun Microsystems, Inc. business.
  6. * Permission to use, copy, modify, and distribute this
  7. * software is freely granted, provided that this notice
  8. * is preserved.
  9. * ====================================================
  10. */
  11. /*
  12. * Copyright (c) 2008 Stephen L. Moshier <[email protected]>
  13. *
  14. * Permission to use, copy, modify, and distribute this software for any
  15. * purpose with or without fee is hereby granted, provided that the above
  16. * copyright notice and this permission notice appear in all copies.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  19. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  20. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  21. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  22. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  23. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  24. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  25. */
  26. /* double erf(double x)
  27. * double erfc(double x)
  28. * x
  29. * 2 |\
  30. * erf(x) = --------- | exp(-t*t)dt
  31. * sqrt(pi) \|
  32. * 0
  33. *
  34. * erfc(x) = 1-erf(x)
  35. * Note that
  36. * erf(-x) = -erf(x)
  37. * erfc(-x) = 2 - erfc(x)
  38. *
  39. * Method:
  40. * 1. For |x| in [0, 0.84375]
  41. * erf(x) = x + x*R(x^2)
  42. * erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
  43. * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
  44. * Remark. The formula is derived by noting
  45. * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
  46. * and that
  47. * 2/sqrt(pi) = 1.128379167095512573896158903121545171688
  48. * is close to one. The interval is chosen because the fix
  49. * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
  50. * near 0.6174), and by some experiment, 0.84375 is chosen to
  51. * guarantee the error is less than one ulp for erf.
  52. *
  53. * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
  54. * c = 0.84506291151 rounded to single (24 bits)
  55. * erf(x) = sign(x) * (c + P1(s)/Q1(s))
  56. * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
  57. * 1+(c+P1(s)/Q1(s)) if x < 0
  58. * Remark: here we use the taylor series expansion at x=1.
  59. * erf(1+s) = erf(1) + s*Poly(s)
  60. * = 0.845.. + P1(s)/Q1(s)
  61. * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
  62. *
  63. * 3. For x in [1.25,1/0.35(~2.857143)],
  64. * erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
  65. * z=1/x^2
  66. * erf(x) = 1 - erfc(x)
  67. *
  68. * 4. For x in [1/0.35,107]
  69. * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
  70. * = 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
  71. * if -6.666<x<0
  72. * = 2.0 - tiny (if x <= -6.666)
  73. * z=1/x^2
  74. * erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
  75. * erf(x) = sign(x)*(1.0 - tiny)
  76. * Note1:
  77. * To compute exp(-x*x-0.5625+R/S), let s be a single
  78. * precision number and s := x; then
  79. * -x*x = -s*s + (s-x)*(s+x)
  80. * exp(-x*x-0.5626+R/S) =
  81. * exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
  82. * Note2:
  83. * Here 4 and 5 make use of the asymptotic series
  84. * exp(-x*x)
  85. * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
  86. * x*sqrt(pi)
  87. *
  88. * 5. For inf > x >= 107
  89. * erf(x) = sign(x) *(1 - tiny) (raise inexact)
  90. * erfc(x) = tiny*tiny (raise underflow) if x > 0
  91. * = 2 - tiny if x<0
  92. *
  93. * 7. Special case:
  94. * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
  95. * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
  96. * erfc/erf(NaN) is NaN
  97. */
  98. #include <openlibm_math.h>
  99. #include "math_private.h"
  100. static const long double
  101. tiny = 1e-4931L,
  102. half = 0.5L,
  103. one = 1.0L,
  104. two = 2.0L,
  105. /* c = (float)0.84506291151 */
  106. erx = 0.845062911510467529296875L,
  107. /*
  108. * Coefficients for approximation to erf on [0,0.84375]
  109. */
  110. /* 2/sqrt(pi) - 1 */
  111. efx = 1.2837916709551257389615890312154517168810E-1L,
  112. /* 8 * (2/sqrt(pi) - 1) */
  113. efx8 = 1.0270333367641005911692712249723613735048E0L,
  114. pp[6] = {
  115. 1.122751350964552113068262337278335028553E6L,
  116. -2.808533301997696164408397079650699163276E6L,
  117. -3.314325479115357458197119660818768924100E5L,
  118. -6.848684465326256109712135497895525446398E4L,
  119. -2.657817695110739185591505062971929859314E3L,
  120. -1.655310302737837556654146291646499062882E2L,
  121. },
  122. qq[6] = {
  123. 8.745588372054466262548908189000448124232E6L,
  124. 3.746038264792471129367533128637019611485E6L,
  125. 7.066358783162407559861156173539693900031E5L,
  126. 7.448928604824620999413120955705448117056E4L,
  127. 4.511583986730994111992253980546131408924E3L,
  128. 1.368902937933296323345610240009071254014E2L,
  129. /* 1.000000000000000000000000000000000000000E0 */
  130. },
  131. /*
  132. * Coefficients for approximation to erf in [0.84375,1.25]
  133. */
  134. /* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
  135. -0.15625 <= x <= +.25
  136. Peak relative error 8.5e-22 */
  137. pa[8] = {
  138. -1.076952146179812072156734957705102256059E0L,
  139. 1.884814957770385593365179835059971587220E2L,
  140. -5.339153975012804282890066622962070115606E1L,
  141. 4.435910679869176625928504532109635632618E1L,
  142. 1.683219516032328828278557309642929135179E1L,
  143. -2.360236618396952560064259585299045804293E0L,
  144. 1.852230047861891953244413872297940938041E0L,
  145. 9.394994446747752308256773044667843200719E-2L,
  146. },
  147. qa[7] = {
  148. 4.559263722294508998149925774781887811255E2L,
  149. 3.289248982200800575749795055149780689738E2L,
  150. 2.846070965875643009598627918383314457912E2L,
  151. 1.398715859064535039433275722017479994465E2L,
  152. 6.060190733759793706299079050985358190726E1L,
  153. 2.078695677795422351040502569964299664233E1L,
  154. 4.641271134150895940966798357442234498546E0L,
  155. /* 1.000000000000000000000000000000000000000E0 */
  156. },
  157. /*
  158. * Coefficients for approximation to erfc in [1.25,1/0.35]
  159. */
  160. /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
  161. 1/2.85711669921875 < 1/x < 1/1.25
  162. Peak relative error 3.1e-21 */
  163. ra[] = {
  164. 1.363566591833846324191000679620738857234E-1L,
  165. 1.018203167219873573808450274314658434507E1L,
  166. 1.862359362334248675526472871224778045594E2L,
  167. 1.411622588180721285284945138667933330348E3L,
  168. 5.088538459741511988784440103218342840478E3L,
  169. 8.928251553922176506858267311750789273656E3L,
  170. 7.264436000148052545243018622742770549982E3L,
  171. 2.387492459664548651671894725748959751119E3L,
  172. 2.220916652813908085449221282808458466556E2L,
  173. },
  174. sa[] = {
  175. -1.382234625202480685182526402169222331847E1L,
  176. -3.315638835627950255832519203687435946482E2L,
  177. -2.949124863912936259747237164260785326692E3L,
  178. -1.246622099070875940506391433635999693661E4L,
  179. -2.673079795851665428695842853070996219632E4L,
  180. -2.880269786660559337358397106518918220991E4L,
  181. -1.450600228493968044773354186390390823713E4L,
  182. -2.874539731125893533960680525192064277816E3L,
  183. -1.402241261419067750237395034116942296027E2L,
  184. /* 1.000000000000000000000000000000000000000E0 */
  185. },
  186. /*
  187. * Coefficients for approximation to erfc in [1/.35,107]
  188. */
  189. /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
  190. 1/6.6666259765625 < 1/x < 1/2.85711669921875
  191. Peak relative error 4.2e-22 */
  192. rb[] = {
  193. -4.869587348270494309550558460786501252369E-5L,
  194. -4.030199390527997378549161722412466959403E-3L,
  195. -9.434425866377037610206443566288917589122E-2L,
  196. -9.319032754357658601200655161585539404155E-1L,
  197. -4.273788174307459947350256581445442062291E0L,
  198. -8.842289940696150508373541814064198259278E0L,
  199. -7.069215249419887403187988144752613025255E0L,
  200. -1.401228723639514787920274427443330704764E0L,
  201. },
  202. sb[] = {
  203. 4.936254964107175160157544545879293019085E-3L,
  204. 1.583457624037795744377163924895349412015E-1L,
  205. 1.850647991850328356622940552450636420484E0L,
  206. 9.927611557279019463768050710008450625415E0L,
  207. 2.531667257649436709617165336779212114570E1L,
  208. 2.869752886406743386458304052862814690045E1L,
  209. 1.182059497870819562441683560749192539345E1L,
  210. /* 1.000000000000000000000000000000000000000E0 */
  211. },
  212. /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
  213. 1/107 <= 1/x <= 1/6.6666259765625
  214. Peak relative error 1.1e-21 */
  215. rc[] = {
  216. -8.299617545269701963973537248996670806850E-5L,
  217. -6.243845685115818513578933902532056244108E-3L,
  218. -1.141667210620380223113693474478394397230E-1L,
  219. -7.521343797212024245375240432734425789409E-1L,
  220. -1.765321928311155824664963633786967602934E0L,
  221. -1.029403473103215800456761180695263439188E0L,
  222. },
  223. sc[] = {
  224. 8.413244363014929493035952542677768808601E-3L,
  225. 2.065114333816877479753334599639158060979E-1L,
  226. 1.639064941530797583766364412782135680148E0L,
  227. 4.936788463787115555582319302981666347450E0L,
  228. 5.005177727208955487404729933261347679090E0L,
  229. /* 1.000000000000000000000000000000000000000E0 */
  230. };
  231. long double
  232. erfl(long double x)
  233. {
  234. long double R, S, P, Q, s, y, z, r;
  235. int32_t ix, i;
  236. u_int32_t se, i0, i1;
  237. GET_LDOUBLE_WORDS (se, i0, i1, x);
  238. ix = se & 0x7fff;
  239. if (ix >= 0x7fff)
  240. { /* erf(nan)=nan */
  241. i = ((se & 0xffff) >> 15) << 1;
  242. return (long double) (1 - i) + one / x; /* erf(+-inf)=+-1 */
  243. }
  244. ix = (ix << 16) | (i0 >> 16);
  245. if (ix < 0x3ffed800) /* |x|<0.84375 */
  246. {
  247. if (ix < 0x3fde8000) /* |x|<2**-33 */
  248. {
  249. if (ix < 0x00080000)
  250. return 0.125 * (8.0 * x + efx8 * x); /*avoid underflow */
  251. return x + efx * x;
  252. }
  253. z = x * x;
  254. r = pp[0] + z * (pp[1]
  255. + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
  256. s = qq[0] + z * (qq[1]
  257. + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
  258. y = r / s;
  259. return x + x * y;
  260. }
  261. if (ix < 0x3fffa000) /* 1.25 */
  262. { /* 0.84375 <= |x| < 1.25 */
  263. s = fabsl (x) - one;
  264. P = pa[0] + s * (pa[1] + s * (pa[2]
  265. + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
  266. Q = qa[0] + s * (qa[1] + s * (qa[2]
  267. + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
  268. if ((se & 0x8000) == 0)
  269. return erx + P / Q;
  270. else
  271. return -erx - P / Q;
  272. }
  273. if (ix >= 0x4001d555) /* 6.6666259765625 */
  274. { /* inf>|x|>=6.666 */
  275. if ((se & 0x8000) == 0)
  276. return one - tiny;
  277. else
  278. return tiny - one;
  279. }
  280. x = fabsl (x);
  281. s = one / (x * x);
  282. if (ix < 0x4000b6db) /* 2.85711669921875 */
  283. {
  284. R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
  285. s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
  286. S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
  287. s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
  288. }
  289. else
  290. { /* |x| >= 1/0.35 */
  291. R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
  292. s * (rb[5] + s * (rb[6] + s * rb[7]))))));
  293. S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
  294. s * (sb[5] + s * (sb[6] + s))))));
  295. }
  296. z = x;
  297. GET_LDOUBLE_WORDS (i, i0, i1, z);
  298. i1 = 0;
  299. SET_LDOUBLE_WORDS (z, i, i0, i1);
  300. r =
  301. expl (-z * z - 0.5625) * expl ((z - x) * (z + x) + R / S);
  302. if ((se & 0x8000) == 0)
  303. return one - r / x;
  304. else
  305. return r / x - one;
  306. }
  307. long double
  308. erfcl(long double x)
  309. {
  310. int32_t hx, ix;
  311. long double R, S, P, Q, s, y, z, r;
  312. u_int32_t se, i0, i1;
  313. GET_LDOUBLE_WORDS (se, i0, i1, x);
  314. ix = se & 0x7fff;
  315. if (ix >= 0x7fff)
  316. { /* erfc(nan)=nan */
  317. /* erfc(+-inf)=0,2 */
  318. return (long double) (((se & 0xffff) >> 15) << 1) + one / x;
  319. }
  320. ix = (ix << 16) | (i0 >> 16);
  321. if (ix < 0x3ffed800) /* |x|<0.84375 */
  322. {
  323. if (ix < 0x3fbe0000) /* |x|<2**-65 */
  324. return one - x;
  325. z = x * x;
  326. r = pp[0] + z * (pp[1]
  327. + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
  328. s = qq[0] + z * (qq[1]
  329. + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
  330. y = r / s;
  331. if (ix < 0x3ffd8000) /* x<1/4 */
  332. {
  333. return one - (x + x * y);
  334. }
  335. else
  336. {
  337. r = x * y;
  338. r += (x - half);
  339. return half - r;
  340. }
  341. }
  342. if (ix < 0x3fffa000) /* 1.25 */
  343. { /* 0.84375 <= |x| < 1.25 */
  344. s = fabsl (x) - one;
  345. P = pa[0] + s * (pa[1] + s * (pa[2]
  346. + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
  347. Q = qa[0] + s * (qa[1] + s * (qa[2]
  348. + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
  349. if ((se & 0x8000) == 0)
  350. {
  351. z = one - erx;
  352. return z - P / Q;
  353. }
  354. else
  355. {
  356. z = erx + P / Q;
  357. return one + z;
  358. }
  359. }
  360. if (ix < 0x4005d600) /* 107 */
  361. { /* |x|<107 */
  362. x = fabsl (x);
  363. s = one / (x * x);
  364. if (ix < 0x4000b6db) /* 2.85711669921875 */
  365. { /* |x| < 1/.35 ~ 2.857143 */
  366. R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
  367. s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
  368. S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
  369. s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
  370. }
  371. else if (ix < 0x4001d555) /* 6.6666259765625 */
  372. { /* 6.666 > |x| >= 1/.35 ~ 2.857143 */
  373. R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
  374. s * (rb[5] + s * (rb[6] + s * rb[7]))))));
  375. S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
  376. s * (sb[5] + s * (sb[6] + s))))));
  377. }
  378. else
  379. { /* |x| >= 6.666 */
  380. if (se & 0x8000)
  381. return two - tiny; /* x < -6.666 */
  382. R = rc[0] + s * (rc[1] + s * (rc[2] + s * (rc[3] +
  383. s * (rc[4] + s * rc[5]))));
  384. S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
  385. s * (sc[4] + s))));
  386. }
  387. z = x;
  388. GET_LDOUBLE_WORDS (hx, i0, i1, z);
  389. i1 = 0;
  390. i0 &= 0xffffff00;
  391. SET_LDOUBLE_WORDS (z, hx, i0, i1);
  392. r = expl (-z * z - 0.5625) *
  393. expl ((z - x) * (z + x) + R / S);
  394. if ((se & 0x8000) == 0)
  395. return r / x;
  396. else
  397. return two - r / x;
  398. }
  399. else
  400. {
  401. if ((se & 0x8000) == 0)
  402. return tiny * tiny;
  403. else
  404. return two - tiny;
  405. }
  406. }