e_sqrtl.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. /*-
  2. * Copyright (c) 2007 Steven G. Kargl
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice unmodified, this list of conditions, and the following
  10. * disclaimer.
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "cdefs-compat.h"
  27. //__FBSDID("$FreeBSD: src/lib/msun/src/e_sqrtl.c,v 1.1 2008/03/02 01:47:58 das Exp $");
  28. #include <float.h>
  29. #include <openlibm_fenv.h>
  30. #include <openlibm_math.h>
  31. #include "fpmath.h"
  32. #include "math_private.h"
  33. /* Return (x + ulp) for normal positive x. Assumes no overflow. */
  34. static inline long double
  35. inc(long double x)
  36. {
  37. union IEEEl2bits u;
  38. u.e = x;
  39. if (++u.bits.manl == 0) {
  40. if (++u.bits.manh == 0) {
  41. u.bits.exp++;
  42. u.bits.manh |= LDBL_NBIT;
  43. }
  44. }
  45. return (u.e);
  46. }
  47. /* Return (x - ulp) for normal positive x. Assumes no underflow. */
  48. static inline long double
  49. dec(long double x)
  50. {
  51. union IEEEl2bits u;
  52. u.e = x;
  53. if (u.bits.manl-- == 0) {
  54. if (u.bits.manh-- == LDBL_NBIT) {
  55. u.bits.exp--;
  56. u.bits.manh |= LDBL_NBIT;
  57. }
  58. }
  59. return (u.e);
  60. }
  61. #ifndef __GNUC__
  62. #pragma STDC FENV_ACCESS ON
  63. #endif
  64. /*
  65. * This is slow, but simple and portable. You should use hardware sqrt
  66. * if possible.
  67. */
  68. OLM_DLLEXPORT long double
  69. sqrtl(long double x)
  70. {
  71. union IEEEl2bits u;
  72. int k, r;
  73. long double lo, xn;
  74. fenv_t env;
  75. u.e = x;
  76. /* If x = NaN, then sqrt(x) = NaN. */
  77. /* If x = Inf, then sqrt(x) = Inf. */
  78. /* If x = -Inf, then sqrt(x) = NaN. */
  79. if (u.bits.exp == LDBL_MAX_EXP * 2 - 1)
  80. return (x * x + x);
  81. /* If x = +-0, then sqrt(x) = +-0. */
  82. if ((u.bits.manh | u.bits.manl | u.bits.exp) == 0)
  83. return (x);
  84. /* If x < 0, then raise invalid and return NaN */
  85. if (u.bits.sign)
  86. return ((x - x) / (x - x));
  87. feholdexcept(&env);
  88. if (u.bits.exp == 0) {
  89. /* Adjust subnormal numbers. */
  90. u.e *= 0x1.0p514;
  91. k = -514;
  92. } else {
  93. k = 0;
  94. }
  95. /*
  96. * u.e is a normal number, so break it into u.e = e*2^n where
  97. * u.e = (2*e)*2^2k for odd n and u.e = (4*e)*2^2k for even n.
  98. */
  99. if ((u.bits.exp - 0x3ffe) & 1) { /* n is odd. */
  100. k += u.bits.exp - 0x3fff; /* 2k = n - 1. */
  101. u.bits.exp = 0x3fff; /* u.e in [1,2). */
  102. } else {
  103. k += u.bits.exp - 0x4000; /* 2k = n - 2. */
  104. u.bits.exp = 0x4000; /* u.e in [2,4). */
  105. }
  106. /*
  107. * Newton's iteration.
  108. * Split u.e into a high and low part to achieve additional precision.
  109. */
  110. xn = sqrt(u.e); /* 53-bit estimate of sqrtl(x). */
  111. #if LDBL_MANT_DIG > 100
  112. xn = (xn + (u.e / xn)) * 0.5; /* 106-bit estimate. */
  113. #endif
  114. lo = u.e;
  115. u.bits.manl = 0; /* Zero out lower bits. */
  116. lo = (lo - u.e) / xn; /* Low bits divided by xn. */
  117. xn = xn + (u.e / xn); /* High portion of estimate. */
  118. u.e = xn + lo; /* Combine everything. */
  119. u.bits.exp += (k >> 1) - 1;
  120. feclearexcept(FE_INEXACT);
  121. r = fegetround();
  122. fesetround(FE_TOWARDZERO); /* Set to round-toward-zero. */
  123. xn = x / u.e; /* Chopped quotient (inexact?). */
  124. if (!fetestexcept(FE_INEXACT)) { /* Quotient is exact. */
  125. if (xn == u.e) {
  126. fesetenv(&env);
  127. return (u.e);
  128. }
  129. /* Round correctly for inputs like x = y**2 - ulp. */
  130. xn = dec(xn); /* xn = xn - ulp. */
  131. }
  132. if (r == FE_TONEAREST) {
  133. xn = inc(xn); /* xn = xn + ulp. */
  134. } else if (r == FE_UPWARD) {
  135. u.e = inc(u.e); /* u.e = u.e + ulp. */
  136. xn = inc(xn); /* xn = xn + ulp. */
  137. }
  138. u.e = u.e + xn; /* Chopped sum. */
  139. feupdateenv(&env); /* Restore env and raise inexact */
  140. u.bits.exp--;
  141. return (u.e);
  142. }