s_cbrtl.c 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. /*-
  2. * ====================================================
  3. * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  4. * Copyright (c) 2009-2011, Bruce D. Evans, Steven G. Kargl, David Schultz.
  5. *
  6. * Developed at SunPro, a Sun Microsystems, Inc. business.
  7. * Permission to use, copy, modify, and distribute this
  8. * software is freely granted, provided that this notice
  9. * is preserved.
  10. * ====================================================
  11. *
  12. * The argument reduction and testing for exceptional cases was
  13. * written by Steven G. Kargl with input from Bruce D. Evans
  14. * and David A. Schultz.
  15. */
  16. #include "cdefs-compat.h"
  17. //__FBSDID("$FreeBSD: src/lib/msun/src/s_cbrtl.c,v 1.1 2011/03/12 19:37:35 kargl Exp $");
  18. #include <float.h>
  19. // VBS
  20. //#include <ieeefp.h>
  21. #include "fpmath.h"
  22. #include "openlibm.h"
  23. #include "math_private.h"
  24. #if defined(_WIN32) && defined(__i386__)
  25. #include "i387/bsd_ieeefp.h"
  26. #endif
  27. #define BIAS (LDBL_MAX_EXP - 1)
  28. static const unsigned
  29. B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
  30. DLLEXPORT long double
  31. cbrtl(long double x)
  32. {
  33. union IEEEl2bits u, v;
  34. long double r, s, t, w;
  35. double dr, dt, dx;
  36. float ft, fx;
  37. u_int32_t hx;
  38. u_int16_t expsign;
  39. int k;
  40. u.e = x;
  41. expsign = u.xbits.expsign;
  42. k = expsign & 0x7fff;
  43. /*
  44. * If x = +-Inf, then cbrt(x) = +-Inf.
  45. * If x = NaN, then cbrt(x) = NaN.
  46. */
  47. if (k == BIAS + LDBL_MAX_EXP)
  48. return (x + x);
  49. #ifdef __i386__
  50. fp_prec_t oprec;
  51. oprec = fpgetprec();
  52. if (oprec != FP_PE)
  53. fpsetprec(FP_PE);
  54. #endif
  55. if (k == 0) {
  56. /* If x = +-0, then cbrt(x) = +-0. */
  57. if ((u.bits.manh | u.bits.manl) == 0) {
  58. #ifdef __i386__
  59. if (oprec != FP_PE)
  60. fpsetprec(oprec);
  61. #endif
  62. return (x);
  63. }
  64. /* Adjust subnormal numbers. */
  65. u.e *= 0x1.0p514;
  66. k = u.bits.exp;
  67. k -= BIAS + 514;
  68. } else
  69. k -= BIAS;
  70. u.xbits.expsign = BIAS;
  71. v.e = 1;
  72. x = u.e;
  73. switch (k % 3) {
  74. case 1:
  75. case -2:
  76. x = 2*x;
  77. k--;
  78. break;
  79. case 2:
  80. case -1:
  81. x = 4*x;
  82. k -= 2;
  83. break;
  84. }
  85. v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3);
  86. /*
  87. * The following is the guts of s_cbrtf, with the handling of
  88. * special values removed and extra care for accuracy not taken,
  89. * but with most of the extra accuracy not discarded.
  90. */
  91. /* ~5-bit estimate: */
  92. fx = x;
  93. GET_FLOAT_WORD(hx, fx);
  94. SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1));
  95. /* ~16-bit estimate: */
  96. dx = x;
  97. dt = ft;
  98. dr = dt * dt * dt;
  99. dt = dt * (dx + dx + dr) / (dx + dr + dr);
  100. /* ~47-bit estimate: */
  101. dr = dt * dt * dt;
  102. dt = dt * (dx + dx + dr) / (dx + dr + dr);
  103. #if LDBL_MANT_DIG == 64
  104. /*
  105. * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8).
  106. * Round it away from zero to 32 bits (32 so that t*t is exact, and
  107. * away from zero for technical reasons).
  108. */
  109. volatile double vd2 = 0x1.0p32;
  110. volatile double vd1 = 0x1.0p-31;
  111. #define vd ((long double)vd2 + vd1)
  112. t = dt + vd - 0x1.0p32;
  113. #elif LDBL_MANT_DIG == 113
  114. /*
  115. * Round dt away from zero to 47 bits. Since we don't trust the 47,
  116. * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and
  117. * might be avoidable in this case, since on most machines dt will
  118. * have been evaluated in 53-bit precision and the technical reasons
  119. * for rounding up might not apply to either case in cbrtl() since
  120. * dt is much more accurate than needed.
  121. */
  122. t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60;
  123. #else
  124. #error "Unsupported long double format"
  125. #endif
  126. /*
  127. * Final step Newton iteration to 64 or 113 bits with
  128. * error < 0.667 ulps
  129. */
  130. s=t*t; /* t*t is exact */
  131. r=x/s; /* error <= 0.5 ulps; |r| < |t| */
  132. w=t+t; /* t+t is exact */
  133. r=(r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
  134. t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
  135. t *= v.e;
  136. #ifdef __i386__
  137. if (oprec != FP_PE)
  138. fpsetprec(oprec);
  139. #endif
  140. return (t);
  141. }