openlibm_fenv_x86.h 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /*-
  2. * Copyright (c) 2004-2005 David Schultz <[email protected]>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions
  7. * are met:
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * 2. Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24. * SUCH DAMAGE.
  25. *
  26. * $FreeBSD$
  27. */
  28. #ifndef _OPENLIBM_FENV_AMD64_H_
  29. #define _OPENLIBM_FENV_AMD64_H_
  30. #include <openlibm_compat.h>
  31. #include <sys/cdefs.h>
  32. //#include <sys/_types.h>
  33. #ifndef __fenv_static
  34. #define __fenv_static static
  35. #endif
  36. typedef __uint16_t fexcept_t;
  37. /* Exception flags */
  38. #define FE_INVALID 0x01
  39. #define FE_DENORMAL 0x02
  40. #define FE_DIVBYZERO 0x04
  41. #define FE_OVERFLOW 0x08
  42. #define FE_UNDERFLOW 0x10
  43. #define FE_INEXACT 0x20
  44. #define FE_ALL_EXCEPT (FE_DIVBYZERO | FE_DENORMAL | FE_INEXACT | \
  45. FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW)
  46. /* Rounding modes */
  47. #define FE_TONEAREST 0x0000
  48. #define FE_DOWNWARD 0x0400
  49. #define FE_UPWARD 0x0800
  50. #define FE_TOWARDZERO 0x0c00
  51. #define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
  52. FE_UPWARD | FE_TOWARDZERO)
  53. /*
  54. * As compared to the x87 control word, the SSE unit's control word
  55. * has the rounding control bits offset by 3 and the exception mask
  56. * bits offset by 7.
  57. */
  58. #define _SSE_ROUND_SHIFT 3
  59. #define _SSE_EMASK_SHIFT 7
  60. #ifdef __i386__
  61. /*
  62. * To preserve binary compatibility with FreeBSD 5.3, we pack the
  63. * mxcsr into some reserved fields, rather than changing sizeof(fenv_t).
  64. */
  65. typedef struct {
  66. __uint16_t __control;
  67. __uint16_t __mxcsr_hi;
  68. __uint16_t __status;
  69. __uint16_t __mxcsr_lo;
  70. __uint32_t __tag;
  71. char __other[16];
  72. } fenv_t;
  73. #else /* __amd64__ */
  74. typedef struct {
  75. struct {
  76. __uint32_t __control;
  77. __uint32_t __status;
  78. __uint32_t __tag;
  79. char __other[16];
  80. } __x87;
  81. __uint32_t __mxcsr;
  82. } fenv_t;
  83. #endif /* __i386__ */
  84. __BEGIN_DECLS
  85. /* Default floating-point environment */
  86. extern const fenv_t __fe_dfl_env;
  87. #define FE_DFL_ENV (&__fe_dfl_env)
  88. #define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw))
  89. #define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env))
  90. #define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \
  91. : "st", "st(1)", "st(2)", "st(3)", "st(4)", \
  92. "st(5)", "st(6)", "st(7)")
  93. #define __fnclex() __asm __volatile("fnclex")
  94. #define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env)))
  95. #define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
  96. #define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
  97. #define __fwait() __asm __volatile("fwait")
  98. #define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
  99. #define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
  100. int fegetenv(fenv_t *__envp);
  101. int feholdexcept(fenv_t *__envp);
  102. int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
  103. int feraiseexcept(int __excepts);
  104. int feupdateenv(const fenv_t *__envp);
  105. __fenv_static inline int
  106. fegetround(void)
  107. {
  108. __uint16_t __control;
  109. /*
  110. * We assume that the x87 and the SSE unit agree on the
  111. * rounding mode. Reading the control word on the x87 turns
  112. * out to be about 5 times faster than reading it on the SSE
  113. * unit on an Opteron 244.
  114. */
  115. __fnstcw(&__control);
  116. return (__control & _ROUND_MASK);
  117. }
  118. #if __BSD_VISIBLE
  119. int feenableexcept(int __mask);
  120. int fedisableexcept(int __mask);
  121. /* We currently provide no external definition of fegetexcept(). */
  122. static inline int
  123. fegetexcept(void)
  124. {
  125. __uint16_t __control;
  126. /*
  127. * We assume that the masks for the x87 and the SSE unit are
  128. * the same.
  129. */
  130. __fnstcw(&__control);
  131. return (~__control & FE_ALL_EXCEPT);
  132. }
  133. #endif /* __BSD_VISIBLE */
  134. #ifdef __i386__
  135. /* After testing for SSE support once, we cache the result in __has_sse. */
  136. enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
  137. extern enum __sse_support __has_sse;
  138. int __test_sse(void);
  139. #ifdef __SSE__
  140. #define __HAS_SSE() 1
  141. #else
  142. #define __HAS_SSE() (__has_sse == __SSE_YES || \
  143. (__has_sse == __SSE_UNK && __test_sse()))
  144. #endif
  145. #define __get_mxcsr(env) (((env).__mxcsr_hi << 16) | \
  146. ((env).__mxcsr_lo))
  147. #define __set_mxcsr(env, x) do { \
  148. (env).__mxcsr_hi = (__uint32_t)(x) >> 16; \
  149. (env).__mxcsr_lo = (__uint16_t)(x); \
  150. } while (0)
  151. __fenv_static inline int
  152. feclearexcept(int __excepts)
  153. {
  154. fenv_t __env;
  155. __uint32_t __mxcsr;
  156. if (__excepts == FE_ALL_EXCEPT) {
  157. __fnclex();
  158. } else {
  159. __fnstenv(&__env);
  160. __env.__status &= ~__excepts;
  161. __fldenv(__env);
  162. }
  163. if (__HAS_SSE()) {
  164. __stmxcsr(&__mxcsr);
  165. __mxcsr &= ~__excepts;
  166. __ldmxcsr(__mxcsr);
  167. }
  168. return (0);
  169. }
  170. __fenv_static inline int
  171. fegetexceptflag(fexcept_t *__flagp, int __excepts)
  172. {
  173. __uint32_t __mxcsr;
  174. __uint16_t __status;
  175. __fnstsw(&__status);
  176. if (__HAS_SSE())
  177. __stmxcsr(&__mxcsr);
  178. else
  179. __mxcsr = 0;
  180. *__flagp = (__mxcsr | __status) & __excepts;
  181. return (0);
  182. }
  183. __fenv_static inline int
  184. fetestexcept(int __excepts)
  185. {
  186. __uint32_t __mxcsr;
  187. __uint16_t __status;
  188. __fnstsw(&__status);
  189. if (__HAS_SSE())
  190. __stmxcsr(&__mxcsr);
  191. else
  192. __mxcsr = 0;
  193. return ((__status | __mxcsr) & __excepts);
  194. }
  195. __fenv_static inline int
  196. fesetround(int __round)
  197. {
  198. __uint32_t __mxcsr;
  199. __uint16_t __control;
  200. if (__round & ~_ROUND_MASK)
  201. return (-1);
  202. __fnstcw(&__control);
  203. __control &= ~_ROUND_MASK;
  204. __control |= __round;
  205. __fldcw(__control);
  206. if (__HAS_SSE()) {
  207. __stmxcsr(&__mxcsr);
  208. __mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
  209. __mxcsr |= __round << _SSE_ROUND_SHIFT;
  210. __ldmxcsr(__mxcsr);
  211. }
  212. return (0);
  213. }
  214. __fenv_static inline int
  215. fesetenv(const fenv_t *__envp)
  216. {
  217. fenv_t __env = *__envp;
  218. __uint32_t __mxcsr;
  219. __mxcsr = __get_mxcsr(__env);
  220. __set_mxcsr(__env, 0xffffffff);
  221. /*
  222. * XXX Using fldenvx() instead of fldenv() tells the compiler that this
  223. * instruction clobbers the i387 register stack. This happens because
  224. * we restore the tag word from the saved environment. Normally, this
  225. * would happen anyway and we wouldn't care, because the ABI allows
  226. * function calls to clobber the i387 regs. However, fesetenv() is
  227. * inlined, so we need to be more careful.
  228. */
  229. __fldenvx(__env);
  230. if (__HAS_SSE())
  231. __ldmxcsr(__mxcsr);
  232. return (0);
  233. }
  234. #else /* __amd64__ */
  235. __fenv_static inline int
  236. feclearexcept(int __excepts)
  237. {
  238. fenv_t __env;
  239. if (__excepts == FE_ALL_EXCEPT) {
  240. __fnclex();
  241. } else {
  242. __fnstenv(&__env.__x87);
  243. __env.__x87.__status &= ~__excepts;
  244. __fldenv(__env.__x87);
  245. }
  246. __stmxcsr(&__env.__mxcsr);
  247. __env.__mxcsr &= ~__excepts;
  248. __ldmxcsr(__env.__mxcsr);
  249. return (0);
  250. }
  251. __fenv_static inline int
  252. fegetexceptflag(fexcept_t *__flagp, int __excepts)
  253. {
  254. __uint32_t __mxcsr;
  255. __uint16_t __status;
  256. __stmxcsr(&__mxcsr);
  257. __fnstsw(&__status);
  258. *__flagp = (__mxcsr | __status) & __excepts;
  259. return (0);
  260. }
  261. __fenv_static inline int
  262. fetestexcept(int __excepts)
  263. {
  264. __uint32_t __mxcsr;
  265. __uint16_t __status;
  266. __stmxcsr(&__mxcsr);
  267. __fnstsw(&__status);
  268. return ((__status | __mxcsr) & __excepts);
  269. }
  270. __fenv_static inline int
  271. fesetround(int __round)
  272. {
  273. __uint32_t __mxcsr;
  274. __uint16_t __control;
  275. if (__round & ~_ROUND_MASK)
  276. return (-1);
  277. __fnstcw(&__control);
  278. __control &= ~_ROUND_MASK;
  279. __control |= __round;
  280. __fldcw(__control);
  281. __stmxcsr(&__mxcsr);
  282. __mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
  283. __mxcsr |= __round << _SSE_ROUND_SHIFT;
  284. __ldmxcsr(__mxcsr);
  285. return (0);
  286. }
  287. __fenv_static inline int
  288. fesetenv(const fenv_t *__envp)
  289. {
  290. /*
  291. * XXX Using fldenvx() instead of fldenv() tells the compiler that this
  292. * instruction clobbers the i387 register stack. This happens because
  293. * we restore the tag word from the saved environment. Normally, this
  294. * would happen anyway and we wouldn't care, because the ABI allows
  295. * function calls to clobber the i387 regs. However, fesetenv() is
  296. * inlined, so we need to be more careful.
  297. */
  298. __fldenvx(__envp->__x87);
  299. __ldmxcsr(__envp->__mxcsr);
  300. return (0);
  301. }
  302. #endif /* __i386__ */
  303. __END_DECLS
  304. #endif /* !OPENLIBM_FENV_AMD64_H_ */