فهرست منبع

Add in sincos(), an efficient method of computing the sine and cosine of an angle together

Elliot Saba 11 سال پیش
والد
کامیت
0cf89fad5d
4فایلهای تغییر یافته به همراه388 افزوده شده و 44 حذف شده
  1. 44 44
      src/Make.files
  2. 151 0
      src/s_sincos.c
  3. 162 0
      src/s_sincosf.c
  4. 31 0
      src/s_sincosl.c

+ 44 - 44
src/Make.files

@@ -1,37 +1,37 @@
 $(CUR_SRCS) = \
-        e_acos.c e_acosf.c e_acosh.c e_acoshf.c e_asin.c e_asinf.c \
-        e_atan2.c e_atan2f.c e_atanh.c e_atanhf.c e_cosh.c e_coshf.c e_exp.c \
-        e_expf.c e_fmod.c e_fmodf.c e_gamma.c e_gamma_r.c e_gammaf.c \
-        e_gammaf_r.c e_hypot.c e_hypotf.c e_j0.c e_j0f.c e_j1.c e_j1f.c \
-        e_jn.c e_jnf.c e_lgamma.c e_lgamma_r.c e_lgammaf.c e_lgammaf_r.c \
-        e_log.c e_log10.c e_log10f.c e_log2.c e_log2f.c e_logf.c \
-        e_pow.c e_powf.c e_remainder.c e_remainderf.c e_scalb.c e_scalbf.c \
-        e_rem_pio2.c e_rem_pio2f.c \
-        e_sinh.c e_sinhf.c e_sqrt.c e_sqrtf.c \
-        k_cos.c k_exp.c k_expf.c k_rem_pio2.c k_sin.c k_tan.c \
-        k_cosf.c k_sinf.c k_tanf.c \
-        s_asinh.c s_asinhf.c s_atan.c s_atanf.c s_carg.c s_cargf.c s_cargl.c \
-        s_cbrt.c s_cbrtf.c s_ceil.c s_ceilf.c \
-        s_copysign.c s_copysignf.c s_cos.c s_cosf.c \
-        s_csqrt.c s_csqrtf.c s_erf.c s_erff.c \
-        s_exp2.c s_exp2f.c s_expm1.c s_expm1f.c s_fabs.c s_fabsf.c s_fdim.c \
-        s_finite.c s_finitef.c \
-        s_floor.c s_floorf.c s_fma.c s_fmaf.c \
-        s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \
-        s_fminf.c s_fminl.c s_fpclassify.c \
-        s_frexp.c s_frexpf.c s_ilogb.c s_ilogbf.c \
-        s_ilogbl.c s_isinf.c s_isfinite.c s_isnormal.c s_isnan.c \
-        s_llrint.c s_llrintf.c s_llround.c s_llroundf.c s_llroundl.c \
-        s_log1p.c s_log1pf.c s_logb.c s_logbf.c s_lrint.c s_lrintf.c \
-        s_lround.c s_lroundf.c s_lroundl.c s_modf.c s_modff.c \
-        s_nearbyint.c s_nextafter.c s_nextafterf.c \
-        s_nexttowardf.c s_remquo.c s_remquof.c \
-        s_rint.c s_rintf.c s_round.c s_roundf.c s_roundl.c \
-        s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \
-        s_signgam.c s_significand.c s_significandf.c s_sin.c s_sinf.c \
-        s_tan.c s_tanf.c s_tanh.c s_tanhf.c s_tgammaf.c s_trunc.c s_truncf.c \
-	s_cpow.c  s_cpowf.c s_cpowl.c \
-        w_cabs.c w_cabsf.c w_drem.c w_dremf.c
+	e_acos.c e_acosf.c e_acosh.c e_acoshf.c e_asin.c e_asinf.c \
+	e_atan2.c e_atan2f.c e_atanh.c e_atanhf.c e_cosh.c e_coshf.c e_exp.c \
+	e_expf.c e_fmod.c e_fmodf.c e_gamma.c e_gamma_r.c e_gammaf.c \
+	e_gammaf_r.c e_hypot.c e_hypotf.c e_j0.c e_j0f.c e_j1.c e_j1f.c \
+	e_jn.c e_jnf.c e_lgamma.c e_lgamma_r.c e_lgammaf.c e_lgammaf_r.c \
+	e_log.c e_log10.c e_log10f.c e_log2.c e_log2f.c e_logf.c \
+	e_pow.c e_powf.c e_remainder.c e_remainderf.c e_scalb.c e_scalbf.c \
+	e_rem_pio2.c e_rem_pio2f.c \
+	e_sinh.c e_sinhf.c e_sqrt.c e_sqrtf.c \
+	k_cos.c k_exp.c k_expf.c k_rem_pio2.c k_sin.c k_tan.c \
+	k_cosf.c k_sinf.c k_tanf.c \
+	s_asinh.c s_asinhf.c s_atan.c s_atanf.c s_carg.c s_cargf.c s_cargl.c \
+	s_cbrt.c s_cbrtf.c s_ceil.c s_ceilf.c \
+	s_copysign.c s_copysignf.c s_cos.c s_cosf.c \
+	s_csqrt.c s_csqrtf.c s_erf.c s_erff.c \
+	s_exp2.c s_exp2f.c s_expm1.c s_expm1f.c s_fabs.c s_fabsf.c s_fdim.c \
+	s_finite.c s_finitef.c \
+	s_floor.c s_floorf.c s_fma.c s_fmaf.c \
+	s_fmax.c s_fmaxf.c s_fmaxl.c s_fmin.c \
+	s_fminf.c s_fminl.c s_fpclassify.c \
+	s_frexp.c s_frexpf.c s_ilogb.c s_ilogbf.c \
+	s_ilogbl.c s_isinf.c s_isfinite.c s_isnormal.c s_isnan.c \
+	s_llrint.c s_llrintf.c s_llround.c s_llroundf.c s_llroundl.c \
+	s_log1p.c s_log1pf.c s_logb.c s_logbf.c s_lrint.c s_lrintf.c \
+	s_lround.c s_lroundf.c s_lroundl.c s_modf.c s_modff.c \
+	s_nearbyint.c s_nextafter.c s_nextafterf.c \
+	s_nexttowardf.c s_remquo.c s_remquof.c \
+	s_rint.c s_rintf.c s_round.c s_roundf.c s_roundl.c \
+	s_scalbln.c s_scalbn.c s_scalbnf.c s_signbit.c \
+	s_signgam.c s_significand.c s_significandf.c s_sin.c s_sincos.c \
+	s_sinf.c s_sincosf.c s_tan.c s_tanf.c s_tanh.c s_tanhf.c s_tgammaf.c \
+	s_trunc.c s_truncf.c s_cpow.c  s_cpowf.c s_cpowl.c \
+	w_cabs.c w_cabsf.c w_drem.c w_dremf.c
 
 ifneq ($(OS), WINNT)
 $(CUR_SRCS) += s_nan.c
@@ -42,18 +42,18 @@ $(CUR_SRCS) +=	s_copysignl.c s_fabsl.c s_llrintl.c s_lrintl.c s_modfl.c
 
 # If long double != double use these; otherwise, we alias the double versions.
 $(CUR_SRCS) +=	e_acosl.c e_asinl.c e_atan2l.c e_fmodl.c \
-        e_hypotl.c e_remainderl.c e_sqrtl.c \
-        s_atanl.c s_ceill.c s_cosl.c s_cprojl.c \
-        s_csqrtl.c s_floorl.c s_fmal.c \
-        s_frexpl.c s_logbl.c s_nexttoward.c \
-        s_remquol.c \
-        s_sinl.c s_tanl.c s_truncl.c w_cabsl.c \
-        s_nextafterl.c s_rintl.c s_scalbnl.c
+	e_hypotl.c e_remainderl.c e_sqrtl.c \
+	s_atanl.c s_ceill.c s_cosl.c s_cprojl.c \
+	s_csqrtl.c s_floorl.c s_fmal.c \
+	s_frexpl.c s_logbl.c s_nexttoward.c \
+	s_remquol.c \
+	s_sinl.c s_sincosl.c s_tanl.c s_truncl.c w_cabsl.c \
+	s_nextafterl.c s_rintl.c s_scalbnl.c
 #	s_cbrtl.c
 
 # C99 complex functions
 $(CUR_SRCS) +=	s_ccosh.c s_ccoshf.c s_cexp.c s_cexpf.c \
-        s_cimag.c s_cimagf.c s_cimagl.c \
-        s_conj.c s_conjf.c s_conjl.c \
-        s_cproj.c s_cprojf.c s_creal.c s_crealf.c s_creall.c \
-        s_csinh.c s_csinhf.c s_ctanh.c s_ctanhf.c
+	s_cimag.c s_cimagf.c s_cimagl.c \
+	s_conj.c s_conjf.c s_conjl.c \
+	s_cproj.c s_cprojf.c s_creal.c s_crealf.c s_creall.c \
+	s_csinh.c s_csinhf.c s_ctanh.c s_ctanhf.c

+ 151 - 0
src/s_sincos.c

@@ -0,0 +1,151 @@
+/* @(#)s_sincos.c 5.1 13/07/15 */
+/*
+ * ====================================================
+ * Copyright (C) 2013 Elliot Saba. All rights reserved.
+ *
+ * Developed at the University of Washington.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+ #include "cdefs-compat.h"
+
+/* sincos(x, s, c)
+ * Several applications need sine and cosine of the same
+ * angle x. This function computes both at the same time,
+ * and stores the results in *sin and *cos.
+ *
+ * kernel function:
+ *	__kernel_sin		... sine function on [-pi/4,pi/4]
+ *	__kernel_cos		... cose function on [-pi/4,pi/4]
+ *	__ieee754_rem_pio2	... argument reduction routine
+ *
+ * Method.
+ *      Borrow liberally from s_sin.c and s_cos.c, merging
+ *  efforts where applicable and returning their values in
+ * appropriate variables, thereby slightly reducing the
+ * amount of work relative to just calling sin/cos(x) 
+ * separately
+ *
+ * Special cases:
+ *      Let trig be any of sin, cos, or tan.
+ *      sincos(+-INF, s, c)  is NaN, with signals;
+ *      sincos(NaN, s, c)    is that NaN;
+ */
+
+#include <float.h>
+
+#include "openlibm.h"
+//#define INLINE_REM_PIO2
+#include "math_private.h"
+//#include "e_rem_pio2.c"
+
+/* Constants used in polynomial approximation of sin/cos */
+static const double
+one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
+S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
+S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
+S6  =  1.58969099521155010221e-10, /* 0x3DE5D93A, 0x5ACFD57C */
+C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
+C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
+C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
+C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
+C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
+C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
+
+void
+__kernel_sincos( double x, double y, int iy, double * k_s, double * k_c )
+{
+    /* Inline calculation of sin/cos, as we can save
+    some work, and we will always need to calculate
+    both values, no matter the result of switch */
+    double z, w, r, v, hz;
+    z   = x*x;
+    w   = z*z;
+
+    /* cos-specific computation; equivalent to calling
+     __kernel_cos(x,y) and storing in k_c*/
+    r   = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
+    hz  = 0.5*z;
+    v   = one-hz;
+
+    *k_c = v + (((one-v)-hz) + (z*r-x*y));
+
+    /* sin-specific computation; equivalent to calling
+    __kernel_sin(x,y,1) and storing in k_s*/
+    r   = S2+z*(S3+z*S4) + z*w*(S5+z*S6);
+    v   = z*x;
+    if(iy == 0)
+        *k_s = x+v*(S1+z*r);
+    else
+        *k_s = x-((z*(half*y-v*r)-y)-v*S1);
+}
+
+void
+sincos(double x, double * s, double * c)
+{
+    double y[2];
+    int32_t ix;
+
+    /* Store high word of x in ix */
+    GET_HIGH_WORD(ix,x);
+
+    /* |x| ~< pi/4 */
+    ix &= 0x7fffffff;
+    if(ix <= 0x3fe921fb) {
+        /* Check for small x for sin and cos */
+        if(ix<0x3e46a09e) {
+            /* Check for exact zero */
+            if( (int)x==0 ) {
+                *s = x;
+                *c = 1.0;
+                return;
+            }
+        }
+        /* Call kernel function with 0 extra */
+        __kernel_sincos(x,0.0,0, s, c);
+    } else if( ix >= 0x7ff00000 ) {
+        /* sincos(Inf or NaN) is NaN */
+        *s = x-x;
+        *c = x-x;
+    }
+
+    /*argument reduction needed*/
+    else {
+        double k_c, k_s;
+        printf( "bleh?\n");
+
+        /* Calculate remainer, then sub out to kernel */
+        int32_t n = __ieee754_rem_pio2(x,y);
+        __kernel_sincos( y[0], y[1], 1, &k_s, &k_c );
+
+        /* Figure out permutation of sin/cos outputs to true outputs */
+        switch(n&3) {
+            case 0:
+                *c =  k_c;
+                *s =  k_s;
+                break;
+            case 1:
+                *c = -k_s;
+                *s =  k_c;
+                break;
+            case 2:
+                *c = -k_c;
+                *s = -k_s;
+                break;
+            default:
+                *c =  k_s;
+                *s = -k_c;
+                break;
+        }
+    }
+}
+
+#if (LDBL_MANT_DIG == 53)
+__weak_reference(sincos, sincosl);
+#endif

+ 162 - 0
src/s_sincosf.c

@@ -0,0 +1,162 @@
+/* s_sincosf.c -- float version of s_sincos.c
+ *
+ * Copyright (C) 2013 Elliot Saba
+ * Developed at the University of Washington
+ *
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+*/
+
+#include "cdefs-compat.h"
+#include <float.h>
+#include "openlibm.h"
+
+//#define	INLINE_KERNEL_COSDF
+//#define	INLINE_KERNEL_SINDF
+//#define INLINE_REM_PIO2F
+#include "math_private.h"
+//#include "e_rem_pio2f.c"
+//#include "k_cosf.c"
+//#include "k_sinf.c"
+
+
+/* Constants used in shortcircuits in sincosf */
+static const double
+sc1pio2 = 1*M_PI_2,			/* 0x3FF921FB, 0x54442D18 */
+sc2pio2 = 2*M_PI_2,			/* 0x400921FB, 0x54442D18 */
+sc3pio2 = 3*M_PI_2,			/* 0x4012D97C, 0x7F3321D2 */
+sc4pio2 = 4*M_PI_2,			/* 0x401921FB, 0x54442D18 */
+
+/* Constants used in polynomial approximation of sin/cos */
+one =  1.0,
+S1 = -0x15555554cbac77.0p-55,	/* -0.166666666416265235595 */
+S2 =  0x111110896efbb2.0p-59,	/*  0.0083333293858894631756 */
+S3 = -0x1a00f9e2cae774.0p-65,	/* -0.000198393348360966317347 */
+S4 =  0x16cd878c3b46a7.0p-71,	/*  0.0000027183114939898219064 */
+C0  = -0x1ffffffd0c5e81.0p-54,	/* -0.499999997251031003120 */
+C1  =  0x155553e1053a42.0p-57,	/*  0.0416666233237390631894 */
+C2  = -0x16c087e80f1e27.0p-62,	/* -0.00138867637746099294692 */
+C3  =  0x199342e0ee5069.0p-68;	/*  0.0000243904487962774090654 */
+
+void
+__kernel_sincosdf( double x, float * s, float * c )
+{
+	double r, w, z, v;
+	z = x*x;
+	w = z*z;
+
+	/* cos-specific computation; equivalent to calling
+     __kernel_cos(x,y) and storing in k_c*/
+	r = C2+z*C3;
+	double k_c = ((one+z*C0) + w*C1) + (w*z)*r;
+
+	/* sin-specific computation; equivalent to calling
+    __kernel_sin(x,y,1) and storing in k_s*/
+	r = S3+z*S4;
+	v = z*x;
+	double k_s = (x + v*(S1+z*S2)) + v*w*r;
+
+	*c = k_c;
+	*s = k_s;
+}
+
+void
+sincosf(float x, float * s, float * c) {
+	// Worst approximation of sin and cos NA
+	*s = x;
+	*c = x;
+
+	double y;
+	float k_c, k_s;
+	int32_t n, hx, ix;
+
+	GET_FLOAT_WORD(hx,x);
+	ix = hx & 0x7fffffff;
+
+	if(ix <= 0x3f490fda) {		/* |x| ~<= pi/4 */
+	    if(ix<0x39800000) {		/* |x| < 2**-12 */
+			/* Check if x is exactly zero */
+			if(((int)x)==0) {
+				*s = x;
+				*c = 1.0f;
+				return;
+			}
+		}
+		__kernel_sincosdf(x, s, c);
+		return;
+	}
+	/* |x| ~<= 5*pi/4 */
+	if (ix<=0x407b53d1) {
+		/* |x| ~<= 3pi/4 */
+	    if(ix<=0x4016cbe3) {
+			if(hx>0) {
+				__kernel_sincosdf( sc1pio2 - x, c, s );
+			}
+			else {
+				__kernel_sincosdf( sc1pio2 + x, c, &k_s );
+				*s = -k_s;
+		    }
+		} else {
+
+			if(hx>0) {
+				__kernel_sincosdf( sc2pio2 - x, s, &k_c );
+				*c = -k_c;
+			} else  {
+				__kernel_sincosdf( -sc2pio2 - x, s, &k_c );
+				*c = -k_c;
+			}
+		}
+		return;
+	}
+
+	/* |x| ~<= 9*pi/4 */
+	if(ix<=0x40e231d5) {
+		/* |x|  ~> 7*pi/4 */
+	    if(ix<=0x40afeddf) {	
+			if(hx>0) {
+				__kernel_sincosdf( x - sc3pio2, c, &k_s );
+				*s = -k_s;
+			} else {
+				__kernel_sincosdf( x + sc3pio2, &k_c, s );
+				*c = -k_c;
+		    } 
+		}
+		else {
+	    	if( hx > 0 ) {
+	    		__kernel_sincosdf( x - sc4pio2, s, c );
+	    	} else {
+	    		__kernel_sincosdf( x + sc4pio2, s, c );
+	    	}
+	    }
+		return;
+	}
+
+	/* cos(Inf or NaN) is NaN */
+	else if(ix>=0x7f800000) {
+		*c = *s = x-x;
+	} else {
+		/* general argument reduction needed */
+		n = __ieee754_rem_pio2f(x,&y);
+
+		switch(n&3) {
+			case 0:
+				__kernel_sincosdf( y, s, c );
+				break;
+			case 1:
+				__kernel_sincosdf( -y, c, s );
+				break;
+			case 2: 
+				__kernel_sincosdf( -y, s, &k_c);
+				*c = -k_c;
+				break;
+			default:
+				__kernel_sincosdf( -y, &k_c, &k_s );
+				*c = -k_c;
+				*s = -k_s;
+				break;
+	    }
+	}
+
+}

+ 31 - 0
src/s_sincosl.c

@@ -0,0 +1,31 @@
+/* s_sincosl.c -- long double version of s_sincos.c
+ *
+ * Copyright (C) 2013 Elliot Saba
+ * Developed at the University of Washington
+ *
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+*/
+
+ #include "cdefs-compat.h"
+
+ #include <float.h>
+
+#include "openlibm.h"
+#include "math_private.h"
+#if LDBL_MANT_DIG == 64
+#include "../ld80/e_rem_pio2l.h"
+#elif LDBL_MANT_DIG == 113
+#include "../ld128/e_rem_pio2l.h"
+#else
+#error "Unsupported long double format"
+#endif
+
+ void
+ sincosl( long double x, long double * s, long double * c )
+ {
+ 	*s = cosl( x );
+ 	*c = sinl( x );
+ }