math: bessel cleanup (jn.c and jnf.c)
authorSzabolcs Nagy <nsz@port70.net>
Tue, 1 Jan 2013 21:20:45 +0000 (22:20 +0100)
committerSzabolcs Nagy <nsz@port70.net>
Tue, 1 Jan 2013 21:20:45 +0000 (22:20 +0100)
both jn and yn functions had integer overflow issues for large
and small n

to handle these issues nm1 (== |n|-1) is used instead of n and -n
in the code and some loops are changed to make sure the iteration
counter does not overflow

(another solution could be to use larger integer type or even double
but that has more size and runtime cost, on x87 loading int64_t or
even uint32_t into an fpu register is more than two times slower than
loading int32_t, and using double for n slows down iteration logic)

yn(-1,0) now returns inf

posix2008 specifies that on overflow and at +-0 all y0,y1,yn functions
return -inf, this is not consistent with math when n<0 odd integer in yn
(eg. when x->0, yn(-1,x)->inf, but historically yn(-1,0) seems to be
special cased and returned -inf)

some threshold values in jnf and ynf were fixed that seems to be
incorrectly copy-pasted from the double version

src/math/jn.c
src/math/jnf.c

index d95af15d40ce62f61e1eda8c21f1499273186226..4878a54fedaebf474d1c3ee5c3993ea28e22998a 100644 (file)
@@ -20,7 +20,7 @@
  * Note 2. About jn(n,x), yn(n,x)
  *      For n=0, j0(x) is called,
  *      for n=1, j1(x) is called,
- *      for n<x, forward recursion us used starting
+ *      for n<=x, forward recursion is used starting
  *      from values of j0(x) and j1(x).
  *      for n>x, a continued fraction approximation to
  *      j(n,x)/j(n-1,x) is evaluated and then backward
@@ -32,7 +32,6 @@
  *      yn(n,x) is similar in all respects, except
  *      that forward recursion is used for all
  *      values of n>1.
- *
  */
 
 #include "libm.h"
@@ -41,33 +40,39 @@ static const double invsqrtpi = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x504
 
 double jn(int n, double x)
 {
-       int32_t i,hx,ix,lx,sgn;
-       double a, b, temp, di;
-       double z, w;
+       uint32_t ix, lx;
+       int nm1, i, sign;
+       double a, b, temp;
+
+       EXTRACT_WORDS(ix, lx, x);
+       sign = ix>>31;
+       ix &= 0x7fffffff;
+
+       if ((ix | (lx|-lx)>>31) > 0x7ff00000) /* nan */
+               return x;
 
        /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
         * Thus, J(-n,x) = J(n,-x)
         */
-       EXTRACT_WORDS(hx, lx, x);
-       ix = 0x7fffffff & hx;
-       /* if J(n,NaN) is NaN */
-       if ((ix|((uint32_t)(lx|-lx))>>31) > 0x7ff00000)
-               return x+x;
+       /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */
+       if (n == 0)
+               return j0(x);
        if (n < 0) {
-               n = -n;
+               nm1 = -(n+1);
                x = -x;
-               hx ^= 0x80000000;
-       }
-       if (n == 0) return j0(x);
-       if (n == 1) return j1(x);
+               sign ^= 1;
+       } else
+               nm1 = n-1;
+       if (nm1 == 0)
+               return j1(x);
 
-       sgn = (n&1)&(hx>>31);  /* even n -- 0, odd n -- sign(x) */
+       sign &= n;  /* even n: 0, odd n: signbit(x) */
        x = fabs(x);
-       if ((ix|lx) == 0 || ix >= 0x7ff00000)  /* if x is 0 or inf */
+       if ((ix|lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
                b = 0.0;
-       else if ((double)n <= x) {
+       else if (nm1 < x) {
                /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
-               if (ix >= 0x52D00000) { /* x > 2**302 */
+               if (ix >= 0x52d00000) { /* x > 2**302 */
                        /* (x >> n**2)
                         *      Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
                         *      Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
@@ -81,19 +86,21 @@ double jn(int n, double x)
                         *             2    -s+c            -c-s
                         *             3     s+c             c-s
                         */
-                       switch(n&3) {
-                       case 0: temp =  cos(x)+sin(x); break;
-                       case 1: temp = -cos(x)+sin(x); break;
-                       case 2: temp = -cos(x)-sin(x); break;
-                       case 3: temp =  cos(x)-sin(x); break;
+                       switch(nm1&3) {
+                       case 0: temp = -cos(x)+sin(x); break;
+                       case 1: temp = -cos(x)-sin(x); break;
+                       case 2: temp =  cos(x)-sin(x); break;
+                       default:
+                       case 3: temp =  cos(x)+sin(x); break;
                        }
                        b = invsqrtpi*temp/sqrt(x);
                } else {
                        a = j0(x);
                        b = j1(x);
-                       for (i=1; i<n; i++){
+                       for (i=0; i<nm1; ) {
+                               i++;
                                temp = b;
-                               b = b*((double)(i+i)/x) - a; /* avoid underflow */
+                               b = b*(2.0*i/x) - a; /* avoid underflow */
                                a = temp;
                        }
                }
@@ -102,12 +109,13 @@ double jn(int n, double x)
                        /* x is tiny, return the first Taylor expansion of J(n,x)
                         * J(n,x) = 1/n!*(x/2)^n  - ...
                         */
-                       if (n > 33)  /* underflow */
+                       if (nm1 > 32)  /* underflow */
                                b = 0.0;
                        else {
                                temp = x*0.5;
                                b = temp;
-                               for (a=1.0,i=2; i<=n; i++) {
+                               a = 1.0;
+                               for (i=2; i<=nm1+1; i++) {
                                        a *= (double)i; /* a = n! */
                                        b *= temp;      /* b = (x/2)^n */
                                }
@@ -143,13 +151,14 @@ double jn(int n, double x)
                         * When Q(k) > 1e17     good for quadruple
                         */
                        /* determine k */
-                       double t,v;
-                       double q0,q1,h,tmp;
-                       int32_t k,m;
+                       double t,q0,q1,w,h,z,tmp,nf;
+                       int k;
 
-                       w  = (n+n)/(double)x; h = 2.0/(double)x;
-                       q0 = w;
+                       nf = nm1 + 1.0;
+                       w = 2*nf/x;
+                       h = 2/x;
                        z = w+h;
+                       q0 = w;
                        q1 = w*z - 1.0;
                        k = 1;
                        while (q1 < 1.0e9) {
@@ -159,9 +168,8 @@ double jn(int n, double x)
                                q0 = q1;
                                q1 = tmp;
                        }
-                       m = n+n;
-                       for (t=0.0, i = 2*(n+k); i>=m; i -= 2)
-                               t = 1.0/(i/x-t);
+                       for (t=0.0, i=k; i>=0; i--)
+                               t = 1/(2*(i+nf)/x - t);
                        a = t;
                        b = 1.0;
                        /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
@@ -172,26 +180,20 @@ double jn(int n, double x)
                         *  then recurrent value may overflow and the result is
                         *  likely underflow to zero
                         */
-                       tmp = n;
-                       v = 2.0/x;
-                       tmp = tmp*log(fabs(v*tmp));
+                       tmp = nf*log(fabs(w));
                        if (tmp < 7.09782712893383973096e+02) {
-                               for (i=n-1,di=(double)(i+i); i>0; i--) {
+                               for (i=nm1; i>0; i--) {
                                        temp = b;
-                                       b *= di;
-                                       b = b/x - a;
+                                       b = b*(2.0*i)/x - a;
                                        a = temp;
-                                       di -= 2.0;
                                }
                        } else {
-                               for (i=n-1,di=(double)(i+i); i>0; i--) {
+                               for (i=nm1; i>0; i--) {
                                        temp = b;
-                                       b *= di;
-                                       b = b/x - a;
+                                       b = b*(2.0*i)/x - a;
                                        a = temp;
-                                       di -= 2.0;
                                        /* scale b to avoid spurious overflow */
-                                       if (b > 1e100) {
+                                       if (b > 0x1p500) {
                                                a /= b;
                                                t /= b;
                                                b  = 1.0;
@@ -206,39 +208,40 @@ double jn(int n, double x)
                                b = t*w/a;
                }
        }
-       if (sgn==1) return -b;
-       return b;
+       return sign ? -b : b;
 }
 
 
-
 double yn(int n, double x)
 {
-       int32_t i,hx,ix,lx;
-       int32_t sign;
+       uint32_t ix, lx, ib;
+       int nm1, sign, i;
        double a, b, temp;
 
-       EXTRACT_WORDS(hx, lx, x);
-       ix = 0x7fffffff & hx;
-       /* if Y(n,NaN) is NaN */
-       if ((ix|((uint32_t)(lx|-lx))>>31) > 0x7ff00000)
-               return x+x;
-       if ((ix|lx) == 0)
-               return -1.0/0.0;
-       if (hx < 0)
-               return 0.0/0.0;
-       sign = 1;
-       if (n < 0) {
-               n = -n;
-               sign = 1 - ((n&1)<<1);
-       }
-       if (n == 0)
-               return y0(x);
-       if (n == 1)
-               return sign*y1(x);
+       EXTRACT_WORDS(ix, lx, x);
+       sign = ix>>31;
+       ix &= 0x7fffffff;
+
+       if ((ix | (lx|-lx)>>31) > 0x7ff00000) /* nan */
+               return x;
+       if (sign && (ix|lx)!=0) /* x < 0 */
+               return 0/0.0;
        if (ix == 0x7ff00000)
                return 0.0;
-       if (ix >= 0x52D00000) { /* x > 2**302 */
+
+       if (n == 0)
+               return y0(x);
+       if (n < 0) {
+               nm1 = -(n+1);
+               sign = n&1;
+       } else {
+               nm1 = n-1;
+               sign = 0;
+       }
+       if (nm1 == 0)
+               return sign ? -y1(x) : y1(x);
+
+       if (ix >= 0x52d00000) { /* x > 2**302 */
                /* (x >> n**2)
                 *      Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
                 *      Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
@@ -252,26 +255,26 @@ double yn(int n, double x)
                 *             2    -s+c            -c-s
                 *             3     s+c             c-s
                 */
-               switch(n&3) {
-               case 0: temp =  sin(x)-cos(x); break;
-               case 1: temp = -sin(x)-cos(x); break;
-               case 2: temp = -sin(x)+cos(x); break;
-               case 3: temp =  sin(x)+cos(x); break;
+               switch(nm1&3) {
+               case 0: temp = -sin(x)-cos(x); break;
+               case 1: temp = -sin(x)+cos(x); break;
+               case 2: temp =  sin(x)+cos(x); break;
+               default:
+               case 3: temp =  sin(x)-cos(x); break;
                }
                b = invsqrtpi*temp/sqrt(x);
        } else {
-               uint32_t high;
                a = y0(x);
                b = y1(x);
                /* quit if b is -inf */
-               GET_HIGH_WORD(high, b);
-               for (i=1; i<n && high!=0xfff00000; i++){
+               GET_HIGH_WORD(ib, b);
+               for (i=0; i<nm1 && ib!=0xfff00000; ){
+                       i++;
                        temp = b;
-                       b = ((double)(i+i)/x)*b - a;
-                       GET_HIGH_WORD(high, b);
+                       b = (2.0*i/x)*b - a;
+                       GET_HIGH_WORD(ib, b);
                        a = temp;
                }
        }
-       if (sign > 0) return b;
-       return -b;
+       return sign ? -b : b;
 }
index fd291103945507f86f044b5165c8b2c383c0b1d0..f63c062f32caf74b724f11e0ac97ece8da4d75b9 100644 (file)
 
 float jnf(int n, float x)
 {
-       int32_t i,hx,ix, sgn;
-       float a, b, temp, di;
-       float z, w;
+       uint32_t ix;
+       int nm1, sign, i;
+       float a, b, temp;
+
+       GET_FLOAT_WORD(ix, x);
+       sign = ix>>31;
+       ix &= 0x7fffffff;
+       if (ix > 0x7f800000) /* nan */
+               return x;
 
-       /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
-        * Thus, J(-n,x) = J(n,-x)
-        */
-       GET_FLOAT_WORD(hx, x);
-       ix = 0x7fffffff & hx;
-       /* if J(n,NaN) is NaN */
-       if (ix > 0x7f800000)
-               return x+x;
+       /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */
+       if (n == 0)
+               return j0f(x);
        if (n < 0) {
-               n = -n;
+               nm1 = -(n+1);
                x = -x;
-               hx ^= 0x80000000;
-       }
-       if (n == 0) return j0f(x);
-       if (n == 1) return j1f(x);
+               sign ^= 1;
+       } else
+               nm1 = n-1;
+       if (nm1 == 0)
+               return j1f(x);
 
-       sgn = (n&1)&(hx>>31);  /* even n -- 0, odd n -- sign(x) */
+       sign &= n;  /* even n: 0, odd n: signbit(x) */
        x = fabsf(x);
-       if (ix == 0 || ix >= 0x7f800000)  /* if x is 0 or inf */
+       if (ix == 0 || ix == 0x7f800000)  /* if x is 0 or inf */
                b = 0.0f;
-       else if((float)n <= x) {
+       else if (nm1 < x) {
                /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
                a = j0f(x);
                b = j1f(x);
-               for (i=1; i<n; i++){
+               for (i=0; i<nm1; ){
+                       i++;
                        temp = b;
-                       b = b*((float)(i+i)/x) - a; /* avoid underflow */
+                       b = b*(2.0f*i/x) - a;
                        a = temp;
                }
        } else {
-               if (ix < 0x30800000) { /* x < 2**-29 */
+               if (ix < 0x35800000) { /* x < 2**-20 */
                        /* x is tiny, return the first Taylor expansion of J(n,x)
                         * J(n,x) = 1/n!*(x/2)^n  - ...
                         */
-                       if (n > 33)  /* underflow */
-                               b = 0.0f;
-                       else {
-                               temp = 0.5f * x;
-                               b = temp;
-                               for (a=1.0f,i=2; i<=n; i++) {
-                                       a *= (float)i;    /* a = n! */
-                                       b *= temp;        /* b = (x/2)^n */
-                               }
-                               b = b/a;
+                       if (nm1 > 8)  /* underflow */
+                               nm1 = 8;
+                       temp = 0.5f * x;
+                       b = temp;
+                       a = 1.0f;
+                       for (i=2; i<=nm1+1; i++) {
+                               a *= (float)i;    /* a = n! */
+                               b *= temp;        /* b = (x/2)^n */
                        }
+                       b = b/a;
                } else {
                        /* use backward recurrence */
                        /*                      x      x^2      x^2
@@ -97,26 +99,25 @@ float jnf(int n, float x)
                         * When Q(k) > 1e17     good for quadruple
                         */
                        /* determine k */
-                       float t,v;
-                       float q0,q1,h,tmp;
-                       int32_t k,m;
+                       float t,q0,q1,w,h,z,tmp,nf;
+                       int k;
 
-                       w = (n+n)/x;
-                       h = 2.0f/x;
+                       nf = nm1+1.0f;
+                       w = 2*nf/x;
+                       h = 2/x;
                        z = w+h;
                        q0 = w;
                        q1 = w*z - 1.0f;
                        k = 1;
-                       while (q1 < 1.0e9f) {
+                       while (q1 < 1.0e4f) {
                                k += 1;
                                z += h;
                                tmp = z*q1 - q0;
                                q0 = q1;
                                q1 = tmp;
                        }
-                       m = n+n;
-                       for (t=0.0f, i = 2*(n+k); i>=m; i -= 2)
-                               t = 1.0f/(i/x-t);
+                       for (t=0.0f, i=k; i>=0; i--)
+                               t = 1.0f/(2*(i+nf)/x-t);
                        a = t;
                        b = 1.0f;
                        /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
@@ -127,26 +128,20 @@ float jnf(int n, float x)
                         *  then recurrent value may overflow and the result is
                         *  likely underflow to zero
                         */
-                       tmp = n;
-                       v = 2.0f/x;
-                       tmp = tmp*logf(fabsf(v*tmp));
+                       tmp = nf*logf(fabsf(w));
                        if (tmp < 88.721679688f) {
-                               for (i=n-1,di=(float)(i+i); i>0; i--) {
+                               for (i=nm1; i>0; i--) {
                                        temp = b;
-                                       b *= di;
-                                       b = b/x - a;
+                                       b = 2.0f*i*b/x - a;
                                        a = temp;
-                                       di -= 2.0f;
                                }
                        } else {
-                               for (i=n-1,di=(float)(i+i); i>0; i--){
+                               for (i=nm1; i>0; i--){
                                        temp = b;
-                                       b *= di;
-                                       b = b/x - a;
+                                       b = 2.0f*i*b/x - a;
                                        a = temp;
-                                       di -= 2.0f;
                                        /* scale b to avoid spurious overflow */
-                                       if (b > 1e10f) {
+                                       if (b > 0x1p60f) {
                                                a /= b;
                                                t /= b;
                                                b = 1.0f;
@@ -161,48 +156,47 @@ float jnf(int n, float x)
                                b = t*w/a;
                }
        }
-       if (sgn == 1) return -b;
-       return b;
+       return sign ? -b : b;
 }
 
 float ynf(int n, float x)
 {
-       int32_t i,hx,ix,ib;
-       int32_t sign;
+       uint32_t ix, ib;
+       int nm1, sign, i;
        float a, b, temp;
 
-       GET_FLOAT_WORD(hx, x);
-       ix = 0x7fffffff & hx;
-       /* if Y(n,NaN) is NaN */
-       if (ix > 0x7f800000)
-               return x+x;
-       if (ix == 0)
-               return -1.0f/0.0f;
-       if (hx < 0)
-               return 0.0f/0.0f;
-       sign = 1;
-       if (n < 0) {
-               n = -n;
-               sign = 1 - ((n&1)<<1);
-       }
-       if (n == 0)
-               return y0f(x);
-       if (n == 1)
-               return sign*y1f(x);
+       GET_FLOAT_WORD(ix, x);
+       sign = ix>>31;
+       ix &= 0x7fffffff;
+       if (ix > 0x7f800000) /* nan */
+               return x;
+       if (sign && ix != 0) /* x < 0 */
+               return 0/0.0f;
        if (ix == 0x7f800000)
                return 0.0f;
 
+       if (n == 0)
+               return y0f(x);
+       if (n < 0) {
+               nm1 = -(n+1);
+               sign = n&1;
+       } else {
+               nm1 = n-1;
+               sign = 0;
+       }
+       if (nm1 == 0)
+               return sign ? -y1f(x) : y1f(x);
+
        a = y0f(x);
        b = y1f(x);
        /* quit if b is -inf */
        GET_FLOAT_WORD(ib,b);
-       for (i = 1; i < n && ib != 0xff800000; i++){
+       for (i = 0; i < nm1 && ib != 0xff800000; ) {
+               i++;
                temp = b;
-               b = ((float)(i+i)/x)*b - a;
+               b = (2.0f*i/x)*b - a;
                GET_FLOAT_WORD(ib, b);
                a = temp;
        }
-       if (sign > 0)
-               return b;
-       return -b;
+       return sign ? -b : b;
 }