src/share/vm/runtime/sharedRuntimeTrig.cpp

Sat, 01 Dec 2007 00:00:00 +0000

author
duke
date
Sat, 01 Dec 2007 00:00:00 +0000
changeset 435
a61af66fc99e
child 1840
fb57d4cf76c2
permissions
-rw-r--r--

Initial load

     1 /*
     2  * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    20  * CA 95054 USA or visit www.sun.com if you need additional information or
    21  * have any questions.
    22  *
    23  */
    25 #include "incls/_precompiled.incl"
    26 #include "incls/_sharedRuntimeTrig.cpp.incl"
    28 // This file contains copies of the fdlibm routines used by
    29 // StrictMath. It turns out that it is almost always required to use
    30 // these runtime routines; the Intel CPU doesn't meet the Java
    31 // specification for sin/cos outside a certain limited argument range,
    32 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
    33 // also turns out that avoiding the indirect call through function
    34 // pointer out to libjava.so in SharedRuntime speeds these routines up
    35 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
    37 // Enabling optimizations in this file causes incorrect code to be
    38 // generated; can not figure out how to turn down optimization for one
    39 // file in the IDE on Windows
    40 #ifdef WIN32
    41 # pragma optimize ( "", off )
    42 #endif
    44 #include <math.h>
    46 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
    47 // [jk] this is not 100% correct because the float word order may different
    48 // from the byte order (e.g. on ARM)
    49 #ifdef VM_LITTLE_ENDIAN
    50 # define __HI(x) *(1+(int*)&x)
    51 # define __LO(x) *(int*)&x
    52 #else
    53 # define __HI(x) *(int*)&x
    54 # define __LO(x) *(1+(int*)&x)
    55 #endif
    57 static double copysignA(double x, double y) {
    58   __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
    59   return x;
    60 }
    62 /*
    63  * scalbn (double x, int n)
    64  * scalbn(x,n) returns x* 2**n  computed by  exponent
    65  * manipulation rather than by actually performing an
    66  * exponentiation or a multiplication.
    67  */
    69 static const double
    70 two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
    71 twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
    72 hugeX  = 1.0e+300,
    73 tiny   = 1.0e-300;
    75 static double scalbnA (double x, int n) {
    76   int  k,hx,lx;
    77   hx = __HI(x);
    78   lx = __LO(x);
    79   k = (hx&0x7ff00000)>>20;              /* extract exponent */
    80   if (k==0) {                           /* 0 or subnormal x */
    81     if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
    82     x *= two54;
    83     hx = __HI(x);
    84     k = ((hx&0x7ff00000)>>20) - 54;
    85     if (n< -50000) return tiny*x;       /*underflow*/
    86   }
    87   if (k==0x7ff) return x+x;             /* NaN or Inf */
    88   k = k+n;
    89   if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
    90   if (k > 0)                            /* normal result */
    91     {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
    92   if (k <= -54) {
    93     if (n > 50000)      /* in case integer overflow in n+k */
    94       return hugeX*copysignA(hugeX,x);  /*overflow*/
    95     else return tiny*copysignA(tiny,x); /*underflow*/
    96   }
    97   k += 54;                              /* subnormal result */
    98   __HI(x) = (hx&0x800fffff)|(k<<20);
    99   return x*twom54;
   100 }
   102 /*
   103  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
   104  * double x[],y[]; int e0,nx,prec; int ipio2[];
   105  *
   106  * __kernel_rem_pio2 return the last three digits of N with
   107  *              y = x - N*pi/2
   108  * so that |y| < pi/2.
   109  *
   110  * The method is to compute the integer (mod 8) and fraction parts of
   111  * (2/pi)*x without doing the full multiplication. In general we
   112  * skip the part of the product that are known to be a huge integer (
   113  * more accurately, = 0 mod 8 ). Thus the number of operations are
   114  * independent of the exponent of the input.
   115  *
   116  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
   117  *
   118  * Input parameters:
   119  *      x[]     The input value (must be positive) is broken into nx
   120  *              pieces of 24-bit integers in double precision format.
   121  *              x[i] will be the i-th 24 bit of x. The scaled exponent
   122  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
   123  *              match x's up to 24 bits.
   124  *
   125  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
   126  *                      e0 = ilogb(z)-23
   127  *                      z  = scalbn(z,-e0)
   128  *              for i = 0,1,2
   129  *                      x[i] = floor(z)
   130  *                      z    = (z-x[i])*2**24
   131  *
   132  *
   133  *      y[]     ouput result in an array of double precision numbers.
   134  *              The dimension of y[] is:
   135  *                      24-bit  precision       1
   136  *                      53-bit  precision       2
   137  *                      64-bit  precision       2
   138  *                      113-bit precision       3
   139  *              The actual value is the sum of them. Thus for 113-bit
   140  *              precsion, one may have to do something like:
   141  *
   142  *              long double t,w,r_head, r_tail;
   143  *              t = (long double)y[2] + (long double)y[1];
   144  *              w = (long double)y[0];
   145  *              r_head = t+w;
   146  *              r_tail = w - (r_head - t);
   147  *
   148  *      e0      The exponent of x[0]
   149  *
   150  *      nx      dimension of x[]
   151  *
   152  *      prec    an interger indicating the precision:
   153  *                      0       24  bits (single)
   154  *                      1       53  bits (double)
   155  *                      2       64  bits (extended)
   156  *                      3       113 bits (quad)
   157  *
   158  *      ipio2[]
   159  *              integer array, contains the (24*i)-th to (24*i+23)-th
   160  *              bit of 2/pi after binary point. The corresponding
   161  *              floating value is
   162  *
   163  *                      ipio2[i] * 2^(-24(i+1)).
   164  *
   165  * External function:
   166  *      double scalbn(), floor();
   167  *
   168  *
   169  * Here is the description of some local variables:
   170  *
   171  *      jk      jk+1 is the initial number of terms of ipio2[] needed
   172  *              in the computation. The recommended value is 2,3,4,
   173  *              6 for single, double, extended,and quad.
   174  *
   175  *      jz      local integer variable indicating the number of
   176  *              terms of ipio2[] used.
   177  *
   178  *      jx      nx - 1
   179  *
   180  *      jv      index for pointing to the suitable ipio2[] for the
   181  *              computation. In general, we want
   182  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
   183  *              is an integer. Thus
   184  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
   185  *              Hence jv = max(0,(e0-3)/24).
   186  *
   187  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
   188  *
   189  *      q[]     double array with integral value, representing the
   190  *              24-bits chunk of the product of x and 2/pi.
   191  *
   192  *      q0      the corresponding exponent of q[0]. Note that the
   193  *              exponent for q[i] would be q0-24*i.
   194  *
   195  *      PIo2[]  double precision array, obtained by cutting pi/2
   196  *              into 24 bits chunks.
   197  *
   198  *      f[]     ipio2[] in floating point
   199  *
   200  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
   201  *
   202  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
   203  *
   204  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence
   205  *              it also indicates the *sign* of the result.
   206  *
   207  */
   210 /*
   211  * Constants:
   212  * The hexadecimal values are the intended ones for the following
   213  * constants. The decimal values may be used, provided that the
   214  * compiler will convert from decimal to binary accurately enough
   215  * to produce the hexadecimal values shown.
   216  */
   219 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
   221 static const double PIo2[] = {
   222   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
   223   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
   224   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
   225   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
   226   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
   227   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
   228   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
   229   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
   230 };
   232 static const double
   233 zeroB   = 0.0,
   234 one     = 1.0,
   235 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   236 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
   238 static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
   239   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
   240   double z,fw,f[20],fq[20],q[20];
   242   /* initialize jk*/
   243   jk = init_jk[prec];
   244   jp = jk;
   246   /* determine jx,jv,q0, note that 3>q0 */
   247   jx =  nx-1;
   248   jv = (e0-3)/24; if(jv<0) jv=0;
   249   q0 =  e0-24*(jv+1);
   251   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
   252   j = jv-jx; m = jx+jk;
   253   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
   255   /* compute q[0],q[1],...q[jk] */
   256   for (i=0;i<=jk;i++) {
   257     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
   258   }
   260   jz = jk;
   261 recompute:
   262   /* distill q[] into iq[] reversingly */
   263   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
   264     fw    =  (double)((int)(twon24* z));
   265     iq[i] =  (int)(z-two24B*fw);
   266     z     =  q[j-1]+fw;
   267   }
   269   /* compute n */
   270   z  = scalbnA(z,q0);           /* actual value of z */
   271   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
   272   n  = (int) z;
   273   z -= (double)n;
   274   ih = 0;
   275   if(q0>0) {    /* need iq[jz-1] to determine n */
   276     i  = (iq[jz-1]>>(24-q0)); n += i;
   277     iq[jz-1] -= i<<(24-q0);
   278     ih = iq[jz-1]>>(23-q0);
   279   }
   280   else if(q0==0) ih = iq[jz-1]>>23;
   281   else if(z>=0.5) ih=2;
   283   if(ih>0) {    /* q > 0.5 */
   284     n += 1; carry = 0;
   285     for(i=0;i<jz ;i++) {        /* compute 1-q */
   286       j = iq[i];
   287       if(carry==0) {
   288         if(j!=0) {
   289           carry = 1; iq[i] = 0x1000000- j;
   290         }
   291       } else  iq[i] = 0xffffff - j;
   292     }
   293     if(q0>0) {          /* rare case: chance is 1 in 12 */
   294       switch(q0) {
   295       case 1:
   296         iq[jz-1] &= 0x7fffff; break;
   297       case 2:
   298         iq[jz-1] &= 0x3fffff; break;
   299       }
   300     }
   301     if(ih==2) {
   302       z = one - z;
   303       if(carry!=0) z -= scalbnA(one,q0);
   304     }
   305   }
   307   /* check if recomputation is needed */
   308   if(z==zeroB) {
   309     j = 0;
   310     for (i=jz-1;i>=jk;i--) j |= iq[i];
   311     if(j==0) { /* need recomputation */
   312       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
   314       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
   315         f[jx+i] = (double) ipio2[jv+i];
   316         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
   317         q[i] = fw;
   318       }
   319       jz += k;
   320       goto recompute;
   321     }
   322   }
   324   /* chop off zero terms */
   325   if(z==0.0) {
   326     jz -= 1; q0 -= 24;
   327     while(iq[jz]==0) { jz--; q0-=24;}
   328   } else { /* break z into 24-bit if neccessary */
   329     z = scalbnA(z,-q0);
   330     if(z>=two24B) {
   331       fw = (double)((int)(twon24*z));
   332       iq[jz] = (int)(z-two24B*fw);
   333       jz += 1; q0 += 24;
   334       iq[jz] = (int) fw;
   335     } else iq[jz] = (int) z ;
   336   }
   338   /* convert integer "bit" chunk to floating-point value */
   339   fw = scalbnA(one,q0);
   340   for(i=jz;i>=0;i--) {
   341     q[i] = fw*(double)iq[i]; fw*=twon24;
   342   }
   344   /* compute PIo2[0,...,jp]*q[jz,...,0] */
   345   for(i=jz;i>=0;i--) {
   346     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
   347     fq[jz-i] = fw;
   348   }
   350   /* compress fq[] into y[] */
   351   switch(prec) {
   352   case 0:
   353     fw = 0.0;
   354     for (i=jz;i>=0;i--) fw += fq[i];
   355     y[0] = (ih==0)? fw: -fw;
   356     break;
   357   case 1:
   358   case 2:
   359     fw = 0.0;
   360     for (i=jz;i>=0;i--) fw += fq[i];
   361     y[0] = (ih==0)? fw: -fw;
   362     fw = fq[0]-fw;
   363     for (i=1;i<=jz;i++) fw += fq[i];
   364     y[1] = (ih==0)? fw: -fw;
   365     break;
   366   case 3:       /* painful */
   367     for (i=jz;i>0;i--) {
   368       fw      = fq[i-1]+fq[i];
   369       fq[i]  += fq[i-1]-fw;
   370       fq[i-1] = fw;
   371     }
   372     for (i=jz;i>1;i--) {
   373       fw      = fq[i-1]+fq[i];
   374       fq[i]  += fq[i-1]-fw;
   375       fq[i-1] = fw;
   376     }
   377     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
   378     if(ih==0) {
   379       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
   380     } else {
   381       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
   382     }
   383   }
   384   return n&7;
   385 }
   388 /*
   389  * ====================================================
   390  * Copyright 13 Dec 1993 Sun Microsystems, Inc.  All Rights Reserved.
   391  *
   392  * Developed at SunPro, a Sun Microsystems, Inc. business.
   393  * Permission to use, copy, modify, and distribute this
   394  * software is freely granted, provided that this notice
   395  * is preserved.
   396  * ====================================================
   397  *
   398  */
   400 /* __ieee754_rem_pio2(x,y)
   401  *
   402  * return the remainder of x rem pi/2 in y[0]+y[1]
   403  * use __kernel_rem_pio2()
   404  */
   406 /*
   407  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
   408  */
   409 static const int two_over_pi[] = {
   410   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
   411   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
   412   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
   413   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
   414   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
   415   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
   416   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
   417   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
   418   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
   419   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
   420   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
   421 };
   423 static const int npio2_hw[] = {
   424   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
   425   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
   426   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
   427   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
   428   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
   429   0x404858EB, 0x404921FB,
   430 };
   432 /*
   433  * invpio2:  53 bits of 2/pi
   434  * pio2_1:   first  33 bit of pi/2
   435  * pio2_1t:  pi/2 - pio2_1
   436  * pio2_2:   second 33 bit of pi/2
   437  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
   438  * pio2_3:   third  33 bit of pi/2
   439  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
   440  */
   442 static const double
   443 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
   444 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
   445 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   446 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
   447 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
   448 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
   449 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
   450 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
   451 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
   452 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
   454 static int __ieee754_rem_pio2(double x, double *y) {
   455   double z,w,t,r,fn;
   456   double tx[3];
   457   int e0,i,j,nx,n,ix,hx,i0;
   459   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
   460   hx = *(i0+(int*)&x);          /* high word of x */
   461   ix = hx&0x7fffffff;
   462   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
   463     {y[0] = x; y[1] = 0; return 0;}
   464   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
   465     if(hx>0) {
   466       z = x - pio2_1;
   467       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   468         y[0] = z - pio2_1t;
   469         y[1] = (z-y[0])-pio2_1t;
   470       } else {                /* near pi/2, use 33+33+53 bit pi */
   471         z -= pio2_2;
   472         y[0] = z - pio2_2t;
   473         y[1] = (z-y[0])-pio2_2t;
   474       }
   475       return 1;
   476     } else {    /* negative x */
   477       z = x + pio2_1;
   478       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   479         y[0] = z + pio2_1t;
   480         y[1] = (z-y[0])+pio2_1t;
   481       } else {                /* near pi/2, use 33+33+53 bit pi */
   482         z += pio2_2;
   483         y[0] = z + pio2_2t;
   484         y[1] = (z-y[0])+pio2_2t;
   485       }
   486       return -1;
   487     }
   488   }
   489   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
   490     t  = fabsd(x);
   491     n  = (int) (t*invpio2+half);
   492     fn = (double)n;
   493     r  = t-fn*pio2_1;
   494     w  = fn*pio2_1t;    /* 1st round good to 85 bit */
   495     if(n<32&&ix!=npio2_hw[n-1]) {
   496       y[0] = r-w;       /* quick check no cancellation */
   497     } else {
   498       j  = ix>>20;
   499       y[0] = r-w;
   500       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   501       if(i>16) {  /* 2nd iteration needed, good to 118 */
   502         t  = r;
   503         w  = fn*pio2_2;
   504         r  = t-w;
   505         w  = fn*pio2_2t-((t-r)-w);
   506         y[0] = r-w;
   507         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   508         if(i>49)  {     /* 3rd iteration need, 151 bits acc */
   509           t  = r;       /* will cover all possible cases */
   510           w  = fn*pio2_3;
   511           r  = t-w;
   512           w  = fn*pio2_3t-((t-r)-w);
   513           y[0] = r-w;
   514         }
   515       }
   516     }
   517     y[1] = (r-y[0])-w;
   518     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   519     else         return n;
   520   }
   521   /*
   522    * all other (large) arguments
   523    */
   524   if(ix>=0x7ff00000) {          /* x is inf or NaN */
   525     y[0]=y[1]=x-x; return 0;
   526   }
   527   /* set z = scalbn(|x|,ilogb(x)-23) */
   528   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
   529   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
   530   *(i0+(int*)&z) = ix - (e0<<20);
   531   for(i=0;i<2;i++) {
   532     tx[i] = (double)((int)(z));
   533     z     = (z-tx[i])*two24A;
   534   }
   535   tx[2] = z;
   536   nx = 3;
   537   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
   538   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
   539   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   540   return n;
   541 }
   544 /* __kernel_sin( x, y, iy)
   545  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
   546  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   547  * Input y is the tail of x.
   548  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
   549  *
   550  * Algorithm
   551  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
   552  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
   553  *      3. sin(x) is approximated by a polynomial of degree 13 on
   554  *         [0,pi/4]
   555  *                               3            13
   556  *              sin(x) ~ x + S1*x + ... + S6*x
   557  *         where
   558  *
   559  *      |sin(x)         2     4     6     8     10     12  |     -58
   560  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
   561  *      |  x                                               |
   562  *
   563  *      4. sin(x+y) = sin(x) + sin'(x')*y
   564  *                  ~ sin(x) + (1-x*x/2)*y
   565  *         For better accuracy, let
   566  *                   3      2      2      2      2
   567  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
   568  *         then                   3    2
   569  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
   570  */
   572 static const double
   573 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
   574 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
   575 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
   576 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
   577 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
   578 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
   580 static double __kernel_sin(double x, double y, int iy)
   581 {
   582         double z,r,v;
   583         int ix;
   584         ix = __HI(x)&0x7fffffff;        /* high word of x */
   585         if(ix<0x3e400000)                       /* |x| < 2**-27 */
   586            {if((int)x==0) return x;}            /* generate inexact */
   587         z       =  x*x;
   588         v       =  z*x;
   589         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
   590         if(iy==0) return x+v*(S1+z*r);
   591         else      return x-((z*(half*y-v*r)-y)-v*S1);
   592 }
   594 /*
   595  * __kernel_cos( x,  y )
   596  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
   597  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   598  * Input y is the tail of x.
   599  *
   600  * Algorithm
   601  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
   602  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
   603  *      3. cos(x) is approximated by a polynomial of degree 14 on
   604  *         [0,pi/4]
   605  *                                       4            14
   606  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
   607  *         where the remez error is
   608  *
   609  *      |              2     4     6     8     10    12     14 |     -58
   610  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
   611  *      |                                                      |
   612  *
   613  *                     4     6     8     10    12     14
   614  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
   615  *             cos(x) = 1 - x*x/2 + r
   616  *         since cos(x+y) ~ cos(x) - sin(x)*y
   617  *                        ~ cos(x) - x*y,
   618  *         a correction term is necessary in cos(x) and hence
   619  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
   620  *         For better accuracy when x > 0.3, let qx = |x|/4 with
   621  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
   622  *         Then
   623  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
   624  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
   625  *         magnitude of the latter is at least a quarter of x*x/2,
   626  *         thus, reducing the rounding error in the subtraction.
   627  */
   629 static const double
   630 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
   631 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
   632 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
   633 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
   634 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
   635 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
   637 static double __kernel_cos(double x, double y)
   638 {
   639   double a,hz,z,r,qx;
   640   int ix;
   641   ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/
   642   if(ix<0x3e400000) {                   /* if x < 2**27 */
   643     if(((int)x)==0) return one;         /* generate inexact */
   644   }
   645   z  = x*x;
   646   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
   647   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
   648     return one - (0.5*z - (z*r - x*y));
   649   else {
   650     if(ix > 0x3fe90000) {               /* x > 0.78125 */
   651       qx = 0.28125;
   652     } else {
   653       __HI(qx) = ix-0x00200000; /* x/4 */
   654       __LO(qx) = 0;
   655     }
   656     hz = 0.5*z-qx;
   657     a  = one-qx;
   658     return a - (hz - (z*r-x*y));
   659   }
   660 }
   662 /* __kernel_tan( x, y, k )
   663  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
   664  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   665  * Input y is the tail of x.
   666  * Input k indicates whether tan (if k=1) or
   667  * -1/tan (if k= -1) is returned.
   668  *
   669  * Algorithm
   670  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
   671  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
   672  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
   673  *         [0,0.67434]
   674  *                               3             27
   675  *              tan(x) ~ x + T1*x + ... + T13*x
   676  *         where
   677  *
   678  *              |tan(x)         2     4            26   |     -59.2
   679  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
   680  *              |  x                                    |
   681  *
   682  *         Note: tan(x+y) = tan(x) + tan'(x)*y
   683  *                        ~ tan(x) + (1+x*x)*y
   684  *         Therefore, for better accuracy in computing tan(x+y), let
   685  *                   3      2      2       2       2
   686  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
   687  *         then
   688  *                                  3    2
   689  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
   690  *
   691  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
   692  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
   693  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
   694  */
   696 static const double
   697 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
   698 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
   699 T[] =  {
   700   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
   701   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
   702   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
   703   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
   704   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
   705   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
   706   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
   707   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
   708   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
   709   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
   710   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
   711  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
   712   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
   713 };
   715 static double __kernel_tan(double x, double y, int iy)
   716 {
   717   double z,r,v,w,s;
   718   int ix,hx;
   719   hx = __HI(x);   /* high word of x */
   720   ix = hx&0x7fffffff;     /* high word of |x| */
   721   if(ix<0x3e300000) {                     /* x < 2**-28 */
   722     if((int)x==0) {                       /* generate inexact */
   723       if (((ix | __LO(x)) | (iy + 1)) == 0)
   724         return one / fabsd(x);
   725       else {
   726         if (iy == 1)
   727           return x;
   728         else {    /* compute -1 / (x+y) carefully */
   729           double a, t;
   731           z = w = x + y;
   732           __LO(z) = 0;
   733           v = y - (z - x);
   734           t = a = -one / w;
   735           __LO(t) = 0;
   736           s = one + t * z;
   737           return t + a * (s + t * v);
   738         }
   739       }
   740     }
   741   }
   742   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
   743     if(hx<0) {x = -x; y = -y;}
   744     z = pio4-x;
   745     w = pio4lo-y;
   746     x = z+w; y = 0.0;
   747   }
   748   z       =  x*x;
   749   w       =  z*z;
   750   /* Break x^5*(T[1]+x^2*T[2]+...) into
   751    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
   752    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
   753    */
   754   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
   755   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
   756   s = z*x;
   757   r = y + z*(s*(r+v)+y);
   758   r += T[0]*s;
   759   w = x+r;
   760   if(ix>=0x3FE59428) {
   761     v = (double)iy;
   762     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
   763   }
   764   if(iy==1) return w;
   765   else {          /* if allow error up to 2 ulp,
   766                      simply return -1.0/(x+r) here */
   767     /*  compute -1.0/(x+r) accurately */
   768     double a,t;
   769     z  = w;
   770     __LO(z) = 0;
   771     v  = r-(z - x);     /* z+v = r+x */
   772     t = a  = -1.0/w;    /* a = -1.0/w */
   773     __LO(t) = 0;
   774     s  = 1.0+t*z;
   775     return t+a*(s+t*v);
   776   }
   777 }
   780 //----------------------------------------------------------------------
   781 //
   782 // Routines for new sin/cos implementation
   783 //
   784 //----------------------------------------------------------------------
   786 /* sin(x)
   787  * Return sine function of x.
   788  *
   789  * kernel function:
   790  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   791  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
   792  *      __ieee754_rem_pio2      ... argument reduction routine
   793  *
   794  * Method.
   795  *      Let S,C and T denote the sin, cos and tan respectively on
   796  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   797  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   798  *      We have
   799  *
   800  *          n        sin(x)      cos(x)        tan(x)
   801  *     ----------------------------------------------------------
   802  *          0          S           C             T
   803  *          1          C          -S            -1/T
   804  *          2         -S          -C             T
   805  *          3         -C           S            -1/T
   806  *     ----------------------------------------------------------
   807  *
   808  * Special cases:
   809  *      Let trig be any of sin, cos, or tan.
   810  *      trig(+-INF)  is NaN, with signals;
   811  *      trig(NaN)    is that NaN;
   812  *
   813  * Accuracy:
   814  *      TRIG(x) returns trig(x) nearly rounded
   815  */
   817 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
   818   double y[2],z=0.0;
   819   int n, ix;
   821   /* High word of x. */
   822   ix = __HI(x);
   824   /* |x| ~< pi/4 */
   825   ix &= 0x7fffffff;
   826   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
   828   /* sin(Inf or NaN) is NaN */
   829   else if (ix>=0x7ff00000) return x-x;
   831   /* argument reduction needed */
   832   else {
   833     n = __ieee754_rem_pio2(x,y);
   834     switch(n&3) {
   835     case 0: return  __kernel_sin(y[0],y[1],1);
   836     case 1: return  __kernel_cos(y[0],y[1]);
   837     case 2: return -__kernel_sin(y[0],y[1],1);
   838     default:
   839       return -__kernel_cos(y[0],y[1]);
   840     }
   841   }
   842 JRT_END
   844 /* cos(x)
   845  * Return cosine function of x.
   846  *
   847  * kernel function:
   848  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   849  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
   850  *      __ieee754_rem_pio2      ... argument reduction routine
   851  *
   852  * Method.
   853  *      Let S,C and T denote the sin, cos and tan respectively on
   854  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   855  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   856  *      We have
   857  *
   858  *          n        sin(x)      cos(x)        tan(x)
   859  *     ----------------------------------------------------------
   860  *          0          S           C             T
   861  *          1          C          -S            -1/T
   862  *          2         -S          -C             T
   863  *          3         -C           S            -1/T
   864  *     ----------------------------------------------------------
   865  *
   866  * Special cases:
   867  *      Let trig be any of sin, cos, or tan.
   868  *      trig(+-INF)  is NaN, with signals;
   869  *      trig(NaN)    is that NaN;
   870  *
   871  * Accuracy:
   872  *      TRIG(x) returns trig(x) nearly rounded
   873  */
   875 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
   876   double y[2],z=0.0;
   877   int n, ix;
   879   /* High word of x. */
   880   ix = __HI(x);
   882   /* |x| ~< pi/4 */
   883   ix &= 0x7fffffff;
   884   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
   886   /* cos(Inf or NaN) is NaN */
   887   else if (ix>=0x7ff00000) return x-x;
   889   /* argument reduction needed */
   890   else {
   891     n = __ieee754_rem_pio2(x,y);
   892     switch(n&3) {
   893     case 0: return  __kernel_cos(y[0],y[1]);
   894     case 1: return -__kernel_sin(y[0],y[1],1);
   895     case 2: return -__kernel_cos(y[0],y[1]);
   896     default:
   897       return  __kernel_sin(y[0],y[1],1);
   898     }
   899   }
   900 JRT_END
   902 /* tan(x)
   903  * Return tangent function of x.
   904  *
   905  * kernel function:
   906  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
   907  *      __ieee754_rem_pio2      ... argument reduction routine
   908  *
   909  * Method.
   910  *      Let S,C and T denote the sin, cos and tan respectively on
   911  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   912  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   913  *      We have
   914  *
   915  *          n        sin(x)      cos(x)        tan(x)
   916  *     ----------------------------------------------------------
   917  *          0          S           C             T
   918  *          1          C          -S            -1/T
   919  *          2         -S          -C             T
   920  *          3         -C           S            -1/T
   921  *     ----------------------------------------------------------
   922  *
   923  * Special cases:
   924  *      Let trig be any of sin, cos, or tan.
   925  *      trig(+-INF)  is NaN, with signals;
   926  *      trig(NaN)    is that NaN;
   927  *
   928  * Accuracy:
   929  *      TRIG(x) returns trig(x) nearly rounded
   930  */
   932 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
   933   double y[2],z=0.0;
   934   int n, ix;
   936   /* High word of x. */
   937   ix = __HI(x);
   939   /* |x| ~< pi/4 */
   940   ix &= 0x7fffffff;
   941   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
   943   /* tan(Inf or NaN) is NaN */
   944   else if (ix>=0x7ff00000) return x-x;            /* NaN */
   946   /* argument reduction needed */
   947   else {
   948     n = __ieee754_rem_pio2(x,y);
   949     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
   950                                                      -1 -- n odd */
   951   }
   952 JRT_END
   955 #ifdef WIN32
   956 # pragma optimize ( "", on )
   957 #endif

mercurial