src/share/vm/runtime/sharedRuntimeTrig.cpp

Tue, 23 Nov 2010 13:22:55 -0800

author
stefank
date
Tue, 23 Nov 2010 13:22:55 -0800
changeset 2314
f95d63e2154a
parent 1907
c18cbe5936b8
child 6461
bdd155477289
permissions
-rw-r--r--

6989984: Use standard include model for Hospot
Summary: Replaced MakeDeps and the includeDB files with more standardized solutions.
Reviewed-by: coleenp, kvn, kamg

     1 /*
     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "prims/jni.h"
    27 #include "runtime/interfaceSupport.hpp"
    28 #include "runtime/sharedRuntime.hpp"
    30 // This file contains copies of the fdlibm routines used by
    31 // StrictMath. It turns out that it is almost always required to use
    32 // these runtime routines; the Intel CPU doesn't meet the Java
    33 // specification for sin/cos outside a certain limited argument range,
    34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
    35 // also turns out that avoiding the indirect call through function
    36 // pointer out to libjava.so in SharedRuntime speeds these routines up
    37 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
    39 // Enabling optimizations in this file causes incorrect code to be
    40 // generated; can not figure out how to turn down optimization for one
    41 // file in the IDE on Windows
    42 #ifdef WIN32
    43 # pragma optimize ( "", off )
    44 #endif
    46 /* The above workaround now causes more problems with the latest MS compiler.
    47  * Visual Studio 2010's /GS option tries to guard against buffer overruns.
    48  * /GS is on by default if you specify optimizations, which we do globally
    49  * via /W3 /O2. However the above selective turning off of optimizations means
    50  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
    51  * then the compilation fails. There are several possible solutions
    52  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
    53  * (2) Stop treating warnings as errors - would be a backward step
    54  * (3) Disable /GS - may help performance but you lose the security checks
    55  * (4) Disable the warning with "#pragma warning( disable : 4748 )"
    56  * (5) Disable planting the code with  __declspec(safebuffers)
    57  * I've opted for (5) although we should investigate the local performance
    58  * benefits of (1) and global performance benefit of (3).
    59  */
    60 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
    61 #define SAFEBUF __declspec(safebuffers)
    62 #else
    63 #define SAFEBUF
    64 #endif
    66 #include <math.h>
    68 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
    69 // [jk] this is not 100% correct because the float word order may different
    70 // from the byte order (e.g. on ARM)
    71 #ifdef VM_LITTLE_ENDIAN
    72 # define __HI(x) *(1+(int*)&x)
    73 # define __LO(x) *(int*)&x
    74 #else
    75 # define __HI(x) *(int*)&x
    76 # define __LO(x) *(1+(int*)&x)
    77 #endif
    79 static double copysignA(double x, double y) {
    80   __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
    81   return x;
    82 }
    84 /*
    85  * scalbn (double x, int n)
    86  * scalbn(x,n) returns x* 2**n  computed by  exponent
    87  * manipulation rather than by actually performing an
    88  * exponentiation or a multiplication.
    89  */
    91 static const double
    92 two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
    93 twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
    94 hugeX  = 1.0e+300,
    95 tiny   = 1.0e-300;
    97 static double scalbnA (double x, int n) {
    98   int  k,hx,lx;
    99   hx = __HI(x);
   100   lx = __LO(x);
   101   k = (hx&0x7ff00000)>>20;              /* extract exponent */
   102   if (k==0) {                           /* 0 or subnormal x */
   103     if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
   104     x *= two54;
   105     hx = __HI(x);
   106     k = ((hx&0x7ff00000)>>20) - 54;
   107     if (n< -50000) return tiny*x;       /*underflow*/
   108   }
   109   if (k==0x7ff) return x+x;             /* NaN or Inf */
   110   k = k+n;
   111   if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
   112   if (k > 0)                            /* normal result */
   113     {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
   114   if (k <= -54) {
   115     if (n > 50000)      /* in case integer overflow in n+k */
   116       return hugeX*copysignA(hugeX,x);  /*overflow*/
   117     else return tiny*copysignA(tiny,x); /*underflow*/
   118   }
   119   k += 54;                              /* subnormal result */
   120   __HI(x) = (hx&0x800fffff)|(k<<20);
   121   return x*twom54;
   122 }
   124 /*
   125  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
   126  * double x[],y[]; int e0,nx,prec; int ipio2[];
   127  *
   128  * __kernel_rem_pio2 return the last three digits of N with
   129  *              y = x - N*pi/2
   130  * so that |y| < pi/2.
   131  *
   132  * The method is to compute the integer (mod 8) and fraction parts of
   133  * (2/pi)*x without doing the full multiplication. In general we
   134  * skip the part of the product that are known to be a huge integer (
   135  * more accurately, = 0 mod 8 ). Thus the number of operations are
   136  * independent of the exponent of the input.
   137  *
   138  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
   139  *
   140  * Input parameters:
   141  *      x[]     The input value (must be positive) is broken into nx
   142  *              pieces of 24-bit integers in double precision format.
   143  *              x[i] will be the i-th 24 bit of x. The scaled exponent
   144  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
   145  *              match x's up to 24 bits.
   146  *
   147  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
   148  *                      e0 = ilogb(z)-23
   149  *                      z  = scalbn(z,-e0)
   150  *              for i = 0,1,2
   151  *                      x[i] = floor(z)
   152  *                      z    = (z-x[i])*2**24
   153  *
   154  *
   155  *      y[]     ouput result in an array of double precision numbers.
   156  *              The dimension of y[] is:
   157  *                      24-bit  precision       1
   158  *                      53-bit  precision       2
   159  *                      64-bit  precision       2
   160  *                      113-bit precision       3
   161  *              The actual value is the sum of them. Thus for 113-bit
   162  *              precsion, one may have to do something like:
   163  *
   164  *              long double t,w,r_head, r_tail;
   165  *              t = (long double)y[2] + (long double)y[1];
   166  *              w = (long double)y[0];
   167  *              r_head = t+w;
   168  *              r_tail = w - (r_head - t);
   169  *
   170  *      e0      The exponent of x[0]
   171  *
   172  *      nx      dimension of x[]
   173  *
   174  *      prec    an interger indicating the precision:
   175  *                      0       24  bits (single)
   176  *                      1       53  bits (double)
   177  *                      2       64  bits (extended)
   178  *                      3       113 bits (quad)
   179  *
   180  *      ipio2[]
   181  *              integer array, contains the (24*i)-th to (24*i+23)-th
   182  *              bit of 2/pi after binary point. The corresponding
   183  *              floating value is
   184  *
   185  *                      ipio2[i] * 2^(-24(i+1)).
   186  *
   187  * External function:
   188  *      double scalbn(), floor();
   189  *
   190  *
   191  * Here is the description of some local variables:
   192  *
   193  *      jk      jk+1 is the initial number of terms of ipio2[] needed
   194  *              in the computation. The recommended value is 2,3,4,
   195  *              6 for single, double, extended,and quad.
   196  *
   197  *      jz      local integer variable indicating the number of
   198  *              terms of ipio2[] used.
   199  *
   200  *      jx      nx - 1
   201  *
   202  *      jv      index for pointing to the suitable ipio2[] for the
   203  *              computation. In general, we want
   204  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
   205  *              is an integer. Thus
   206  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
   207  *              Hence jv = max(0,(e0-3)/24).
   208  *
   209  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
   210  *
   211  *      q[]     double array with integral value, representing the
   212  *              24-bits chunk of the product of x and 2/pi.
   213  *
   214  *      q0      the corresponding exponent of q[0]. Note that the
   215  *              exponent for q[i] would be q0-24*i.
   216  *
   217  *      PIo2[]  double precision array, obtained by cutting pi/2
   218  *              into 24 bits chunks.
   219  *
   220  *      f[]     ipio2[] in floating point
   221  *
   222  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
   223  *
   224  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
   225  *
   226  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence
   227  *              it also indicates the *sign* of the result.
   228  *
   229  */
   232 /*
   233  * Constants:
   234  * The hexadecimal values are the intended ones for the following
   235  * constants. The decimal values may be used, provided that the
   236  * compiler will convert from decimal to binary accurately enough
   237  * to produce the hexadecimal values shown.
   238  */
   241 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
   243 static const double PIo2[] = {
   244   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
   245   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
   246   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
   247   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
   248   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
   249   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
   250   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
   251   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
   252 };
   254 static const double
   255 zeroB   = 0.0,
   256 one     = 1.0,
   257 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   258 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
   260 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
   261   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
   262   double z,fw,f[20],fq[20],q[20];
   264   /* initialize jk*/
   265   jk = init_jk[prec];
   266   jp = jk;
   268   /* determine jx,jv,q0, note that 3>q0 */
   269   jx =  nx-1;
   270   jv = (e0-3)/24; if(jv<0) jv=0;
   271   q0 =  e0-24*(jv+1);
   273   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
   274   j = jv-jx; m = jx+jk;
   275   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
   277   /* compute q[0],q[1],...q[jk] */
   278   for (i=0;i<=jk;i++) {
   279     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
   280   }
   282   jz = jk;
   283 recompute:
   284   /* distill q[] into iq[] reversingly */
   285   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
   286     fw    =  (double)((int)(twon24* z));
   287     iq[i] =  (int)(z-two24B*fw);
   288     z     =  q[j-1]+fw;
   289   }
   291   /* compute n */
   292   z  = scalbnA(z,q0);           /* actual value of z */
   293   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
   294   n  = (int) z;
   295   z -= (double)n;
   296   ih = 0;
   297   if(q0>0) {    /* need iq[jz-1] to determine n */
   298     i  = (iq[jz-1]>>(24-q0)); n += i;
   299     iq[jz-1] -= i<<(24-q0);
   300     ih = iq[jz-1]>>(23-q0);
   301   }
   302   else if(q0==0) ih = iq[jz-1]>>23;
   303   else if(z>=0.5) ih=2;
   305   if(ih>0) {    /* q > 0.5 */
   306     n += 1; carry = 0;
   307     for(i=0;i<jz ;i++) {        /* compute 1-q */
   308       j = iq[i];
   309       if(carry==0) {
   310         if(j!=0) {
   311           carry = 1; iq[i] = 0x1000000- j;
   312         }
   313       } else  iq[i] = 0xffffff - j;
   314     }
   315     if(q0>0) {          /* rare case: chance is 1 in 12 */
   316       switch(q0) {
   317       case 1:
   318         iq[jz-1] &= 0x7fffff; break;
   319       case 2:
   320         iq[jz-1] &= 0x3fffff; break;
   321       }
   322     }
   323     if(ih==2) {
   324       z = one - z;
   325       if(carry!=0) z -= scalbnA(one,q0);
   326     }
   327   }
   329   /* check if recomputation is needed */
   330   if(z==zeroB) {
   331     j = 0;
   332     for (i=jz-1;i>=jk;i--) j |= iq[i];
   333     if(j==0) { /* need recomputation */
   334       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
   336       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
   337         f[jx+i] = (double) ipio2[jv+i];
   338         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
   339         q[i] = fw;
   340       }
   341       jz += k;
   342       goto recompute;
   343     }
   344   }
   346   /* chop off zero terms */
   347   if(z==0.0) {
   348     jz -= 1; q0 -= 24;
   349     while(iq[jz]==0) { jz--; q0-=24;}
   350   } else { /* break z into 24-bit if neccessary */
   351     z = scalbnA(z,-q0);
   352     if(z>=two24B) {
   353       fw = (double)((int)(twon24*z));
   354       iq[jz] = (int)(z-two24B*fw);
   355       jz += 1; q0 += 24;
   356       iq[jz] = (int) fw;
   357     } else iq[jz] = (int) z ;
   358   }
   360   /* convert integer "bit" chunk to floating-point value */
   361   fw = scalbnA(one,q0);
   362   for(i=jz;i>=0;i--) {
   363     q[i] = fw*(double)iq[i]; fw*=twon24;
   364   }
   366   /* compute PIo2[0,...,jp]*q[jz,...,0] */
   367   for(i=jz;i>=0;i--) {
   368     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
   369     fq[jz-i] = fw;
   370   }
   372   /* compress fq[] into y[] */
   373   switch(prec) {
   374   case 0:
   375     fw = 0.0;
   376     for (i=jz;i>=0;i--) fw += fq[i];
   377     y[0] = (ih==0)? fw: -fw;
   378     break;
   379   case 1:
   380   case 2:
   381     fw = 0.0;
   382     for (i=jz;i>=0;i--) fw += fq[i];
   383     y[0] = (ih==0)? fw: -fw;
   384     fw = fq[0]-fw;
   385     for (i=1;i<=jz;i++) fw += fq[i];
   386     y[1] = (ih==0)? fw: -fw;
   387     break;
   388   case 3:       /* painful */
   389     for (i=jz;i>0;i--) {
   390       fw      = fq[i-1]+fq[i];
   391       fq[i]  += fq[i-1]-fw;
   392       fq[i-1] = fw;
   393     }
   394     for (i=jz;i>1;i--) {
   395       fw      = fq[i-1]+fq[i];
   396       fq[i]  += fq[i-1]-fw;
   397       fq[i-1] = fw;
   398     }
   399     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
   400     if(ih==0) {
   401       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
   402     } else {
   403       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
   404     }
   405   }
   406   return n&7;
   407 }
   410 /*
   411  * ====================================================
   412  * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.
   413  *
   414  * Developed at SunPro, a Sun Microsystems, Inc. business.
   415  * Permission to use, copy, modify, and distribute this
   416  * software is freely granted, provided that this notice
   417  * is preserved.
   418  * ====================================================
   419  *
   420  */
   422 /* __ieee754_rem_pio2(x,y)
   423  *
   424  * return the remainder of x rem pi/2 in y[0]+y[1]
   425  * use __kernel_rem_pio2()
   426  */
   428 /*
   429  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
   430  */
   431 static const int two_over_pi[] = {
   432   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
   433   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
   434   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
   435   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
   436   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
   437   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
   438   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
   439   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
   440   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
   441   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
   442   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
   443 };
   445 static const int npio2_hw[] = {
   446   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
   447   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
   448   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
   449   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
   450   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
   451   0x404858EB, 0x404921FB,
   452 };
   454 /*
   455  * invpio2:  53 bits of 2/pi
   456  * pio2_1:   first  33 bit of pi/2
   457  * pio2_1t:  pi/2 - pio2_1
   458  * pio2_2:   second 33 bit of pi/2
   459  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
   460  * pio2_3:   third  33 bit of pi/2
   461  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
   462  */
   464 static const double
   465 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
   466 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
   467 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   468 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
   469 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
   470 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
   471 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
   472 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
   473 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
   474 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
   476 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
   477   double z,w,t,r,fn;
   478   double tx[3];
   479   int e0,i,j,nx,n,ix,hx,i0;
   481   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
   482   hx = *(i0+(int*)&x);          /* high word of x */
   483   ix = hx&0x7fffffff;
   484   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
   485     {y[0] = x; y[1] = 0; return 0;}
   486   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
   487     if(hx>0) {
   488       z = x - pio2_1;
   489       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   490         y[0] = z - pio2_1t;
   491         y[1] = (z-y[0])-pio2_1t;
   492       } else {                /* near pi/2, use 33+33+53 bit pi */
   493         z -= pio2_2;
   494         y[0] = z - pio2_2t;
   495         y[1] = (z-y[0])-pio2_2t;
   496       }
   497       return 1;
   498     } else {    /* negative x */
   499       z = x + pio2_1;
   500       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   501         y[0] = z + pio2_1t;
   502         y[1] = (z-y[0])+pio2_1t;
   503       } else {                /* near pi/2, use 33+33+53 bit pi */
   504         z += pio2_2;
   505         y[0] = z + pio2_2t;
   506         y[1] = (z-y[0])+pio2_2t;
   507       }
   508       return -1;
   509     }
   510   }
   511   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
   512     t  = fabsd(x);
   513     n  = (int) (t*invpio2+half);
   514     fn = (double)n;
   515     r  = t-fn*pio2_1;
   516     w  = fn*pio2_1t;    /* 1st round good to 85 bit */
   517     if(n<32&&ix!=npio2_hw[n-1]) {
   518       y[0] = r-w;       /* quick check no cancellation */
   519     } else {
   520       j  = ix>>20;
   521       y[0] = r-w;
   522       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   523       if(i>16) {  /* 2nd iteration needed, good to 118 */
   524         t  = r;
   525         w  = fn*pio2_2;
   526         r  = t-w;
   527         w  = fn*pio2_2t-((t-r)-w);
   528         y[0] = r-w;
   529         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   530         if(i>49)  {     /* 3rd iteration need, 151 bits acc */
   531           t  = r;       /* will cover all possible cases */
   532           w  = fn*pio2_3;
   533           r  = t-w;
   534           w  = fn*pio2_3t-((t-r)-w);
   535           y[0] = r-w;
   536         }
   537       }
   538     }
   539     y[1] = (r-y[0])-w;
   540     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   541     else         return n;
   542   }
   543   /*
   544    * all other (large) arguments
   545    */
   546   if(ix>=0x7ff00000) {          /* x is inf or NaN */
   547     y[0]=y[1]=x-x; return 0;
   548   }
   549   /* set z = scalbn(|x|,ilogb(x)-23) */
   550   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
   551   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
   552   *(i0+(int*)&z) = ix - (e0<<20);
   553   for(i=0;i<2;i++) {
   554     tx[i] = (double)((int)(z));
   555     z     = (z-tx[i])*two24A;
   556   }
   557   tx[2] = z;
   558   nx = 3;
   559   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
   560   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
   561   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   562   return n;
   563 }
   566 /* __kernel_sin( x, y, iy)
   567  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
   568  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   569  * Input y is the tail of x.
   570  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
   571  *
   572  * Algorithm
   573  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
   574  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
   575  *      3. sin(x) is approximated by a polynomial of degree 13 on
   576  *         [0,pi/4]
   577  *                               3            13
   578  *              sin(x) ~ x + S1*x + ... + S6*x
   579  *         where
   580  *
   581  *      |sin(x)         2     4     6     8     10     12  |     -58
   582  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
   583  *      |  x                                               |
   584  *
   585  *      4. sin(x+y) = sin(x) + sin'(x')*y
   586  *                  ~ sin(x) + (1-x*x/2)*y
   587  *         For better accuracy, let
   588  *                   3      2      2      2      2
   589  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
   590  *         then                   3    2
   591  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
   592  */
   594 static const double
   595 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
   596 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
   597 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
   598 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
   599 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
   600 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
   602 static double __kernel_sin(double x, double y, int iy)
   603 {
   604         double z,r,v;
   605         int ix;
   606         ix = __HI(x)&0x7fffffff;        /* high word of x */
   607         if(ix<0x3e400000)                       /* |x| < 2**-27 */
   608            {if((int)x==0) return x;}            /* generate inexact */
   609         z       =  x*x;
   610         v       =  z*x;
   611         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
   612         if(iy==0) return x+v*(S1+z*r);
   613         else      return x-((z*(half*y-v*r)-y)-v*S1);
   614 }
   616 /*
   617  * __kernel_cos( x,  y )
   618  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
   619  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   620  * Input y is the tail of x.
   621  *
   622  * Algorithm
   623  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
   624  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
   625  *      3. cos(x) is approximated by a polynomial of degree 14 on
   626  *         [0,pi/4]
   627  *                                       4            14
   628  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
   629  *         where the remez error is
   630  *
   631  *      |              2     4     6     8     10    12     14 |     -58
   632  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
   633  *      |                                                      |
   634  *
   635  *                     4     6     8     10    12     14
   636  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
   637  *             cos(x) = 1 - x*x/2 + r
   638  *         since cos(x+y) ~ cos(x) - sin(x)*y
   639  *                        ~ cos(x) - x*y,
   640  *         a correction term is necessary in cos(x) and hence
   641  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
   642  *         For better accuracy when x > 0.3, let qx = |x|/4 with
   643  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
   644  *         Then
   645  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
   646  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
   647  *         magnitude of the latter is at least a quarter of x*x/2,
   648  *         thus, reducing the rounding error in the subtraction.
   649  */
   651 static const double
   652 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
   653 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
   654 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
   655 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
   656 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
   657 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
   659 static double __kernel_cos(double x, double y)
   660 {
   661   double a,hz,z,r,qx;
   662   int ix;
   663   ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/
   664   if(ix<0x3e400000) {                   /* if x < 2**27 */
   665     if(((int)x)==0) return one;         /* generate inexact */
   666   }
   667   z  = x*x;
   668   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
   669   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
   670     return one - (0.5*z - (z*r - x*y));
   671   else {
   672     if(ix > 0x3fe90000) {               /* x > 0.78125 */
   673       qx = 0.28125;
   674     } else {
   675       __HI(qx) = ix-0x00200000; /* x/4 */
   676       __LO(qx) = 0;
   677     }
   678     hz = 0.5*z-qx;
   679     a  = one-qx;
   680     return a - (hz - (z*r-x*y));
   681   }
   682 }
   684 /* __kernel_tan( x, y, k )
   685  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
   686  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   687  * Input y is the tail of x.
   688  * Input k indicates whether tan (if k=1) or
   689  * -1/tan (if k= -1) is returned.
   690  *
   691  * Algorithm
   692  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
   693  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
   694  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
   695  *         [0,0.67434]
   696  *                               3             27
   697  *              tan(x) ~ x + T1*x + ... + T13*x
   698  *         where
   699  *
   700  *              |tan(x)         2     4            26   |     -59.2
   701  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
   702  *              |  x                                    |
   703  *
   704  *         Note: tan(x+y) = tan(x) + tan'(x)*y
   705  *                        ~ tan(x) + (1+x*x)*y
   706  *         Therefore, for better accuracy in computing tan(x+y), let
   707  *                   3      2      2       2       2
   708  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
   709  *         then
   710  *                                  3    2
   711  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
   712  *
   713  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
   714  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
   715  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
   716  */
   718 static const double
   719 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
   720 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
   721 T[] =  {
   722   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
   723   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
   724   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
   725   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
   726   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
   727   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
   728   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
   729   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
   730   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
   731   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
   732   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
   733  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
   734   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
   735 };
   737 static double __kernel_tan(double x, double y, int iy)
   738 {
   739   double z,r,v,w,s;
   740   int ix,hx;
   741   hx = __HI(x);   /* high word of x */
   742   ix = hx&0x7fffffff;     /* high word of |x| */
   743   if(ix<0x3e300000) {                     /* x < 2**-28 */
   744     if((int)x==0) {                       /* generate inexact */
   745       if (((ix | __LO(x)) | (iy + 1)) == 0)
   746         return one / fabsd(x);
   747       else {
   748         if (iy == 1)
   749           return x;
   750         else {    /* compute -1 / (x+y) carefully */
   751           double a, t;
   753           z = w = x + y;
   754           __LO(z) = 0;
   755           v = y - (z - x);
   756           t = a = -one / w;
   757           __LO(t) = 0;
   758           s = one + t * z;
   759           return t + a * (s + t * v);
   760         }
   761       }
   762     }
   763   }
   764   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
   765     if(hx<0) {x = -x; y = -y;}
   766     z = pio4-x;
   767     w = pio4lo-y;
   768     x = z+w; y = 0.0;
   769   }
   770   z       =  x*x;
   771   w       =  z*z;
   772   /* Break x^5*(T[1]+x^2*T[2]+...) into
   773    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
   774    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
   775    */
   776   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
   777   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
   778   s = z*x;
   779   r = y + z*(s*(r+v)+y);
   780   r += T[0]*s;
   781   w = x+r;
   782   if(ix>=0x3FE59428) {
   783     v = (double)iy;
   784     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
   785   }
   786   if(iy==1) return w;
   787   else {          /* if allow error up to 2 ulp,
   788                      simply return -1.0/(x+r) here */
   789     /*  compute -1.0/(x+r) accurately */
   790     double a,t;
   791     z  = w;
   792     __LO(z) = 0;
   793     v  = r-(z - x);     /* z+v = r+x */
   794     t = a  = -1.0/w;    /* a = -1.0/w */
   795     __LO(t) = 0;
   796     s  = 1.0+t*z;
   797     return t+a*(s+t*v);
   798   }
   799 }
   802 //----------------------------------------------------------------------
   803 //
   804 // Routines for new sin/cos implementation
   805 //
   806 //----------------------------------------------------------------------
   808 /* sin(x)
   809  * Return sine function of x.
   810  *
   811  * kernel function:
   812  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   813  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
   814  *      __ieee754_rem_pio2      ... argument reduction routine
   815  *
   816  * Method.
   817  *      Let S,C and T denote the sin, cos and tan respectively on
   818  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   819  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   820  *      We have
   821  *
   822  *          n        sin(x)      cos(x)        tan(x)
   823  *     ----------------------------------------------------------
   824  *          0          S           C             T
   825  *          1          C          -S            -1/T
   826  *          2         -S          -C             T
   827  *          3         -C           S            -1/T
   828  *     ----------------------------------------------------------
   829  *
   830  * Special cases:
   831  *      Let trig be any of sin, cos, or tan.
   832  *      trig(+-INF)  is NaN, with signals;
   833  *      trig(NaN)    is that NaN;
   834  *
   835  * Accuracy:
   836  *      TRIG(x) returns trig(x) nearly rounded
   837  */
   839 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
   840   double y[2],z=0.0;
   841   int n, ix;
   843   /* High word of x. */
   844   ix = __HI(x);
   846   /* |x| ~< pi/4 */
   847   ix &= 0x7fffffff;
   848   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
   850   /* sin(Inf or NaN) is NaN */
   851   else if (ix>=0x7ff00000) return x-x;
   853   /* argument reduction needed */
   854   else {
   855     n = __ieee754_rem_pio2(x,y);
   856     switch(n&3) {
   857     case 0: return  __kernel_sin(y[0],y[1],1);
   858     case 1: return  __kernel_cos(y[0],y[1]);
   859     case 2: return -__kernel_sin(y[0],y[1],1);
   860     default:
   861       return -__kernel_cos(y[0],y[1]);
   862     }
   863   }
   864 JRT_END
   866 /* cos(x)
   867  * Return cosine function of x.
   868  *
   869  * kernel function:
   870  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   871  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
   872  *      __ieee754_rem_pio2      ... argument reduction routine
   873  *
   874  * Method.
   875  *      Let S,C and T denote the sin, cos and tan respectively on
   876  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   877  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   878  *      We have
   879  *
   880  *          n        sin(x)      cos(x)        tan(x)
   881  *     ----------------------------------------------------------
   882  *          0          S           C             T
   883  *          1          C          -S            -1/T
   884  *          2         -S          -C             T
   885  *          3         -C           S            -1/T
   886  *     ----------------------------------------------------------
   887  *
   888  * Special cases:
   889  *      Let trig be any of sin, cos, or tan.
   890  *      trig(+-INF)  is NaN, with signals;
   891  *      trig(NaN)    is that NaN;
   892  *
   893  * Accuracy:
   894  *      TRIG(x) returns trig(x) nearly rounded
   895  */
   897 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
   898   double y[2],z=0.0;
   899   int n, ix;
   901   /* High word of x. */
   902   ix = __HI(x);
   904   /* |x| ~< pi/4 */
   905   ix &= 0x7fffffff;
   906   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
   908   /* cos(Inf or NaN) is NaN */
   909   else if (ix>=0x7ff00000) return x-x;
   911   /* argument reduction needed */
   912   else {
   913     n = __ieee754_rem_pio2(x,y);
   914     switch(n&3) {
   915     case 0: return  __kernel_cos(y[0],y[1]);
   916     case 1: return -__kernel_sin(y[0],y[1],1);
   917     case 2: return -__kernel_cos(y[0],y[1]);
   918     default:
   919       return  __kernel_sin(y[0],y[1],1);
   920     }
   921   }
   922 JRT_END
   924 /* tan(x)
   925  * Return tangent function of x.
   926  *
   927  * kernel function:
   928  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
   929  *      __ieee754_rem_pio2      ... argument reduction routine
   930  *
   931  * Method.
   932  *      Let S,C and T denote the sin, cos and tan respectively on
   933  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   934  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   935  *      We have
   936  *
   937  *          n        sin(x)      cos(x)        tan(x)
   938  *     ----------------------------------------------------------
   939  *          0          S           C             T
   940  *          1          C          -S            -1/T
   941  *          2         -S          -C             T
   942  *          3         -C           S            -1/T
   943  *     ----------------------------------------------------------
   944  *
   945  * Special cases:
   946  *      Let trig be any of sin, cos, or tan.
   947  *      trig(+-INF)  is NaN, with signals;
   948  *      trig(NaN)    is that NaN;
   949  *
   950  * Accuracy:
   951  *      TRIG(x) returns trig(x) nearly rounded
   952  */
   954 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
   955   double y[2],z=0.0;
   956   int n, ix;
   958   /* High word of x. */
   959   ix = __HI(x);
   961   /* |x| ~< pi/4 */
   962   ix &= 0x7fffffff;
   963   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
   965   /* tan(Inf or NaN) is NaN */
   966   else if (ix>=0x7ff00000) return x-x;            /* NaN */
   968   /* argument reduction needed */
   969   else {
   970     n = __ieee754_rem_pio2(x,y);
   971     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
   972                                                      -1 -- n odd */
   973   }
   974 JRT_END
   977 #ifdef WIN32
   978 # pragma optimize ( "", on )
   979 #endif

mercurial