src/share/vm/runtime/sharedRuntimeTrig.cpp

Fri, 29 Apr 2016 00:06:10 +0800

author
aoqi
date
Fri, 29 Apr 2016 00:06:10 +0800
changeset 1
2d8a650513c2
parent 0
f90c822e73f8
child 6876
710a3c8b516e
permissions
-rw-r--r--

Added MIPS 64-bit port.

     1 /*
     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 /*
    26  * This file has been modified by Loongson Technology in 2015. These
    27  * modifications are Copyright (c) 2015 Loongson Technology, and are made
    28  * available on the same license terms set forth above.
    29  */
    31 #include "precompiled.hpp"
    32 #include "prims/jni.h"
    33 #include "runtime/interfaceSupport.hpp"
    34 #include "runtime/sharedRuntime.hpp"
    36 // This file contains copies of the fdlibm routines used by
    37 // StrictMath. It turns out that it is almost always required to use
    38 // these runtime routines; the Intel CPU doesn't meet the Java
    39 // specification for sin/cos outside a certain limited argument range,
    40 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
    41 // also turns out that avoiding the indirect call through function
    42 // pointer out to libjava.so in SharedRuntime speeds these routines up
    43 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
    45 // Enabling optimizations in this file causes incorrect code to be
    46 // generated; can not figure out how to turn down optimization for one
    47 // file in the IDE on Windows
    48 #ifdef WIN32
    49 # pragma optimize ( "", off )
    50 #endif
    52 /* The above workaround now causes more problems with the latest MS compiler.
    53  * Visual Studio 2010's /GS option tries to guard against buffer overruns.
    54  * /GS is on by default if you specify optimizations, which we do globally
    55  * via /W3 /O2. However the above selective turning off of optimizations means
    56  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
    57  * then the compilation fails. There are several possible solutions
    58  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
    59  * (2) Stop treating warnings as errors - would be a backward step
    60  * (3) Disable /GS - may help performance but you lose the security checks
    61  * (4) Disable the warning with "#pragma warning( disable : 4748 )"
    62  * (5) Disable planting the code with  __declspec(safebuffers)
    63  * I've opted for (5) although we should investigate the local performance
    64  * benefits of (1) and global performance benefit of (3).
    65  */
    66 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
    67 #define SAFEBUF __declspec(safebuffers)
    68 #else
    69 #define SAFEBUF
    70 #endif
    72 #include <math.h>
    74 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
    75 // [jk] this is not 100% correct because the float word order may different
    76 // from the byte order (e.g. on ARM)
    77 #ifdef VM_LITTLE_ENDIAN
    78 # define __HI(x) *(1+(int*)&x)
    79 # define __LO(x) *(int*)&x
    80 #else
    81 # define __HI(x) *(int*)&x
    82 # define __LO(x) *(1+(int*)&x)
    83 #endif
    85 static double copysignA(double x, double y) {
    86   __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
    87   return x;
    88 }
    90 /*
    91  * scalbn (double x, int n)
    92  * scalbn(x,n) returns x* 2**n  computed by  exponent
    93  * manipulation rather than by actually performing an
    94  * exponentiation or a multiplication.
    95  */
    97 static const double
    98 two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
    99 twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
   100 hugeX  = 1.0e+300,
   101 tiny   = 1.0e-300;
   103 static double scalbnA (double x, int n) {
   104   int  k,hx,lx;
   105   hx = __HI(x);
   106   lx = __LO(x);
   107   k = (hx&0x7ff00000)>>20;              /* extract exponent */
   108   if (k==0) {                           /* 0 or subnormal x */
   109     if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
   110     x *= two54;
   111     hx = __HI(x);
   112     k = ((hx&0x7ff00000)>>20) - 54;
   113     if (n< -50000) return tiny*x;       /*underflow*/
   114   }
   115   if (k==0x7ff) return x+x;             /* NaN or Inf */
   116   k = k+n;
   117   if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
   118   if (k > 0)                            /* normal result */
   119     {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
   120   if (k <= -54) {
   121     if (n > 50000)      /* in case integer overflow in n+k */
   122       return hugeX*copysignA(hugeX,x);  /*overflow*/
   123     else return tiny*copysignA(tiny,x); /*underflow*/
   124   }
   125   k += 54;                              /* subnormal result */
   126   __HI(x) = (hx&0x800fffff)|(k<<20);
   127   return x*twom54;
   128 }
   130 /*
   131  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
   132  * double x[],y[]; int e0,nx,prec; int ipio2[];
   133  *
   134  * __kernel_rem_pio2 return the last three digits of N with
   135  *              y = x - N*pi/2
   136  * so that |y| < pi/2.
   137  *
   138  * The method is to compute the integer (mod 8) and fraction parts of
   139  * (2/pi)*x without doing the full multiplication. In general we
   140  * skip the part of the product that are known to be a huge integer (
   141  * more accurately, = 0 mod 8 ). Thus the number of operations are
   142  * independent of the exponent of the input.
   143  *
   144  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
   145  *
   146  * Input parameters:
   147  *      x[]     The input value (must be positive) is broken into nx
   148  *              pieces of 24-bit integers in double precision format.
   149  *              x[i] will be the i-th 24 bit of x. The scaled exponent
   150  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
   151  *              match x's up to 24 bits.
   152  *
   153  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
   154  *                      e0 = ilogb(z)-23
   155  *                      z  = scalbn(z,-e0)
   156  *              for i = 0,1,2
   157  *                      x[i] = floor(z)
   158  *                      z    = (z-x[i])*2**24
   159  *
   160  *
   161  *      y[]     ouput result in an array of double precision numbers.
   162  *              The dimension of y[] is:
   163  *                      24-bit  precision       1
   164  *                      53-bit  precision       2
   165  *                      64-bit  precision       2
   166  *                      113-bit precision       3
   167  *              The actual value is the sum of them. Thus for 113-bit
   168  *              precsion, one may have to do something like:
   169  *
   170  *              long double t,w,r_head, r_tail;
   171  *              t = (long double)y[2] + (long double)y[1];
   172  *              w = (long double)y[0];
   173  *              r_head = t+w;
   174  *              r_tail = w - (r_head - t);
   175  *
   176  *      e0      The exponent of x[0]
   177  *
   178  *      nx      dimension of x[]
   179  *
   180  *      prec    an interger indicating the precision:
   181  *                      0       24  bits (single)
   182  *                      1       53  bits (double)
   183  *                      2       64  bits (extended)
   184  *                      3       113 bits (quad)
   185  *
   186  *      ipio2[]
   187  *              integer array, contains the (24*i)-th to (24*i+23)-th
   188  *              bit of 2/pi after binary point. The corresponding
   189  *              floating value is
   190  *
   191  *                      ipio2[i] * 2^(-24(i+1)).
   192  *
   193  * External function:
   194  *      double scalbn(), floor();
   195  *
   196  *
   197  * Here is the description of some local variables:
   198  *
   199  *      jk      jk+1 is the initial number of terms of ipio2[] needed
   200  *              in the computation. The recommended value is 2,3,4,
   201  *              6 for single, double, extended,and quad.
   202  *
   203  *      jz      local integer variable indicating the number of
   204  *              terms of ipio2[] used.
   205  *
   206  *      jx      nx - 1
   207  *
   208  *      jv      index for pointing to the suitable ipio2[] for the
   209  *              computation. In general, we want
   210  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
   211  *              is an integer. Thus
   212  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
   213  *              Hence jv = max(0,(e0-3)/24).
   214  *
   215  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
   216  *
   217  *      q[]     double array with integral value, representing the
   218  *              24-bits chunk of the product of x and 2/pi.
   219  *
   220  *      q0      the corresponding exponent of q[0]. Note that the
   221  *              exponent for q[i] would be q0-24*i.
   222  *
   223  *      PIo2[]  double precision array, obtained by cutting pi/2
   224  *              into 24 bits chunks.
   225  *
   226  *      f[]     ipio2[] in floating point
   227  *
   228  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
   229  *
   230  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
   231  *
   232  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence
   233  *              it also indicates the *sign* of the result.
   234  *
   235  */
   238 /*
   239  * Constants:
   240  * The hexadecimal values are the intended ones for the following
   241  * constants. The decimal values may be used, provided that the
   242  * compiler will convert from decimal to binary accurately enough
   243  * to produce the hexadecimal values shown.
   244  */
   247 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
   249 static const double PIo2[] = {
   250   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
   251   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
   252   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
   253   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
   254   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
   255   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
   256   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
   257   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
   258 };
   260 static const double
   261 zeroB   = 0.0,
   262 one     = 1.0,
   263 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   264 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
   266 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
   267   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
   268   double z,fw,f[20],fq[20],q[20];
   270   /* initialize jk*/
   271   jk = init_jk[prec];
   272   jp = jk;
   274   /* determine jx,jv,q0, note that 3>q0 */
   275   jx =  nx-1;
   276   jv = (e0-3)/24; if(jv<0) jv=0;
   277   q0 =  e0-24*(jv+1);
   279   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
   280   j = jv-jx; m = jx+jk;
   281   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
   283   /* compute q[0],q[1],...q[jk] */
   284   for (i=0;i<=jk;i++) {
   285     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
   286   }
   288   jz = jk;
   289 recompute:
   290   /* distill q[] into iq[] reversingly */
   291   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
   292     fw    =  (double)((int)(twon24* z));
   293     iq[i] =  (int)(z-two24B*fw);
   294     z     =  q[j-1]+fw;
   295   }
   297   /* compute n */
   298   z  = scalbnA(z,q0);           /* actual value of z */
   299   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
   300   n  = (int) z;
   301   z -= (double)n;
   302   ih = 0;
   303   if(q0>0) {    /* need iq[jz-1] to determine n */
   304     i  = (iq[jz-1]>>(24-q0)); n += i;
   305     iq[jz-1] -= i<<(24-q0);
   306     ih = iq[jz-1]>>(23-q0);
   307   }
   308   else if(q0==0) ih = iq[jz-1]>>23;
   309   else if(z>=0.5) ih=2;
   311   if(ih>0) {    /* q > 0.5 */
   312     n += 1; carry = 0;
   313     for(i=0;i<jz ;i++) {        /* compute 1-q */
   314       j = iq[i];
   315       if(carry==0) {
   316         if(j!=0) {
   317           carry = 1; iq[i] = 0x1000000- j;
   318         }
   319       } else  iq[i] = 0xffffff - j;
   320     }
   321     if(q0>0) {          /* rare case: chance is 1 in 12 */
   322       switch(q0) {
   323       case 1:
   324         iq[jz-1] &= 0x7fffff; break;
   325       case 2:
   326         iq[jz-1] &= 0x3fffff; break;
   327       }
   328     }
   329     if(ih==2) {
   330       z = one - z;
   331       if(carry!=0) z -= scalbnA(one,q0);
   332     }
   333   }
   335   /* check if recomputation is needed */
   336   if(z==zeroB) {
   337     j = 0;
   338     for (i=jz-1;i>=jk;i--) j |= iq[i];
   339     if(j==0) { /* need recomputation */
   340       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
   342       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
   343         f[jx+i] = (double) ipio2[jv+i];
   344         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
   345         q[i] = fw;
   346       }
   347       jz += k;
   348       goto recompute;
   349     }
   350   }
   352   /* chop off zero terms */
   353   if(z==0.0) {
   354     jz -= 1; q0 -= 24;
   355     while(iq[jz]==0) { jz--; q0-=24;}
   356   } else { /* break z into 24-bit if neccessary */
   357     z = scalbnA(z,-q0);
   358     if(z>=two24B) {
   359       fw = (double)((int)(twon24*z));
   360       iq[jz] = (int)(z-two24B*fw);
   361       jz += 1; q0 += 24;
   362       iq[jz] = (int) fw;
   363     } else iq[jz] = (int) z ;
   364   }
   366   /* convert integer "bit" chunk to floating-point value */
   367   fw = scalbnA(one,q0);
   368   for(i=jz;i>=0;i--) {
   369     q[i] = fw*(double)iq[i]; fw*=twon24;
   370   }
   372   /* compute PIo2[0,...,jp]*q[jz,...,0] */
   373   for(i=jz;i>=0;i--) {
   374     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
   375     fq[jz-i] = fw;
   376   }
   378   /* compress fq[] into y[] */
   379   switch(prec) {
   380   case 0:
   381     fw = 0.0;
   382     for (i=jz;i>=0;i--) fw += fq[i];
   383     y[0] = (ih==0)? fw: -fw;
   384     break;
   385   case 1:
   386   case 2:
   387     fw = 0.0;
   388     for (i=jz;i>=0;i--) fw += fq[i];
   389     y[0] = (ih==0)? fw: -fw;
   390     fw = fq[0]-fw;
   391     for (i=1;i<=jz;i++) fw += fq[i];
   392     y[1] = (ih==0)? fw: -fw;
   393     break;
   394   case 3:       /* painful */
   395     for (i=jz;i>0;i--) {
   396       fw      = fq[i-1]+fq[i];
   397       fq[i]  += fq[i-1]-fw;
   398       fq[i-1] = fw;
   399     }
   400     for (i=jz;i>1;i--) {
   401       fw      = fq[i-1]+fq[i];
   402       fq[i]  += fq[i-1]-fw;
   403       fq[i-1] = fw;
   404     }
   405     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
   406     if(ih==0) {
   407       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
   408     } else {
   409       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
   410     }
   411   }
   412   return n&7;
   413 }
   416 /*
   417  * ====================================================
   418  * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.
   419  *
   420  * Developed at SunPro, a Sun Microsystems, Inc. business.
   421  * Permission to use, copy, modify, and distribute this
   422  * software is freely granted, provided that this notice
   423  * is preserved.
   424  * ====================================================
   425  *
   426  */
   428 /* __ieee754_rem_pio2(x,y)
   429  *
   430  * return the remainder of x rem pi/2 in y[0]+y[1]
   431  * use __kernel_rem_pio2()
   432  */
   434 /*
   435  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
   436  */
   437 static const int two_over_pi[] = {
   438   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
   439   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
   440   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
   441   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
   442   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
   443   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
   444   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
   445   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
   446   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
   447   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
   448   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
   449 };
   451 static const int npio2_hw[] = {
   452   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
   453   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
   454   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
   455   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
   456   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
   457   0x404858EB, 0x404921FB,
   458 };
   460 /*
   461  * invpio2:  53 bits of 2/pi
   462  * pio2_1:   first  33 bit of pi/2
   463  * pio2_1t:  pi/2 - pio2_1
   464  * pio2_2:   second 33 bit of pi/2
   465  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
   466  * pio2_3:   third  33 bit of pi/2
   467  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
   468  */
   470 static const double
   471 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
   472 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
   473 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   474 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
   475 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
   476 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
   477 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
   478 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
   479 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
   480 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
   482 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
   483   double z,w,t,r,fn;
   484   double tx[3];
   485   int e0,i,j,nx,n,ix,hx,i0;
   487   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
   488   hx = *(i0+(int*)&x);          /* high word of x */
   489   ix = hx&0x7fffffff;
   490   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
   491     {y[0] = x; y[1] = 0; return 0;}
   492   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
   493     if(hx>0) {
   494       z = x - pio2_1;
   495       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   496         y[0] = z - pio2_1t;
   497         y[1] = (z-y[0])-pio2_1t;
   498       } else {                /* near pi/2, use 33+33+53 bit pi */
   499         z -= pio2_2;
   500         y[0] = z - pio2_2t;
   501         y[1] = (z-y[0])-pio2_2t;
   502       }
   503       return 1;
   504     } else {    /* negative x */
   505       z = x + pio2_1;
   506       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   507         y[0] = z + pio2_1t;
   508         y[1] = (z-y[0])+pio2_1t;
   509       } else {                /* near pi/2, use 33+33+53 bit pi */
   510         z += pio2_2;
   511         y[0] = z + pio2_2t;
   512         y[1] = (z-y[0])+pio2_2t;
   513       }
   514       return -1;
   515     }
   516   }
   517   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
   518     t  = fabsd(x);
   519     n  = (int) (t*invpio2+half);
   520     fn = (double)n;
   521     r  = t-fn*pio2_1;
   522     w  = fn*pio2_1t;    /* 1st round good to 85 bit */
   523     if(n<32&&ix!=npio2_hw[n-1]) {
   524       y[0] = r-w;       /* quick check no cancellation */
   525     } else {
   526       j  = ix>>20;
   527       y[0] = r-w;
   528       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   529       if(i>16) {  /* 2nd iteration needed, good to 118 */
   530         t  = r;
   531         w  = fn*pio2_2;
   532         r  = t-w;
   533         w  = fn*pio2_2t-((t-r)-w);
   534         y[0] = r-w;
   535         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   536         if(i>49)  {     /* 3rd iteration need, 151 bits acc */
   537           t  = r;       /* will cover all possible cases */
   538           w  = fn*pio2_3;
   539           r  = t-w;
   540           w  = fn*pio2_3t-((t-r)-w);
   541           y[0] = r-w;
   542         }
   543       }
   544     }
   545     y[1] = (r-y[0])-w;
   546     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   547     else         return n;
   548   }
   549   /*
   550    * all other (large) arguments
   551    */
   552   if(ix>=0x7ff00000) {          /* x is inf or NaN */
   553     y[0]=y[1]=x-x; return 0;
   554   }
   555   /* set z = scalbn(|x|,ilogb(x)-23) */
   556   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
   557   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
   558   *(i0+(int*)&z) = ix - (e0<<20);
   559   for(i=0;i<2;i++) {
   560     tx[i] = (double)((int)(z));
   561     z     = (z-tx[i])*two24A;
   562   }
   563   tx[2] = z;
   564   nx = 3;
   565   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
   566   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
   567   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   568   return n;
   569 }
   572 /* __kernel_sin( x, y, iy)
   573  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
   574  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   575  * Input y is the tail of x.
   576  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
   577  *
   578  * Algorithm
   579  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
   580  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
   581  *      3. sin(x) is approximated by a polynomial of degree 13 on
   582  *         [0,pi/4]
   583  *                               3            13
   584  *              sin(x) ~ x + S1*x + ... + S6*x
   585  *         where
   586  *
   587  *      |sin(x)         2     4     6     8     10     12  |     -58
   588  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
   589  *      |  x                                               |
   590  *
   591  *      4. sin(x+y) = sin(x) + sin'(x')*y
   592  *                  ~ sin(x) + (1-x*x/2)*y
   593  *         For better accuracy, let
   594  *                   3      2      2      2      2
   595  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
   596  *         then                   3    2
   597  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
   598  */
   599 #ifdef MIPS64
   600 #undef S1
   601 #undef S2
   602 #undef S3
   603 #undef S4
   604 #undef S5
   605 #undef S6
   606 #endif
   608 static const double
   609 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
   610 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
   611 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
   612 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
   613 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
   614 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
   616 static double __kernel_sin(double x, double y, int iy)
   617 {
   618         double z,r,v;
   619         int ix;
   620         ix = __HI(x)&0x7fffffff;        /* high word of x */
   621         if(ix<0x3e400000)                       /* |x| < 2**-27 */
   622            {if((int)x==0) return x;}            /* generate inexact */
   623         z       =  x*x;
   624         v       =  z*x;
   625         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
   626         if(iy==0) return x+v*(S1+z*r);
   627         else      return x-((z*(half*y-v*r)-y)-v*S1);
   628 }
   630 /*
   631  * __kernel_cos( x,  y )
   632  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
   633  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   634  * Input y is the tail of x.
   635  *
   636  * Algorithm
   637  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
   638  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
   639  *      3. cos(x) is approximated by a polynomial of degree 14 on
   640  *         [0,pi/4]
   641  *                                       4            14
   642  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
   643  *         where the remez error is
   644  *
   645  *      |              2     4     6     8     10    12     14 |     -58
   646  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
   647  *      |                                                      |
   648  *
   649  *                     4     6     8     10    12     14
   650  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
   651  *             cos(x) = 1 - x*x/2 + r
   652  *         since cos(x+y) ~ cos(x) - sin(x)*y
   653  *                        ~ cos(x) - x*y,
   654  *         a correction term is necessary in cos(x) and hence
   655  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
   656  *         For better accuracy when x > 0.3, let qx = |x|/4 with
   657  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
   658  *         Then
   659  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
   660  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
   661  *         magnitude of the latter is at least a quarter of x*x/2,
   662  *         thus, reducing the rounding error in the subtraction.
   663  */
   665 static const double
   666 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
   667 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
   668 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
   669 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
   670 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
   671 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
   673 static double __kernel_cos(double x, double y)
   674 {
   675   double a,h,z,r,qx;
   676   int ix;
   677   ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/
   678   if(ix<0x3e400000) {                   /* if x < 2**27 */
   679     if(((int)x)==0) return one;         /* generate inexact */
   680   }
   681   z  = x*x;
   682   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
   683   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
   684     return one - (0.5*z - (z*r - x*y));
   685   else {
   686     if(ix > 0x3fe90000) {               /* x > 0.78125 */
   687       qx = 0.28125;
   688     } else {
   689       __HI(qx) = ix-0x00200000; /* x/4 */
   690       __LO(qx) = 0;
   691     }
   692     h = 0.5*z-qx;
   693     a = one-qx;
   694     return a - (h - (z*r-x*y));
   695   }
   696 }
   698 /* __kernel_tan( x, y, k )
   699  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
   700  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   701  * Input y is the tail of x.
   702  * Input k indicates whether tan (if k=1) or
   703  * -1/tan (if k= -1) is returned.
   704  *
   705  * Algorithm
   706  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
   707  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
   708  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
   709  *         [0,0.67434]
   710  *                               3             27
   711  *              tan(x) ~ x + T1*x + ... + T13*x
   712  *         where
   713  *
   714  *              |tan(x)         2     4            26   |     -59.2
   715  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
   716  *              |  x                                    |
   717  *
   718  *         Note: tan(x+y) = tan(x) + tan'(x)*y
   719  *                        ~ tan(x) + (1+x*x)*y
   720  *         Therefore, for better accuracy in computing tan(x+y), let
   721  *                   3      2      2       2       2
   722  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
   723  *         then
   724  *                                  3    2
   725  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
   726  *
   727  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
   728  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
   729  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
   730  */
   732 static const double
   733 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
   734 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
   735 T[] =  {
   736   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
   737   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
   738   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
   739   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
   740   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
   741   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
   742   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
   743   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
   744   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
   745   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
   746   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
   747  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
   748   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
   749 };
   751 static double __kernel_tan(double x, double y, int iy)
   752 {
   753   double z,r,v,w,s;
   754   int ix,hx;
   755   hx = __HI(x);   /* high word of x */
   756   ix = hx&0x7fffffff;     /* high word of |x| */
   757   if(ix<0x3e300000) {                     /* x < 2**-28 */
   758     if((int)x==0) {                       /* generate inexact */
   759       if (((ix | __LO(x)) | (iy + 1)) == 0)
   760         return one / fabsd(x);
   761       else {
   762         if (iy == 1)
   763           return x;
   764         else {    /* compute -1 / (x+y) carefully */
   765           double a, t;
   767           z = w = x + y;
   768           __LO(z) = 0;
   769           v = y - (z - x);
   770           t = a = -one / w;
   771           __LO(t) = 0;
   772           s = one + t * z;
   773           return t + a * (s + t * v);
   774         }
   775       }
   776     }
   777   }
   778   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
   779     if(hx<0) {x = -x; y = -y;}
   780     z = pio4-x;
   781     w = pio4lo-y;
   782     x = z+w; y = 0.0;
   783   }
   784   z       =  x*x;
   785   w       =  z*z;
   786   /* Break x^5*(T[1]+x^2*T[2]+...) into
   787    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
   788    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
   789    */
   790   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
   791   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
   792   s = z*x;
   793   r = y + z*(s*(r+v)+y);
   794   r += T[0]*s;
   795   w = x+r;
   796   if(ix>=0x3FE59428) {
   797     v = (double)iy;
   798     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
   799   }
   800   if(iy==1) return w;
   801   else {          /* if allow error up to 2 ulp,
   802                      simply return -1.0/(x+r) here */
   803     /*  compute -1.0/(x+r) accurately */
   804     double a,t;
   805     z  = w;
   806     __LO(z) = 0;
   807     v  = r-(z - x);     /* z+v = r+x */
   808     t = a  = -1.0/w;    /* a = -1.0/w */
   809     __LO(t) = 0;
   810     s  = 1.0+t*z;
   811     return t+a*(s+t*v);
   812   }
   813 }
   816 //----------------------------------------------------------------------
   817 //
   818 // Routines for new sin/cos implementation
   819 //
   820 //----------------------------------------------------------------------
   822 /* sin(x)
   823  * Return sine function of x.
   824  *
   825  * kernel function:
   826  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   827  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
   828  *      __ieee754_rem_pio2      ... argument reduction routine
   829  *
   830  * Method.
   831  *      Let S,C and T denote the sin, cos and tan respectively on
   832  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   833  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   834  *      We have
   835  *
   836  *          n        sin(x)      cos(x)        tan(x)
   837  *     ----------------------------------------------------------
   838  *          0          S           C             T
   839  *          1          C          -S            -1/T
   840  *          2         -S          -C             T
   841  *          3         -C           S            -1/T
   842  *     ----------------------------------------------------------
   843  *
   844  * Special cases:
   845  *      Let trig be any of sin, cos, or tan.
   846  *      trig(+-INF)  is NaN, with signals;
   847  *      trig(NaN)    is that NaN;
   848  *
   849  * Accuracy:
   850  *      TRIG(x) returns trig(x) nearly rounded
   851  */
   853 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
   854   double y[2],z=0.0;
   855   int n, ix;
   857   /* High word of x. */
   858   ix = __HI(x);
   860   /* |x| ~< pi/4 */
   861   ix &= 0x7fffffff;
   862   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
   864   /* sin(Inf or NaN) is NaN */
   865   else if (ix>=0x7ff00000) return x-x;
   867   /* argument reduction needed */
   868   else {
   869     n = __ieee754_rem_pio2(x,y);
   870     switch(n&3) {
   871     case 0: return  __kernel_sin(y[0],y[1],1);
   872     case 1: return  __kernel_cos(y[0],y[1]);
   873     case 2: return -__kernel_sin(y[0],y[1],1);
   874     default:
   875       return -__kernel_cos(y[0],y[1]);
   876     }
   877   }
   878 JRT_END
   880 /* cos(x)
   881  * Return cosine function of x.
   882  *
   883  * kernel function:
   884  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   885  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
   886  *      __ieee754_rem_pio2      ... argument reduction routine
   887  *
   888  * Method.
   889  *      Let S,C and T denote the sin, cos and tan respectively on
   890  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   891  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   892  *      We have
   893  *
   894  *          n        sin(x)      cos(x)        tan(x)
   895  *     ----------------------------------------------------------
   896  *          0          S           C             T
   897  *          1          C          -S            -1/T
   898  *          2         -S          -C             T
   899  *          3         -C           S            -1/T
   900  *     ----------------------------------------------------------
   901  *
   902  * Special cases:
   903  *      Let trig be any of sin, cos, or tan.
   904  *      trig(+-INF)  is NaN, with signals;
   905  *      trig(NaN)    is that NaN;
   906  *
   907  * Accuracy:
   908  *      TRIG(x) returns trig(x) nearly rounded
   909  */
   911 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
   912   double y[2],z=0.0;
   913   int n, ix;
   915   /* High word of x. */
   916   ix = __HI(x);
   918   /* |x| ~< pi/4 */
   919   ix &= 0x7fffffff;
   920   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
   922   /* cos(Inf or NaN) is NaN */
   923   else if (ix>=0x7ff00000) return x-x;
   925   /* argument reduction needed */
   926   else {
   927     n = __ieee754_rem_pio2(x,y);
   928     switch(n&3) {
   929     case 0: return  __kernel_cos(y[0],y[1]);
   930     case 1: return -__kernel_sin(y[0],y[1],1);
   931     case 2: return -__kernel_cos(y[0],y[1]);
   932     default:
   933       return  __kernel_sin(y[0],y[1],1);
   934     }
   935   }
   936 JRT_END
   938 /* tan(x)
   939  * Return tangent function of x.
   940  *
   941  * kernel function:
   942  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
   943  *      __ieee754_rem_pio2      ... argument reduction routine
   944  *
   945  * Method.
   946  *      Let S,C and T denote the sin, cos and tan respectively on
   947  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   948  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   949  *      We have
   950  *
   951  *          n        sin(x)      cos(x)        tan(x)
   952  *     ----------------------------------------------------------
   953  *          0          S           C             T
   954  *          1          C          -S            -1/T
   955  *          2         -S          -C             T
   956  *          3         -C           S            -1/T
   957  *     ----------------------------------------------------------
   958  *
   959  * Special cases:
   960  *      Let trig be any of sin, cos, or tan.
   961  *      trig(+-INF)  is NaN, with signals;
   962  *      trig(NaN)    is that NaN;
   963  *
   964  * Accuracy:
   965  *      TRIG(x) returns trig(x) nearly rounded
   966  */
   968 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
   969   double y[2],z=0.0;
   970   int n, ix;
   972   /* High word of x. */
   973   ix = __HI(x);
   975   /* |x| ~< pi/4 */
   976   ix &= 0x7fffffff;
   977   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
   979   /* tan(Inf or NaN) is NaN */
   980   else if (ix>=0x7ff00000) return x-x;            /* NaN */
   982   /* argument reduction needed */
   983   else {
   984     n = __ieee754_rem_pio2(x,y);
   985     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
   986                                                      -1 -- n odd */
   987   }
   988 JRT_END
   991 #ifdef WIN32
   992 # pragma optimize ( "", on )
   993 #endif

mercurial