src/share/vm/runtime/sharedRuntimeTrig.cpp

Thu, 12 Oct 2017 21:27:07 +0800

author
aoqi
date
Thu, 12 Oct 2017 21:27:07 +0800
changeset 7535
7ae4e26cb1e0
parent 7002
a073be2ce5c2
parent 6876
710a3c8b516e
child 9138
b56ab8e56604
permissions
-rw-r--r--

merge

     1 /*
     2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 /*
    26  * This file has been modified by Loongson Technology in 2015. These
    27  * modifications are Copyright (c) 2015 Loongson Technology, and are made
    28  * available on the same license terms set forth above.
    29  */
    31 #include "precompiled.hpp"
    32 #include "prims/jni.h"
    33 #include "runtime/interfaceSupport.hpp"
    34 #include "runtime/sharedRuntime.hpp"
    36 // This file contains copies of the fdlibm routines used by
    37 // StrictMath. It turns out that it is almost always required to use
    38 // these runtime routines; the Intel CPU doesn't meet the Java
    39 // specification for sin/cos outside a certain limited argument range,
    40 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
    41 // also turns out that avoiding the indirect call through function
    42 // pointer out to libjava.so in SharedRuntime speeds these routines up
    43 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
    45 // Enabling optimizations in this file causes incorrect code to be
    46 // generated; can not figure out how to turn down optimization for one
    47 // file in the IDE on Windows
    48 #ifdef WIN32
    49 # pragma optimize ( "", off )
    50 #endif
    52 /* The above workaround now causes more problems with the latest MS compiler.
    53  * Visual Studio 2010's /GS option tries to guard against buffer overruns.
    54  * /GS is on by default if you specify optimizations, which we do globally
    55  * via /W3 /O2. However the above selective turning off of optimizations means
    56  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
    57  * then the compilation fails. There are several possible solutions
    58  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
    59  * (2) Stop treating warnings as errors - would be a backward step
    60  * (3) Disable /GS - may help performance but you lose the security checks
    61  * (4) Disable the warning with "#pragma warning( disable : 4748 )"
    62  * (5) Disable planting the code with  __declspec(safebuffers)
    63  * I've opted for (5) although we should investigate the local performance
    64  * benefits of (1) and global performance benefit of (3).
    65  */
    66 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
    67 #define SAFEBUF __declspec(safebuffers)
    68 #else
    69 #define SAFEBUF
    70 #endif
    72 #include "runtime/sharedRuntimeMath.hpp"
    74 /*
    75  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
    76  * double x[],y[]; int e0,nx,prec; int ipio2[];
    77  *
    78  * __kernel_rem_pio2 return the last three digits of N with
    79  *              y = x - N*pi/2
    80  * so that |y| < pi/2.
    81  *
    82  * The method is to compute the integer (mod 8) and fraction parts of
    83  * (2/pi)*x without doing the full multiplication. In general we
    84  * skip the part of the product that are known to be a huge integer (
    85  * more accurately, = 0 mod 8 ). Thus the number of operations are
    86  * independent of the exponent of the input.
    87  *
    88  * (2/pi) is represented by an array of 24-bit integers in ipio2[].
    89  *
    90  * Input parameters:
    91  *      x[]     The input value (must be positive) is broken into nx
    92  *              pieces of 24-bit integers in double precision format.
    93  *              x[i] will be the i-th 24 bit of x. The scaled exponent
    94  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
    95  *              match x's up to 24 bits.
    96  *
    97  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
    98  *                      e0 = ilogb(z)-23
    99  *                      z  = scalbn(z,-e0)
   100  *              for i = 0,1,2
   101  *                      x[i] = floor(z)
   102  *                      z    = (z-x[i])*2**24
   103  *
   104  *
   105  *      y[]     ouput result in an array of double precision numbers.
   106  *              The dimension of y[] is:
   107  *                      24-bit  precision       1
   108  *                      53-bit  precision       2
   109  *                      64-bit  precision       2
   110  *                      113-bit precision       3
   111  *              The actual value is the sum of them. Thus for 113-bit
   112  *              precsion, one may have to do something like:
   113  *
   114  *              long double t,w,r_head, r_tail;
   115  *              t = (long double)y[2] + (long double)y[1];
   116  *              w = (long double)y[0];
   117  *              r_head = t+w;
   118  *              r_tail = w - (r_head - t);
   119  *
   120  *      e0      The exponent of x[0]
   121  *
   122  *      nx      dimension of x[]
   123  *
   124  *      prec    an interger indicating the precision:
   125  *                      0       24  bits (single)
   126  *                      1       53  bits (double)
   127  *                      2       64  bits (extended)
   128  *                      3       113 bits (quad)
   129  *
   130  *      ipio2[]
   131  *              integer array, contains the (24*i)-th to (24*i+23)-th
   132  *              bit of 2/pi after binary point. The corresponding
   133  *              floating value is
   134  *
   135  *                      ipio2[i] * 2^(-24(i+1)).
   136  *
   137  * External function:
   138  *      double scalbn(), floor();
   139  *
   140  *
   141  * Here is the description of some local variables:
   142  *
   143  *      jk      jk+1 is the initial number of terms of ipio2[] needed
   144  *              in the computation. The recommended value is 2,3,4,
   145  *              6 for single, double, extended,and quad.
   146  *
   147  *      jz      local integer variable indicating the number of
   148  *              terms of ipio2[] used.
   149  *
   150  *      jx      nx - 1
   151  *
   152  *      jv      index for pointing to the suitable ipio2[] for the
   153  *              computation. In general, we want
   154  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
   155  *              is an integer. Thus
   156  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
   157  *              Hence jv = max(0,(e0-3)/24).
   158  *
   159  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
   160  *
   161  *      q[]     double array with integral value, representing the
   162  *              24-bits chunk of the product of x and 2/pi.
   163  *
   164  *      q0      the corresponding exponent of q[0]. Note that the
   165  *              exponent for q[i] would be q0-24*i.
   166  *
   167  *      PIo2[]  double precision array, obtained by cutting pi/2
   168  *              into 24 bits chunks.
   169  *
   170  *      f[]     ipio2[] in floating point
   171  *
   172  *      iq[]    integer array by breaking up q[] in 24-bits chunk.
   173  *
   174  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
   175  *
   176  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence
   177  *              it also indicates the *sign* of the result.
   178  *
   179  */
   182 /*
   183  * Constants:
   184  * The hexadecimal values are the intended ones for the following
   185  * constants. The decimal values may be used, provided that the
   186  * compiler will convert from decimal to binary accurately enough
   187  * to produce the hexadecimal values shown.
   188  */
   191 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
   193 static const double PIo2[] = {
   194   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
   195   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
   196   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
   197   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
   198   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
   199   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
   200   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
   201   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
   202 };
   204 static const double
   205 zeroB   = 0.0,
   206 one     = 1.0,
   207 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   208 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
   210 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
   211   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
   212   double z,fw,f[20],fq[20],q[20];
   214   /* initialize jk*/
   215   jk = init_jk[prec];
   216   jp = jk;
   218   /* determine jx,jv,q0, note that 3>q0 */
   219   jx =  nx-1;
   220   jv = (e0-3)/24; if(jv<0) jv=0;
   221   q0 =  e0-24*(jv+1);
   223   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
   224   j = jv-jx; m = jx+jk;
   225   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
   227   /* compute q[0],q[1],...q[jk] */
   228   for (i=0;i<=jk;i++) {
   229     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
   230   }
   232   jz = jk;
   233 recompute:
   234   /* distill q[] into iq[] reversingly */
   235   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
   236     fw    =  (double)((int)(twon24* z));
   237     iq[i] =  (int)(z-two24B*fw);
   238     z     =  q[j-1]+fw;
   239   }
   241   /* compute n */
   242   z  = scalbnA(z,q0);           /* actual value of z */
   243   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
   244   n  = (int) z;
   245   z -= (double)n;
   246   ih = 0;
   247   if(q0>0) {    /* need iq[jz-1] to determine n */
   248     i  = (iq[jz-1]>>(24-q0)); n += i;
   249     iq[jz-1] -= i<<(24-q0);
   250     ih = iq[jz-1]>>(23-q0);
   251   }
   252   else if(q0==0) ih = iq[jz-1]>>23;
   253   else if(z>=0.5) ih=2;
   255   if(ih>0) {    /* q > 0.5 */
   256     n += 1; carry = 0;
   257     for(i=0;i<jz ;i++) {        /* compute 1-q */
   258       j = iq[i];
   259       if(carry==0) {
   260         if(j!=0) {
   261           carry = 1; iq[i] = 0x1000000- j;
   262         }
   263       } else  iq[i] = 0xffffff - j;
   264     }
   265     if(q0>0) {          /* rare case: chance is 1 in 12 */
   266       switch(q0) {
   267       case 1:
   268         iq[jz-1] &= 0x7fffff; break;
   269       case 2:
   270         iq[jz-1] &= 0x3fffff; break;
   271       }
   272     }
   273     if(ih==2) {
   274       z = one - z;
   275       if(carry!=0) z -= scalbnA(one,q0);
   276     }
   277   }
   279   /* check if recomputation is needed */
   280   if(z==zeroB) {
   281     j = 0;
   282     for (i=jz-1;i>=jk;i--) j |= iq[i];
   283     if(j==0) { /* need recomputation */
   284       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
   286       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
   287         f[jx+i] = (double) ipio2[jv+i];
   288         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
   289         q[i] = fw;
   290       }
   291       jz += k;
   292       goto recompute;
   293     }
   294   }
   296   /* chop off zero terms */
   297   if(z==0.0) {
   298     jz -= 1; q0 -= 24;
   299     while(iq[jz]==0) { jz--; q0-=24;}
   300   } else { /* break z into 24-bit if neccessary */
   301     z = scalbnA(z,-q0);
   302     if(z>=two24B) {
   303       fw = (double)((int)(twon24*z));
   304       iq[jz] = (int)(z-two24B*fw);
   305       jz += 1; q0 += 24;
   306       iq[jz] = (int) fw;
   307     } else iq[jz] = (int) z ;
   308   }
   310   /* convert integer "bit" chunk to floating-point value */
   311   fw = scalbnA(one,q0);
   312   for(i=jz;i>=0;i--) {
   313     q[i] = fw*(double)iq[i]; fw*=twon24;
   314   }
   316   /* compute PIo2[0,...,jp]*q[jz,...,0] */
   317   for(i=jz;i>=0;i--) {
   318     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
   319     fq[jz-i] = fw;
   320   }
   322   /* compress fq[] into y[] */
   323   switch(prec) {
   324   case 0:
   325     fw = 0.0;
   326     for (i=jz;i>=0;i--) fw += fq[i];
   327     y[0] = (ih==0)? fw: -fw;
   328     break;
   329   case 1:
   330   case 2:
   331     fw = 0.0;
   332     for (i=jz;i>=0;i--) fw += fq[i];
   333     y[0] = (ih==0)? fw: -fw;
   334     fw = fq[0]-fw;
   335     for (i=1;i<=jz;i++) fw += fq[i];
   336     y[1] = (ih==0)? fw: -fw;
   337     break;
   338   case 3:       /* painful */
   339     for (i=jz;i>0;i--) {
   340       fw      = fq[i-1]+fq[i];
   341       fq[i]  += fq[i-1]-fw;
   342       fq[i-1] = fw;
   343     }
   344     for (i=jz;i>1;i--) {
   345       fw      = fq[i-1]+fq[i];
   346       fq[i]  += fq[i-1]-fw;
   347       fq[i-1] = fw;
   348     }
   349     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
   350     if(ih==0) {
   351       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
   352     } else {
   353       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
   354     }
   355   }
   356   return n&7;
   357 }
   360 /*
   361  * ====================================================
   362  * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.
   363  *
   364  * Developed at SunPro, a Sun Microsystems, Inc. business.
   365  * Permission to use, copy, modify, and distribute this
   366  * software is freely granted, provided that this notice
   367  * is preserved.
   368  * ====================================================
   369  *
   370  */
   372 /* __ieee754_rem_pio2(x,y)
   373  *
   374  * return the remainder of x rem pi/2 in y[0]+y[1]
   375  * use __kernel_rem_pio2()
   376  */
   378 /*
   379  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
   380  */
   381 static const int two_over_pi[] = {
   382   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
   383   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
   384   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
   385   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
   386   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
   387   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
   388   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
   389   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
   390   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
   391   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
   392   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
   393 };
   395 static const int npio2_hw[] = {
   396   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
   397   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
   398   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
   399   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
   400   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
   401   0x404858EB, 0x404921FB,
   402 };
   404 /*
   405  * invpio2:  53 bits of 2/pi
   406  * pio2_1:   first  33 bit of pi/2
   407  * pio2_1t:  pi/2 - pio2_1
   408  * pio2_2:   second 33 bit of pi/2
   409  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
   410  * pio2_3:   third  33 bit of pi/2
   411  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
   412  */
   414 static const double
   415 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
   416 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
   417 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   418 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
   419 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
   420 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
   421 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
   422 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
   423 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
   424 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
   426 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
   427   double z,w,t,r,fn;
   428   double tx[3];
   429   int e0,i,j,nx,n,ix,hx,i0;
   431   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
   432   hx = *(i0+(int*)&x);          /* high word of x */
   433   ix = hx&0x7fffffff;
   434   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
   435     {y[0] = x; y[1] = 0; return 0;}
   436   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
   437     if(hx>0) {
   438       z = x - pio2_1;
   439       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   440         y[0] = z - pio2_1t;
   441         y[1] = (z-y[0])-pio2_1t;
   442       } else {                /* near pi/2, use 33+33+53 bit pi */
   443         z -= pio2_2;
   444         y[0] = z - pio2_2t;
   445         y[1] = (z-y[0])-pio2_2t;
   446       }
   447       return 1;
   448     } else {    /* negative x */
   449       z = x + pio2_1;
   450       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   451         y[0] = z + pio2_1t;
   452         y[1] = (z-y[0])+pio2_1t;
   453       } else {                /* near pi/2, use 33+33+53 bit pi */
   454         z += pio2_2;
   455         y[0] = z + pio2_2t;
   456         y[1] = (z-y[0])+pio2_2t;
   457       }
   458       return -1;
   459     }
   460   }
   461   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
   462     t  = fabsd(x);
   463     n  = (int) (t*invpio2+half);
   464     fn = (double)n;
   465     r  = t-fn*pio2_1;
   466     w  = fn*pio2_1t;    /* 1st round good to 85 bit */
   467     if(n<32&&ix!=npio2_hw[n-1]) {
   468       y[0] = r-w;       /* quick check no cancellation */
   469     } else {
   470       j  = ix>>20;
   471       y[0] = r-w;
   472       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   473       if(i>16) {  /* 2nd iteration needed, good to 118 */
   474         t  = r;
   475         w  = fn*pio2_2;
   476         r  = t-w;
   477         w  = fn*pio2_2t-((t-r)-w);
   478         y[0] = r-w;
   479         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   480         if(i>49)  {     /* 3rd iteration need, 151 bits acc */
   481           t  = r;       /* will cover all possible cases */
   482           w  = fn*pio2_3;
   483           r  = t-w;
   484           w  = fn*pio2_3t-((t-r)-w);
   485           y[0] = r-w;
   486         }
   487       }
   488     }
   489     y[1] = (r-y[0])-w;
   490     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   491     else         return n;
   492   }
   493   /*
   494    * all other (large) arguments
   495    */
   496   if(ix>=0x7ff00000) {          /* x is inf or NaN */
   497     y[0]=y[1]=x-x; return 0;
   498   }
   499   /* set z = scalbn(|x|,ilogb(x)-23) */
   500   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
   501   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
   502   *(i0+(int*)&z) = ix - (e0<<20);
   503   for(i=0;i<2;i++) {
   504     tx[i] = (double)((int)(z));
   505     z     = (z-tx[i])*two24A;
   506   }
   507   tx[2] = z;
   508   nx = 3;
   509   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
   510   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
   511   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   512   return n;
   513 }
   516 /* __kernel_sin( x, y, iy)
   517  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
   518  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   519  * Input y is the tail of x.
   520  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
   521  *
   522  * Algorithm
   523  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
   524  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
   525  *      3. sin(x) is approximated by a polynomial of degree 13 on
   526  *         [0,pi/4]
   527  *                               3            13
   528  *              sin(x) ~ x + S1*x + ... + S6*x
   529  *         where
   530  *
   531  *      |sin(x)         2     4     6     8     10     12  |     -58
   532  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
   533  *      |  x                                               |
   534  *
   535  *      4. sin(x+y) = sin(x) + sin'(x')*y
   536  *                  ~ sin(x) + (1-x*x/2)*y
   537  *         For better accuracy, let
   538  *                   3      2      2      2      2
   539  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
   540  *         then                   3    2
   541  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
   542  */
   543 #ifdef MIPS64
   544 #undef S1
   545 #undef S2
   546 #undef S3
   547 #undef S4
   548 #undef S5
   549 #undef S6
   550 #endif
   552 static const double
   553 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
   554 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
   555 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
   556 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
   557 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
   558 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
   560 static double __kernel_sin(double x, double y, int iy)
   561 {
   562         double z,r,v;
   563         int ix;
   564         ix = high(x)&0x7fffffff;                /* high word of x */
   565         if(ix<0x3e400000)                       /* |x| < 2**-27 */
   566            {if((int)x==0) return x;}            /* generate inexact */
   567         z       =  x*x;
   568         v       =  z*x;
   569         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
   570         if(iy==0) return x+v*(S1+z*r);
   571         else      return x-((z*(half*y-v*r)-y)-v*S1);
   572 }
   574 /*
   575  * __kernel_cos( x,  y )
   576  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
   577  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   578  * Input y is the tail of x.
   579  *
   580  * Algorithm
   581  *      1. Since cos(-x) = cos(x), we need only to consider positive x.
   582  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
   583  *      3. cos(x) is approximated by a polynomial of degree 14 on
   584  *         [0,pi/4]
   585  *                                       4            14
   586  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
   587  *         where the remez error is
   588  *
   589  *      |              2     4     6     8     10    12     14 |     -58
   590  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
   591  *      |                                                      |
   592  *
   593  *                     4     6     8     10    12     14
   594  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
   595  *             cos(x) = 1 - x*x/2 + r
   596  *         since cos(x+y) ~ cos(x) - sin(x)*y
   597  *                        ~ cos(x) - x*y,
   598  *         a correction term is necessary in cos(x) and hence
   599  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
   600  *         For better accuracy when x > 0.3, let qx = |x|/4 with
   601  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
   602  *         Then
   603  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
   604  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
   605  *         magnitude of the latter is at least a quarter of x*x/2,
   606  *         thus, reducing the rounding error in the subtraction.
   607  */
   609 static const double
   610 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
   611 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
   612 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
   613 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
   614 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
   615 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
   617 static double __kernel_cos(double x, double y)
   618 {
   619   double a,h,z,r,qx=0;
   620   int ix;
   621   ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
   622   if(ix<0x3e400000) {                   /* if x < 2**27 */
   623     if(((int)x)==0) return one;         /* generate inexact */
   624   }
   625   z  = x*x;
   626   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
   627   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
   628     return one - (0.5*z - (z*r - x*y));
   629   else {
   630     if(ix > 0x3fe90000) {               /* x > 0.78125 */
   631       qx = 0.28125;
   632     } else {
   633       set_high(&qx, ix-0x00200000); /* x/4 */
   634       set_low(&qx, 0);
   635     }
   636     h = 0.5*z-qx;
   637     a = one-qx;
   638     return a - (h - (z*r-x*y));
   639   }
   640 }
   642 /* __kernel_tan( x, y, k )
   643  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
   644  * Input x is assumed to be bounded by ~pi/4 in magnitude.
   645  * Input y is the tail of x.
   646  * Input k indicates whether tan (if k=1) or
   647  * -1/tan (if k= -1) is returned.
   648  *
   649  * Algorithm
   650  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
   651  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
   652  *      3. tan(x) is approximated by a odd polynomial of degree 27 on
   653  *         [0,0.67434]
   654  *                               3             27
   655  *              tan(x) ~ x + T1*x + ... + T13*x
   656  *         where
   657  *
   658  *              |tan(x)         2     4            26   |     -59.2
   659  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
   660  *              |  x                                    |
   661  *
   662  *         Note: tan(x+y) = tan(x) + tan'(x)*y
   663  *                        ~ tan(x) + (1+x*x)*y
   664  *         Therefore, for better accuracy in computing tan(x+y), let
   665  *                   3      2      2       2       2
   666  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
   667  *         then
   668  *                                  3    2
   669  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
   670  *
   671  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
   672  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
   673  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
   674  */
   676 static const double
   677 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
   678 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
   679 T[] =  {
   680   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
   681   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
   682   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
   683   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
   684   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
   685   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
   686   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
   687   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
   688   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
   689   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
   690   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
   691  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
   692   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
   693 };
   695 static double __kernel_tan(double x, double y, int iy)
   696 {
   697   double z,r,v,w,s;
   698   int ix,hx;
   699   hx = high(x);           /* high word of x */
   700   ix = hx&0x7fffffff;     /* high word of |x| */
   701   if(ix<0x3e300000) {                     /* x < 2**-28 */
   702     if((int)x==0) {                       /* generate inexact */
   703       if (((ix | low(x)) | (iy + 1)) == 0)
   704         return one / fabsd(x);
   705       else {
   706         if (iy == 1)
   707           return x;
   708         else {    /* compute -1 / (x+y) carefully */
   709           double a, t;
   711           z = w = x + y;
   712           set_low(&z, 0);
   713           v = y - (z - x);
   714           t = a = -one / w;
   715           set_low(&t, 0);
   716           s = one + t * z;
   717           return t + a * (s + t * v);
   718         }
   719       }
   720     }
   721   }
   722   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
   723     if(hx<0) {x = -x; y = -y;}
   724     z = pio4-x;
   725     w = pio4lo-y;
   726     x = z+w; y = 0.0;
   727   }
   728   z       =  x*x;
   729   w       =  z*z;
   730   /* Break x^5*(T[1]+x^2*T[2]+...) into
   731    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
   732    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
   733    */
   734   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
   735   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
   736   s = z*x;
   737   r = y + z*(s*(r+v)+y);
   738   r += T[0]*s;
   739   w = x+r;
   740   if(ix>=0x3FE59428) {
   741     v = (double)iy;
   742     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
   743   }
   744   if(iy==1) return w;
   745   else {          /* if allow error up to 2 ulp,
   746                      simply return -1.0/(x+r) here */
   747     /*  compute -1.0/(x+r) accurately */
   748     double a,t;
   749     z  = w;
   750     set_low(&z, 0);
   751     v  = r-(z - x);     /* z+v = r+x */
   752     t = a  = -1.0/w;    /* a = -1.0/w */
   753     set_low(&t, 0);
   754     s  = 1.0+t*z;
   755     return t+a*(s+t*v);
   756   }
   757 }
   760 //----------------------------------------------------------------------
   761 //
   762 // Routines for new sin/cos implementation
   763 //
   764 //----------------------------------------------------------------------
   766 /* sin(x)
   767  * Return sine function of x.
   768  *
   769  * kernel function:
   770  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   771  *      __kernel_cos            ... cose function on [-pi/4,pi/4]
   772  *      __ieee754_rem_pio2      ... argument reduction routine
   773  *
   774  * Method.
   775  *      Let S,C and T denote the sin, cos and tan respectively on
   776  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   777  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   778  *      We have
   779  *
   780  *          n        sin(x)      cos(x)        tan(x)
   781  *     ----------------------------------------------------------
   782  *          0          S           C             T
   783  *          1          C          -S            -1/T
   784  *          2         -S          -C             T
   785  *          3         -C           S            -1/T
   786  *     ----------------------------------------------------------
   787  *
   788  * Special cases:
   789  *      Let trig be any of sin, cos, or tan.
   790  *      trig(+-INF)  is NaN, with signals;
   791  *      trig(NaN)    is that NaN;
   792  *
   793  * Accuracy:
   794  *      TRIG(x) returns trig(x) nearly rounded
   795  */
   797 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
   798   double y[2],z=0.0;
   799   int n, ix;
   801   /* High word of x. */
   802   ix = high(x);
   804   /* |x| ~< pi/4 */
   805   ix &= 0x7fffffff;
   806   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
   808   /* sin(Inf or NaN) is NaN */
   809   else if (ix>=0x7ff00000) return x-x;
   811   /* argument reduction needed */
   812   else {
   813     n = __ieee754_rem_pio2(x,y);
   814     switch(n&3) {
   815     case 0: return  __kernel_sin(y[0],y[1],1);
   816     case 1: return  __kernel_cos(y[0],y[1]);
   817     case 2: return -__kernel_sin(y[0],y[1],1);
   818     default:
   819       return -__kernel_cos(y[0],y[1]);
   820     }
   821   }
   822 JRT_END
   824 /* cos(x)
   825  * Return cosine function of x.
   826  *
   827  * kernel function:
   828  *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   829  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
   830  *      __ieee754_rem_pio2      ... argument reduction routine
   831  *
   832  * Method.
   833  *      Let S,C and T denote the sin, cos and tan respectively on
   834  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   835  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   836  *      We have
   837  *
   838  *          n        sin(x)      cos(x)        tan(x)
   839  *     ----------------------------------------------------------
   840  *          0          S           C             T
   841  *          1          C          -S            -1/T
   842  *          2         -S          -C             T
   843  *          3         -C           S            -1/T
   844  *     ----------------------------------------------------------
   845  *
   846  * Special cases:
   847  *      Let trig be any of sin, cos, or tan.
   848  *      trig(+-INF)  is NaN, with signals;
   849  *      trig(NaN)    is that NaN;
   850  *
   851  * Accuracy:
   852  *      TRIG(x) returns trig(x) nearly rounded
   853  */
   855 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
   856   double y[2],z=0.0;
   857   int n, ix;
   859   /* High word of x. */
   860   ix = high(x);
   862   /* |x| ~< pi/4 */
   863   ix &= 0x7fffffff;
   864   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
   866   /* cos(Inf or NaN) is NaN */
   867   else if (ix>=0x7ff00000) return x-x;
   869   /* argument reduction needed */
   870   else {
   871     n = __ieee754_rem_pio2(x,y);
   872     switch(n&3) {
   873     case 0: return  __kernel_cos(y[0],y[1]);
   874     case 1: return -__kernel_sin(y[0],y[1],1);
   875     case 2: return -__kernel_cos(y[0],y[1]);
   876     default:
   877       return  __kernel_sin(y[0],y[1],1);
   878     }
   879   }
   880 JRT_END
   882 /* tan(x)
   883  * Return tangent function of x.
   884  *
   885  * kernel function:
   886  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
   887  *      __ieee754_rem_pio2      ... argument reduction routine
   888  *
   889  * Method.
   890  *      Let S,C and T denote the sin, cos and tan respectively on
   891  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   892  *      in [-pi/4 , +pi/4], and let n = k mod 4.
   893  *      We have
   894  *
   895  *          n        sin(x)      cos(x)        tan(x)
   896  *     ----------------------------------------------------------
   897  *          0          S           C             T
   898  *          1          C          -S            -1/T
   899  *          2         -S          -C             T
   900  *          3         -C           S            -1/T
   901  *     ----------------------------------------------------------
   902  *
   903  * Special cases:
   904  *      Let trig be any of sin, cos, or tan.
   905  *      trig(+-INF)  is NaN, with signals;
   906  *      trig(NaN)    is that NaN;
   907  *
   908  * Accuracy:
   909  *      TRIG(x) returns trig(x) nearly rounded
   910  */
   912 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
   913   double y[2],z=0.0;
   914   int n, ix;
   916   /* High word of x. */
   917   ix = high(x);
   919   /* |x| ~< pi/4 */
   920   ix &= 0x7fffffff;
   921   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
   923   /* tan(Inf or NaN) is NaN */
   924   else if (ix>=0x7ff00000) return x-x;            /* NaN */
   926   /* argument reduction needed */
   927   else {
   928     n = __ieee754_rem_pio2(x,y);
   929     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
   930                                                      -1 -- n odd */
   931   }
   932 JRT_END
   935 #ifdef WIN32
   936 # pragma optimize ( "", on )
   937 #endif

mercurial