jdk8-mips64-public/hotspot: src/share/vm/runtime/sharedRuntimeTrig.cpp@f3ceb2e8bd21

8240295: hs_err elapsed time in seconds is not accurate enough
Reviewed-by: dholmes, sspitsyn

     1 /*

     2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "prims/jni.h"

    27 #include "runtime/interfaceSupport.hpp"

    28 #include "runtime/sharedRuntime.hpp"

    30 // This file contains copies of the fdlibm routines used by

    31 // StrictMath. It turns out that it is almost always required to use

    32 // these runtime routines; the Intel CPU doesn't meet the Java

    33 // specification for sin/cos outside a certain limited argument range,

    34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It

    35 // also turns out that avoiding the indirect call through function

    36 // pointer out to libjava.so in SharedRuntime speeds these routines up

    37 // by roughly 15% on both Win32/x86 and Solaris/SPARC.

    39 // Enabling optimizations in this file causes incorrect code to be

    40 // generated; can not figure out how to turn down optimization for one

    41 // file in the IDE on Windows

    42 #ifdef WIN32

    43 # pragma optimize ( "", off )

    44 #endif

    46 /* The above workaround now causes more problems with the latest MS compiler.

    47  * Visual Studio 2010's /GS option tries to guard against buffer overruns.

    48  * /GS is on by default if you specify optimizations, which we do globally

    49  * via /W3 /O2. However the above selective turning off of optimizations means

    50  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)

    51  * then the compilation fails. There are several possible solutions

    52  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.

    53  * (2) Stop treating warnings as errors - would be a backward step

    54  * (3) Disable /GS - may help performance but you lose the security checks

    55  * (4) Disable the warning with "#pragma warning( disable : 4748 )"

    56  * (5) Disable planting the code with  __declspec(safebuffers)

    57  * I've opted for (5) although we should investigate the local performance

    58  * benefits of (1) and global performance benefit of (3).

    59  */

    60 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))

    61 #define SAFEBUF __declspec(safebuffers)

    62 #else

    63 #define SAFEBUF

    64 #endif

    66 #include "runtime/sharedRuntimeMath.hpp"

    68 /*

    69  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)

    70  * double x[],y[]; int e0,nx,prec; int ipio2[];

    71  *

    72  * __kernel_rem_pio2 return the last three digits of N with

    73  *              y = x - N*pi/2

    74  * so that |y| < pi/2.

    75  *

    76  * The method is to compute the integer (mod 8) and fraction parts of

    77  * (2/pi)*x without doing the full multiplication. In general we

    78  * skip the part of the product that are known to be a huge integer (

    79  * more accurately, = 0 mod 8 ). Thus the number of operations are

    80  * independent of the exponent of the input.

    81  *

    82  * (2/pi) is represented by an array of 24-bit integers in ipio2[].

    83  *

    84  * Input parameters:

    85  *      x[]     The input value (must be positive) is broken into nx

    86  *              pieces of 24-bit integers in double precision format.

    87  *              x[i] will be the i-th 24 bit of x. The scaled exponent

    88  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0

    89  *              match x's up to 24 bits.

    90  *

    91  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:

    92  *                      e0 = ilogb(z)-23

    93  *                      z  = scalbn(z,-e0)

    94  *              for i = 0,1,2

    95  *                      x[i] = floor(z)

    96  *                      z    = (z-x[i])*2**24

    97  *

    98  *

    99  *      y[]     ouput result in an array of double precision numbers.

   100  *              The dimension of y[] is:

   101  *                      24-bit  precision       1

   102  *                      53-bit  precision       2

   103  *                      64-bit  precision       2

   104  *                      113-bit precision       3

   105  *              The actual value is the sum of them. Thus for 113-bit

   106  *              precsion, one may have to do something like:

   107  *

   108  *              long double t,w,r_head, r_tail;

   109  *              t = (long double)y[2] + (long double)y[1];

   110  *              w = (long double)y[0];

   111  *              r_head = t+w;

   112  *              r_tail = w - (r_head - t);

   113  *

   114  *      e0      The exponent of x[0]

   115  *

   116  *      nx      dimension of x[]

   117  *

   118  *      prec    an interger indicating the precision:

   119  *                      0       24  bits (single)

   120  *                      1       53  bits (double)

   121  *                      2       64  bits (extended)

   122  *                      3       113 bits (quad)

   123  *

   124  *      ipio2[]

   125  *              integer array, contains the (24*i)-th to (24*i+23)-th

   126  *              bit of 2/pi after binary point. The corresponding

   127  *              floating value is

   128  *

   129  *                      ipio2[i] * 2^(-24(i+1)).

   130  *

   131  * External function:

   132  *      double scalbn(), floor();

   133  *

   134  *

   135  * Here is the description of some local variables:

   136  *

   137  *      jk      jk+1 is the initial number of terms of ipio2[] needed

   138  *              in the computation. The recommended value is 2,3,4,

   139  *              6 for single, double, extended,and quad.

   140  *

   141  *      jz      local integer variable indicating the number of

   142  *              terms of ipio2[] used.

   143  *

   144  *      jx      nx - 1

   145  *

   146  *      jv      index for pointing to the suitable ipio2[] for the

   147  *              computation. In general, we want

   148  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8

   149  *              is an integer. Thus

   150  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv

   151  *              Hence jv = max(0,(e0-3)/24).

   152  *

   153  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.

   154  *

   155  *      q[]     double array with integral value, representing the

   156  *              24-bits chunk of the product of x and 2/pi.

   157  *

   158  *      q0      the corresponding exponent of q[0]. Note that the

   159  *              exponent for q[i] would be q0-24*i.

   160  *

   161  *      PIo2[]  double precision array, obtained by cutting pi/2

   162  *              into 24 bits chunks.

   163  *

   164  *      f[]     ipio2[] in floating point

   165  *

   166  *      iq[]    integer array by breaking up q[] in 24-bits chunk.

   167  *

   168  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]

   169  *

   170  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence

   171  *              it also indicates the *sign* of the result.

   172  *

   173  */

   176 /*

   177  * Constants:

   178  * The hexadecimal values are the intended ones for the following

   179  * constants. The decimal values may be used, provided that the

   180  * compiler will convert from decimal to binary accurately enough

   181  * to produce the hexadecimal values shown.

   182  */

   185 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */

   187 static const double PIo2[] = {

   188   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */

   189   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */

   190   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */

   191   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */

   192   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */

   193   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */

   194   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */

   195   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */

   196 };

   198 static const double

   199 zeroB   = 0.0,

   200 one     = 1.0,

   201 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   202 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */

   204 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {

   205   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;

   206   double z,fw,f[20],fq[20],q[20];

   208   /* initialize jk*/

   209   jk = init_jk[prec];

   210   jp = jk;

   212   /* determine jx,jv,q0, note that 3>q0 */

   213   jx =  nx-1;

   214   jv = (e0-3)/24; if(jv<0) jv=0;

   215   q0 =  e0-24*(jv+1);

   217   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */

   218   j = jv-jx; m = jx+jk;

   219   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];

   221   /* compute q[0],q[1],...q[jk] */

   222   for (i=0;i<=jk;i++) {

   223     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;

   224   }

   226   jz = jk;

   227 recompute:

   228   /* distill q[] into iq[] reversingly */

   229   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {

   230     fw    =  (double)((int)(twon24* z));

   231     iq[i] =  (int)(z-two24B*fw);

   232     z     =  q[j-1]+fw;

   233   }

   235   /* compute n */

   236   z  = scalbnA(z,q0);           /* actual value of z */

   237   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */

   238   n  = (int) z;

   239   z -= (double)n;

   240   ih = 0;

   241   if(q0>0) {    /* need iq[jz-1] to determine n */

   242     i  = (iq[jz-1]>>(24-q0)); n += i;

   243     iq[jz-1] -= i<<(24-q0);

   244     ih = iq[jz-1]>>(23-q0);

   245   }

   246   else if(q0==0) ih = iq[jz-1]>>23;

   247   else if(z>=0.5) ih=2;

   249   if(ih>0) {    /* q > 0.5 */

   250     n += 1; carry = 0;

   251     for(i=0;i<jz ;i++) {        /* compute 1-q */

   252       j = iq[i];

   253       if(carry==0) {

   254         if(j!=0) {

   255           carry = 1; iq[i] = 0x1000000- j;

   256         }

   257       } else  iq[i] = 0xffffff - j;

   258     }

   259     if(q0>0) {          /* rare case: chance is 1 in 12 */

   260       switch(q0) {

   261       case 1:

   262         iq[jz-1] &= 0x7fffff; break;

   263       case 2:

   264         iq[jz-1] &= 0x3fffff; break;

   265       }

   266     }

   267     if(ih==2) {

   268       z = one - z;

   269       if(carry!=0) z -= scalbnA(one,q0);

   270     }

   271   }

   273   /* check if recomputation is needed */

   274   if(z==zeroB) {

   275     j = 0;

   276     for (i=jz-1;i>=jk;i--) j |= iq[i];

   277     if(j==0) { /* need recomputation */

   278       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */

   280       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */

   281         f[jx+i] = (double) ipio2[jv+i];

   282         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];

   283         q[i] = fw;

   284       }

   285       jz += k;

   286       goto recompute;

   287     }

   288   }

   290   /* chop off zero terms */

   291   if(z==0.0) {

   292     jz -= 1; q0 -= 24;

   293     while(iq[jz]==0) { jz--; q0-=24;}

   294   } else { /* break z into 24-bit if neccessary */

   295     z = scalbnA(z,-q0);

   296     if(z>=two24B) {

   297       fw = (double)((int)(twon24*z));

   298       iq[jz] = (int)(z-two24B*fw);

   299       jz += 1; q0 += 24;

   300       iq[jz] = (int) fw;

   301     } else iq[jz] = (int) z ;

   302   }

   304   /* convert integer "bit" chunk to floating-point value */

   305   fw = scalbnA(one,q0);

   306   for(i=jz;i>=0;i--) {

   307     q[i] = fw*(double)iq[i]; fw*=twon24;

   308   }

   310   /* compute PIo2[0,...,jp]*q[jz,...,0] */

   311   for(i=jz;i>=0;i--) {

   312     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];

   313     fq[jz-i] = fw;

   314   }

   316   /* compress fq[] into y[] */

   317   switch(prec) {

   318   case 0:

   319     fw = 0.0;

   320     for (i=jz;i>=0;i--) fw += fq[i];

   321     y[0] = (ih==0)? fw: -fw;

   322     break;

   323   case 1:

   324   case 2:

   325     fw = 0.0;

   326     for (i=jz;i>=0;i--) fw += fq[i];

   327     y[0] = (ih==0)? fw: -fw;

   328     fw = fq[0]-fw;

   329     for (i=1;i<=jz;i++) fw += fq[i];

   330     y[1] = (ih==0)? fw: -fw;

   331     break;

   332   case 3:       /* painful */

   333     for (i=jz;i>0;i--) {

   334       fw      = fq[i-1]+fq[i];

   335       fq[i]  += fq[i-1]-fw;

   336       fq[i-1] = fw;

   337     }

   338     for (i=jz;i>1;i--) {

   339       fw      = fq[i-1]+fq[i];

   340       fq[i]  += fq[i-1]-fw;

   341       fq[i-1] = fw;

   342     }

   343     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];

   344     if(ih==0) {

   345       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;

   346     } else {

   347       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;

   348     }

   349   }

   350   return n&7;

   351 }

   354 /*

   355  * ====================================================

   356  * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.

   357  *

   358  * Developed at SunPro, a Sun Microsystems, Inc. business.

   359  * Permission to use, copy, modify, and distribute this

   360  * software is freely granted, provided that this notice

   361  * is preserved.

   362  * ====================================================

   363  *

   364  */

   366 /* __ieee754_rem_pio2(x,y)

   367  *

   368  * return the remainder of x rem pi/2 in y[0]+y[1]

   369  * use __kernel_rem_pio2()

   370  */

   372 /*

   373  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi

   374  */

   375 static const int two_over_pi[] = {

   376   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,

   377   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,

   378   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,

   379   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,

   380   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,

   381   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,

   382   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,

   383   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,

   384   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,

   385   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,

   386   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,

   387 };

   389 static const int npio2_hw[] = {

   390   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,

   391   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,

   392   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,

   393   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,

   394   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,

   395   0x404858EB, 0x404921FB,

   396 };

   398 /*

   399  * invpio2:  53 bits of 2/pi

   400  * pio2_1:   first  33 bit of pi/2

   401  * pio2_1t:  pi/2 - pio2_1

   402  * pio2_2:   second 33 bit of pi/2

   403  * pio2_2t:  pi/2 - (pio2_1+pio2_2)

   404  * pio2_3:   third  33 bit of pi/2

   405  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)

   406  */

   408 static const double

   409 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */

   410 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */

   411 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   412 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */

   413 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */

   414 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */

   415 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */

   416 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */

   417 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */

   418 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */

   420 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {

   421   double z,w,t,r,fn;

   422   double tx[3];

   423   int e0,i,j,nx,n,ix,hx,i0;

   425   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */

   426   hx = *(i0+(int*)&x);          /* high word of x */

   427   ix = hx&0x7fffffff;

   428   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */

   429     {y[0] = x; y[1] = 0; return 0;}

   430   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */

   431     if(hx>0) {

   432       z = x - pio2_1;

   433       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   434         y[0] = z - pio2_1t;

   435         y[1] = (z-y[0])-pio2_1t;

   436       } else {                /* near pi/2, use 33+33+53 bit pi */

   437         z -= pio2_2;

   438         y[0] = z - pio2_2t;

   439         y[1] = (z-y[0])-pio2_2t;

   440       }

   441       return 1;

   442     } else {    /* negative x */

   443       z = x + pio2_1;

   444       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   445         y[0] = z + pio2_1t;

   446         y[1] = (z-y[0])+pio2_1t;

   447       } else {                /* near pi/2, use 33+33+53 bit pi */

   448         z += pio2_2;

   449         y[0] = z + pio2_2t;

   450         y[1] = (z-y[0])+pio2_2t;

   451       }

   452       return -1;

   453     }

   454   }

   455   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */

   456     t  = fabsd(x);

   457     n  = (int) (t*invpio2+half);

   458     fn = (double)n;

   459     r  = t-fn*pio2_1;

   460     w  = fn*pio2_1t;    /* 1st round good to 85 bit */

   461     if(n<32&&ix!=npio2_hw[n-1]) {

   462       y[0] = r-w;       /* quick check no cancellation */

   463     } else {

   464       j  = ix>>20;

   465       y[0] = r-w;

   466       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   467       if(i>16) {  /* 2nd iteration needed, good to 118 */

   468         t  = r;

   469         w  = fn*pio2_2;

   470         r  = t-w;

   471         w  = fn*pio2_2t-((t-r)-w);

   472         y[0] = r-w;

   473         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   474         if(i>49)  {     /* 3rd iteration need, 151 bits acc */

   475           t  = r;       /* will cover all possible cases */

   476           w  = fn*pio2_3;

   477           r  = t-w;

   478           w  = fn*pio2_3t-((t-r)-w);

   479           y[0] = r-w;

   480         }

   481       }

   482     }

   483     y[1] = (r-y[0])-w;

   484     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   485     else         return n;

   486   }

   487   /*

   488    * all other (large) arguments

   489    */

   490   if(ix>=0x7ff00000) {          /* x is inf or NaN */

   491     y[0]=y[1]=x-x; return 0;

   492   }

   493   /* set z = scalbn(|x|,ilogb(x)-23) */

   494   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);

   495   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */

   496   *(i0+(int*)&z) = ix - (e0<<20);

   497   for(i=0;i<2;i++) {

   498     tx[i] = (double)((int)(z));

   499     z     = (z-tx[i])*two24A;

   500   }

   501   tx[2] = z;

   502   nx = 3;

   503   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */

   504   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);

   505   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   506   return n;

   507 }

   510 /* __kernel_sin( x, y, iy)

   511  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854

   512  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   513  * Input y is the tail of x.

   514  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).

   515  *

   516  * Algorithm

   517  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.

   518  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.

   519  *      3. sin(x) is approximated by a polynomial of degree 13 on

   520  *         [0,pi/4]

   521  *                               3            13

   522  *              sin(x) ~ x + S1*x + ... + S6*x

   523  *         where

   524  *

   525  *      |sin(x)         2     4     6     8     10     12  |     -58

   526  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2

   527  *      |  x                                               |

   528  *

   529  *      4. sin(x+y) = sin(x) + sin'(x')*y

   530  *                  ~ sin(x) + (1-x*x/2)*y

   531  *         For better accuracy, let

   532  *                   3      2      2      2      2

   533  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))

   534  *         then                   3    2

   535  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))

   536  */

   538 static const double

   539 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */

   540 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */

   541 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */

   542 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */

   543 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */

   544 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */

   546 static double __kernel_sin(double x, double y, int iy)

   547 {

   548         double z,r,v;

   549         int ix;

   550         ix = high(x)&0x7fffffff;                /* high word of x */

   551         if(ix<0x3e400000)                       /* |x| < 2**-27 */

   552            {if((int)x==0) return x;}            /* generate inexact */

   553         z       =  x*x;

   554         v       =  z*x;

   555         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));

   556         if(iy==0) return x+v*(S1+z*r);

   557         else      return x-((z*(half*y-v*r)-y)-v*S1);

   558 }

   560 /*

   561  * __kernel_cos( x,  y )

   562  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164

   563  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   564  * Input y is the tail of x.

   565  *

   566  * Algorithm

   567  *      1. Since cos(-x) = cos(x), we need only to consider positive x.

   568  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.

   569  *      3. cos(x) is approximated by a polynomial of degree 14 on

   570  *         [0,pi/4]

   571  *                                       4            14

   572  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x

   573  *         where the remez error is

   574  *

   575  *      |              2     4     6     8     10    12     14 |     -58

   576  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2

   577  *      |                                                      |

   578  *

   579  *                     4     6     8     10    12     14

   580  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then

   581  *             cos(x) = 1 - x*x/2 + r

   582  *         since cos(x+y) ~ cos(x) - sin(x)*y

   583  *                        ~ cos(x) - x*y,

   584  *         a correction term is necessary in cos(x) and hence

   585  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))

   586  *         For better accuracy when x > 0.3, let qx = |x|/4 with

   587  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.

   588  *         Then

   589  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).

   590  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the

   591  *         magnitude of the latter is at least a quarter of x*x/2,

   592  *         thus, reducing the rounding error in the subtraction.

   593  */

   595 static const double

   596 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */

   597 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */

   598 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */

   599 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */

   600 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */

   601 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */

   603 static double __kernel_cos(double x, double y)

   604 {

   605   double a,h,z,r,qx=0;

   606   int ix;

   607   ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/

   608   if(ix<0x3e400000) {                   /* if x < 2**27 */

   609     if(((int)x)==0) return one;         /* generate inexact */

   610   }

   611   z  = x*x;

   612   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));

   613   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */

   614     return one - (0.5*z - (z*r - x*y));

   615   else {

   616     if(ix > 0x3fe90000) {               /* x > 0.78125 */

   617       qx = 0.28125;

   618     } else {

   619       set_high(&qx, ix-0x00200000); /* x/4 */

   620       set_low(&qx, 0);

   621     }

   622     h = 0.5*z-qx;

   623     a = one-qx;

   624     return a - (h - (z*r-x*y));

   625   }

   626 }

   628 /* __kernel_tan( x, y, k )

   629  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854

   630  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   631  * Input y is the tail of x.

   632  * Input k indicates whether tan (if k=1) or

   633  * -1/tan (if k= -1) is returned.

   634  *

   635  * Algorithm

   636  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.

   637  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.

   638  *      3. tan(x) is approximated by a odd polynomial of degree 27 on

   639  *         [0,0.67434]

   640  *                               3             27

   641  *              tan(x) ~ x + T1*x + ... + T13*x

   642  *         where

   643  *

   644  *              |tan(x)         2     4            26   |     -59.2

   645  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2

   646  *              |  x                                    |

   647  *

   648  *         Note: tan(x+y) = tan(x) + tan'(x)*y

   649  *                        ~ tan(x) + (1+x*x)*y

   650  *         Therefore, for better accuracy in computing tan(x+y), let

   651  *                   3      2      2       2       2

   652  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))

   653  *         then

   654  *                                  3    2

   655  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))

   656  *

   657  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then

   658  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))

   659  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))

   660  */

   662 static const double

   663 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */

   664 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */

   665 T[] =  {

   666   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */

   667   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */

   668   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */

   669   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */

   670   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */

   671   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */

   672   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */

   673   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */

   674   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */

   675   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */

   676   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */

   677  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */

   678   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */

   679 };

   681 static double __kernel_tan(double x, double y, int iy)

   682 {

   683   double z,r,v,w,s;

   684   int ix,hx;

   685   hx = high(x);           /* high word of x */

   686   ix = hx&0x7fffffff;     /* high word of |x| */

   687   if(ix<0x3e300000) {                     /* x < 2**-28 */

   688     if((int)x==0) {                       /* generate inexact */

   689       if (((ix | low(x)) | (iy + 1)) == 0)

   690         return one / fabsd(x);

   691       else {

   692         if (iy == 1)

   693           return x;

   694         else {    /* compute -1 / (x+y) carefully */

   695           double a, t;

   697           z = w = x + y;

   698           set_low(&z, 0);

   699           v = y - (z - x);

   700           t = a = -one / w;

   701           set_low(&t, 0);

   702           s = one + t * z;

   703           return t + a * (s + t * v);

   704         }

   705       }

   706     }

   707   }

   708   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */

   709     if(hx<0) {x = -x; y = -y;}

   710     z = pio4-x;

   711     w = pio4lo-y;

   712     x = z+w; y = 0.0;

   713   }

   714   z       =  x*x;

   715   w       =  z*z;

   716   /* Break x^5*(T[1]+x^2*T[2]+...) into

   717    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +

   718    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))

   719    */

   720   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));

   721   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));

   722   s = z*x;

   723   r = y + z*(s*(r+v)+y);

   724   r += T[0]*s;

   725   w = x+r;

   726   if(ix>=0x3FE59428) {

   727     v = (double)iy;

   728     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));

   729   }

   730   if(iy==1) return w;

   731   else {          /* if allow error up to 2 ulp,

   732                      simply return -1.0/(x+r) here */

   733     /*  compute -1.0/(x+r) accurately */

   734     double a,t;

   735     z  = w;

   736     set_low(&z, 0);

   737     v  = r-(z - x);     /* z+v = r+x */

   738     t = a  = -1.0/w;    /* a = -1.0/w */

   739     set_low(&t, 0);

   740     s  = 1.0+t*z;

   741     return t+a*(s+t*v);

   742   }

   743 }

   746 //----------------------------------------------------------------------

   747 //

   748 // Routines for new sin/cos implementation

   749 //

   750 //----------------------------------------------------------------------

   752 /* sin(x)

   753  * Return sine function of x.

   754  *

   755  * kernel function:

   756  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   757  *      __kernel_cos            ... cose function on [-pi/4,pi/4]

   758  *      __ieee754_rem_pio2      ... argument reduction routine

   759  *

   760  * Method.

   761  *      Let S,C and T denote the sin, cos and tan respectively on

   762  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   763  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   764  *      We have

   765  *

   766  *          n        sin(x)      cos(x)        tan(x)

   767  *     ----------------------------------------------------------

   768  *          0          S           C             T

   769  *          1          C          -S            -1/T

   770  *          2         -S          -C             T

   771  *          3         -C           S            -1/T

   772  *     ----------------------------------------------------------

   773  *

   774  * Special cases:

   775  *      Let trig be any of sin, cos, or tan.

   776  *      trig(+-INF)  is NaN, with signals;

   777  *      trig(NaN)    is that NaN;

   778  *

   779  * Accuracy:

   780  *      TRIG(x) returns trig(x) nearly rounded

   781  */

   783 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))

   784   double y[2],z=0.0;

   785   int n, ix;

   787   /* High word of x. */

   788   ix = high(x);

   790   /* |x| ~< pi/4 */

   791   ix &= 0x7fffffff;

   792   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);

   794   /* sin(Inf or NaN) is NaN */

   795   else if (ix>=0x7ff00000) return x-x;

   797   /* argument reduction needed */

   798   else {

   799     n = __ieee754_rem_pio2(x,y);

   800     switch(n&3) {

   801     case 0: return  __kernel_sin(y[0],y[1],1);

   802     case 1: return  __kernel_cos(y[0],y[1]);

   803     case 2: return -__kernel_sin(y[0],y[1],1);

   804     default:

   805       return -__kernel_cos(y[0],y[1]);

   806     }

   807   }

   808 JRT_END

   810 /* cos(x)

   811  * Return cosine function of x.

   812  *

   813  * kernel function:

   814  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   815  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]

   816  *      __ieee754_rem_pio2      ... argument reduction routine

   817  *

   818  * Method.

   819  *      Let S,C and T denote the sin, cos and tan respectively on

   820  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   821  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   822  *      We have

   823  *

   824  *          n        sin(x)      cos(x)        tan(x)

   825  *     ----------------------------------------------------------

   826  *          0          S           C             T

   827  *          1          C          -S            -1/T

   828  *          2         -S          -C             T

   829  *          3         -C           S            -1/T

   830  *     ----------------------------------------------------------

   831  *

   832  * Special cases:

   833  *      Let trig be any of sin, cos, or tan.

   834  *      trig(+-INF)  is NaN, with signals;

   835  *      trig(NaN)    is that NaN;

   836  *

   837  * Accuracy:

   838  *      TRIG(x) returns trig(x) nearly rounded

   839  */

   841 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))

   842   double y[2],z=0.0;

   843   int n, ix;

   845   /* High word of x. */

   846   ix = high(x);

   848   /* |x| ~< pi/4 */

   849   ix &= 0x7fffffff;

   850   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);

   852   /* cos(Inf or NaN) is NaN */

   853   else if (ix>=0x7ff00000) return x-x;

   855   /* argument reduction needed */

   856   else {

   857     n = __ieee754_rem_pio2(x,y);

   858     switch(n&3) {

   859     case 0: return  __kernel_cos(y[0],y[1]);

   860     case 1: return -__kernel_sin(y[0],y[1],1);

   861     case 2: return -__kernel_cos(y[0],y[1]);

   862     default:

   863       return  __kernel_sin(y[0],y[1],1);

   864     }

   865   }

   866 JRT_END

   868 /* tan(x)

   869  * Return tangent function of x.

   870  *

   871  * kernel function:

   872  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]

   873  *      __ieee754_rem_pio2      ... argument reduction routine

   874  *

   875  * Method.

   876  *      Let S,C and T denote the sin, cos and tan respectively on

   877  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   878  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   879  *      We have

   880  *

   881  *          n        sin(x)      cos(x)        tan(x)

   882  *     ----------------------------------------------------------

   883  *          0          S           C             T

   884  *          1          C          -S            -1/T

   885  *          2         -S          -C             T

   886  *          3         -C           S            -1/T

   887  *     ----------------------------------------------------------

   888  *

   889  * Special cases:

   890  *      Let trig be any of sin, cos, or tan.

   891  *      trig(+-INF)  is NaN, with signals;

   892  *      trig(NaN)    is that NaN;

   893  *

   894  * Accuracy:

   895  *      TRIG(x) returns trig(x) nearly rounded

   896  */

   898 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))

   899   double y[2],z=0.0;

   900   int n, ix;

   902   /* High word of x. */

   903   ix = high(x);

   905   /* |x| ~< pi/4 */

   906   ix &= 0x7fffffff;

   907   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);

   909   /* tan(Inf or NaN) is NaN */

   910   else if (ix>=0x7ff00000) return x-x;            /* NaN */

   912   /* argument reduction needed */

   913   else {

   914     n = __ieee754_rem_pio2(x,y);

   915     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even

   916                                                      -1 -- n odd */

   917   }

   918 JRT_END

   921 #ifdef WIN32

   922 # pragma optimize ( "", on )

   923 #endif

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/runtime/sharedRuntimeTrig.cpp@f3ceb2e8bd21

src/share/vm/runtime/sharedRuntimeTrig.cpp