jdk8-mips64-public/hotspot: src/share/vm/runtime/sharedRuntimeTrig.cpp@f95d63e2154a

6989984: Use standard include model for Hospot
Summary: Replaced MakeDeps and the includeDB files with more standardized solutions.
Reviewed-by: coleenp, kvn, kamg

     1 /*

     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "prims/jni.h"

    27 #include "runtime/interfaceSupport.hpp"

    28 #include "runtime/sharedRuntime.hpp"

    30 // This file contains copies of the fdlibm routines used by

    31 // StrictMath. It turns out that it is almost always required to use

    32 // these runtime routines; the Intel CPU doesn't meet the Java

    33 // specification for sin/cos outside a certain limited argument range,

    34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It

    35 // also turns out that avoiding the indirect call through function

    36 // pointer out to libjava.so in SharedRuntime speeds these routines up

    37 // by roughly 15% on both Win32/x86 and Solaris/SPARC.

    39 // Enabling optimizations in this file causes incorrect code to be

    40 // generated; can not figure out how to turn down optimization for one

    41 // file in the IDE on Windows

    42 #ifdef WIN32

    43 # pragma optimize ( "", off )

    44 #endif

    46 /* The above workaround now causes more problems with the latest MS compiler.

    47  * Visual Studio 2010's /GS option tries to guard against buffer overruns.

    48  * /GS is on by default if you specify optimizations, which we do globally

    49  * via /W3 /O2. However the above selective turning off of optimizations means

    50  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)

    51  * then the compilation fails. There are several possible solutions

    52  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.

    53  * (2) Stop treating warnings as errors - would be a backward step

    54  * (3) Disable /GS - may help performance but you lose the security checks

    55  * (4) Disable the warning with "#pragma warning( disable : 4748 )"

    56  * (5) Disable planting the code with  __declspec(safebuffers)

    57  * I've opted for (5) although we should investigate the local performance

    58  * benefits of (1) and global performance benefit of (3).

    59  */

    60 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))

    61 #define SAFEBUF __declspec(safebuffers)

    62 #else

    63 #define SAFEBUF

    64 #endif

    66 #include <math.h>

    68 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles

    69 // [jk] this is not 100% correct because the float word order may different

    70 // from the byte order (e.g. on ARM)

    71 #ifdef VM_LITTLE_ENDIAN

    72 # define __HI(x) *(1+(int*)&x)

    73 # define __LO(x) *(int*)&x

    74 #else

    75 # define __HI(x) *(int*)&x

    76 # define __LO(x) *(1+(int*)&x)

    77 #endif

    79 static double copysignA(double x, double y) {

    80   __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);

    81   return x;

    82 }

    84 /*

    85  * scalbn (double x, int n)

    86  * scalbn(x,n) returns x* 2**n  computed by  exponent

    87  * manipulation rather than by actually performing an

    88  * exponentiation or a multiplication.

    89  */

    91 static const double

    92 two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

    93 twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */

    94 hugeX  = 1.0e+300,

    95 tiny   = 1.0e-300;

    97 static double scalbnA (double x, int n) {

    98   int  k,hx,lx;

    99   hx = __HI(x);

   100   lx = __LO(x);

   101   k = (hx&0x7ff00000)>>20;              /* extract exponent */

   102   if (k==0) {                           /* 0 or subnormal x */

   103     if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */

   104     x *= two54;

   105     hx = __HI(x);

   106     k = ((hx&0x7ff00000)>>20) - 54;

   107     if (n< -50000) return tiny*x;       /*underflow*/

   108   }

   109   if (k==0x7ff) return x+x;             /* NaN or Inf */

   110   k = k+n;

   111   if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */

   112   if (k > 0)                            /* normal result */

   113     {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}

   114   if (k <= -54) {

   115     if (n > 50000)      /* in case integer overflow in n+k */

   116       return hugeX*copysignA(hugeX,x);  /*overflow*/

   117     else return tiny*copysignA(tiny,x); /*underflow*/

   118   }

   119   k += 54;                              /* subnormal result */

   120   __HI(x) = (hx&0x800fffff)|(k<<20);

   121   return x*twom54;

   122 }

   124 /*

   125  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)

   126  * double x[],y[]; int e0,nx,prec; int ipio2[];

   127  *

   128  * __kernel_rem_pio2 return the last three digits of N with

   129  *              y = x - N*pi/2

   130  * so that |y| < pi/2.

   131  *

   132  * The method is to compute the integer (mod 8) and fraction parts of

   133  * (2/pi)*x without doing the full multiplication. In general we

   134  * skip the part of the product that are known to be a huge integer (

   135  * more accurately, = 0 mod 8 ). Thus the number of operations are

   136  * independent of the exponent of the input.

   137  *

   138  * (2/pi) is represented by an array of 24-bit integers in ipio2[].

   139  *

   140  * Input parameters:

   141  *      x[]     The input value (must be positive) is broken into nx

   142  *              pieces of 24-bit integers in double precision format.

   143  *              x[i] will be the i-th 24 bit of x. The scaled exponent

   144  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0

   145  *              match x's up to 24 bits.

   146  *

   147  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:

   148  *                      e0 = ilogb(z)-23

   149  *                      z  = scalbn(z,-e0)

   150  *              for i = 0,1,2

   151  *                      x[i] = floor(z)

   152  *                      z    = (z-x[i])*2**24

   153  *

   154  *

   155  *      y[]     ouput result in an array of double precision numbers.

   156  *              The dimension of y[] is:

   157  *                      24-bit  precision       1

   158  *                      53-bit  precision       2

   159  *                      64-bit  precision       2

   160  *                      113-bit precision       3

   161  *              The actual value is the sum of them. Thus for 113-bit

   162  *              precsion, one may have to do something like:

   163  *

   164  *              long double t,w,r_head, r_tail;

   165  *              t = (long double)y[2] + (long double)y[1];

   166  *              w = (long double)y[0];

   167  *              r_head = t+w;

   168  *              r_tail = w - (r_head - t);

   169  *

   170  *      e0      The exponent of x[0]

   171  *

   172  *      nx      dimension of x[]

   173  *

   174  *      prec    an interger indicating the precision:

   175  *                      0       24  bits (single)

   176  *                      1       53  bits (double)

   177  *                      2       64  bits (extended)

   178  *                      3       113 bits (quad)

   179  *

   180  *      ipio2[]

   181  *              integer array, contains the (24*i)-th to (24*i+23)-th

   182  *              bit of 2/pi after binary point. The corresponding

   183  *              floating value is

   184  *

   185  *                      ipio2[i] * 2^(-24(i+1)).

   186  *

   187  * External function:

   188  *      double scalbn(), floor();

   189  *

   190  *

   191  * Here is the description of some local variables:

   192  *

   193  *      jk      jk+1 is the initial number of terms of ipio2[] needed

   194  *              in the computation. The recommended value is 2,3,4,

   195  *              6 for single, double, extended,and quad.

   196  *

   197  *      jz      local integer variable indicating the number of

   198  *              terms of ipio2[] used.

   199  *

   200  *      jx      nx - 1

   201  *

   202  *      jv      index for pointing to the suitable ipio2[] for the

   203  *              computation. In general, we want

   204  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8

   205  *              is an integer. Thus

   206  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv

   207  *              Hence jv = max(0,(e0-3)/24).

   208  *

   209  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.

   210  *

   211  *      q[]     double array with integral value, representing the

   212  *              24-bits chunk of the product of x and 2/pi.

   213  *

   214  *      q0      the corresponding exponent of q[0]. Note that the

   215  *              exponent for q[i] would be q0-24*i.

   216  *

   217  *      PIo2[]  double precision array, obtained by cutting pi/2

   218  *              into 24 bits chunks.

   219  *

   220  *      f[]     ipio2[] in floating point

   221  *

   222  *      iq[]    integer array by breaking up q[] in 24-bits chunk.

   223  *

   224  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]

   225  *

   226  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence

   227  *              it also indicates the *sign* of the result.

   228  *

   229  */

   232 /*

   233  * Constants:

   234  * The hexadecimal values are the intended ones for the following

   235  * constants. The decimal values may be used, provided that the

   236  * compiler will convert from decimal to binary accurately enough

   237  * to produce the hexadecimal values shown.

   238  */

   241 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */

   243 static const double PIo2[] = {

   244   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */

   245   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */

   246   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */

   247   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */

   248   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */

   249   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */

   250   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */

   251   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */

   252 };

   254 static const double

   255 zeroB   = 0.0,

   256 one     = 1.0,

   257 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   258 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */

   260 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {

   261   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;

   262   double z,fw,f[20],fq[20],q[20];

   264   /* initialize jk*/

   265   jk = init_jk[prec];

   266   jp = jk;

   268   /* determine jx,jv,q0, note that 3>q0 */

   269   jx =  nx-1;

   270   jv = (e0-3)/24; if(jv<0) jv=0;

   271   q0 =  e0-24*(jv+1);

   273   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */

   274   j = jv-jx; m = jx+jk;

   275   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];

   277   /* compute q[0],q[1],...q[jk] */

   278   for (i=0;i<=jk;i++) {

   279     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;

   280   }

   282   jz = jk;

   283 recompute:

   284   /* distill q[] into iq[] reversingly */

   285   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {

   286     fw    =  (double)((int)(twon24* z));

   287     iq[i] =  (int)(z-two24B*fw);

   288     z     =  q[j-1]+fw;

   289   }

   291   /* compute n */

   292   z  = scalbnA(z,q0);           /* actual value of z */

   293   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */

   294   n  = (int) z;

   295   z -= (double)n;

   296   ih = 0;

   297   if(q0>0) {    /* need iq[jz-1] to determine n */

   298     i  = (iq[jz-1]>>(24-q0)); n += i;

   299     iq[jz-1] -= i<<(24-q0);

   300     ih = iq[jz-1]>>(23-q0);

   301   }

   302   else if(q0==0) ih = iq[jz-1]>>23;

   303   else if(z>=0.5) ih=2;

   305   if(ih>0) {    /* q > 0.5 */

   306     n += 1; carry = 0;

   307     for(i=0;i<jz ;i++) {        /* compute 1-q */

   308       j = iq[i];

   309       if(carry==0) {

   310         if(j!=0) {

   311           carry = 1; iq[i] = 0x1000000- j;

   312         }

   313       } else  iq[i] = 0xffffff - j;

   314     }

   315     if(q0>0) {          /* rare case: chance is 1 in 12 */

   316       switch(q0) {

   317       case 1:

   318         iq[jz-1] &= 0x7fffff; break;

   319       case 2:

   320         iq[jz-1] &= 0x3fffff; break;

   321       }

   322     }

   323     if(ih==2) {

   324       z = one - z;

   325       if(carry!=0) z -= scalbnA(one,q0);

   326     }

   327   }

   329   /* check if recomputation is needed */

   330   if(z==zeroB) {

   331     j = 0;

   332     for (i=jz-1;i>=jk;i--) j |= iq[i];

   333     if(j==0) { /* need recomputation */

   334       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */

   336       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */

   337         f[jx+i] = (double) ipio2[jv+i];

   338         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];

   339         q[i] = fw;

   340       }

   341       jz += k;

   342       goto recompute;

   343     }

   344   }

   346   /* chop off zero terms */

   347   if(z==0.0) {

   348     jz -= 1; q0 -= 24;

   349     while(iq[jz]==0) { jz--; q0-=24;}

   350   } else { /* break z into 24-bit if neccessary */

   351     z = scalbnA(z,-q0);

   352     if(z>=two24B) {

   353       fw = (double)((int)(twon24*z));

   354       iq[jz] = (int)(z-two24B*fw);

   355       jz += 1; q0 += 24;

   356       iq[jz] = (int) fw;

   357     } else iq[jz] = (int) z ;

   358   }

   360   /* convert integer "bit" chunk to floating-point value */

   361   fw = scalbnA(one,q0);

   362   for(i=jz;i>=0;i--) {

   363     q[i] = fw*(double)iq[i]; fw*=twon24;

   364   }

   366   /* compute PIo2[0,...,jp]*q[jz,...,0] */

   367   for(i=jz;i>=0;i--) {

   368     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];

   369     fq[jz-i] = fw;

   370   }

   372   /* compress fq[] into y[] */

   373   switch(prec) {

   374   case 0:

   375     fw = 0.0;

   376     for (i=jz;i>=0;i--) fw += fq[i];

   377     y[0] = (ih==0)? fw: -fw;

   378     break;

   379   case 1:

   380   case 2:

   381     fw = 0.0;

   382     for (i=jz;i>=0;i--) fw += fq[i];

   383     y[0] = (ih==0)? fw: -fw;

   384     fw = fq[0]-fw;

   385     for (i=1;i<=jz;i++) fw += fq[i];

   386     y[1] = (ih==0)? fw: -fw;

   387     break;

   388   case 3:       /* painful */

   389     for (i=jz;i>0;i--) {

   390       fw      = fq[i-1]+fq[i];

   391       fq[i]  += fq[i-1]-fw;

   392       fq[i-1] = fw;

   393     }

   394     for (i=jz;i>1;i--) {

   395       fw      = fq[i-1]+fq[i];

   396       fq[i]  += fq[i-1]-fw;

   397       fq[i-1] = fw;

   398     }

   399     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];

   400     if(ih==0) {

   401       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;

   402     } else {

   403       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;

   404     }

   405   }

   406   return n&7;

   407 }

   410 /*

   411  * ====================================================

   412  * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.

   413  *

   414  * Developed at SunPro, a Sun Microsystems, Inc. business.

   415  * Permission to use, copy, modify, and distribute this

   416  * software is freely granted, provided that this notice

   417  * is preserved.

   418  * ====================================================

   419  *

   420  */

   422 /* __ieee754_rem_pio2(x,y)

   423  *

   424  * return the remainder of x rem pi/2 in y[0]+y[1]

   425  * use __kernel_rem_pio2()

   426  */

   428 /*

   429  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi

   430  */

   431 static const int two_over_pi[] = {

   432   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,

   433   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,

   434   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,

   435   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,

   436   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,

   437   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,

   438   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,

   439   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,

   440   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,

   441   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,

   442   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,

   443 };

   445 static const int npio2_hw[] = {

   446   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,

   447   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,

   448   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,

   449   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,

   450   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,

   451   0x404858EB, 0x404921FB,

   452 };

   454 /*

   455  * invpio2:  53 bits of 2/pi

   456  * pio2_1:   first  33 bit of pi/2

   457  * pio2_1t:  pi/2 - pio2_1

   458  * pio2_2:   second 33 bit of pi/2

   459  * pio2_2t:  pi/2 - (pio2_1+pio2_2)

   460  * pio2_3:   third  33 bit of pi/2

   461  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)

   462  */

   464 static const double

   465 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */

   466 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */

   467 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   468 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */

   469 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */

   470 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */

   471 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */

   472 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */

   473 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */

   474 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */

   476 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {

   477   double z,w,t,r,fn;

   478   double tx[3];

   479   int e0,i,j,nx,n,ix,hx,i0;

   481   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */

   482   hx = *(i0+(int*)&x);          /* high word of x */

   483   ix = hx&0x7fffffff;

   484   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */

   485     {y[0] = x; y[1] = 0; return 0;}

   486   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */

   487     if(hx>0) {

   488       z = x - pio2_1;

   489       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   490         y[0] = z - pio2_1t;

   491         y[1] = (z-y[0])-pio2_1t;

   492       } else {                /* near pi/2, use 33+33+53 bit pi */

   493         z -= pio2_2;

   494         y[0] = z - pio2_2t;

   495         y[1] = (z-y[0])-pio2_2t;

   496       }

   497       return 1;

   498     } else {    /* negative x */

   499       z = x + pio2_1;

   500       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   501         y[0] = z + pio2_1t;

   502         y[1] = (z-y[0])+pio2_1t;

   503       } else {                /* near pi/2, use 33+33+53 bit pi */

   504         z += pio2_2;

   505         y[0] = z + pio2_2t;

   506         y[1] = (z-y[0])+pio2_2t;

   507       }

   508       return -1;

   509     }

   510   }

   511   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */

   512     t  = fabsd(x);

   513     n  = (int) (t*invpio2+half);

   514     fn = (double)n;

   515     r  = t-fn*pio2_1;

   516     w  = fn*pio2_1t;    /* 1st round good to 85 bit */

   517     if(n<32&&ix!=npio2_hw[n-1]) {

   518       y[0] = r-w;       /* quick check no cancellation */

   519     } else {

   520       j  = ix>>20;

   521       y[0] = r-w;

   522       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   523       if(i>16) {  /* 2nd iteration needed, good to 118 */

   524         t  = r;

   525         w  = fn*pio2_2;

   526         r  = t-w;

   527         w  = fn*pio2_2t-((t-r)-w);

   528         y[0] = r-w;

   529         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   530         if(i>49)  {     /* 3rd iteration need, 151 bits acc */

   531           t  = r;       /* will cover all possible cases */

   532           w  = fn*pio2_3;

   533           r  = t-w;

   534           w  = fn*pio2_3t-((t-r)-w);

   535           y[0] = r-w;

   536         }

   537       }

   538     }

   539     y[1] = (r-y[0])-w;

   540     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   541     else         return n;

   542   }

   543   /*

   544    * all other (large) arguments

   545    */

   546   if(ix>=0x7ff00000) {          /* x is inf or NaN */

   547     y[0]=y[1]=x-x; return 0;

   548   }

   549   /* set z = scalbn(|x|,ilogb(x)-23) */

   550   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);

   551   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */

   552   *(i0+(int*)&z) = ix - (e0<<20);

   553   for(i=0;i<2;i++) {

   554     tx[i] = (double)((int)(z));

   555     z     = (z-tx[i])*two24A;

   556   }

   557   tx[2] = z;

   558   nx = 3;

   559   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */

   560   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);

   561   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   562   return n;

   563 }

   566 /* __kernel_sin( x, y, iy)

   567  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854

   568  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   569  * Input y is the tail of x.

   570  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).

   571  *

   572  * Algorithm

   573  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.

   574  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.

   575  *      3. sin(x) is approximated by a polynomial of degree 13 on

   576  *         [0,pi/4]

   577  *                               3            13

   578  *              sin(x) ~ x + S1*x + ... + S6*x

   579  *         where

   580  *

   581  *      |sin(x)         2     4     6     8     10     12  |     -58

   582  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2

   583  *      |  x                                               |

   584  *

   585  *      4. sin(x+y) = sin(x) + sin'(x')*y

   586  *                  ~ sin(x) + (1-x*x/2)*y

   587  *         For better accuracy, let

   588  *                   3      2      2      2      2

   589  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))

   590  *         then                   3    2

   591  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))

   592  */

   594 static const double

   595 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */

   596 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */

   597 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */

   598 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */

   599 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */

   600 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */

   602 static double __kernel_sin(double x, double y, int iy)

   603 {

   604         double z,r,v;

   605         int ix;

   606         ix = __HI(x)&0x7fffffff;        /* high word of x */

   607         if(ix<0x3e400000)                       /* |x| < 2**-27 */

   608            {if((int)x==0) return x;}            /* generate inexact */

   609         z       =  x*x;

   610         v       =  z*x;

   611         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));

   612         if(iy==0) return x+v*(S1+z*r);

   613         else      return x-((z*(half*y-v*r)-y)-v*S1);

   614 }

   616 /*

   617  * __kernel_cos( x,  y )

   618  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164

   619  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   620  * Input y is the tail of x.

   621  *

   622  * Algorithm

   623  *      1. Since cos(-x) = cos(x), we need only to consider positive x.

   624  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.

   625  *      3. cos(x) is approximated by a polynomial of degree 14 on

   626  *         [0,pi/4]

   627  *                                       4            14

   628  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x

   629  *         where the remez error is

   630  *

   631  *      |              2     4     6     8     10    12     14 |     -58

   632  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2

   633  *      |                                                      |

   634  *

   635  *                     4     6     8     10    12     14

   636  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then

   637  *             cos(x) = 1 - x*x/2 + r

   638  *         since cos(x+y) ~ cos(x) - sin(x)*y

   639  *                        ~ cos(x) - x*y,

   640  *         a correction term is necessary in cos(x) and hence

   641  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))

   642  *         For better accuracy when x > 0.3, let qx = |x|/4 with

   643  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.

   644  *         Then

   645  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).

   646  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the

   647  *         magnitude of the latter is at least a quarter of x*x/2,

   648  *         thus, reducing the rounding error in the subtraction.

   649  */

   651 static const double

   652 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */

   653 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */

   654 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */

   655 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */

   656 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */

   657 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */

   659 static double __kernel_cos(double x, double y)

   660 {

   661   double a,hz,z,r,qx;

   662   int ix;

   663   ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/

   664   if(ix<0x3e400000) {                   /* if x < 2**27 */

   665     if(((int)x)==0) return one;         /* generate inexact */

   666   }

   667   z  = x*x;

   668   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));

   669   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */

   670     return one - (0.5*z - (z*r - x*y));

   671   else {

   672     if(ix > 0x3fe90000) {               /* x > 0.78125 */

   673       qx = 0.28125;

   674     } else {

   675       __HI(qx) = ix-0x00200000; /* x/4 */

   676       __LO(qx) = 0;

   677     }

   678     hz = 0.5*z-qx;

   679     a  = one-qx;

   680     return a - (hz - (z*r-x*y));

   681   }

   682 }

   684 /* __kernel_tan( x, y, k )

   685  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854

   686  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   687  * Input y is the tail of x.

   688  * Input k indicates whether tan (if k=1) or

   689  * -1/tan (if k= -1) is returned.

   690  *

   691  * Algorithm

   692  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.

   693  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.

   694  *      3. tan(x) is approximated by a odd polynomial of degree 27 on

   695  *         [0,0.67434]

   696  *                               3             27

   697  *              tan(x) ~ x + T1*x + ... + T13*x

   698  *         where

   699  *

   700  *              |tan(x)         2     4            26   |     -59.2

   701  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2

   702  *              |  x                                    |

   703  *

   704  *         Note: tan(x+y) = tan(x) + tan'(x)*y

   705  *                        ~ tan(x) + (1+x*x)*y

   706  *         Therefore, for better accuracy in computing tan(x+y), let

   707  *                   3      2      2       2       2

   708  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))

   709  *         then

   710  *                                  3    2

   711  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))

   712  *

   713  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then

   714  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))

   715  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))

   716  */

   718 static const double

   719 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */

   720 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */

   721 T[] =  {

   722   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */

   723   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */

   724   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */

   725   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */

   726   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */

   727   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */

   728   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */

   729   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */

   730   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */

   731   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */

   732   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */

   733  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */

   734   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */

   735 };

   737 static double __kernel_tan(double x, double y, int iy)

   738 {

   739   double z,r,v,w,s;

   740   int ix,hx;

   741   hx = __HI(x);   /* high word of x */

   742   ix = hx&0x7fffffff;     /* high word of |x| */

   743   if(ix<0x3e300000) {                     /* x < 2**-28 */

   744     if((int)x==0) {                       /* generate inexact */

   745       if (((ix | __LO(x)) | (iy + 1)) == 0)

   746         return one / fabsd(x);

   747       else {

   748         if (iy == 1)

   749           return x;

   750         else {    /* compute -1 / (x+y) carefully */

   751           double a, t;

   753           z = w = x + y;

   754           __LO(z) = 0;

   755           v = y - (z - x);

   756           t = a = -one / w;

   757           __LO(t) = 0;

   758           s = one + t * z;

   759           return t + a * (s + t * v);

   760         }

   761       }

   762     }

   763   }

   764   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */

   765     if(hx<0) {x = -x; y = -y;}

   766     z = pio4-x;

   767     w = pio4lo-y;

   768     x = z+w; y = 0.0;

   769   }

   770   z       =  x*x;

   771   w       =  z*z;

   772   /* Break x^5*(T[1]+x^2*T[2]+...) into

   773    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +

   774    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))

   775    */

   776   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));

   777   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));

   778   s = z*x;

   779   r = y + z*(s*(r+v)+y);

   780   r += T[0]*s;

   781   w = x+r;

   782   if(ix>=0x3FE59428) {

   783     v = (double)iy;

   784     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));

   785   }

   786   if(iy==1) return w;

   787   else {          /* if allow error up to 2 ulp,

   788                      simply return -1.0/(x+r) here */

   789     /*  compute -1.0/(x+r) accurately */

   790     double a,t;

   791     z  = w;

   792     __LO(z) = 0;

   793     v  = r-(z - x);     /* z+v = r+x */

   794     t = a  = -1.0/w;    /* a = -1.0/w */

   795     __LO(t) = 0;

   796     s  = 1.0+t*z;

   797     return t+a*(s+t*v);

   798   }

   799 }

   802 //----------------------------------------------------------------------

   803 //

   804 // Routines for new sin/cos implementation

   805 //

   806 //----------------------------------------------------------------------

   808 /* sin(x)

   809  * Return sine function of x.

   810  *

   811  * kernel function:

   812  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   813  *      __kernel_cos            ... cose function on [-pi/4,pi/4]

   814  *      __ieee754_rem_pio2      ... argument reduction routine

   815  *

   816  * Method.

   817  *      Let S,C and T denote the sin, cos and tan respectively on

   818  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   819  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   820  *      We have

   821  *

   822  *          n        sin(x)      cos(x)        tan(x)

   823  *     ----------------------------------------------------------

   824  *          0          S           C             T

   825  *          1          C          -S            -1/T

   826  *          2         -S          -C             T

   827  *          3         -C           S            -1/T

   828  *     ----------------------------------------------------------

   829  *

   830  * Special cases:

   831  *      Let trig be any of sin, cos, or tan.

   832  *      trig(+-INF)  is NaN, with signals;

   833  *      trig(NaN)    is that NaN;

   834  *

   835  * Accuracy:

   836  *      TRIG(x) returns trig(x) nearly rounded

   837  */

   839 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))

   840   double y[2],z=0.0;

   841   int n, ix;

   843   /* High word of x. */

   844   ix = __HI(x);

   846   /* |x| ~< pi/4 */

   847   ix &= 0x7fffffff;

   848   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);

   850   /* sin(Inf or NaN) is NaN */

   851   else if (ix>=0x7ff00000) return x-x;

   853   /* argument reduction needed */

   854   else {

   855     n = __ieee754_rem_pio2(x,y);

   856     switch(n&3) {

   857     case 0: return  __kernel_sin(y[0],y[1],1);

   858     case 1: return  __kernel_cos(y[0],y[1]);

   859     case 2: return -__kernel_sin(y[0],y[1],1);

   860     default:

   861       return -__kernel_cos(y[0],y[1]);

   862     }

   863   }

   864 JRT_END

   866 /* cos(x)

   867  * Return cosine function of x.

   868  *

   869  * kernel function:

   870  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   871  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]

   872  *      __ieee754_rem_pio2      ... argument reduction routine

   873  *

   874  * Method.

   875  *      Let S,C and T denote the sin, cos and tan respectively on

   876  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   877  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   878  *      We have

   879  *

   880  *          n        sin(x)      cos(x)        tan(x)

   881  *     ----------------------------------------------------------

   882  *          0          S           C             T

   883  *          1          C          -S            -1/T

   884  *          2         -S          -C             T

   885  *          3         -C           S            -1/T

   886  *     ----------------------------------------------------------

   887  *

   888  * Special cases:

   889  *      Let trig be any of sin, cos, or tan.

   890  *      trig(+-INF)  is NaN, with signals;

   891  *      trig(NaN)    is that NaN;

   892  *

   893  * Accuracy:

   894  *      TRIG(x) returns trig(x) nearly rounded

   895  */

   897 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))

   898   double y[2],z=0.0;

   899   int n, ix;

   901   /* High word of x. */

   902   ix = __HI(x);

   904   /* |x| ~< pi/4 */

   905   ix &= 0x7fffffff;

   906   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);

   908   /* cos(Inf or NaN) is NaN */

   909   else if (ix>=0x7ff00000) return x-x;

   911   /* argument reduction needed */

   912   else {

   913     n = __ieee754_rem_pio2(x,y);

   914     switch(n&3) {

   915     case 0: return  __kernel_cos(y[0],y[1]);

   916     case 1: return -__kernel_sin(y[0],y[1],1);

   917     case 2: return -__kernel_cos(y[0],y[1]);

   918     default:

   919       return  __kernel_sin(y[0],y[1],1);

   920     }

   921   }

   922 JRT_END

   924 /* tan(x)

   925  * Return tangent function of x.

   926  *

   927  * kernel function:

   928  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]

   929  *      __ieee754_rem_pio2      ... argument reduction routine

   930  *

   931  * Method.

   932  *      Let S,C and T denote the sin, cos and tan respectively on

   933  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   934  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   935  *      We have

   936  *

   937  *          n        sin(x)      cos(x)        tan(x)

   938  *     ----------------------------------------------------------

   939  *          0          S           C             T

   940  *          1          C          -S            -1/T

   941  *          2         -S          -C             T

   942  *          3         -C           S            -1/T

   943  *     ----------------------------------------------------------

   944  *

   945  * Special cases:

   946  *      Let trig be any of sin, cos, or tan.

   947  *      trig(+-INF)  is NaN, with signals;

   948  *      trig(NaN)    is that NaN;

   949  *

   950  * Accuracy:

   951  *      TRIG(x) returns trig(x) nearly rounded

   952  */

   954 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))

   955   double y[2],z=0.0;

   956   int n, ix;

   958   /* High word of x. */

   959   ix = __HI(x);

   961   /* |x| ~< pi/4 */

   962   ix &= 0x7fffffff;

   963   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);

   965   /* tan(Inf or NaN) is NaN */

   966   else if (ix>=0x7ff00000) return x-x;            /* NaN */

   968   /* argument reduction needed */

   969   else {

   970     n = __ieee754_rem_pio2(x,y);

   971     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even

   972                                                      -1 -- n odd */

   973   }

   974 JRT_END

   977 #ifdef WIN32

   978 # pragma optimize ( "", on )

   979 #endif

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/runtime/sharedRuntimeTrig.cpp@f95d63e2154a

src/share/vm/runtime/sharedRuntimeTrig.cpp