jdk8-mips64-public/hotspot: src/share/vm/runtime/sharedRuntimeTrig.cpp@7ae4e26cb1e0

     1 /*

     2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 /*

    26  * This file has been modified by Loongson Technology in 2015. These

    27  * modifications are Copyright (c) 2015 Loongson Technology, and are made

    28  * available on the same license terms set forth above.

    29  */

    31 #include "precompiled.hpp"

    32 #include "prims/jni.h"

    33 #include "runtime/interfaceSupport.hpp"

    34 #include "runtime/sharedRuntime.hpp"

    36 // This file contains copies of the fdlibm routines used by

    37 // StrictMath. It turns out that it is almost always required to use

    38 // these runtime routines; the Intel CPU doesn't meet the Java

    39 // specification for sin/cos outside a certain limited argument range,

    40 // and the SPARC CPU doesn't appear to have sin/cos instructions. It

    41 // also turns out that avoiding the indirect call through function

    42 // pointer out to libjava.so in SharedRuntime speeds these routines up

    43 // by roughly 15% on both Win32/x86 and Solaris/SPARC.

    45 // Enabling optimizations in this file causes incorrect code to be

    46 // generated; can not figure out how to turn down optimization for one

    47 // file in the IDE on Windows

    48 #ifdef WIN32

    49 # pragma optimize ( "", off )

    50 #endif

    52 /* The above workaround now causes more problems with the latest MS compiler.

    53  * Visual Studio 2010's /GS option tries to guard against buffer overruns.

    54  * /GS is on by default if you specify optimizations, which we do globally

    55  * via /W3 /O2. However the above selective turning off of optimizations means

    56  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)

    57  * then the compilation fails. There are several possible solutions

    58  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.

    59  * (2) Stop treating warnings as errors - would be a backward step

    60  * (3) Disable /GS - may help performance but you lose the security checks

    61  * (4) Disable the warning with "#pragma warning( disable : 4748 )"

    62  * (5) Disable planting the code with  __declspec(safebuffers)

    63  * I've opted for (5) although we should investigate the local performance

    64  * benefits of (1) and global performance benefit of (3).

    65  */

    66 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))

    67 #define SAFEBUF __declspec(safebuffers)

    68 #else

    69 #define SAFEBUF

    70 #endif

    72 #include "runtime/sharedRuntimeMath.hpp"

    74 /*

    75  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)

    76  * double x[],y[]; int e0,nx,prec; int ipio2[];

    77  *

    78  * __kernel_rem_pio2 return the last three digits of N with

    79  *              y = x - N*pi/2

    80  * so that |y| < pi/2.

    81  *

    82  * The method is to compute the integer (mod 8) and fraction parts of

    83  * (2/pi)*x without doing the full multiplication. In general we

    84  * skip the part of the product that are known to be a huge integer (

    85  * more accurately, = 0 mod 8 ). Thus the number of operations are

    86  * independent of the exponent of the input.

    87  *

    88  * (2/pi) is represented by an array of 24-bit integers in ipio2[].

    89  *

    90  * Input parameters:

    91  *      x[]     The input value (must be positive) is broken into nx

    92  *              pieces of 24-bit integers in double precision format.

    93  *              x[i] will be the i-th 24 bit of x. The scaled exponent

    94  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0

    95  *              match x's up to 24 bits.

    96  *

    97  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:

    98  *                      e0 = ilogb(z)-23

    99  *                      z  = scalbn(z,-e0)

   100  *              for i = 0,1,2

   101  *                      x[i] = floor(z)

   102  *                      z    = (z-x[i])*2**24

   103  *

   104  *

   105  *      y[]     ouput result in an array of double precision numbers.

   106  *              The dimension of y[] is:

   107  *                      24-bit  precision       1

   108  *                      53-bit  precision       2

   109  *                      64-bit  precision       2

   110  *                      113-bit precision       3

   111  *              The actual value is the sum of them. Thus for 113-bit

   112  *              precsion, one may have to do something like:

   113  *

   114  *              long double t,w,r_head, r_tail;

   115  *              t = (long double)y[2] + (long double)y[1];

   116  *              w = (long double)y[0];

   117  *              r_head = t+w;

   118  *              r_tail = w - (r_head - t);

   119  *

   120  *      e0      The exponent of x[0]

   121  *

   122  *      nx      dimension of x[]

   123  *

   124  *      prec    an interger indicating the precision:

   125  *                      0       24  bits (single)

   126  *                      1       53  bits (double)

   127  *                      2       64  bits (extended)

   128  *                      3       113 bits (quad)

   129  *

   130  *      ipio2[]

   131  *              integer array, contains the (24*i)-th to (24*i+23)-th

   132  *              bit of 2/pi after binary point. The corresponding

   133  *              floating value is

   134  *

   135  *                      ipio2[i] * 2^(-24(i+1)).

   136  *

   137  * External function:

   138  *      double scalbn(), floor();

   139  *

   140  *

   141  * Here is the description of some local variables:

   142  *

   143  *      jk      jk+1 is the initial number of terms of ipio2[] needed

   144  *              in the computation. The recommended value is 2,3,4,

   145  *              6 for single, double, extended,and quad.

   146  *

   147  *      jz      local integer variable indicating the number of

   148  *              terms of ipio2[] used.

   149  *

   150  *      jx      nx - 1

   151  *

   152  *      jv      index for pointing to the suitable ipio2[] for the

   153  *              computation. In general, we want

   154  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8

   155  *              is an integer. Thus

   156  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv

   157  *              Hence jv = max(0,(e0-3)/24).

   158  *

   159  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.

   160  *

   161  *      q[]     double array with integral value, representing the

   162  *              24-bits chunk of the product of x and 2/pi.

   163  *

   164  *      q0      the corresponding exponent of q[0]. Note that the

   165  *              exponent for q[i] would be q0-24*i.

   166  *

   167  *      PIo2[]  double precision array, obtained by cutting pi/2

   168  *              into 24 bits chunks.

   169  *

   170  *      f[]     ipio2[] in floating point

   171  *

   172  *      iq[]    integer array by breaking up q[] in 24-bits chunk.

   173  *

   174  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]

   175  *

   176  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence

   177  *              it also indicates the *sign* of the result.

   178  *

   179  */

   182 /*

   183  * Constants:

   184  * The hexadecimal values are the intended ones for the following

   185  * constants. The decimal values may be used, provided that the

   186  * compiler will convert from decimal to binary accurately enough

   187  * to produce the hexadecimal values shown.

   188  */

   191 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */

   193 static const double PIo2[] = {

   194   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */

   195   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */

   196   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */

   197   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */

   198   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */

   199   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */

   200   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */

   201   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */

   202 };

   204 static const double

   205 zeroB   = 0.0,

   206 one     = 1.0,

   207 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   208 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */

   210 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {

   211   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;

   212   double z,fw,f[20],fq[20],q[20];

   214   /* initialize jk*/

   215   jk = init_jk[prec];

   216   jp = jk;

   218   /* determine jx,jv,q0, note that 3>q0 */

   219   jx =  nx-1;

   220   jv = (e0-3)/24; if(jv<0) jv=0;

   221   q0 =  e0-24*(jv+1);

   223   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */

   224   j = jv-jx; m = jx+jk;

   225   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];

   227   /* compute q[0],q[1],...q[jk] */

   228   for (i=0;i<=jk;i++) {

   229     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;

   230   }

   232   jz = jk;

   233 recompute:

   234   /* distill q[] into iq[] reversingly */

   235   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {

   236     fw    =  (double)((int)(twon24* z));

   237     iq[i] =  (int)(z-two24B*fw);

   238     z     =  q[j-1]+fw;

   239   }

   241   /* compute n */

   242   z  = scalbnA(z,q0);           /* actual value of z */

   243   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */

   244   n  = (int) z;

   245   z -= (double)n;

   246   ih = 0;

   247   if(q0>0) {    /* need iq[jz-1] to determine n */

   248     i  = (iq[jz-1]>>(24-q0)); n += i;

   249     iq[jz-1] -= i<<(24-q0);

   250     ih = iq[jz-1]>>(23-q0);

   251   }

   252   else if(q0==0) ih = iq[jz-1]>>23;

   253   else if(z>=0.5) ih=2;

   255   if(ih>0) {    /* q > 0.5 */

   256     n += 1; carry = 0;

   257     for(i=0;i<jz ;i++) {        /* compute 1-q */

   258       j = iq[i];

   259       if(carry==0) {

   260         if(j!=0) {

   261           carry = 1; iq[i] = 0x1000000- j;

   262         }

   263       } else  iq[i] = 0xffffff - j;

   264     }

   265     if(q0>0) {          /* rare case: chance is 1 in 12 */

   266       switch(q0) {

   267       case 1:

   268         iq[jz-1] &= 0x7fffff; break;

   269       case 2:

   270         iq[jz-1] &= 0x3fffff; break;

   271       }

   272     }

   273     if(ih==2) {

   274       z = one - z;

   275       if(carry!=0) z -= scalbnA(one,q0);

   276     }

   277   }

   279   /* check if recomputation is needed */

   280   if(z==zeroB) {

   281     j = 0;

   282     for (i=jz-1;i>=jk;i--) j |= iq[i];

   283     if(j==0) { /* need recomputation */

   284       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */

   286       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */

   287         f[jx+i] = (double) ipio2[jv+i];

   288         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];

   289         q[i] = fw;

   290       }

   291       jz += k;

   292       goto recompute;

   293     }

   294   }

   296   /* chop off zero terms */

   297   if(z==0.0) {

   298     jz -= 1; q0 -= 24;

   299     while(iq[jz]==0) { jz--; q0-=24;}

   300   } else { /* break z into 24-bit if neccessary */

   301     z = scalbnA(z,-q0);

   302     if(z>=two24B) {

   303       fw = (double)((int)(twon24*z));

   304       iq[jz] = (int)(z-two24B*fw);

   305       jz += 1; q0 += 24;

   306       iq[jz] = (int) fw;

   307     } else iq[jz] = (int) z ;

   308   }

   310   /* convert integer "bit" chunk to floating-point value */

   311   fw = scalbnA(one,q0);

   312   for(i=jz;i>=0;i--) {

   313     q[i] = fw*(double)iq[i]; fw*=twon24;

   314   }

   316   /* compute PIo2[0,...,jp]*q[jz,...,0] */

   317   for(i=jz;i>=0;i--) {

   318     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];

   319     fq[jz-i] = fw;

   320   }

   322   /* compress fq[] into y[] */

   323   switch(prec) {

   324   case 0:

   325     fw = 0.0;

   326     for (i=jz;i>=0;i--) fw += fq[i];

   327     y[0] = (ih==0)? fw: -fw;

   328     break;

   329   case 1:

   330   case 2:

   331     fw = 0.0;

   332     for (i=jz;i>=0;i--) fw += fq[i];

   333     y[0] = (ih==0)? fw: -fw;

   334     fw = fq[0]-fw;

   335     for (i=1;i<=jz;i++) fw += fq[i];

   336     y[1] = (ih==0)? fw: -fw;

   337     break;

   338   case 3:       /* painful */

   339     for (i=jz;i>0;i--) {

   340       fw      = fq[i-1]+fq[i];

   341       fq[i]  += fq[i-1]-fw;

   342       fq[i-1] = fw;

   343     }

   344     for (i=jz;i>1;i--) {

   345       fw      = fq[i-1]+fq[i];

   346       fq[i]  += fq[i-1]-fw;

   347       fq[i-1] = fw;

   348     }

   349     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];

   350     if(ih==0) {

   351       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;

   352     } else {

   353       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;

   354     }

   355   }

   356   return n&7;

   357 }

   360 /*

   361  * ====================================================

   362  * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.

   363  *

   364  * Developed at SunPro, a Sun Microsystems, Inc. business.

   365  * Permission to use, copy, modify, and distribute this

   366  * software is freely granted, provided that this notice

   367  * is preserved.

   368  * ====================================================

   369  *

   370  */

   372 /* __ieee754_rem_pio2(x,y)

   373  *

   374  * return the remainder of x rem pi/2 in y[0]+y[1]

   375  * use __kernel_rem_pio2()

   376  */

   378 /*

   379  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi

   380  */

   381 static const int two_over_pi[] = {

   382   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,

   383   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,

   384   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,

   385   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,

   386   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,

   387   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,

   388   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,

   389   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,

   390   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,

   391   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,

   392   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,

   393 };

   395 static const int npio2_hw[] = {

   396   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,

   397   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,

   398   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,

   399   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,

   400   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,

   401   0x404858EB, 0x404921FB,

   402 };

   404 /*

   405  * invpio2:  53 bits of 2/pi

   406  * pio2_1:   first  33 bit of pi/2

   407  * pio2_1t:  pi/2 - pio2_1

   408  * pio2_2:   second 33 bit of pi/2

   409  * pio2_2t:  pi/2 - (pio2_1+pio2_2)

   410  * pio2_3:   third  33 bit of pi/2

   411  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)

   412  */

   414 static const double

   415 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */

   416 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */

   417 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   418 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */

   419 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */

   420 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */

   421 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */

   422 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */

   423 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */

   424 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */

   426 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {

   427   double z,w,t,r,fn;

   428   double tx[3];

   429   int e0,i,j,nx,n,ix,hx,i0;

   431   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */

   432   hx = *(i0+(int*)&x);          /* high word of x */

   433   ix = hx&0x7fffffff;

   434   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */

   435     {y[0] = x; y[1] = 0; return 0;}

   436   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */

   437     if(hx>0) {

   438       z = x - pio2_1;

   439       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   440         y[0] = z - pio2_1t;

   441         y[1] = (z-y[0])-pio2_1t;

   442       } else {                /* near pi/2, use 33+33+53 bit pi */

   443         z -= pio2_2;

   444         y[0] = z - pio2_2t;

   445         y[1] = (z-y[0])-pio2_2t;

   446       }

   447       return 1;

   448     } else {    /* negative x */

   449       z = x + pio2_1;

   450       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   451         y[0] = z + pio2_1t;

   452         y[1] = (z-y[0])+pio2_1t;

   453       } else {                /* near pi/2, use 33+33+53 bit pi */

   454         z += pio2_2;

   455         y[0] = z + pio2_2t;

   456         y[1] = (z-y[0])+pio2_2t;

   457       }

   458       return -1;

   459     }

   460   }

   461   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */

   462     t  = fabsd(x);

   463     n  = (int) (t*invpio2+half);

   464     fn = (double)n;

   465     r  = t-fn*pio2_1;

   466     w  = fn*pio2_1t;    /* 1st round good to 85 bit */

   467     if(n<32&&ix!=npio2_hw[n-1]) {

   468       y[0] = r-w;       /* quick check no cancellation */

   469     } else {

   470       j  = ix>>20;

   471       y[0] = r-w;

   472       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   473       if(i>16) {  /* 2nd iteration needed, good to 118 */

   474         t  = r;

   475         w  = fn*pio2_2;

   476         r  = t-w;

   477         w  = fn*pio2_2t-((t-r)-w);

   478         y[0] = r-w;

   479         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   480         if(i>49)  {     /* 3rd iteration need, 151 bits acc */

   481           t  = r;       /* will cover all possible cases */

   482           w  = fn*pio2_3;

   483           r  = t-w;

   484           w  = fn*pio2_3t-((t-r)-w);

   485           y[0] = r-w;

   486         }

   487       }

   488     }

   489     y[1] = (r-y[0])-w;

   490     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   491     else         return n;

   492   }

   493   /*

   494    * all other (large) arguments

   495    */

   496   if(ix>=0x7ff00000) {          /* x is inf or NaN */

   497     y[0]=y[1]=x-x; return 0;

   498   }

   499   /* set z = scalbn(|x|,ilogb(x)-23) */

   500   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);

   501   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */

   502   *(i0+(int*)&z) = ix - (e0<<20);

   503   for(i=0;i<2;i++) {

   504     tx[i] = (double)((int)(z));

   505     z     = (z-tx[i])*two24A;

   506   }

   507   tx[2] = z;

   508   nx = 3;

   509   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */

   510   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);

   511   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   512   return n;

   513 }

   516 /* __kernel_sin( x, y, iy)

   517  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854

   518  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   519  * Input y is the tail of x.

   520  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).

   521  *

   522  * Algorithm

   523  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.

   524  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.

   525  *      3. sin(x) is approximated by a polynomial of degree 13 on

   526  *         [0,pi/4]

   527  *                               3            13

   528  *              sin(x) ~ x + S1*x + ... + S6*x

   529  *         where

   530  *

   531  *      |sin(x)         2     4     6     8     10     12  |     -58

   532  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2

   533  *      |  x                                               |

   534  *

   535  *      4. sin(x+y) = sin(x) + sin'(x')*y

   536  *                  ~ sin(x) + (1-x*x/2)*y

   537  *         For better accuracy, let

   538  *                   3      2      2      2      2

   539  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))

   540  *         then                   3    2

   541  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))

   542  */

   543 #ifdef MIPS64

   544 #undef S1

   545 #undef S2

   546 #undef S3

   547 #undef S4

   548 #undef S5

   549 #undef S6

   550 #endif

   552 static const double

   553 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */

   554 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */

   555 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */

   556 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */

   557 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */

   558 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */

   560 static double __kernel_sin(double x, double y, int iy)

   561 {

   562         double z,r,v;

   563         int ix;

   564         ix = high(x)&0x7fffffff;                /* high word of x */

   565         if(ix<0x3e400000)                       /* |x| < 2**-27 */

   566            {if((int)x==0) return x;}            /* generate inexact */

   567         z       =  x*x;

   568         v       =  z*x;

   569         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));

   570         if(iy==0) return x+v*(S1+z*r);

   571         else      return x-((z*(half*y-v*r)-y)-v*S1);

   572 }

   574 /*

   575  * __kernel_cos( x,  y )

   576  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164

   577  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   578  * Input y is the tail of x.

   579  *

   580  * Algorithm

   581  *      1. Since cos(-x) = cos(x), we need only to consider positive x.

   582  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.

   583  *      3. cos(x) is approximated by a polynomial of degree 14 on

   584  *         [0,pi/4]

   585  *                                       4            14

   586  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x

   587  *         where the remez error is

   588  *

   589  *      |              2     4     6     8     10    12     14 |     -58

   590  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2

   591  *      |                                                      |

   592  *

   593  *                     4     6     8     10    12     14

   594  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then

   595  *             cos(x) = 1 - x*x/2 + r

   596  *         since cos(x+y) ~ cos(x) - sin(x)*y

   597  *                        ~ cos(x) - x*y,

   598  *         a correction term is necessary in cos(x) and hence

   599  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))

   600  *         For better accuracy when x > 0.3, let qx = |x|/4 with

   601  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.

   602  *         Then

   603  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).

   604  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the

   605  *         magnitude of the latter is at least a quarter of x*x/2,

   606  *         thus, reducing the rounding error in the subtraction.

   607  */

   609 static const double

   610 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */

   611 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */

   612 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */

   613 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */

   614 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */

   615 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */

   617 static double __kernel_cos(double x, double y)

   618 {

   619   double a,h,z,r,qx=0;

   620   int ix;

   621   ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/

   622   if(ix<0x3e400000) {                   /* if x < 2**27 */

   623     if(((int)x)==0) return one;         /* generate inexact */

   624   }

   625   z  = x*x;

   626   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));

   627   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */

   628     return one - (0.5*z - (z*r - x*y));

   629   else {

   630     if(ix > 0x3fe90000) {               /* x > 0.78125 */

   631       qx = 0.28125;

   632     } else {

   633       set_high(&qx, ix-0x00200000); /* x/4 */

   634       set_low(&qx, 0);

   635     }

   636     h = 0.5*z-qx;

   637     a = one-qx;

   638     return a - (h - (z*r-x*y));

   639   }

   640 }

   642 /* __kernel_tan( x, y, k )

   643  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854

   644  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   645  * Input y is the tail of x.

   646  * Input k indicates whether tan (if k=1) or

   647  * -1/tan (if k= -1) is returned.

   648  *

   649  * Algorithm

   650  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.

   651  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.

   652  *      3. tan(x) is approximated by a odd polynomial of degree 27 on

   653  *         [0,0.67434]

   654  *                               3             27

   655  *              tan(x) ~ x + T1*x + ... + T13*x

   656  *         where

   657  *

   658  *              |tan(x)         2     4            26   |     -59.2

   659  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2

   660  *              |  x                                    |

   661  *

   662  *         Note: tan(x+y) = tan(x) + tan'(x)*y

   663  *                        ~ tan(x) + (1+x*x)*y

   664  *         Therefore, for better accuracy in computing tan(x+y), let

   665  *                   3      2      2       2       2

   666  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))

   667  *         then

   668  *                                  3    2

   669  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))

   670  *

   671  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then

   672  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))

   673  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))

   674  */

   676 static const double

   677 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */

   678 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */

   679 T[] =  {

   680   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */

   681   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */

   682   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */

   683   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */

   684   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */

   685   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */

   686   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */

   687   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */

   688   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */

   689   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */

   690   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */

   691  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */

   692   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */

   693 };

   695 static double __kernel_tan(double x, double y, int iy)

   696 {

   697   double z,r,v,w,s;

   698   int ix,hx;

   699   hx = high(x);           /* high word of x */

   700   ix = hx&0x7fffffff;     /* high word of |x| */

   701   if(ix<0x3e300000) {                     /* x < 2**-28 */

   702     if((int)x==0) {                       /* generate inexact */

   703       if (((ix | low(x)) | (iy + 1)) == 0)

   704         return one / fabsd(x);

   705       else {

   706         if (iy == 1)

   707           return x;

   708         else {    /* compute -1 / (x+y) carefully */

   709           double a, t;

   711           z = w = x + y;

   712           set_low(&z, 0);

   713           v = y - (z - x);

   714           t = a = -one / w;

   715           set_low(&t, 0);

   716           s = one + t * z;

   717           return t + a * (s + t * v);

   718         }

   719       }

   720     }

   721   }

   722   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */

   723     if(hx<0) {x = -x; y = -y;}

   724     z = pio4-x;

   725     w = pio4lo-y;

   726     x = z+w; y = 0.0;

   727   }

   728   z       =  x*x;

   729   w       =  z*z;

   730   /* Break x^5*(T[1]+x^2*T[2]+...) into

   731    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +

   732    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))

   733    */

   734   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));

   735   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));

   736   s = z*x;

   737   r = y + z*(s*(r+v)+y);

   738   r += T[0]*s;

   739   w = x+r;

   740   if(ix>=0x3FE59428) {

   741     v = (double)iy;

   742     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));

   743   }

   744   if(iy==1) return w;

   745   else {          /* if allow error up to 2 ulp,

   746                      simply return -1.0/(x+r) here */

   747     /*  compute -1.0/(x+r) accurately */

   748     double a,t;

   749     z  = w;

   750     set_low(&z, 0);

   751     v  = r-(z - x);     /* z+v = r+x */

   752     t = a  = -1.0/w;    /* a = -1.0/w */

   753     set_low(&t, 0);

   754     s  = 1.0+t*z;

   755     return t+a*(s+t*v);

   756   }

   757 }

   760 //----------------------------------------------------------------------

   761 //

   762 // Routines for new sin/cos implementation

   763 //

   764 //----------------------------------------------------------------------

   766 /* sin(x)

   767  * Return sine function of x.

   768  *

   769  * kernel function:

   770  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   771  *      __kernel_cos            ... cose function on [-pi/4,pi/4]

   772  *      __ieee754_rem_pio2      ... argument reduction routine

   773  *

   774  * Method.

   775  *      Let S,C and T denote the sin, cos and tan respectively on

   776  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   777  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   778  *      We have

   779  *

   780  *          n        sin(x)      cos(x)        tan(x)

   781  *     ----------------------------------------------------------

   782  *          0          S           C             T

   783  *          1          C          -S            -1/T

   784  *          2         -S          -C             T

   785  *          3         -C           S            -1/T

   786  *     ----------------------------------------------------------

   787  *

   788  * Special cases:

   789  *      Let trig be any of sin, cos, or tan.

   790  *      trig(+-INF)  is NaN, with signals;

   791  *      trig(NaN)    is that NaN;

   792  *

   793  * Accuracy:

   794  *      TRIG(x) returns trig(x) nearly rounded

   795  */

   797 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))

   798   double y[2],z=0.0;

   799   int n, ix;

   801   /* High word of x. */

   802   ix = high(x);

   804   /* |x| ~< pi/4 */

   805   ix &= 0x7fffffff;

   806   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);

   808   /* sin(Inf or NaN) is NaN */

   809   else if (ix>=0x7ff00000) return x-x;

   811   /* argument reduction needed */

   812   else {

   813     n = __ieee754_rem_pio2(x,y);

   814     switch(n&3) {

   815     case 0: return  __kernel_sin(y[0],y[1],1);

   816     case 1: return  __kernel_cos(y[0],y[1]);

   817     case 2: return -__kernel_sin(y[0],y[1],1);

   818     default:

   819       return -__kernel_cos(y[0],y[1]);

   820     }

   821   }

   822 JRT_END

   824 /* cos(x)

   825  * Return cosine function of x.

   826  *

   827  * kernel function:

   828  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   829  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]

   830  *      __ieee754_rem_pio2      ... argument reduction routine

   831  *

   832  * Method.

   833  *      Let S,C and T denote the sin, cos and tan respectively on

   834  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   835  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   836  *      We have

   837  *

   838  *          n        sin(x)      cos(x)        tan(x)

   839  *     ----------------------------------------------------------

   840  *          0          S           C             T

   841  *          1          C          -S            -1/T

   842  *          2         -S          -C             T

   843  *          3         -C           S            -1/T

   844  *     ----------------------------------------------------------

   845  *

   846  * Special cases:

   847  *      Let trig be any of sin, cos, or tan.

   848  *      trig(+-INF)  is NaN, with signals;

   849  *      trig(NaN)    is that NaN;

   850  *

   851  * Accuracy:

   852  *      TRIG(x) returns trig(x) nearly rounded

   853  */

   855 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))

   856   double y[2],z=0.0;

   857   int n, ix;

   859   /* High word of x. */

   860   ix = high(x);

   862   /* |x| ~< pi/4 */

   863   ix &= 0x7fffffff;

   864   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);

   866   /* cos(Inf or NaN) is NaN */

   867   else if (ix>=0x7ff00000) return x-x;

   869   /* argument reduction needed */

   870   else {

   871     n = __ieee754_rem_pio2(x,y);

   872     switch(n&3) {

   873     case 0: return  __kernel_cos(y[0],y[1]);

   874     case 1: return -__kernel_sin(y[0],y[1],1);

   875     case 2: return -__kernel_cos(y[0],y[1]);

   876     default:

   877       return  __kernel_sin(y[0],y[1],1);

   878     }

   879   }

   880 JRT_END

   882 /* tan(x)

   883  * Return tangent function of x.

   884  *

   885  * kernel function:

   886  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]

   887  *      __ieee754_rem_pio2      ... argument reduction routine

   888  *

   889  * Method.

   890  *      Let S,C and T denote the sin, cos and tan respectively on

   891  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   892  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   893  *      We have

   894  *

   895  *          n        sin(x)      cos(x)        tan(x)

   896  *     ----------------------------------------------------------

   897  *          0          S           C             T

   898  *          1          C          -S            -1/T

   899  *          2         -S          -C             T

   900  *          3         -C           S            -1/T

   901  *     ----------------------------------------------------------

   902  *

   903  * Special cases:

   904  *      Let trig be any of sin, cos, or tan.

   905  *      trig(+-INF)  is NaN, with signals;

   906  *      trig(NaN)    is that NaN;

   907  *

   908  * Accuracy:

   909  *      TRIG(x) returns trig(x) nearly rounded

   910  */

   912 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))

   913   double y[2],z=0.0;

   914   int n, ix;

   916   /* High word of x. */

   917   ix = high(x);

   919   /* |x| ~< pi/4 */

   920   ix &= 0x7fffffff;

   921   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);

   923   /* tan(Inf or NaN) is NaN */

   924   else if (ix>=0x7ff00000) return x-x;            /* NaN */

   926   /* argument reduction needed */

   927   else {

   928     n = __ieee754_rem_pio2(x,y);

   929     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even

   930                                                      -1 -- n odd */

   931   }

   932 JRT_END

   935 #ifdef WIN32

   936 # pragma optimize ( "", on )

   937 #endif

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/runtime/sharedRuntimeTrig.cpp@7ae4e26cb1e0

src/share/vm/runtime/sharedRuntimeTrig.cpp