jdk8-mips64-public/hotspot: src/share/vm/runtime/sharedRuntimeTrig.cpp@2d8a650513c2

     1 /*

     2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 /*

    26  * This file has been modified by Loongson Technology in 2015. These

    27  * modifications are Copyright (c) 2015 Loongson Technology, and are made

    28  * available on the same license terms set forth above.

    29  */

    31 #include "precompiled.hpp"

    32 #include "prims/jni.h"

    33 #include "runtime/interfaceSupport.hpp"

    34 #include "runtime/sharedRuntime.hpp"

    36 // This file contains copies of the fdlibm routines used by

    37 // StrictMath. It turns out that it is almost always required to use

    38 // these runtime routines; the Intel CPU doesn't meet the Java

    39 // specification for sin/cos outside a certain limited argument range,

    40 // and the SPARC CPU doesn't appear to have sin/cos instructions. It

    41 // also turns out that avoiding the indirect call through function

    42 // pointer out to libjava.so in SharedRuntime speeds these routines up

    43 // by roughly 15% on both Win32/x86 and Solaris/SPARC.

    45 // Enabling optimizations in this file causes incorrect code to be

    46 // generated; can not figure out how to turn down optimization for one

    47 // file in the IDE on Windows

    48 #ifdef WIN32

    49 # pragma optimize ( "", off )

    50 #endif

    52 /* The above workaround now causes more problems with the latest MS compiler.

    53  * Visual Studio 2010's /GS option tries to guard against buffer overruns.

    54  * /GS is on by default if you specify optimizations, which we do globally

    55  * via /W3 /O2. However the above selective turning off of optimizations means

    56  * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)

    57  * then the compilation fails. There are several possible solutions

    58  * (1) Remove that pragma above as obsolete with VS2010 - requires testing.

    59  * (2) Stop treating warnings as errors - would be a backward step

    60  * (3) Disable /GS - may help performance but you lose the security checks

    61  * (4) Disable the warning with "#pragma warning( disable : 4748 )"

    62  * (5) Disable planting the code with  __declspec(safebuffers)

    63  * I've opted for (5) although we should investigate the local performance

    64  * benefits of (1) and global performance benefit of (3).

    65  */

    66 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))

    67 #define SAFEBUF __declspec(safebuffers)

    68 #else

    69 #define SAFEBUF

    70 #endif

    72 #include <math.h>

    74 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles

    75 // [jk] this is not 100% correct because the float word order may different

    76 // from the byte order (e.g. on ARM)

    77 #ifdef VM_LITTLE_ENDIAN

    78 # define __HI(x) *(1+(int*)&x)

    79 # define __LO(x) *(int*)&x

    80 #else

    81 # define __HI(x) *(int*)&x

    82 # define __LO(x) *(1+(int*)&x)

    83 #endif

    85 static double copysignA(double x, double y) {

    86   __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);

    87   return x;

    88 }

    90 /*

    91  * scalbn (double x, int n)

    92  * scalbn(x,n) returns x* 2**n  computed by  exponent

    93  * manipulation rather than by actually performing an

    94  * exponentiation or a multiplication.

    95  */

    97 static const double

    98 two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */

    99 twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */

   100 hugeX  = 1.0e+300,

   101 tiny   = 1.0e-300;

   103 static double scalbnA (double x, int n) {

   104   int  k,hx,lx;

   105   hx = __HI(x);

   106   lx = __LO(x);

   107   k = (hx&0x7ff00000)>>20;              /* extract exponent */

   108   if (k==0) {                           /* 0 or subnormal x */

   109     if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */

   110     x *= two54;

   111     hx = __HI(x);

   112     k = ((hx&0x7ff00000)>>20) - 54;

   113     if (n< -50000) return tiny*x;       /*underflow*/

   114   }

   115   if (k==0x7ff) return x+x;             /* NaN or Inf */

   116   k = k+n;

   117   if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */

   118   if (k > 0)                            /* normal result */

   119     {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}

   120   if (k <= -54) {

   121     if (n > 50000)      /* in case integer overflow in n+k */

   122       return hugeX*copysignA(hugeX,x);  /*overflow*/

   123     else return tiny*copysignA(tiny,x); /*underflow*/

   124   }

   125   k += 54;                              /* subnormal result */

   126   __HI(x) = (hx&0x800fffff)|(k<<20);

   127   return x*twom54;

   128 }

   130 /*

   131  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)

   132  * double x[],y[]; int e0,nx,prec; int ipio2[];

   133  *

   134  * __kernel_rem_pio2 return the last three digits of N with

   135  *              y = x - N*pi/2

   136  * so that |y| < pi/2.

   137  *

   138  * The method is to compute the integer (mod 8) and fraction parts of

   139  * (2/pi)*x without doing the full multiplication. In general we

   140  * skip the part of the product that are known to be a huge integer (

   141  * more accurately, = 0 mod 8 ). Thus the number of operations are

   142  * independent of the exponent of the input.

   143  *

   144  * (2/pi) is represented by an array of 24-bit integers in ipio2[].

   145  *

   146  * Input parameters:

   147  *      x[]     The input value (must be positive) is broken into nx

   148  *              pieces of 24-bit integers in double precision format.

   149  *              x[i] will be the i-th 24 bit of x. The scaled exponent

   150  *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0

   151  *              match x's up to 24 bits.

   152  *

   153  *              Example of breaking a double positive z into x[0]+x[1]+x[2]:

   154  *                      e0 = ilogb(z)-23

   155  *                      z  = scalbn(z,-e0)

   156  *              for i = 0,1,2

   157  *                      x[i] = floor(z)

   158  *                      z    = (z-x[i])*2**24

   159  *

   160  *

   161  *      y[]     ouput result in an array of double precision numbers.

   162  *              The dimension of y[] is:

   163  *                      24-bit  precision       1

   164  *                      53-bit  precision       2

   165  *                      64-bit  precision       2

   166  *                      113-bit precision       3

   167  *              The actual value is the sum of them. Thus for 113-bit

   168  *              precsion, one may have to do something like:

   169  *

   170  *              long double t,w,r_head, r_tail;

   171  *              t = (long double)y[2] + (long double)y[1];

   172  *              w = (long double)y[0];

   173  *              r_head = t+w;

   174  *              r_tail = w - (r_head - t);

   175  *

   176  *      e0      The exponent of x[0]

   177  *

   178  *      nx      dimension of x[]

   179  *

   180  *      prec    an interger indicating the precision:

   181  *                      0       24  bits (single)

   182  *                      1       53  bits (double)

   183  *                      2       64  bits (extended)

   184  *                      3       113 bits (quad)

   185  *

   186  *      ipio2[]

   187  *              integer array, contains the (24*i)-th to (24*i+23)-th

   188  *              bit of 2/pi after binary point. The corresponding

   189  *              floating value is

   190  *

   191  *                      ipio2[i] * 2^(-24(i+1)).

   192  *

   193  * External function:

   194  *      double scalbn(), floor();

   195  *

   196  *

   197  * Here is the description of some local variables:

   198  *

   199  *      jk      jk+1 is the initial number of terms of ipio2[] needed

   200  *              in the computation. The recommended value is 2,3,4,

   201  *              6 for single, double, extended,and quad.

   202  *

   203  *      jz      local integer variable indicating the number of

   204  *              terms of ipio2[] used.

   205  *

   206  *      jx      nx - 1

   207  *

   208  *      jv      index for pointing to the suitable ipio2[] for the

   209  *              computation. In general, we want

   210  *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8

   211  *              is an integer. Thus

   212  *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv

   213  *              Hence jv = max(0,(e0-3)/24).

   214  *

   215  *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.

   216  *

   217  *      q[]     double array with integral value, representing the

   218  *              24-bits chunk of the product of x and 2/pi.

   219  *

   220  *      q0      the corresponding exponent of q[0]. Note that the

   221  *              exponent for q[i] would be q0-24*i.

   222  *

   223  *      PIo2[]  double precision array, obtained by cutting pi/2

   224  *              into 24 bits chunks.

   225  *

   226  *      f[]     ipio2[] in floating point

   227  *

   228  *      iq[]    integer array by breaking up q[] in 24-bits chunk.

   229  *

   230  *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]

   231  *

   232  *      ih      integer. If >0 it indicats q[] is >= 0.5, hence

   233  *              it also indicates the *sign* of the result.

   234  *

   235  */

   238 /*

   239  * Constants:

   240  * The hexadecimal values are the intended ones for the following

   241  * constants. The decimal values may be used, provided that the

   242  * compiler will convert from decimal to binary accurately enough

   243  * to produce the hexadecimal values shown.

   244  */

   247 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */

   249 static const double PIo2[] = {

   250   1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */

   251   7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */

   252   5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */

   253   3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */

   254   1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */

   255   1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */

   256   2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */

   257   2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */

   258 };

   260 static const double

   261 zeroB   = 0.0,

   262 one     = 1.0,

   263 two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   264 twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */

   266 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {

   267   int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;

   268   double z,fw,f[20],fq[20],q[20];

   270   /* initialize jk*/

   271   jk = init_jk[prec];

   272   jp = jk;

   274   /* determine jx,jv,q0, note that 3>q0 */

   275   jx =  nx-1;

   276   jv = (e0-3)/24; if(jv<0) jv=0;

   277   q0 =  e0-24*(jv+1);

   279   /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */

   280   j = jv-jx; m = jx+jk;

   281   for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];

   283   /* compute q[0],q[1],...q[jk] */

   284   for (i=0;i<=jk;i++) {

   285     for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;

   286   }

   288   jz = jk;

   289 recompute:

   290   /* distill q[] into iq[] reversingly */

   291   for(i=0,j=jz,z=q[jz];j>0;i++,j--) {

   292     fw    =  (double)((int)(twon24* z));

   293     iq[i] =  (int)(z-two24B*fw);

   294     z     =  q[j-1]+fw;

   295   }

   297   /* compute n */

   298   z  = scalbnA(z,q0);           /* actual value of z */

   299   z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */

   300   n  = (int) z;

   301   z -= (double)n;

   302   ih = 0;

   303   if(q0>0) {    /* need iq[jz-1] to determine n */

   304     i  = (iq[jz-1]>>(24-q0)); n += i;

   305     iq[jz-1] -= i<<(24-q0);

   306     ih = iq[jz-1]>>(23-q0);

   307   }

   308   else if(q0==0) ih = iq[jz-1]>>23;

   309   else if(z>=0.5) ih=2;

   311   if(ih>0) {    /* q > 0.5 */

   312     n += 1; carry = 0;

   313     for(i=0;i<jz ;i++) {        /* compute 1-q */

   314       j = iq[i];

   315       if(carry==0) {

   316         if(j!=0) {

   317           carry = 1; iq[i] = 0x1000000- j;

   318         }

   319       } else  iq[i] = 0xffffff - j;

   320     }

   321     if(q0>0) {          /* rare case: chance is 1 in 12 */

   322       switch(q0) {

   323       case 1:

   324         iq[jz-1] &= 0x7fffff; break;

   325       case 2:

   326         iq[jz-1] &= 0x3fffff; break;

   327       }

   328     }

   329     if(ih==2) {

   330       z = one - z;

   331       if(carry!=0) z -= scalbnA(one,q0);

   332     }

   333   }

   335   /* check if recomputation is needed */

   336   if(z==zeroB) {

   337     j = 0;

   338     for (i=jz-1;i>=jk;i--) j |= iq[i];

   339     if(j==0) { /* need recomputation */

   340       for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */

   342       for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */

   343         f[jx+i] = (double) ipio2[jv+i];

   344         for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];

   345         q[i] = fw;

   346       }

   347       jz += k;

   348       goto recompute;

   349     }

   350   }

   352   /* chop off zero terms */

   353   if(z==0.0) {

   354     jz -= 1; q0 -= 24;

   355     while(iq[jz]==0) { jz--; q0-=24;}

   356   } else { /* break z into 24-bit if neccessary */

   357     z = scalbnA(z,-q0);

   358     if(z>=two24B) {

   359       fw = (double)((int)(twon24*z));

   360       iq[jz] = (int)(z-two24B*fw);

   361       jz += 1; q0 += 24;

   362       iq[jz] = (int) fw;

   363     } else iq[jz] = (int) z ;

   364   }

   366   /* convert integer "bit" chunk to floating-point value */

   367   fw = scalbnA(one,q0);

   368   for(i=jz;i>=0;i--) {

   369     q[i] = fw*(double)iq[i]; fw*=twon24;

   370   }

   372   /* compute PIo2[0,...,jp]*q[jz,...,0] */

   373   for(i=jz;i>=0;i--) {

   374     for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];

   375     fq[jz-i] = fw;

   376   }

   378   /* compress fq[] into y[] */

   379   switch(prec) {

   380   case 0:

   381     fw = 0.0;

   382     for (i=jz;i>=0;i--) fw += fq[i];

   383     y[0] = (ih==0)? fw: -fw;

   384     break;

   385   case 1:

   386   case 2:

   387     fw = 0.0;

   388     for (i=jz;i>=0;i--) fw += fq[i];

   389     y[0] = (ih==0)? fw: -fw;

   390     fw = fq[0]-fw;

   391     for (i=1;i<=jz;i++) fw += fq[i];

   392     y[1] = (ih==0)? fw: -fw;

   393     break;

   394   case 3:       /* painful */

   395     for (i=jz;i>0;i--) {

   396       fw      = fq[i-1]+fq[i];

   397       fq[i]  += fq[i-1]-fw;

   398       fq[i-1] = fw;

   399     }

   400     for (i=jz;i>1;i--) {

   401       fw      = fq[i-1]+fq[i];

   402       fq[i]  += fq[i-1]-fw;

   403       fq[i-1] = fw;

   404     }

   405     for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];

   406     if(ih==0) {

   407       y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;

   408     } else {

   409       y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;

   410     }

   411   }

   412   return n&7;

   413 }

   416 /*

   417  * ====================================================

   418  * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.

   419  *

   420  * Developed at SunPro, a Sun Microsystems, Inc. business.

   421  * Permission to use, copy, modify, and distribute this

   422  * software is freely granted, provided that this notice

   423  * is preserved.

   424  * ====================================================

   425  *

   426  */

   428 /* __ieee754_rem_pio2(x,y)

   429  *

   430  * return the remainder of x rem pi/2 in y[0]+y[1]

   431  * use __kernel_rem_pio2()

   432  */

   434 /*

   435  * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi

   436  */

   437 static const int two_over_pi[] = {

   438   0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,

   439   0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,

   440   0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,

   441   0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,

   442   0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,

   443   0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,

   444   0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,

   445   0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,

   446   0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,

   447   0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,

   448   0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,

   449 };

   451 static const int npio2_hw[] = {

   452   0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,

   453   0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,

   454   0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,

   455   0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,

   456   0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,

   457   0x404858EB, 0x404921FB,

   458 };

   460 /*

   461  * invpio2:  53 bits of 2/pi

   462  * pio2_1:   first  33 bit of pi/2

   463  * pio2_1t:  pi/2 - pio2_1

   464  * pio2_2:   second 33 bit of pi/2

   465  * pio2_2t:  pi/2 - (pio2_1+pio2_2)

   466  * pio2_3:   third  33 bit of pi/2

   467  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)

   468  */

   470 static const double

   471 zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */

   472 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */

   473 two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */

   474 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */

   475 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */

   476 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */

   477 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */

   478 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */

   479 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */

   480 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */

   482 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {

   483   double z,w,t,r,fn;

   484   double tx[3];

   485   int e0,i,j,nx,n,ix,hx,i0;

   487   i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */

   488   hx = *(i0+(int*)&x);          /* high word of x */

   489   ix = hx&0x7fffffff;

   490   if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */

   491     {y[0] = x; y[1] = 0; return 0;}

   492   if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */

   493     if(hx>0) {

   494       z = x - pio2_1;

   495       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   496         y[0] = z - pio2_1t;

   497         y[1] = (z-y[0])-pio2_1t;

   498       } else {                /* near pi/2, use 33+33+53 bit pi */

   499         z -= pio2_2;

   500         y[0] = z - pio2_2t;

   501         y[1] = (z-y[0])-pio2_2t;

   502       }

   503       return 1;

   504     } else {    /* negative x */

   505       z = x + pio2_1;

   506       if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */

   507         y[0] = z + pio2_1t;

   508         y[1] = (z-y[0])+pio2_1t;

   509       } else {                /* near pi/2, use 33+33+53 bit pi */

   510         z += pio2_2;

   511         y[0] = z + pio2_2t;

   512         y[1] = (z-y[0])+pio2_2t;

   513       }

   514       return -1;

   515     }

   516   }

   517   if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */

   518     t  = fabsd(x);

   519     n  = (int) (t*invpio2+half);

   520     fn = (double)n;

   521     r  = t-fn*pio2_1;

   522     w  = fn*pio2_1t;    /* 1st round good to 85 bit */

   523     if(n<32&&ix!=npio2_hw[n-1]) {

   524       y[0] = r-w;       /* quick check no cancellation */

   525     } else {

   526       j  = ix>>20;

   527       y[0] = r-w;

   528       i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   529       if(i>16) {  /* 2nd iteration needed, good to 118 */

   530         t  = r;

   531         w  = fn*pio2_2;

   532         r  = t-w;

   533         w  = fn*pio2_2t-((t-r)-w);

   534         y[0] = r-w;

   535         i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);

   536         if(i>49)  {     /* 3rd iteration need, 151 bits acc */

   537           t  = r;       /* will cover all possible cases */

   538           w  = fn*pio2_3;

   539           r  = t-w;

   540           w  = fn*pio2_3t-((t-r)-w);

   541           y[0] = r-w;

   542         }

   543       }

   544     }

   545     y[1] = (r-y[0])-w;

   546     if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   547     else         return n;

   548   }

   549   /*

   550    * all other (large) arguments

   551    */

   552   if(ix>=0x7ff00000) {          /* x is inf or NaN */

   553     y[0]=y[1]=x-x; return 0;

   554   }

   555   /* set z = scalbn(|x|,ilogb(x)-23) */

   556   *(1-i0+(int*)&z) = *(1-i0+(int*)&x);

   557   e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */

   558   *(i0+(int*)&z) = ix - (e0<<20);

   559   for(i=0;i<2;i++) {

   560     tx[i] = (double)((int)(z));

   561     z     = (z-tx[i])*two24A;

   562   }

   563   tx[2] = z;

   564   nx = 3;

   565   while(tx[nx-1]==zeroA) nx--;  /* skip zero term */

   566   n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);

   567   if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}

   568   return n;

   569 }

   572 /* __kernel_sin( x, y, iy)

   573  * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854

   574  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   575  * Input y is the tail of x.

   576  * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).

   577  *

   578  * Algorithm

   579  *      1. Since sin(-x) = -sin(x), we need only to consider positive x.

   580  *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.

   581  *      3. sin(x) is approximated by a polynomial of degree 13 on

   582  *         [0,pi/4]

   583  *                               3            13

   584  *              sin(x) ~ x + S1*x + ... + S6*x

   585  *         where

   586  *

   587  *      |sin(x)         2     4     6     8     10     12  |     -58

   588  *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2

   589  *      |  x                                               |

   590  *

   591  *      4. sin(x+y) = sin(x) + sin'(x')*y

   592  *                  ~ sin(x) + (1-x*x/2)*y

   593  *         For better accuracy, let

   594  *                   3      2      2      2      2

   595  *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))

   596  *         then                   3    2

   597  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))

   598  */

   599 #ifdef MIPS64

   600 #undef S1

   601 #undef S2

   602 #undef S3

   603 #undef S4

   604 #undef S5

   605 #undef S6

   606 #endif

   608 static const double

   609 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */

   610 S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */

   611 S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */

   612 S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */

   613 S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */

   614 S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */

   616 static double __kernel_sin(double x, double y, int iy)

   617 {

   618         double z,r,v;

   619         int ix;

   620         ix = __HI(x)&0x7fffffff;        /* high word of x */

   621         if(ix<0x3e400000)                       /* |x| < 2**-27 */

   622            {if((int)x==0) return x;}            /* generate inexact */

   623         z       =  x*x;

   624         v       =  z*x;

   625         r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));

   626         if(iy==0) return x+v*(S1+z*r);

   627         else      return x-((z*(half*y-v*r)-y)-v*S1);

   628 }

   630 /*

   631  * __kernel_cos( x,  y )

   632  * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164

   633  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   634  * Input y is the tail of x.

   635  *

   636  * Algorithm

   637  *      1. Since cos(-x) = cos(x), we need only to consider positive x.

   638  *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.

   639  *      3. cos(x) is approximated by a polynomial of degree 14 on

   640  *         [0,pi/4]

   641  *                                       4            14

   642  *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x

   643  *         where the remez error is

   644  *

   645  *      |              2     4     6     8     10    12     14 |     -58

   646  *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2

   647  *      |                                                      |

   648  *

   649  *                     4     6     8     10    12     14

   650  *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then

   651  *             cos(x) = 1 - x*x/2 + r

   652  *         since cos(x+y) ~ cos(x) - sin(x)*y

   653  *                        ~ cos(x) - x*y,

   654  *         a correction term is necessary in cos(x) and hence

   655  *              cos(x+y) = 1 - (x*x/2 - (r - x*y))

   656  *         For better accuracy when x > 0.3, let qx = |x|/4 with

   657  *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.

   658  *         Then

   659  *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).

   660  *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the

   661  *         magnitude of the latter is at least a quarter of x*x/2,

   662  *         thus, reducing the rounding error in the subtraction.

   663  */

   665 static const double

   666 C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */

   667 C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */

   668 C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */

   669 C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */

   670 C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */

   671 C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */

   673 static double __kernel_cos(double x, double y)

   674 {

   675   double a,h,z,r,qx;

   676   int ix;

   677   ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/

   678   if(ix<0x3e400000) {                   /* if x < 2**27 */

   679     if(((int)x)==0) return one;         /* generate inexact */

   680   }

   681   z  = x*x;

   682   r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));

   683   if(ix < 0x3FD33333)                   /* if |x| < 0.3 */

   684     return one - (0.5*z - (z*r - x*y));

   685   else {

   686     if(ix > 0x3fe90000) {               /* x > 0.78125 */

   687       qx = 0.28125;

   688     } else {

   689       __HI(qx) = ix-0x00200000; /* x/4 */

   690       __LO(qx) = 0;

   691     }

   692     h = 0.5*z-qx;

   693     a = one-qx;

   694     return a - (h - (z*r-x*y));

   695   }

   696 }

   698 /* __kernel_tan( x, y, k )

   699  * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854

   700  * Input x is assumed to be bounded by ~pi/4 in magnitude.

   701  * Input y is the tail of x.

   702  * Input k indicates whether tan (if k=1) or

   703  * -1/tan (if k= -1) is returned.

   704  *

   705  * Algorithm

   706  *      1. Since tan(-x) = -tan(x), we need only to consider positive x.

   707  *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.

   708  *      3. tan(x) is approximated by a odd polynomial of degree 27 on

   709  *         [0,0.67434]

   710  *                               3             27

   711  *              tan(x) ~ x + T1*x + ... + T13*x

   712  *         where

   713  *

   714  *              |tan(x)         2     4            26   |     -59.2

   715  *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2

   716  *              |  x                                    |

   717  *

   718  *         Note: tan(x+y) = tan(x) + tan'(x)*y

   719  *                        ~ tan(x) + (1+x*x)*y

   720  *         Therefore, for better accuracy in computing tan(x+y), let

   721  *                   3      2      2       2       2

   722  *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))

   723  *         then

   724  *                                  3    2

   725  *              tan(x+y) = x + (T1*x + (x *(r+y)+y))

   726  *

   727  *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then

   728  *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))

   729  *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))

   730  */

   732 static const double

   733 pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */

   734 pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */

   735 T[] =  {

   736   3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */

   737   1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */

   738   5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */

   739   2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */

   740   8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */

   741   3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */

   742   1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */

   743   5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */

   744   2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */

   745   7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */

   746   7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */

   747  -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */

   748   2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */

   749 };

   751 static double __kernel_tan(double x, double y, int iy)

   752 {

   753   double z,r,v,w,s;

   754   int ix,hx;

   755   hx = __HI(x);   /* high word of x */

   756   ix = hx&0x7fffffff;     /* high word of |x| */

   757   if(ix<0x3e300000) {                     /* x < 2**-28 */

   758     if((int)x==0) {                       /* generate inexact */

   759       if (((ix | __LO(x)) | (iy + 1)) == 0)

   760         return one / fabsd(x);

   761       else {

   762         if (iy == 1)

   763           return x;

   764         else {    /* compute -1 / (x+y) carefully */

   765           double a, t;

   767           z = w = x + y;

   768           __LO(z) = 0;

   769           v = y - (z - x);

   770           t = a = -one / w;

   771           __LO(t) = 0;

   772           s = one + t * z;

   773           return t + a * (s + t * v);

   774         }

   775       }

   776     }

   777   }

   778   if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */

   779     if(hx<0) {x = -x; y = -y;}

   780     z = pio4-x;

   781     w = pio4lo-y;

   782     x = z+w; y = 0.0;

   783   }

   784   z       =  x*x;

   785   w       =  z*z;

   786   /* Break x^5*(T[1]+x^2*T[2]+...) into

   787    *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +

   788    *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))

   789    */

   790   r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));

   791   v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));

   792   s = z*x;

   793   r = y + z*(s*(r+v)+y);

   794   r += T[0]*s;

   795   w = x+r;

   796   if(ix>=0x3FE59428) {

   797     v = (double)iy;

   798     return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));

   799   }

   800   if(iy==1) return w;

   801   else {          /* if allow error up to 2 ulp,

   802                      simply return -1.0/(x+r) here */

   803     /*  compute -1.0/(x+r) accurately */

   804     double a,t;

   805     z  = w;

   806     __LO(z) = 0;

   807     v  = r-(z - x);     /* z+v = r+x */

   808     t = a  = -1.0/w;    /* a = -1.0/w */

   809     __LO(t) = 0;

   810     s  = 1.0+t*z;

   811     return t+a*(s+t*v);

   812   }

   813 }

   816 //----------------------------------------------------------------------

   817 //

   818 // Routines for new sin/cos implementation

   819 //

   820 //----------------------------------------------------------------------

   822 /* sin(x)

   823  * Return sine function of x.

   824  *

   825  * kernel function:

   826  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   827  *      __kernel_cos            ... cose function on [-pi/4,pi/4]

   828  *      __ieee754_rem_pio2      ... argument reduction routine

   829  *

   830  * Method.

   831  *      Let S,C and T denote the sin, cos and tan respectively on

   832  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   833  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   834  *      We have

   835  *

   836  *          n        sin(x)      cos(x)        tan(x)

   837  *     ----------------------------------------------------------

   838  *          0          S           C             T

   839  *          1          C          -S            -1/T

   840  *          2         -S          -C             T

   841  *          3         -C           S            -1/T

   842  *     ----------------------------------------------------------

   843  *

   844  * Special cases:

   845  *      Let trig be any of sin, cos, or tan.

   846  *      trig(+-INF)  is NaN, with signals;

   847  *      trig(NaN)    is that NaN;

   848  *

   849  * Accuracy:

   850  *      TRIG(x) returns trig(x) nearly rounded

   851  */

   853 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))

   854   double y[2],z=0.0;

   855   int n, ix;

   857   /* High word of x. */

   858   ix = __HI(x);

   860   /* |x| ~< pi/4 */

   861   ix &= 0x7fffffff;

   862   if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);

   864   /* sin(Inf or NaN) is NaN */

   865   else if (ix>=0x7ff00000) return x-x;

   867   /* argument reduction needed */

   868   else {

   869     n = __ieee754_rem_pio2(x,y);

   870     switch(n&3) {

   871     case 0: return  __kernel_sin(y[0],y[1],1);

   872     case 1: return  __kernel_cos(y[0],y[1]);

   873     case 2: return -__kernel_sin(y[0],y[1],1);

   874     default:

   875       return -__kernel_cos(y[0],y[1]);

   876     }

   877   }

   878 JRT_END

   880 /* cos(x)

   881  * Return cosine function of x.

   882  *

   883  * kernel function:

   884  *      __kernel_sin            ... sine function on [-pi/4,pi/4]

   885  *      __kernel_cos            ... cosine function on [-pi/4,pi/4]

   886  *      __ieee754_rem_pio2      ... argument reduction routine

   887  *

   888  * Method.

   889  *      Let S,C and T denote the sin, cos and tan respectively on

   890  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   891  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   892  *      We have

   893  *

   894  *          n        sin(x)      cos(x)        tan(x)

   895  *     ----------------------------------------------------------

   896  *          0          S           C             T

   897  *          1          C          -S            -1/T

   898  *          2         -S          -C             T

   899  *          3         -C           S            -1/T

   900  *     ----------------------------------------------------------

   901  *

   902  * Special cases:

   903  *      Let trig be any of sin, cos, or tan.

   904  *      trig(+-INF)  is NaN, with signals;

   905  *      trig(NaN)    is that NaN;

   906  *

   907  * Accuracy:

   908  *      TRIG(x) returns trig(x) nearly rounded

   909  */

   911 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))

   912   double y[2],z=0.0;

   913   int n, ix;

   915   /* High word of x. */

   916   ix = __HI(x);

   918   /* |x| ~< pi/4 */

   919   ix &= 0x7fffffff;

   920   if(ix <= 0x3fe921fb) return __kernel_cos(x,z);

   922   /* cos(Inf or NaN) is NaN */

   923   else if (ix>=0x7ff00000) return x-x;

   925   /* argument reduction needed */

   926   else {

   927     n = __ieee754_rem_pio2(x,y);

   928     switch(n&3) {

   929     case 0: return  __kernel_cos(y[0],y[1]);

   930     case 1: return -__kernel_sin(y[0],y[1],1);

   931     case 2: return -__kernel_cos(y[0],y[1]);

   932     default:

   933       return  __kernel_sin(y[0],y[1],1);

   934     }

   935   }

   936 JRT_END

   938 /* tan(x)

   939  * Return tangent function of x.

   940  *

   941  * kernel function:

   942  *      __kernel_tan            ... tangent function on [-pi/4,pi/4]

   943  *      __ieee754_rem_pio2      ... argument reduction routine

   944  *

   945  * Method.

   946  *      Let S,C and T denote the sin, cos and tan respectively on

   947  *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2

   948  *      in [-pi/4 , +pi/4], and let n = k mod 4.

   949  *      We have

   950  *

   951  *          n        sin(x)      cos(x)        tan(x)

   952  *     ----------------------------------------------------------

   953  *          0          S           C             T

   954  *          1          C          -S            -1/T

   955  *          2         -S          -C             T

   956  *          3         -C           S            -1/T

   957  *     ----------------------------------------------------------

   958  *

   959  * Special cases:

   960  *      Let trig be any of sin, cos, or tan.

   961  *      trig(+-INF)  is NaN, with signals;

   962  *      trig(NaN)    is that NaN;

   963  *

   964  * Accuracy:

   965  *      TRIG(x) returns trig(x) nearly rounded

   966  */

   968 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))

   969   double y[2],z=0.0;

   970   int n, ix;

   972   /* High word of x. */

   973   ix = __HI(x);

   975   /* |x| ~< pi/4 */

   976   ix &= 0x7fffffff;

   977   if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);

   979   /* tan(Inf or NaN) is NaN */

   980   else if (ix>=0x7ff00000) return x-x;            /* NaN */

   982   /* argument reduction needed */

   983   else {

   984     n = __ieee754_rem_pio2(x,y);

   985     return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even

   986                                                      -1 -- n odd */

   987   }

   988 JRT_END

   991 #ifdef WIN32

   992 # pragma optimize ( "", on )

   993 #endif

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/runtime/sharedRuntimeTrig.cpp@2d8a650513c2

src/share/vm/runtime/sharedRuntimeTrig.cpp