src/share/vm/runtime/sharedRuntimeTrig.cpp

changeset 435
a61af66fc99e
child 1840
fb57d4cf76c2
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/vm/runtime/sharedRuntimeTrig.cpp	Sat Dec 01 00:00:00 2007 +0000
     1.3 @@ -0,0 +1,957 @@
     1.4 +/*
     1.5 + * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#include "incls/_precompiled.incl"
    1.29 +#include "incls/_sharedRuntimeTrig.cpp.incl"
    1.30 +
    1.31 +// This file contains copies of the fdlibm routines used by
    1.32 +// StrictMath. It turns out that it is almost always required to use
    1.33 +// these runtime routines; the Intel CPU doesn't meet the Java
    1.34 +// specification for sin/cos outside a certain limited argument range,
    1.35 +// and the SPARC CPU doesn't appear to have sin/cos instructions. It
    1.36 +// also turns out that avoiding the indirect call through function
    1.37 +// pointer out to libjava.so in SharedRuntime speeds these routines up
    1.38 +// by roughly 15% on both Win32/x86 and Solaris/SPARC.
    1.39 +
    1.40 +// Enabling optimizations in this file causes incorrect code to be
    1.41 +// generated; can not figure out how to turn down optimization for one
    1.42 +// file in the IDE on Windows
    1.43 +#ifdef WIN32
    1.44 +# pragma optimize ( "", off )
    1.45 +#endif
    1.46 +
    1.47 +#include <math.h>
    1.48 +
    1.49 +// VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
    1.50 +// [jk] this is not 100% correct because the float word order may different
    1.51 +// from the byte order (e.g. on ARM)
    1.52 +#ifdef VM_LITTLE_ENDIAN
    1.53 +# define __HI(x) *(1+(int*)&x)
    1.54 +# define __LO(x) *(int*)&x
    1.55 +#else
    1.56 +# define __HI(x) *(int*)&x
    1.57 +# define __LO(x) *(1+(int*)&x)
    1.58 +#endif
    1.59 +
    1.60 +static double copysignA(double x, double y) {
    1.61 +  __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
    1.62 +  return x;
    1.63 +}
    1.64 +
    1.65 +/*
    1.66 + * scalbn (double x, int n)
    1.67 + * scalbn(x,n) returns x* 2**n  computed by  exponent
    1.68 + * manipulation rather than by actually performing an
    1.69 + * exponentiation or a multiplication.
    1.70 + */
    1.71 +
    1.72 +static const double
    1.73 +two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
    1.74 +twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
    1.75 +hugeX  = 1.0e+300,
    1.76 +tiny   = 1.0e-300;
    1.77 +
    1.78 +static double scalbnA (double x, int n) {
    1.79 +  int  k,hx,lx;
    1.80 +  hx = __HI(x);
    1.81 +  lx = __LO(x);
    1.82 +  k = (hx&0x7ff00000)>>20;              /* extract exponent */
    1.83 +  if (k==0) {                           /* 0 or subnormal x */
    1.84 +    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
    1.85 +    x *= two54;
    1.86 +    hx = __HI(x);
    1.87 +    k = ((hx&0x7ff00000)>>20) - 54;
    1.88 +    if (n< -50000) return tiny*x;       /*underflow*/
    1.89 +  }
    1.90 +  if (k==0x7ff) return x+x;             /* NaN or Inf */
    1.91 +  k = k+n;
    1.92 +  if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
    1.93 +  if (k > 0)                            /* normal result */
    1.94 +    {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
    1.95 +  if (k <= -54) {
    1.96 +    if (n > 50000)      /* in case integer overflow in n+k */
    1.97 +      return hugeX*copysignA(hugeX,x);  /*overflow*/
    1.98 +    else return tiny*copysignA(tiny,x); /*underflow*/
    1.99 +  }
   1.100 +  k += 54;                              /* subnormal result */
   1.101 +  __HI(x) = (hx&0x800fffff)|(k<<20);
   1.102 +  return x*twom54;
   1.103 +}
   1.104 +
   1.105 +/*
   1.106 + * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
   1.107 + * double x[],y[]; int e0,nx,prec; int ipio2[];
   1.108 + *
   1.109 + * __kernel_rem_pio2 return the last three digits of N with
   1.110 + *              y = x - N*pi/2
   1.111 + * so that |y| < pi/2.
   1.112 + *
   1.113 + * The method is to compute the integer (mod 8) and fraction parts of
   1.114 + * (2/pi)*x without doing the full multiplication. In general we
   1.115 + * skip the part of the product that are known to be a huge integer (
   1.116 + * more accurately, = 0 mod 8 ). Thus the number of operations are
   1.117 + * independent of the exponent of the input.
   1.118 + *
   1.119 + * (2/pi) is represented by an array of 24-bit integers in ipio2[].
   1.120 + *
   1.121 + * Input parameters:
   1.122 + *      x[]     The input value (must be positive) is broken into nx
   1.123 + *              pieces of 24-bit integers in double precision format.
   1.124 + *              x[i] will be the i-th 24 bit of x. The scaled exponent
   1.125 + *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
   1.126 + *              match x's up to 24 bits.
   1.127 + *
   1.128 + *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
   1.129 + *                      e0 = ilogb(z)-23
   1.130 + *                      z  = scalbn(z,-e0)
   1.131 + *              for i = 0,1,2
   1.132 + *                      x[i] = floor(z)
   1.133 + *                      z    = (z-x[i])*2**24
   1.134 + *
   1.135 + *
   1.136 + *      y[]     ouput result in an array of double precision numbers.
   1.137 + *              The dimension of y[] is:
   1.138 + *                      24-bit  precision       1
   1.139 + *                      53-bit  precision       2
   1.140 + *                      64-bit  precision       2
   1.141 + *                      113-bit precision       3
   1.142 + *              The actual value is the sum of them. Thus for 113-bit
   1.143 + *              precsion, one may have to do something like:
   1.144 + *
   1.145 + *              long double t,w,r_head, r_tail;
   1.146 + *              t = (long double)y[2] + (long double)y[1];
   1.147 + *              w = (long double)y[0];
   1.148 + *              r_head = t+w;
   1.149 + *              r_tail = w - (r_head - t);
   1.150 + *
   1.151 + *      e0      The exponent of x[0]
   1.152 + *
   1.153 + *      nx      dimension of x[]
   1.154 + *
   1.155 + *      prec    an interger indicating the precision:
   1.156 + *                      0       24  bits (single)
   1.157 + *                      1       53  bits (double)
   1.158 + *                      2       64  bits (extended)
   1.159 + *                      3       113 bits (quad)
   1.160 + *
   1.161 + *      ipio2[]
   1.162 + *              integer array, contains the (24*i)-th to (24*i+23)-th
   1.163 + *              bit of 2/pi after binary point. The corresponding
   1.164 + *              floating value is
   1.165 + *
   1.166 + *                      ipio2[i] * 2^(-24(i+1)).
   1.167 + *
   1.168 + * External function:
   1.169 + *      double scalbn(), floor();
   1.170 + *
   1.171 + *
   1.172 + * Here is the description of some local variables:
   1.173 + *
   1.174 + *      jk      jk+1 is the initial number of terms of ipio2[] needed
   1.175 + *              in the computation. The recommended value is 2,3,4,
   1.176 + *              6 for single, double, extended,and quad.
   1.177 + *
   1.178 + *      jz      local integer variable indicating the number of
   1.179 + *              terms of ipio2[] used.
   1.180 + *
   1.181 + *      jx      nx - 1
   1.182 + *
   1.183 + *      jv      index for pointing to the suitable ipio2[] for the
   1.184 + *              computation. In general, we want
   1.185 + *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
   1.186 + *              is an integer. Thus
   1.187 + *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
   1.188 + *              Hence jv = max(0,(e0-3)/24).
   1.189 + *
   1.190 + *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
   1.191 + *
   1.192 + *      q[]     double array with integral value, representing the
   1.193 + *              24-bits chunk of the product of x and 2/pi.
   1.194 + *
   1.195 + *      q0      the corresponding exponent of q[0]. Note that the
   1.196 + *              exponent for q[i] would be q0-24*i.
   1.197 + *
   1.198 + *      PIo2[]  double precision array, obtained by cutting pi/2
   1.199 + *              into 24 bits chunks.
   1.200 + *
   1.201 + *      f[]     ipio2[] in floating point
   1.202 + *
   1.203 + *      iq[]    integer array by breaking up q[] in 24-bits chunk.
   1.204 + *
   1.205 + *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
   1.206 + *
   1.207 + *      ih      integer. If >0 it indicats q[] is >= 0.5, hence
   1.208 + *              it also indicates the *sign* of the result.
   1.209 + *
   1.210 + */
   1.211 +
   1.212 +
   1.213 +/*
   1.214 + * Constants:
   1.215 + * The hexadecimal values are the intended ones for the following
   1.216 + * constants. The decimal values may be used, provided that the
   1.217 + * compiler will convert from decimal to binary accurately enough
   1.218 + * to produce the hexadecimal values shown.
   1.219 + */
   1.220 +
   1.221 +
   1.222 +static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
   1.223 +
   1.224 +static const double PIo2[] = {
   1.225 +  1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
   1.226 +  7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
   1.227 +  5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
   1.228 +  3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
   1.229 +  1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
   1.230 +  1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
   1.231 +  2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
   1.232 +  2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
   1.233 +};
   1.234 +
   1.235 +static const double
   1.236 +zeroB   = 0.0,
   1.237 +one     = 1.0,
   1.238 +two24B  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   1.239 +twon24  = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
   1.240 +
   1.241 +static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
   1.242 +  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
   1.243 +  double z,fw,f[20],fq[20],q[20];
   1.244 +
   1.245 +  /* initialize jk*/
   1.246 +  jk = init_jk[prec];
   1.247 +  jp = jk;
   1.248 +
   1.249 +  /* determine jx,jv,q0, note that 3>q0 */
   1.250 +  jx =  nx-1;
   1.251 +  jv = (e0-3)/24; if(jv<0) jv=0;
   1.252 +  q0 =  e0-24*(jv+1);
   1.253 +
   1.254 +  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
   1.255 +  j = jv-jx; m = jx+jk;
   1.256 +  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
   1.257 +
   1.258 +  /* compute q[0],q[1],...q[jk] */
   1.259 +  for (i=0;i<=jk;i++) {
   1.260 +    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
   1.261 +  }
   1.262 +
   1.263 +  jz = jk;
   1.264 +recompute:
   1.265 +  /* distill q[] into iq[] reversingly */
   1.266 +  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
   1.267 +    fw    =  (double)((int)(twon24* z));
   1.268 +    iq[i] =  (int)(z-two24B*fw);
   1.269 +    z     =  q[j-1]+fw;
   1.270 +  }
   1.271 +
   1.272 +  /* compute n */
   1.273 +  z  = scalbnA(z,q0);           /* actual value of z */
   1.274 +  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
   1.275 +  n  = (int) z;
   1.276 +  z -= (double)n;
   1.277 +  ih = 0;
   1.278 +  if(q0>0) {    /* need iq[jz-1] to determine n */
   1.279 +    i  = (iq[jz-1]>>(24-q0)); n += i;
   1.280 +    iq[jz-1] -= i<<(24-q0);
   1.281 +    ih = iq[jz-1]>>(23-q0);
   1.282 +  }
   1.283 +  else if(q0==0) ih = iq[jz-1]>>23;
   1.284 +  else if(z>=0.5) ih=2;
   1.285 +
   1.286 +  if(ih>0) {    /* q > 0.5 */
   1.287 +    n += 1; carry = 0;
   1.288 +    for(i=0;i<jz ;i++) {        /* compute 1-q */
   1.289 +      j = iq[i];
   1.290 +      if(carry==0) {
   1.291 +        if(j!=0) {
   1.292 +          carry = 1; iq[i] = 0x1000000- j;
   1.293 +        }
   1.294 +      } else  iq[i] = 0xffffff - j;
   1.295 +    }
   1.296 +    if(q0>0) {          /* rare case: chance is 1 in 12 */
   1.297 +      switch(q0) {
   1.298 +      case 1:
   1.299 +        iq[jz-1] &= 0x7fffff; break;
   1.300 +      case 2:
   1.301 +        iq[jz-1] &= 0x3fffff; break;
   1.302 +      }
   1.303 +    }
   1.304 +    if(ih==2) {
   1.305 +      z = one - z;
   1.306 +      if(carry!=0) z -= scalbnA(one,q0);
   1.307 +    }
   1.308 +  }
   1.309 +
   1.310 +  /* check if recomputation is needed */
   1.311 +  if(z==zeroB) {
   1.312 +    j = 0;
   1.313 +    for (i=jz-1;i>=jk;i--) j |= iq[i];
   1.314 +    if(j==0) { /* need recomputation */
   1.315 +      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
   1.316 +
   1.317 +      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
   1.318 +        f[jx+i] = (double) ipio2[jv+i];
   1.319 +        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
   1.320 +        q[i] = fw;
   1.321 +      }
   1.322 +      jz += k;
   1.323 +      goto recompute;
   1.324 +    }
   1.325 +  }
   1.326 +
   1.327 +  /* chop off zero terms */
   1.328 +  if(z==0.0) {
   1.329 +    jz -= 1; q0 -= 24;
   1.330 +    while(iq[jz]==0) { jz--; q0-=24;}
   1.331 +  } else { /* break z into 24-bit if neccessary */
   1.332 +    z = scalbnA(z,-q0);
   1.333 +    if(z>=two24B) {
   1.334 +      fw = (double)((int)(twon24*z));
   1.335 +      iq[jz] = (int)(z-two24B*fw);
   1.336 +      jz += 1; q0 += 24;
   1.337 +      iq[jz] = (int) fw;
   1.338 +    } else iq[jz] = (int) z ;
   1.339 +  }
   1.340 +
   1.341 +  /* convert integer "bit" chunk to floating-point value */
   1.342 +  fw = scalbnA(one,q0);
   1.343 +  for(i=jz;i>=0;i--) {
   1.344 +    q[i] = fw*(double)iq[i]; fw*=twon24;
   1.345 +  }
   1.346 +
   1.347 +  /* compute PIo2[0,...,jp]*q[jz,...,0] */
   1.348 +  for(i=jz;i>=0;i--) {
   1.349 +    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
   1.350 +    fq[jz-i] = fw;
   1.351 +  }
   1.352 +
   1.353 +  /* compress fq[] into y[] */
   1.354 +  switch(prec) {
   1.355 +  case 0:
   1.356 +    fw = 0.0;
   1.357 +    for (i=jz;i>=0;i--) fw += fq[i];
   1.358 +    y[0] = (ih==0)? fw: -fw;
   1.359 +    break;
   1.360 +  case 1:
   1.361 +  case 2:
   1.362 +    fw = 0.0;
   1.363 +    for (i=jz;i>=0;i--) fw += fq[i];
   1.364 +    y[0] = (ih==0)? fw: -fw;
   1.365 +    fw = fq[0]-fw;
   1.366 +    for (i=1;i<=jz;i++) fw += fq[i];
   1.367 +    y[1] = (ih==0)? fw: -fw;
   1.368 +    break;
   1.369 +  case 3:       /* painful */
   1.370 +    for (i=jz;i>0;i--) {
   1.371 +      fw      = fq[i-1]+fq[i];
   1.372 +      fq[i]  += fq[i-1]-fw;
   1.373 +      fq[i-1] = fw;
   1.374 +    }
   1.375 +    for (i=jz;i>1;i--) {
   1.376 +      fw      = fq[i-1]+fq[i];
   1.377 +      fq[i]  += fq[i-1]-fw;
   1.378 +      fq[i-1] = fw;
   1.379 +    }
   1.380 +    for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
   1.381 +    if(ih==0) {
   1.382 +      y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
   1.383 +    } else {
   1.384 +      y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
   1.385 +    }
   1.386 +  }
   1.387 +  return n&7;
   1.388 +}
   1.389 +
   1.390 +
   1.391 +/*
   1.392 + * ====================================================
   1.393 + * Copyright 13 Dec 1993 Sun Microsystems, Inc.  All Rights Reserved.
   1.394 + *
   1.395 + * Developed at SunPro, a Sun Microsystems, Inc. business.
   1.396 + * Permission to use, copy, modify, and distribute this
   1.397 + * software is freely granted, provided that this notice
   1.398 + * is preserved.
   1.399 + * ====================================================
   1.400 + *
   1.401 + */
   1.402 +
   1.403 +/* __ieee754_rem_pio2(x,y)
   1.404 + *
   1.405 + * return the remainder of x rem pi/2 in y[0]+y[1]
   1.406 + * use __kernel_rem_pio2()
   1.407 + */
   1.408 +
   1.409 +/*
   1.410 + * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
   1.411 + */
   1.412 +static const int two_over_pi[] = {
   1.413 +  0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
   1.414 +  0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
   1.415 +  0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
   1.416 +  0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
   1.417 +  0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
   1.418 +  0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
   1.419 +  0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
   1.420 +  0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
   1.421 +  0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
   1.422 +  0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
   1.423 +  0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
   1.424 +};
   1.425 +
   1.426 +static const int npio2_hw[] = {
   1.427 +  0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
   1.428 +  0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
   1.429 +  0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
   1.430 +  0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
   1.431 +  0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
   1.432 +  0x404858EB, 0x404921FB,
   1.433 +};
   1.434 +
   1.435 +/*
   1.436 + * invpio2:  53 bits of 2/pi
   1.437 + * pio2_1:   first  33 bit of pi/2
   1.438 + * pio2_1t:  pi/2 - pio2_1
   1.439 + * pio2_2:   second 33 bit of pi/2
   1.440 + * pio2_2t:  pi/2 - (pio2_1+pio2_2)
   1.441 + * pio2_3:   third  33 bit of pi/2
   1.442 + * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
   1.443 + */
   1.444 +
   1.445 +static const double
   1.446 +zeroA =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
   1.447 +half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
   1.448 +two24A =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
   1.449 +invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
   1.450 +pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
   1.451 +pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
   1.452 +pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
   1.453 +pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
   1.454 +pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
   1.455 +pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
   1.456 +
   1.457 +static int __ieee754_rem_pio2(double x, double *y) {
   1.458 +  double z,w,t,r,fn;
   1.459 +  double tx[3];
   1.460 +  int e0,i,j,nx,n,ix,hx,i0;
   1.461 +
   1.462 +  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
   1.463 +  hx = *(i0+(int*)&x);          /* high word of x */
   1.464 +  ix = hx&0x7fffffff;
   1.465 +  if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
   1.466 +    {y[0] = x; y[1] = 0; return 0;}
   1.467 +  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
   1.468 +    if(hx>0) {
   1.469 +      z = x - pio2_1;
   1.470 +      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   1.471 +        y[0] = z - pio2_1t;
   1.472 +        y[1] = (z-y[0])-pio2_1t;
   1.473 +      } else {                /* near pi/2, use 33+33+53 bit pi */
   1.474 +        z -= pio2_2;
   1.475 +        y[0] = z - pio2_2t;
   1.476 +        y[1] = (z-y[0])-pio2_2t;
   1.477 +      }
   1.478 +      return 1;
   1.479 +    } else {    /* negative x */
   1.480 +      z = x + pio2_1;
   1.481 +      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
   1.482 +        y[0] = z + pio2_1t;
   1.483 +        y[1] = (z-y[0])+pio2_1t;
   1.484 +      } else {                /* near pi/2, use 33+33+53 bit pi */
   1.485 +        z += pio2_2;
   1.486 +        y[0] = z + pio2_2t;
   1.487 +        y[1] = (z-y[0])+pio2_2t;
   1.488 +      }
   1.489 +      return -1;
   1.490 +    }
   1.491 +  }
   1.492 +  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
   1.493 +    t  = fabsd(x);
   1.494 +    n  = (int) (t*invpio2+half);
   1.495 +    fn = (double)n;
   1.496 +    r  = t-fn*pio2_1;
   1.497 +    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
   1.498 +    if(n<32&&ix!=npio2_hw[n-1]) {
   1.499 +      y[0] = r-w;       /* quick check no cancellation */
   1.500 +    } else {
   1.501 +      j  = ix>>20;
   1.502 +      y[0] = r-w;
   1.503 +      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   1.504 +      if(i>16) {  /* 2nd iteration needed, good to 118 */
   1.505 +        t  = r;
   1.506 +        w  = fn*pio2_2;
   1.507 +        r  = t-w;
   1.508 +        w  = fn*pio2_2t-((t-r)-w);
   1.509 +        y[0] = r-w;
   1.510 +        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
   1.511 +        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
   1.512 +          t  = r;       /* will cover all possible cases */
   1.513 +          w  = fn*pio2_3;
   1.514 +          r  = t-w;
   1.515 +          w  = fn*pio2_3t-((t-r)-w);
   1.516 +          y[0] = r-w;
   1.517 +        }
   1.518 +      }
   1.519 +    }
   1.520 +    y[1] = (r-y[0])-w;
   1.521 +    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   1.522 +    else         return n;
   1.523 +  }
   1.524 +  /*
   1.525 +   * all other (large) arguments
   1.526 +   */
   1.527 +  if(ix>=0x7ff00000) {          /* x is inf or NaN */
   1.528 +    y[0]=y[1]=x-x; return 0;
   1.529 +  }
   1.530 +  /* set z = scalbn(|x|,ilogb(x)-23) */
   1.531 +  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
   1.532 +  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
   1.533 +  *(i0+(int*)&z) = ix - (e0<<20);
   1.534 +  for(i=0;i<2;i++) {
   1.535 +    tx[i] = (double)((int)(z));
   1.536 +    z     = (z-tx[i])*two24A;
   1.537 +  }
   1.538 +  tx[2] = z;
   1.539 +  nx = 3;
   1.540 +  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
   1.541 +  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
   1.542 +  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
   1.543 +  return n;
   1.544 +}
   1.545 +
   1.546 +
   1.547 +/* __kernel_sin( x, y, iy)
   1.548 + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
   1.549 + * Input x is assumed to be bounded by ~pi/4 in magnitude.
   1.550 + * Input y is the tail of x.
   1.551 + * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
   1.552 + *
   1.553 + * Algorithm
   1.554 + *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
   1.555 + *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
   1.556 + *      3. sin(x) is approximated by a polynomial of degree 13 on
   1.557 + *         [0,pi/4]
   1.558 + *                               3            13
   1.559 + *              sin(x) ~ x + S1*x + ... + S6*x
   1.560 + *         where
   1.561 + *
   1.562 + *      |sin(x)         2     4     6     8     10     12  |     -58
   1.563 + *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
   1.564 + *      |  x                                               |
   1.565 + *
   1.566 + *      4. sin(x+y) = sin(x) + sin'(x')*y
   1.567 + *                  ~ sin(x) + (1-x*x/2)*y
   1.568 + *         For better accuracy, let
   1.569 + *                   3      2      2      2      2
   1.570 + *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
   1.571 + *         then                   3    2
   1.572 + *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
   1.573 + */
   1.574 +
   1.575 +static const double
   1.576 +S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
   1.577 +S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
   1.578 +S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
   1.579 +S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
   1.580 +S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
   1.581 +S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
   1.582 +
   1.583 +static double __kernel_sin(double x, double y, int iy)
   1.584 +{
   1.585 +        double z,r,v;
   1.586 +        int ix;
   1.587 +        ix = __HI(x)&0x7fffffff;        /* high word of x */
   1.588 +        if(ix<0x3e400000)                       /* |x| < 2**-27 */
   1.589 +           {if((int)x==0) return x;}            /* generate inexact */
   1.590 +        z       =  x*x;
   1.591 +        v       =  z*x;
   1.592 +        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
   1.593 +        if(iy==0) return x+v*(S1+z*r);
   1.594 +        else      return x-((z*(half*y-v*r)-y)-v*S1);
   1.595 +}
   1.596 +
   1.597 +/*
   1.598 + * __kernel_cos( x,  y )
   1.599 + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
   1.600 + * Input x is assumed to be bounded by ~pi/4 in magnitude.
   1.601 + * Input y is the tail of x.
   1.602 + *
   1.603 + * Algorithm
   1.604 + *      1. Since cos(-x) = cos(x), we need only to consider positive x.
   1.605 + *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
   1.606 + *      3. cos(x) is approximated by a polynomial of degree 14 on
   1.607 + *         [0,pi/4]
   1.608 + *                                       4            14
   1.609 + *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
   1.610 + *         where the remez error is
   1.611 + *
   1.612 + *      |              2     4     6     8     10    12     14 |     -58
   1.613 + *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
   1.614 + *      |                                                      |
   1.615 + *
   1.616 + *                     4     6     8     10    12     14
   1.617 + *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
   1.618 + *             cos(x) = 1 - x*x/2 + r
   1.619 + *         since cos(x+y) ~ cos(x) - sin(x)*y
   1.620 + *                        ~ cos(x) - x*y,
   1.621 + *         a correction term is necessary in cos(x) and hence
   1.622 + *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
   1.623 + *         For better accuracy when x > 0.3, let qx = |x|/4 with
   1.624 + *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
   1.625 + *         Then
   1.626 + *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
   1.627 + *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
   1.628 + *         magnitude of the latter is at least a quarter of x*x/2,
   1.629 + *         thus, reducing the rounding error in the subtraction.
   1.630 + */
   1.631 +
   1.632 +static const double
   1.633 +C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
   1.634 +C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
   1.635 +C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
   1.636 +C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
   1.637 +C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
   1.638 +C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
   1.639 +
   1.640 +static double __kernel_cos(double x, double y)
   1.641 +{
   1.642 +  double a,hz,z,r,qx;
   1.643 +  int ix;
   1.644 +  ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/
   1.645 +  if(ix<0x3e400000) {                   /* if x < 2**27 */
   1.646 +    if(((int)x)==0) return one;         /* generate inexact */
   1.647 +  }
   1.648 +  z  = x*x;
   1.649 +  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
   1.650 +  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
   1.651 +    return one - (0.5*z - (z*r - x*y));
   1.652 +  else {
   1.653 +    if(ix > 0x3fe90000) {               /* x > 0.78125 */
   1.654 +      qx = 0.28125;
   1.655 +    } else {
   1.656 +      __HI(qx) = ix-0x00200000; /* x/4 */
   1.657 +      __LO(qx) = 0;
   1.658 +    }
   1.659 +    hz = 0.5*z-qx;
   1.660 +    a  = one-qx;
   1.661 +    return a - (hz - (z*r-x*y));
   1.662 +  }
   1.663 +}
   1.664 +
   1.665 +/* __kernel_tan( x, y, k )
   1.666 + * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
   1.667 + * Input x is assumed to be bounded by ~pi/4 in magnitude.
   1.668 + * Input y is the tail of x.
   1.669 + * Input k indicates whether tan (if k=1) or
   1.670 + * -1/tan (if k= -1) is returned.
   1.671 + *
   1.672 + * Algorithm
   1.673 + *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
   1.674 + *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
   1.675 + *      3. tan(x) is approximated by a odd polynomial of degree 27 on
   1.676 + *         [0,0.67434]
   1.677 + *                               3             27
   1.678 + *              tan(x) ~ x + T1*x + ... + T13*x
   1.679 + *         where
   1.680 + *
   1.681 + *              |tan(x)         2     4            26   |     -59.2
   1.682 + *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
   1.683 + *              |  x                                    |
   1.684 + *
   1.685 + *         Note: tan(x+y) = tan(x) + tan'(x)*y
   1.686 + *                        ~ tan(x) + (1+x*x)*y
   1.687 + *         Therefore, for better accuracy in computing tan(x+y), let
   1.688 + *                   3      2      2       2       2
   1.689 + *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
   1.690 + *         then
   1.691 + *                                  3    2
   1.692 + *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
   1.693 + *
   1.694 + *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
   1.695 + *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
   1.696 + *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
   1.697 + */
   1.698 +
   1.699 +static const double
   1.700 +pio4  =  7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
   1.701 +pio4lo=  3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
   1.702 +T[] =  {
   1.703 +  3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
   1.704 +  1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
   1.705 +  5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
   1.706 +  2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
   1.707 +  8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
   1.708 +  3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
   1.709 +  1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
   1.710 +  5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
   1.711 +  2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
   1.712 +  7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
   1.713 +  7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
   1.714 + -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
   1.715 +  2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
   1.716 +};
   1.717 +
   1.718 +static double __kernel_tan(double x, double y, int iy)
   1.719 +{
   1.720 +  double z,r,v,w,s;
   1.721 +  int ix,hx;
   1.722 +  hx = __HI(x);   /* high word of x */
   1.723 +  ix = hx&0x7fffffff;     /* high word of |x| */
   1.724 +  if(ix<0x3e300000) {                     /* x < 2**-28 */
   1.725 +    if((int)x==0) {                       /* generate inexact */
   1.726 +      if (((ix | __LO(x)) | (iy + 1)) == 0)
   1.727 +        return one / fabsd(x);
   1.728 +      else {
   1.729 +        if (iy == 1)
   1.730 +          return x;
   1.731 +        else {    /* compute -1 / (x+y) carefully */
   1.732 +          double a, t;
   1.733 +
   1.734 +          z = w = x + y;
   1.735 +          __LO(z) = 0;
   1.736 +          v = y - (z - x);
   1.737 +          t = a = -one / w;
   1.738 +          __LO(t) = 0;
   1.739 +          s = one + t * z;
   1.740 +          return t + a * (s + t * v);
   1.741 +        }
   1.742 +      }
   1.743 +    }
   1.744 +  }
   1.745 +  if(ix>=0x3FE59428) {                    /* |x|>=0.6744 */
   1.746 +    if(hx<0) {x = -x; y = -y;}
   1.747 +    z = pio4-x;
   1.748 +    w = pio4lo-y;
   1.749 +    x = z+w; y = 0.0;
   1.750 +  }
   1.751 +  z       =  x*x;
   1.752 +  w       =  z*z;
   1.753 +  /* Break x^5*(T[1]+x^2*T[2]+...) into
   1.754 +   *    x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
   1.755 +   *    x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
   1.756 +   */
   1.757 +  r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
   1.758 +  v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
   1.759 +  s = z*x;
   1.760 +  r = y + z*(s*(r+v)+y);
   1.761 +  r += T[0]*s;
   1.762 +  w = x+r;
   1.763 +  if(ix>=0x3FE59428) {
   1.764 +    v = (double)iy;
   1.765 +    return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
   1.766 +  }
   1.767 +  if(iy==1) return w;
   1.768 +  else {          /* if allow error up to 2 ulp,
   1.769 +                     simply return -1.0/(x+r) here */
   1.770 +    /*  compute -1.0/(x+r) accurately */
   1.771 +    double a,t;
   1.772 +    z  = w;
   1.773 +    __LO(z) = 0;
   1.774 +    v  = r-(z - x);     /* z+v = r+x */
   1.775 +    t = a  = -1.0/w;    /* a = -1.0/w */
   1.776 +    __LO(t) = 0;
   1.777 +    s  = 1.0+t*z;
   1.778 +    return t+a*(s+t*v);
   1.779 +  }
   1.780 +}
   1.781 +
   1.782 +
   1.783 +//----------------------------------------------------------------------
   1.784 +//
   1.785 +// Routines for new sin/cos implementation
   1.786 +//
   1.787 +//----------------------------------------------------------------------
   1.788 +
   1.789 +/* sin(x)
   1.790 + * Return sine function of x.
   1.791 + *
   1.792 + * kernel function:
   1.793 + *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   1.794 + *      __kernel_cos            ... cose function on [-pi/4,pi/4]
   1.795 + *      __ieee754_rem_pio2      ... argument reduction routine
   1.796 + *
   1.797 + * Method.
   1.798 + *      Let S,C and T denote the sin, cos and tan respectively on
   1.799 + *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   1.800 + *      in [-pi/4 , +pi/4], and let n = k mod 4.
   1.801 + *      We have
   1.802 + *
   1.803 + *          n        sin(x)      cos(x)        tan(x)
   1.804 + *     ----------------------------------------------------------
   1.805 + *          0          S           C             T
   1.806 + *          1          C          -S            -1/T
   1.807 + *          2         -S          -C             T
   1.808 + *          3         -C           S            -1/T
   1.809 + *     ----------------------------------------------------------
   1.810 + *
   1.811 + * Special cases:
   1.812 + *      Let trig be any of sin, cos, or tan.
   1.813 + *      trig(+-INF)  is NaN, with signals;
   1.814 + *      trig(NaN)    is that NaN;
   1.815 + *
   1.816 + * Accuracy:
   1.817 + *      TRIG(x) returns trig(x) nearly rounded
   1.818 + */
   1.819 +
   1.820 +JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
   1.821 +  double y[2],z=0.0;
   1.822 +  int n, ix;
   1.823 +
   1.824 +  /* High word of x. */
   1.825 +  ix = __HI(x);
   1.826 +
   1.827 +  /* |x| ~< pi/4 */
   1.828 +  ix &= 0x7fffffff;
   1.829 +  if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
   1.830 +
   1.831 +  /* sin(Inf or NaN) is NaN */
   1.832 +  else if (ix>=0x7ff00000) return x-x;
   1.833 +
   1.834 +  /* argument reduction needed */
   1.835 +  else {
   1.836 +    n = __ieee754_rem_pio2(x,y);
   1.837 +    switch(n&3) {
   1.838 +    case 0: return  __kernel_sin(y[0],y[1],1);
   1.839 +    case 1: return  __kernel_cos(y[0],y[1]);
   1.840 +    case 2: return -__kernel_sin(y[0],y[1],1);
   1.841 +    default:
   1.842 +      return -__kernel_cos(y[0],y[1]);
   1.843 +    }
   1.844 +  }
   1.845 +JRT_END
   1.846 +
   1.847 +/* cos(x)
   1.848 + * Return cosine function of x.
   1.849 + *
   1.850 + * kernel function:
   1.851 + *      __kernel_sin            ... sine function on [-pi/4,pi/4]
   1.852 + *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
   1.853 + *      __ieee754_rem_pio2      ... argument reduction routine
   1.854 + *
   1.855 + * Method.
   1.856 + *      Let S,C and T denote the sin, cos and tan respectively on
   1.857 + *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   1.858 + *      in [-pi/4 , +pi/4], and let n = k mod 4.
   1.859 + *      We have
   1.860 + *
   1.861 + *          n        sin(x)      cos(x)        tan(x)
   1.862 + *     ----------------------------------------------------------
   1.863 + *          0          S           C             T
   1.864 + *          1          C          -S            -1/T
   1.865 + *          2         -S          -C             T
   1.866 + *          3         -C           S            -1/T
   1.867 + *     ----------------------------------------------------------
   1.868 + *
   1.869 + * Special cases:
   1.870 + *      Let trig be any of sin, cos, or tan.
   1.871 + *      trig(+-INF)  is NaN, with signals;
   1.872 + *      trig(NaN)    is that NaN;
   1.873 + *
   1.874 + * Accuracy:
   1.875 + *      TRIG(x) returns trig(x) nearly rounded
   1.876 + */
   1.877 +
   1.878 +JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
   1.879 +  double y[2],z=0.0;
   1.880 +  int n, ix;
   1.881 +
   1.882 +  /* High word of x. */
   1.883 +  ix = __HI(x);
   1.884 +
   1.885 +  /* |x| ~< pi/4 */
   1.886 +  ix &= 0x7fffffff;
   1.887 +  if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
   1.888 +
   1.889 +  /* cos(Inf or NaN) is NaN */
   1.890 +  else if (ix>=0x7ff00000) return x-x;
   1.891 +
   1.892 +  /* argument reduction needed */
   1.893 +  else {
   1.894 +    n = __ieee754_rem_pio2(x,y);
   1.895 +    switch(n&3) {
   1.896 +    case 0: return  __kernel_cos(y[0],y[1]);
   1.897 +    case 1: return -__kernel_sin(y[0],y[1],1);
   1.898 +    case 2: return -__kernel_cos(y[0],y[1]);
   1.899 +    default:
   1.900 +      return  __kernel_sin(y[0],y[1],1);
   1.901 +    }
   1.902 +  }
   1.903 +JRT_END
   1.904 +
   1.905 +/* tan(x)
   1.906 + * Return tangent function of x.
   1.907 + *
   1.908 + * kernel function:
   1.909 + *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
   1.910 + *      __ieee754_rem_pio2      ... argument reduction routine
   1.911 + *
   1.912 + * Method.
   1.913 + *      Let S,C and T denote the sin, cos and tan respectively on
   1.914 + *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
   1.915 + *      in [-pi/4 , +pi/4], and let n = k mod 4.
   1.916 + *      We have
   1.917 + *
   1.918 + *          n        sin(x)      cos(x)        tan(x)
   1.919 + *     ----------------------------------------------------------
   1.920 + *          0          S           C             T
   1.921 + *          1          C          -S            -1/T
   1.922 + *          2         -S          -C             T
   1.923 + *          3         -C           S            -1/T
   1.924 + *     ----------------------------------------------------------
   1.925 + *
   1.926 + * Special cases:
   1.927 + *      Let trig be any of sin, cos, or tan.
   1.928 + *      trig(+-INF)  is NaN, with signals;
   1.929 + *      trig(NaN)    is that NaN;
   1.930 + *
   1.931 + * Accuracy:
   1.932 + *      TRIG(x) returns trig(x) nearly rounded
   1.933 + */
   1.934 +
   1.935 +JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
   1.936 +  double y[2],z=0.0;
   1.937 +  int n, ix;
   1.938 +
   1.939 +  /* High word of x. */
   1.940 +  ix = __HI(x);
   1.941 +
   1.942 +  /* |x| ~< pi/4 */
   1.943 +  ix &= 0x7fffffff;
   1.944 +  if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
   1.945 +
   1.946 +  /* tan(Inf or NaN) is NaN */
   1.947 +  else if (ix>=0x7ff00000) return x-x;            /* NaN */
   1.948 +
   1.949 +  /* argument reduction needed */
   1.950 +  else {
   1.951 +    n = __ieee754_rem_pio2(x,y);
   1.952 +    return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /*   1 -- n even
   1.953 +                                                     -1 -- n odd */
   1.954 +  }
   1.955 +JRT_END
   1.956 +
   1.957 +
   1.958 +#ifdef WIN32
   1.959 +# pragma optimize ( "", on )
   1.960 +#endif

mercurial