1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/runtime/sharedRuntimeTrig.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,957 @@ 1.4 +/* 1.5 + * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "incls/_precompiled.incl" 1.29 +#include "incls/_sharedRuntimeTrig.cpp.incl" 1.30 + 1.31 +// This file contains copies of the fdlibm routines used by 1.32 +// StrictMath. It turns out that it is almost always required to use 1.33 +// these runtime routines; the Intel CPU doesn't meet the Java 1.34 +// specification for sin/cos outside a certain limited argument range, 1.35 +// and the SPARC CPU doesn't appear to have sin/cos instructions. It 1.36 +// also turns out that avoiding the indirect call through function 1.37 +// pointer out to libjava.so in SharedRuntime speeds these routines up 1.38 +// by roughly 15% on both Win32/x86 and Solaris/SPARC. 1.39 + 1.40 +// Enabling optimizations in this file causes incorrect code to be 1.41 +// generated; can not figure out how to turn down optimization for one 1.42 +// file in the IDE on Windows 1.43 +#ifdef WIN32 1.44 +# pragma optimize ( "", off ) 1.45 +#endif 1.46 + 1.47 +#include <math.h> 1.48 + 1.49 +// VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles 1.50 +// [jk] this is not 100% correct because the float word order may different 1.51 +// from the byte order (e.g. on ARM) 1.52 +#ifdef VM_LITTLE_ENDIAN 1.53 +# define __HI(x) *(1+(int*)&x) 1.54 +# define __LO(x) *(int*)&x 1.55 +#else 1.56 +# define __HI(x) *(int*)&x 1.57 +# define __LO(x) *(1+(int*)&x) 1.58 +#endif 1.59 + 1.60 +static double copysignA(double x, double y) { 1.61 + __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000); 1.62 + return x; 1.63 +} 1.64 + 1.65 +/* 1.66 + * scalbn (double x, int n) 1.67 + * scalbn(x,n) returns x* 2**n computed by exponent 1.68 + * manipulation rather than by actually performing an 1.69 + * exponentiation or a multiplication. 1.70 + */ 1.71 + 1.72 +static const double 1.73 +two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ 1.74 +twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */ 1.75 +hugeX = 1.0e+300, 1.76 +tiny = 1.0e-300; 1.77 + 1.78 +static double scalbnA (double x, int n) { 1.79 + int k,hx,lx; 1.80 + hx = __HI(x); 1.81 + lx = __LO(x); 1.82 + k = (hx&0x7ff00000)>>20; /* extract exponent */ 1.83 + if (k==0) { /* 0 or subnormal x */ 1.84 + if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */ 1.85 + x *= two54; 1.86 + hx = __HI(x); 1.87 + k = ((hx&0x7ff00000)>>20) - 54; 1.88 + if (n< -50000) return tiny*x; /*underflow*/ 1.89 + } 1.90 + if (k==0x7ff) return x+x; /* NaN or Inf */ 1.91 + k = k+n; 1.92 + if (k > 0x7fe) return hugeX*copysignA(hugeX,x); /* overflow */ 1.93 + if (k > 0) /* normal result */ 1.94 + {__HI(x) = (hx&0x800fffff)|(k<<20); return x;} 1.95 + if (k <= -54) { 1.96 + if (n > 50000) /* in case integer overflow in n+k */ 1.97 + return hugeX*copysignA(hugeX,x); /*overflow*/ 1.98 + else return tiny*copysignA(tiny,x); /*underflow*/ 1.99 + } 1.100 + k += 54; /* subnormal result */ 1.101 + __HI(x) = (hx&0x800fffff)|(k<<20); 1.102 + return x*twom54; 1.103 +} 1.104 + 1.105 +/* 1.106 + * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) 1.107 + * double x[],y[]; int e0,nx,prec; int ipio2[]; 1.108 + * 1.109 + * __kernel_rem_pio2 return the last three digits of N with 1.110 + * y = x - N*pi/2 1.111 + * so that |y| < pi/2. 1.112 + * 1.113 + * The method is to compute the integer (mod 8) and fraction parts of 1.114 + * (2/pi)*x without doing the full multiplication. In general we 1.115 + * skip the part of the product that are known to be a huge integer ( 1.116 + * more accurately, = 0 mod 8 ). Thus the number of operations are 1.117 + * independent of the exponent of the input. 1.118 + * 1.119 + * (2/pi) is represented by an array of 24-bit integers in ipio2[]. 1.120 + * 1.121 + * Input parameters: 1.122 + * x[] The input value (must be positive) is broken into nx 1.123 + * pieces of 24-bit integers in double precision format. 1.124 + * x[i] will be the i-th 24 bit of x. The scaled exponent 1.125 + * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 1.126 + * match x's up to 24 bits. 1.127 + * 1.128 + * Example of breaking a double positive z into x[0]+x[1]+x[2]: 1.129 + * e0 = ilogb(z)-23 1.130 + * z = scalbn(z,-e0) 1.131 + * for i = 0,1,2 1.132 + * x[i] = floor(z) 1.133 + * z = (z-x[i])*2**24 1.134 + * 1.135 + * 1.136 + * y[] ouput result in an array of double precision numbers. 1.137 + * The dimension of y[] is: 1.138 + * 24-bit precision 1 1.139 + * 53-bit precision 2 1.140 + * 64-bit precision 2 1.141 + * 113-bit precision 3 1.142 + * The actual value is the sum of them. Thus for 113-bit 1.143 + * precsion, one may have to do something like: 1.144 + * 1.145 + * long double t,w,r_head, r_tail; 1.146 + * t = (long double)y[2] + (long double)y[1]; 1.147 + * w = (long double)y[0]; 1.148 + * r_head = t+w; 1.149 + * r_tail = w - (r_head - t); 1.150 + * 1.151 + * e0 The exponent of x[0] 1.152 + * 1.153 + * nx dimension of x[] 1.154 + * 1.155 + * prec an interger indicating the precision: 1.156 + * 0 24 bits (single) 1.157 + * 1 53 bits (double) 1.158 + * 2 64 bits (extended) 1.159 + * 3 113 bits (quad) 1.160 + * 1.161 + * ipio2[] 1.162 + * integer array, contains the (24*i)-th to (24*i+23)-th 1.163 + * bit of 2/pi after binary point. The corresponding 1.164 + * floating value is 1.165 + * 1.166 + * ipio2[i] * 2^(-24(i+1)). 1.167 + * 1.168 + * External function: 1.169 + * double scalbn(), floor(); 1.170 + * 1.171 + * 1.172 + * Here is the description of some local variables: 1.173 + * 1.174 + * jk jk+1 is the initial number of terms of ipio2[] needed 1.175 + * in the computation. The recommended value is 2,3,4, 1.176 + * 6 for single, double, extended,and quad. 1.177 + * 1.178 + * jz local integer variable indicating the number of 1.179 + * terms of ipio2[] used. 1.180 + * 1.181 + * jx nx - 1 1.182 + * 1.183 + * jv index for pointing to the suitable ipio2[] for the 1.184 + * computation. In general, we want 1.185 + * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 1.186 + * is an integer. Thus 1.187 + * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 1.188 + * Hence jv = max(0,(e0-3)/24). 1.189 + * 1.190 + * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. 1.191 + * 1.192 + * q[] double array with integral value, representing the 1.193 + * 24-bits chunk of the product of x and 2/pi. 1.194 + * 1.195 + * q0 the corresponding exponent of q[0]. Note that the 1.196 + * exponent for q[i] would be q0-24*i. 1.197 + * 1.198 + * PIo2[] double precision array, obtained by cutting pi/2 1.199 + * into 24 bits chunks. 1.200 + * 1.201 + * f[] ipio2[] in floating point 1.202 + * 1.203 + * iq[] integer array by breaking up q[] in 24-bits chunk. 1.204 + * 1.205 + * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 1.206 + * 1.207 + * ih integer. If >0 it indicats q[] is >= 0.5, hence 1.208 + * it also indicates the *sign* of the result. 1.209 + * 1.210 + */ 1.211 + 1.212 + 1.213 +/* 1.214 + * Constants: 1.215 + * The hexadecimal values are the intended ones for the following 1.216 + * constants. The decimal values may be used, provided that the 1.217 + * compiler will convert from decimal to binary accurately enough 1.218 + * to produce the hexadecimal values shown. 1.219 + */ 1.220 + 1.221 + 1.222 +static const int init_jk[] = {2,3,4,6}; /* initial value for jk */ 1.223 + 1.224 +static const double PIo2[] = { 1.225 + 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 1.226 + 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 1.227 + 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ 1.228 + 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ 1.229 + 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ 1.230 + 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ 1.231 + 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ 1.232 + 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ 1.233 +}; 1.234 + 1.235 +static const double 1.236 +zeroB = 0.0, 1.237 +one = 1.0, 1.238 +two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 1.239 +twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ 1.240 + 1.241 +static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) { 1.242 + int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; 1.243 + double z,fw,f[20],fq[20],q[20]; 1.244 + 1.245 + /* initialize jk*/ 1.246 + jk = init_jk[prec]; 1.247 + jp = jk; 1.248 + 1.249 + /* determine jx,jv,q0, note that 3>q0 */ 1.250 + jx = nx-1; 1.251 + jv = (e0-3)/24; if(jv<0) jv=0; 1.252 + q0 = e0-24*(jv+1); 1.253 + 1.254 + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ 1.255 + j = jv-jx; m = jx+jk; 1.256 + for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j]; 1.257 + 1.258 + /* compute q[0],q[1],...q[jk] */ 1.259 + for (i=0;i<=jk;i++) { 1.260 + for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; 1.261 + } 1.262 + 1.263 + jz = jk; 1.264 +recompute: 1.265 + /* distill q[] into iq[] reversingly */ 1.266 + for(i=0,j=jz,z=q[jz];j>0;i++,j--) { 1.267 + fw = (double)((int)(twon24* z)); 1.268 + iq[i] = (int)(z-two24B*fw); 1.269 + z = q[j-1]+fw; 1.270 + } 1.271 + 1.272 + /* compute n */ 1.273 + z = scalbnA(z,q0); /* actual value of z */ 1.274 + z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ 1.275 + n = (int) z; 1.276 + z -= (double)n; 1.277 + ih = 0; 1.278 + if(q0>0) { /* need iq[jz-1] to determine n */ 1.279 + i = (iq[jz-1]>>(24-q0)); n += i; 1.280 + iq[jz-1] -= i<<(24-q0); 1.281 + ih = iq[jz-1]>>(23-q0); 1.282 + } 1.283 + else if(q0==0) ih = iq[jz-1]>>23; 1.284 + else if(z>=0.5) ih=2; 1.285 + 1.286 + if(ih>0) { /* q > 0.5 */ 1.287 + n += 1; carry = 0; 1.288 + for(i=0;i<jz ;i++) { /* compute 1-q */ 1.289 + j = iq[i]; 1.290 + if(carry==0) { 1.291 + if(j!=0) { 1.292 + carry = 1; iq[i] = 0x1000000- j; 1.293 + } 1.294 + } else iq[i] = 0xffffff - j; 1.295 + } 1.296 + if(q0>0) { /* rare case: chance is 1 in 12 */ 1.297 + switch(q0) { 1.298 + case 1: 1.299 + iq[jz-1] &= 0x7fffff; break; 1.300 + case 2: 1.301 + iq[jz-1] &= 0x3fffff; break; 1.302 + } 1.303 + } 1.304 + if(ih==2) { 1.305 + z = one - z; 1.306 + if(carry!=0) z -= scalbnA(one,q0); 1.307 + } 1.308 + } 1.309 + 1.310 + /* check if recomputation is needed */ 1.311 + if(z==zeroB) { 1.312 + j = 0; 1.313 + for (i=jz-1;i>=jk;i--) j |= iq[i]; 1.314 + if(j==0) { /* need recomputation */ 1.315 + for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ 1.316 + 1.317 + for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ 1.318 + f[jx+i] = (double) ipio2[jv+i]; 1.319 + for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; 1.320 + q[i] = fw; 1.321 + } 1.322 + jz += k; 1.323 + goto recompute; 1.324 + } 1.325 + } 1.326 + 1.327 + /* chop off zero terms */ 1.328 + if(z==0.0) { 1.329 + jz -= 1; q0 -= 24; 1.330 + while(iq[jz]==0) { jz--; q0-=24;} 1.331 + } else { /* break z into 24-bit if neccessary */ 1.332 + z = scalbnA(z,-q0); 1.333 + if(z>=two24B) { 1.334 + fw = (double)((int)(twon24*z)); 1.335 + iq[jz] = (int)(z-two24B*fw); 1.336 + jz += 1; q0 += 24; 1.337 + iq[jz] = (int) fw; 1.338 + } else iq[jz] = (int) z ; 1.339 + } 1.340 + 1.341 + /* convert integer "bit" chunk to floating-point value */ 1.342 + fw = scalbnA(one,q0); 1.343 + for(i=jz;i>=0;i--) { 1.344 + q[i] = fw*(double)iq[i]; fw*=twon24; 1.345 + } 1.346 + 1.347 + /* compute PIo2[0,...,jp]*q[jz,...,0] */ 1.348 + for(i=jz;i>=0;i--) { 1.349 + for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; 1.350 + fq[jz-i] = fw; 1.351 + } 1.352 + 1.353 + /* compress fq[] into y[] */ 1.354 + switch(prec) { 1.355 + case 0: 1.356 + fw = 0.0; 1.357 + for (i=jz;i>=0;i--) fw += fq[i]; 1.358 + y[0] = (ih==0)? fw: -fw; 1.359 + break; 1.360 + case 1: 1.361 + case 2: 1.362 + fw = 0.0; 1.363 + for (i=jz;i>=0;i--) fw += fq[i]; 1.364 + y[0] = (ih==0)? fw: -fw; 1.365 + fw = fq[0]-fw; 1.366 + for (i=1;i<=jz;i++) fw += fq[i]; 1.367 + y[1] = (ih==0)? fw: -fw; 1.368 + break; 1.369 + case 3: /* painful */ 1.370 + for (i=jz;i>0;i--) { 1.371 + fw = fq[i-1]+fq[i]; 1.372 + fq[i] += fq[i-1]-fw; 1.373 + fq[i-1] = fw; 1.374 + } 1.375 + for (i=jz;i>1;i--) { 1.376 + fw = fq[i-1]+fq[i]; 1.377 + fq[i] += fq[i-1]-fw; 1.378 + fq[i-1] = fw; 1.379 + } 1.380 + for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; 1.381 + if(ih==0) { 1.382 + y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; 1.383 + } else { 1.384 + y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; 1.385 + } 1.386 + } 1.387 + return n&7; 1.388 +} 1.389 + 1.390 + 1.391 +/* 1.392 + * ==================================================== 1.393 + * Copyright 13 Dec 1993 Sun Microsystems, Inc. All Rights Reserved. 1.394 + * 1.395 + * Developed at SunPro, a Sun Microsystems, Inc. business. 1.396 + * Permission to use, copy, modify, and distribute this 1.397 + * software is freely granted, provided that this notice 1.398 + * is preserved. 1.399 + * ==================================================== 1.400 + * 1.401 + */ 1.402 + 1.403 +/* __ieee754_rem_pio2(x,y) 1.404 + * 1.405 + * return the remainder of x rem pi/2 in y[0]+y[1] 1.406 + * use __kernel_rem_pio2() 1.407 + */ 1.408 + 1.409 +/* 1.410 + * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi 1.411 + */ 1.412 +static const int two_over_pi[] = { 1.413 + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 1.414 + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 1.415 + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 1.416 + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 1.417 + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 1.418 + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 1.419 + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 1.420 + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 1.421 + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 1.422 + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 1.423 + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, 1.424 +}; 1.425 + 1.426 +static const int npio2_hw[] = { 1.427 + 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 1.428 + 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, 1.429 + 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, 1.430 + 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, 1.431 + 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, 1.432 + 0x404858EB, 0x404921FB, 1.433 +}; 1.434 + 1.435 +/* 1.436 + * invpio2: 53 bits of 2/pi 1.437 + * pio2_1: first 33 bit of pi/2 1.438 + * pio2_1t: pi/2 - pio2_1 1.439 + * pio2_2: second 33 bit of pi/2 1.440 + * pio2_2t: pi/2 - (pio2_1+pio2_2) 1.441 + * pio2_3: third 33 bit of pi/2 1.442 + * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) 1.443 + */ 1.444 + 1.445 +static const double 1.446 +zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ 1.447 +half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ 1.448 +two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 1.449 +invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ 1.450 +pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ 1.451 +pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ 1.452 +pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ 1.453 +pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ 1.454 +pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ 1.455 +pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ 1.456 + 1.457 +static int __ieee754_rem_pio2(double x, double *y) { 1.458 + double z,w,t,r,fn; 1.459 + double tx[3]; 1.460 + int e0,i,j,nx,n,ix,hx,i0; 1.461 + 1.462 + i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ 1.463 + hx = *(i0+(int*)&x); /* high word of x */ 1.464 + ix = hx&0x7fffffff; 1.465 + if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */ 1.466 + {y[0] = x; y[1] = 0; return 0;} 1.467 + if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ 1.468 + if(hx>0) { 1.469 + z = x - pio2_1; 1.470 + if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ 1.471 + y[0] = z - pio2_1t; 1.472 + y[1] = (z-y[0])-pio2_1t; 1.473 + } else { /* near pi/2, use 33+33+53 bit pi */ 1.474 + z -= pio2_2; 1.475 + y[0] = z - pio2_2t; 1.476 + y[1] = (z-y[0])-pio2_2t; 1.477 + } 1.478 + return 1; 1.479 + } else { /* negative x */ 1.480 + z = x + pio2_1; 1.481 + if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ 1.482 + y[0] = z + pio2_1t; 1.483 + y[1] = (z-y[0])+pio2_1t; 1.484 + } else { /* near pi/2, use 33+33+53 bit pi */ 1.485 + z += pio2_2; 1.486 + y[0] = z + pio2_2t; 1.487 + y[1] = (z-y[0])+pio2_2t; 1.488 + } 1.489 + return -1; 1.490 + } 1.491 + } 1.492 + if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ 1.493 + t = fabsd(x); 1.494 + n = (int) (t*invpio2+half); 1.495 + fn = (double)n; 1.496 + r = t-fn*pio2_1; 1.497 + w = fn*pio2_1t; /* 1st round good to 85 bit */ 1.498 + if(n<32&&ix!=npio2_hw[n-1]) { 1.499 + y[0] = r-w; /* quick check no cancellation */ 1.500 + } else { 1.501 + j = ix>>20; 1.502 + y[0] = r-w; 1.503 + i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); 1.504 + if(i>16) { /* 2nd iteration needed, good to 118 */ 1.505 + t = r; 1.506 + w = fn*pio2_2; 1.507 + r = t-w; 1.508 + w = fn*pio2_2t-((t-r)-w); 1.509 + y[0] = r-w; 1.510 + i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); 1.511 + if(i>49) { /* 3rd iteration need, 151 bits acc */ 1.512 + t = r; /* will cover all possible cases */ 1.513 + w = fn*pio2_3; 1.514 + r = t-w; 1.515 + w = fn*pio2_3t-((t-r)-w); 1.516 + y[0] = r-w; 1.517 + } 1.518 + } 1.519 + } 1.520 + y[1] = (r-y[0])-w; 1.521 + if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} 1.522 + else return n; 1.523 + } 1.524 + /* 1.525 + * all other (large) arguments 1.526 + */ 1.527 + if(ix>=0x7ff00000) { /* x is inf or NaN */ 1.528 + y[0]=y[1]=x-x; return 0; 1.529 + } 1.530 + /* set z = scalbn(|x|,ilogb(x)-23) */ 1.531 + *(1-i0+(int*)&z) = *(1-i0+(int*)&x); 1.532 + e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ 1.533 + *(i0+(int*)&z) = ix - (e0<<20); 1.534 + for(i=0;i<2;i++) { 1.535 + tx[i] = (double)((int)(z)); 1.536 + z = (z-tx[i])*two24A; 1.537 + } 1.538 + tx[2] = z; 1.539 + nx = 3; 1.540 + while(tx[nx-1]==zeroA) nx--; /* skip zero term */ 1.541 + n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); 1.542 + if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} 1.543 + return n; 1.544 +} 1.545 + 1.546 + 1.547 +/* __kernel_sin( x, y, iy) 1.548 + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 1.549 + * Input x is assumed to be bounded by ~pi/4 in magnitude. 1.550 + * Input y is the tail of x. 1.551 + * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). 1.552 + * 1.553 + * Algorithm 1.554 + * 1. Since sin(-x) = -sin(x), we need only to consider positive x. 1.555 + * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. 1.556 + * 3. sin(x) is approximated by a polynomial of degree 13 on 1.557 + * [0,pi/4] 1.558 + * 3 13 1.559 + * sin(x) ~ x + S1*x + ... + S6*x 1.560 + * where 1.561 + * 1.562 + * |sin(x) 2 4 6 8 10 12 | -58 1.563 + * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 1.564 + * | x | 1.565 + * 1.566 + * 4. sin(x+y) = sin(x) + sin'(x')*y 1.567 + * ~ sin(x) + (1-x*x/2)*y 1.568 + * For better accuracy, let 1.569 + * 3 2 2 2 2 1.570 + * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) 1.571 + * then 3 2 1.572 + * sin(x) = x + (S1*x + (x *(r-y/2)+y)) 1.573 + */ 1.574 + 1.575 +static const double 1.576 +S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ 1.577 +S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ 1.578 +S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ 1.579 +S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ 1.580 +S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ 1.581 +S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ 1.582 + 1.583 +static double __kernel_sin(double x, double y, int iy) 1.584 +{ 1.585 + double z,r,v; 1.586 + int ix; 1.587 + ix = __HI(x)&0x7fffffff; /* high word of x */ 1.588 + if(ix<0x3e400000) /* |x| < 2**-27 */ 1.589 + {if((int)x==0) return x;} /* generate inexact */ 1.590 + z = x*x; 1.591 + v = z*x; 1.592 + r = S2+z*(S3+z*(S4+z*(S5+z*S6))); 1.593 + if(iy==0) return x+v*(S1+z*r); 1.594 + else return x-((z*(half*y-v*r)-y)-v*S1); 1.595 +} 1.596 + 1.597 +/* 1.598 + * __kernel_cos( x, y ) 1.599 + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 1.600 + * Input x is assumed to be bounded by ~pi/4 in magnitude. 1.601 + * Input y is the tail of x. 1.602 + * 1.603 + * Algorithm 1.604 + * 1. Since cos(-x) = cos(x), we need only to consider positive x. 1.605 + * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. 1.606 + * 3. cos(x) is approximated by a polynomial of degree 14 on 1.607 + * [0,pi/4] 1.608 + * 4 14 1.609 + * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x 1.610 + * where the remez error is 1.611 + * 1.612 + * | 2 4 6 8 10 12 14 | -58 1.613 + * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 1.614 + * | | 1.615 + * 1.616 + * 4 6 8 10 12 14 1.617 + * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then 1.618 + * cos(x) = 1 - x*x/2 + r 1.619 + * since cos(x+y) ~ cos(x) - sin(x)*y 1.620 + * ~ cos(x) - x*y, 1.621 + * a correction term is necessary in cos(x) and hence 1.622 + * cos(x+y) = 1 - (x*x/2 - (r - x*y)) 1.623 + * For better accuracy when x > 0.3, let qx = |x|/4 with 1.624 + * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. 1.625 + * Then 1.626 + * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). 1.627 + * Note that 1-qx and (x*x/2-qx) is EXACT here, and the 1.628 + * magnitude of the latter is at least a quarter of x*x/2, 1.629 + * thus, reducing the rounding error in the subtraction. 1.630 + */ 1.631 + 1.632 +static const double 1.633 +C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ 1.634 +C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ 1.635 +C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ 1.636 +C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ 1.637 +C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ 1.638 +C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ 1.639 + 1.640 +static double __kernel_cos(double x, double y) 1.641 +{ 1.642 + double a,hz,z,r,qx; 1.643 + int ix; 1.644 + ix = __HI(x)&0x7fffffff; /* ix = |x|'s high word*/ 1.645 + if(ix<0x3e400000) { /* if x < 2**27 */ 1.646 + if(((int)x)==0) return one; /* generate inexact */ 1.647 + } 1.648 + z = x*x; 1.649 + r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); 1.650 + if(ix < 0x3FD33333) /* if |x| < 0.3 */ 1.651 + return one - (0.5*z - (z*r - x*y)); 1.652 + else { 1.653 + if(ix > 0x3fe90000) { /* x > 0.78125 */ 1.654 + qx = 0.28125; 1.655 + } else { 1.656 + __HI(qx) = ix-0x00200000; /* x/4 */ 1.657 + __LO(qx) = 0; 1.658 + } 1.659 + hz = 0.5*z-qx; 1.660 + a = one-qx; 1.661 + return a - (hz - (z*r-x*y)); 1.662 + } 1.663 +} 1.664 + 1.665 +/* __kernel_tan( x, y, k ) 1.666 + * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 1.667 + * Input x is assumed to be bounded by ~pi/4 in magnitude. 1.668 + * Input y is the tail of x. 1.669 + * Input k indicates whether tan (if k=1) or 1.670 + * -1/tan (if k= -1) is returned. 1.671 + * 1.672 + * Algorithm 1.673 + * 1. Since tan(-x) = -tan(x), we need only to consider positive x. 1.674 + * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. 1.675 + * 3. tan(x) is approximated by a odd polynomial of degree 27 on 1.676 + * [0,0.67434] 1.677 + * 3 27 1.678 + * tan(x) ~ x + T1*x + ... + T13*x 1.679 + * where 1.680 + * 1.681 + * |tan(x) 2 4 26 | -59.2 1.682 + * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 1.683 + * | x | 1.684 + * 1.685 + * Note: tan(x+y) = tan(x) + tan'(x)*y 1.686 + * ~ tan(x) + (1+x*x)*y 1.687 + * Therefore, for better accuracy in computing tan(x+y), let 1.688 + * 3 2 2 2 2 1.689 + * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) 1.690 + * then 1.691 + * 3 2 1.692 + * tan(x+y) = x + (T1*x + (x *(r+y)+y)) 1.693 + * 1.694 + * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then 1.695 + * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) 1.696 + * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) 1.697 + */ 1.698 + 1.699 +static const double 1.700 +pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ 1.701 +pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */ 1.702 +T[] = { 1.703 + 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */ 1.704 + 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */ 1.705 + 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */ 1.706 + 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */ 1.707 + 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */ 1.708 + 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */ 1.709 + 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */ 1.710 + 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */ 1.711 + 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */ 1.712 + 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */ 1.713 + 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */ 1.714 + -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */ 1.715 + 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */ 1.716 +}; 1.717 + 1.718 +static double __kernel_tan(double x, double y, int iy) 1.719 +{ 1.720 + double z,r,v,w,s; 1.721 + int ix,hx; 1.722 + hx = __HI(x); /* high word of x */ 1.723 + ix = hx&0x7fffffff; /* high word of |x| */ 1.724 + if(ix<0x3e300000) { /* x < 2**-28 */ 1.725 + if((int)x==0) { /* generate inexact */ 1.726 + if (((ix | __LO(x)) | (iy + 1)) == 0) 1.727 + return one / fabsd(x); 1.728 + else { 1.729 + if (iy == 1) 1.730 + return x; 1.731 + else { /* compute -1 / (x+y) carefully */ 1.732 + double a, t; 1.733 + 1.734 + z = w = x + y; 1.735 + __LO(z) = 0; 1.736 + v = y - (z - x); 1.737 + t = a = -one / w; 1.738 + __LO(t) = 0; 1.739 + s = one + t * z; 1.740 + return t + a * (s + t * v); 1.741 + } 1.742 + } 1.743 + } 1.744 + } 1.745 + if(ix>=0x3FE59428) { /* |x|>=0.6744 */ 1.746 + if(hx<0) {x = -x; y = -y;} 1.747 + z = pio4-x; 1.748 + w = pio4lo-y; 1.749 + x = z+w; y = 0.0; 1.750 + } 1.751 + z = x*x; 1.752 + w = z*z; 1.753 + /* Break x^5*(T[1]+x^2*T[2]+...) into 1.754 + * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + 1.755 + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) 1.756 + */ 1.757 + r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11])))); 1.758 + v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12]))))); 1.759 + s = z*x; 1.760 + r = y + z*(s*(r+v)+y); 1.761 + r += T[0]*s; 1.762 + w = x+r; 1.763 + if(ix>=0x3FE59428) { 1.764 + v = (double)iy; 1.765 + return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r))); 1.766 + } 1.767 + if(iy==1) return w; 1.768 + else { /* if allow error up to 2 ulp, 1.769 + simply return -1.0/(x+r) here */ 1.770 + /* compute -1.0/(x+r) accurately */ 1.771 + double a,t; 1.772 + z = w; 1.773 + __LO(z) = 0; 1.774 + v = r-(z - x); /* z+v = r+x */ 1.775 + t = a = -1.0/w; /* a = -1.0/w */ 1.776 + __LO(t) = 0; 1.777 + s = 1.0+t*z; 1.778 + return t+a*(s+t*v); 1.779 + } 1.780 +} 1.781 + 1.782 + 1.783 +//---------------------------------------------------------------------- 1.784 +// 1.785 +// Routines for new sin/cos implementation 1.786 +// 1.787 +//---------------------------------------------------------------------- 1.788 + 1.789 +/* sin(x) 1.790 + * Return sine function of x. 1.791 + * 1.792 + * kernel function: 1.793 + * __kernel_sin ... sine function on [-pi/4,pi/4] 1.794 + * __kernel_cos ... cose function on [-pi/4,pi/4] 1.795 + * __ieee754_rem_pio2 ... argument reduction routine 1.796 + * 1.797 + * Method. 1.798 + * Let S,C and T denote the sin, cos and tan respectively on 1.799 + * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 1.800 + * in [-pi/4 , +pi/4], and let n = k mod 4. 1.801 + * We have 1.802 + * 1.803 + * n sin(x) cos(x) tan(x) 1.804 + * ---------------------------------------------------------- 1.805 + * 0 S C T 1.806 + * 1 C -S -1/T 1.807 + * 2 -S -C T 1.808 + * 3 -C S -1/T 1.809 + * ---------------------------------------------------------- 1.810 + * 1.811 + * Special cases: 1.812 + * Let trig be any of sin, cos, or tan. 1.813 + * trig(+-INF) is NaN, with signals; 1.814 + * trig(NaN) is that NaN; 1.815 + * 1.816 + * Accuracy: 1.817 + * TRIG(x) returns trig(x) nearly rounded 1.818 + */ 1.819 + 1.820 +JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x)) 1.821 + double y[2],z=0.0; 1.822 + int n, ix; 1.823 + 1.824 + /* High word of x. */ 1.825 + ix = __HI(x); 1.826 + 1.827 + /* |x| ~< pi/4 */ 1.828 + ix &= 0x7fffffff; 1.829 + if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0); 1.830 + 1.831 + /* sin(Inf or NaN) is NaN */ 1.832 + else if (ix>=0x7ff00000) return x-x; 1.833 + 1.834 + /* argument reduction needed */ 1.835 + else { 1.836 + n = __ieee754_rem_pio2(x,y); 1.837 + switch(n&3) { 1.838 + case 0: return __kernel_sin(y[0],y[1],1); 1.839 + case 1: return __kernel_cos(y[0],y[1]); 1.840 + case 2: return -__kernel_sin(y[0],y[1],1); 1.841 + default: 1.842 + return -__kernel_cos(y[0],y[1]); 1.843 + } 1.844 + } 1.845 +JRT_END 1.846 + 1.847 +/* cos(x) 1.848 + * Return cosine function of x. 1.849 + * 1.850 + * kernel function: 1.851 + * __kernel_sin ... sine function on [-pi/4,pi/4] 1.852 + * __kernel_cos ... cosine function on [-pi/4,pi/4] 1.853 + * __ieee754_rem_pio2 ... argument reduction routine 1.854 + * 1.855 + * Method. 1.856 + * Let S,C and T denote the sin, cos and tan respectively on 1.857 + * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 1.858 + * in [-pi/4 , +pi/4], and let n = k mod 4. 1.859 + * We have 1.860 + * 1.861 + * n sin(x) cos(x) tan(x) 1.862 + * ---------------------------------------------------------- 1.863 + * 0 S C T 1.864 + * 1 C -S -1/T 1.865 + * 2 -S -C T 1.866 + * 3 -C S -1/T 1.867 + * ---------------------------------------------------------- 1.868 + * 1.869 + * Special cases: 1.870 + * Let trig be any of sin, cos, or tan. 1.871 + * trig(+-INF) is NaN, with signals; 1.872 + * trig(NaN) is that NaN; 1.873 + * 1.874 + * Accuracy: 1.875 + * TRIG(x) returns trig(x) nearly rounded 1.876 + */ 1.877 + 1.878 +JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x)) 1.879 + double y[2],z=0.0; 1.880 + int n, ix; 1.881 + 1.882 + /* High word of x. */ 1.883 + ix = __HI(x); 1.884 + 1.885 + /* |x| ~< pi/4 */ 1.886 + ix &= 0x7fffffff; 1.887 + if(ix <= 0x3fe921fb) return __kernel_cos(x,z); 1.888 + 1.889 + /* cos(Inf or NaN) is NaN */ 1.890 + else if (ix>=0x7ff00000) return x-x; 1.891 + 1.892 + /* argument reduction needed */ 1.893 + else { 1.894 + n = __ieee754_rem_pio2(x,y); 1.895 + switch(n&3) { 1.896 + case 0: return __kernel_cos(y[0],y[1]); 1.897 + case 1: return -__kernel_sin(y[0],y[1],1); 1.898 + case 2: return -__kernel_cos(y[0],y[1]); 1.899 + default: 1.900 + return __kernel_sin(y[0],y[1],1); 1.901 + } 1.902 + } 1.903 +JRT_END 1.904 + 1.905 +/* tan(x) 1.906 + * Return tangent function of x. 1.907 + * 1.908 + * kernel function: 1.909 + * __kernel_tan ... tangent function on [-pi/4,pi/4] 1.910 + * __ieee754_rem_pio2 ... argument reduction routine 1.911 + * 1.912 + * Method. 1.913 + * Let S,C and T denote the sin, cos and tan respectively on 1.914 + * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 1.915 + * in [-pi/4 , +pi/4], and let n = k mod 4. 1.916 + * We have 1.917 + * 1.918 + * n sin(x) cos(x) tan(x) 1.919 + * ---------------------------------------------------------- 1.920 + * 0 S C T 1.921 + * 1 C -S -1/T 1.922 + * 2 -S -C T 1.923 + * 3 -C S -1/T 1.924 + * ---------------------------------------------------------- 1.925 + * 1.926 + * Special cases: 1.927 + * Let trig be any of sin, cos, or tan. 1.928 + * trig(+-INF) is NaN, with signals; 1.929 + * trig(NaN) is that NaN; 1.930 + * 1.931 + * Accuracy: 1.932 + * TRIG(x) returns trig(x) nearly rounded 1.933 + */ 1.934 + 1.935 +JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x)) 1.936 + double y[2],z=0.0; 1.937 + int n, ix; 1.938 + 1.939 + /* High word of x. */ 1.940 + ix = __HI(x); 1.941 + 1.942 + /* |x| ~< pi/4 */ 1.943 + ix &= 0x7fffffff; 1.944 + if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1); 1.945 + 1.946 + /* tan(Inf or NaN) is NaN */ 1.947 + else if (ix>=0x7ff00000) return x-x; /* NaN */ 1.948 + 1.949 + /* argument reduction needed */ 1.950 + else { 1.951 + n = __ieee754_rem_pio2(x,y); 1.952 + return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even 1.953 + -1 -- n odd */ 1.954 + } 1.955 +JRT_END 1.956 + 1.957 + 1.958 +#ifdef WIN32 1.959 +# pragma optimize ( "", on ) 1.960 +#endif