1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/runtime/sharedRuntimeTrig.cpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,979 @@ 1.4 +/* 1.5 + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "prims/jni.h" 1.30 +#include "runtime/interfaceSupport.hpp" 1.31 +#include "runtime/sharedRuntime.hpp" 1.32 + 1.33 +// This file contains copies of the fdlibm routines used by 1.34 +// StrictMath. It turns out that it is almost always required to use 1.35 +// these runtime routines; the Intel CPU doesn't meet the Java 1.36 +// specification for sin/cos outside a certain limited argument range, 1.37 +// and the SPARC CPU doesn't appear to have sin/cos instructions. It 1.38 +// also turns out that avoiding the indirect call through function 1.39 +// pointer out to libjava.so in SharedRuntime speeds these routines up 1.40 +// by roughly 15% on both Win32/x86 and Solaris/SPARC. 1.41 + 1.42 +// Enabling optimizations in this file causes incorrect code to be 1.43 +// generated; can not figure out how to turn down optimization for one 1.44 +// file in the IDE on Windows 1.45 +#ifdef WIN32 1.46 +# pragma optimize ( "", off ) 1.47 +#endif 1.48 + 1.49 +/* The above workaround now causes more problems with the latest MS compiler. 1.50 + * Visual Studio 2010's /GS option tries to guard against buffer overruns. 1.51 + * /GS is on by default if you specify optimizations, which we do globally 1.52 + * via /W3 /O2. However the above selective turning off of optimizations means 1.53 + * that /GS issues a warning "4748". And since we treat warnings as errors (/WX) 1.54 + * then the compilation fails. There are several possible solutions 1.55 + * (1) Remove that pragma above as obsolete with VS2010 - requires testing. 1.56 + * (2) Stop treating warnings as errors - would be a backward step 1.57 + * (3) Disable /GS - may help performance but you lose the security checks 1.58 + * (4) Disable the warning with "#pragma warning( disable : 4748 )" 1.59 + * (5) Disable planting the code with __declspec(safebuffers) 1.60 + * I've opted for (5) although we should investigate the local performance 1.61 + * benefits of (1) and global performance benefit of (3). 1.62 + */ 1.63 +#if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600)) 1.64 +#define SAFEBUF __declspec(safebuffers) 1.65 +#else 1.66 +#define SAFEBUF 1.67 +#endif 1.68 + 1.69 +#include <math.h> 1.70 + 1.71 +// VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles 1.72 +// [jk] this is not 100% correct because the float word order may different 1.73 +// from the byte order (e.g. on ARM) 1.74 +#ifdef VM_LITTLE_ENDIAN 1.75 +# define __HI(x) *(1+(int*)&x) 1.76 +# define __LO(x) *(int*)&x 1.77 +#else 1.78 +# define __HI(x) *(int*)&x 1.79 +# define __LO(x) *(1+(int*)&x) 1.80 +#endif 1.81 + 1.82 +static double copysignA(double x, double y) { 1.83 + __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000); 1.84 + return x; 1.85 +} 1.86 + 1.87 +/* 1.88 + * scalbn (double x, int n) 1.89 + * scalbn(x,n) returns x* 2**n computed by exponent 1.90 + * manipulation rather than by actually performing an 1.91 + * exponentiation or a multiplication. 1.92 + */ 1.93 + 1.94 +static const double 1.95 +two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */ 1.96 +twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */ 1.97 +hugeX = 1.0e+300, 1.98 +tiny = 1.0e-300; 1.99 + 1.100 +static double scalbnA (double x, int n) { 1.101 + int k,hx,lx; 1.102 + hx = __HI(x); 1.103 + lx = __LO(x); 1.104 + k = (hx&0x7ff00000)>>20; /* extract exponent */ 1.105 + if (k==0) { /* 0 or subnormal x */ 1.106 + if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */ 1.107 + x *= two54; 1.108 + hx = __HI(x); 1.109 + k = ((hx&0x7ff00000)>>20) - 54; 1.110 + if (n< -50000) return tiny*x; /*underflow*/ 1.111 + } 1.112 + if (k==0x7ff) return x+x; /* NaN or Inf */ 1.113 + k = k+n; 1.114 + if (k > 0x7fe) return hugeX*copysignA(hugeX,x); /* overflow */ 1.115 + if (k > 0) /* normal result */ 1.116 + {__HI(x) = (hx&0x800fffff)|(k<<20); return x;} 1.117 + if (k <= -54) { 1.118 + if (n > 50000) /* in case integer overflow in n+k */ 1.119 + return hugeX*copysignA(hugeX,x); /*overflow*/ 1.120 + else return tiny*copysignA(tiny,x); /*underflow*/ 1.121 + } 1.122 + k += 54; /* subnormal result */ 1.123 + __HI(x) = (hx&0x800fffff)|(k<<20); 1.124 + return x*twom54; 1.125 +} 1.126 + 1.127 +/* 1.128 + * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) 1.129 + * double x[],y[]; int e0,nx,prec; int ipio2[]; 1.130 + * 1.131 + * __kernel_rem_pio2 return the last three digits of N with 1.132 + * y = x - N*pi/2 1.133 + * so that |y| < pi/2. 1.134 + * 1.135 + * The method is to compute the integer (mod 8) and fraction parts of 1.136 + * (2/pi)*x without doing the full multiplication. In general we 1.137 + * skip the part of the product that are known to be a huge integer ( 1.138 + * more accurately, = 0 mod 8 ). Thus the number of operations are 1.139 + * independent of the exponent of the input. 1.140 + * 1.141 + * (2/pi) is represented by an array of 24-bit integers in ipio2[]. 1.142 + * 1.143 + * Input parameters: 1.144 + * x[] The input value (must be positive) is broken into nx 1.145 + * pieces of 24-bit integers in double precision format. 1.146 + * x[i] will be the i-th 24 bit of x. The scaled exponent 1.147 + * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 1.148 + * match x's up to 24 bits. 1.149 + * 1.150 + * Example of breaking a double positive z into x[0]+x[1]+x[2]: 1.151 + * e0 = ilogb(z)-23 1.152 + * z = scalbn(z,-e0) 1.153 + * for i = 0,1,2 1.154 + * x[i] = floor(z) 1.155 + * z = (z-x[i])*2**24 1.156 + * 1.157 + * 1.158 + * y[] ouput result in an array of double precision numbers. 1.159 + * The dimension of y[] is: 1.160 + * 24-bit precision 1 1.161 + * 53-bit precision 2 1.162 + * 64-bit precision 2 1.163 + * 113-bit precision 3 1.164 + * The actual value is the sum of them. Thus for 113-bit 1.165 + * precsion, one may have to do something like: 1.166 + * 1.167 + * long double t,w,r_head, r_tail; 1.168 + * t = (long double)y[2] + (long double)y[1]; 1.169 + * w = (long double)y[0]; 1.170 + * r_head = t+w; 1.171 + * r_tail = w - (r_head - t); 1.172 + * 1.173 + * e0 The exponent of x[0] 1.174 + * 1.175 + * nx dimension of x[] 1.176 + * 1.177 + * prec an interger indicating the precision: 1.178 + * 0 24 bits (single) 1.179 + * 1 53 bits (double) 1.180 + * 2 64 bits (extended) 1.181 + * 3 113 bits (quad) 1.182 + * 1.183 + * ipio2[] 1.184 + * integer array, contains the (24*i)-th to (24*i+23)-th 1.185 + * bit of 2/pi after binary point. The corresponding 1.186 + * floating value is 1.187 + * 1.188 + * ipio2[i] * 2^(-24(i+1)). 1.189 + * 1.190 + * External function: 1.191 + * double scalbn(), floor(); 1.192 + * 1.193 + * 1.194 + * Here is the description of some local variables: 1.195 + * 1.196 + * jk jk+1 is the initial number of terms of ipio2[] needed 1.197 + * in the computation. The recommended value is 2,3,4, 1.198 + * 6 for single, double, extended,and quad. 1.199 + * 1.200 + * jz local integer variable indicating the number of 1.201 + * terms of ipio2[] used. 1.202 + * 1.203 + * jx nx - 1 1.204 + * 1.205 + * jv index for pointing to the suitable ipio2[] for the 1.206 + * computation. In general, we want 1.207 + * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 1.208 + * is an integer. Thus 1.209 + * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 1.210 + * Hence jv = max(0,(e0-3)/24). 1.211 + * 1.212 + * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. 1.213 + * 1.214 + * q[] double array with integral value, representing the 1.215 + * 24-bits chunk of the product of x and 2/pi. 1.216 + * 1.217 + * q0 the corresponding exponent of q[0]. Note that the 1.218 + * exponent for q[i] would be q0-24*i. 1.219 + * 1.220 + * PIo2[] double precision array, obtained by cutting pi/2 1.221 + * into 24 bits chunks. 1.222 + * 1.223 + * f[] ipio2[] in floating point 1.224 + * 1.225 + * iq[] integer array by breaking up q[] in 24-bits chunk. 1.226 + * 1.227 + * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 1.228 + * 1.229 + * ih integer. If >0 it indicats q[] is >= 0.5, hence 1.230 + * it also indicates the *sign* of the result. 1.231 + * 1.232 + */ 1.233 + 1.234 + 1.235 +/* 1.236 + * Constants: 1.237 + * The hexadecimal values are the intended ones for the following 1.238 + * constants. The decimal values may be used, provided that the 1.239 + * compiler will convert from decimal to binary accurately enough 1.240 + * to produce the hexadecimal values shown. 1.241 + */ 1.242 + 1.243 + 1.244 +static const int init_jk[] = {2,3,4,6}; /* initial value for jk */ 1.245 + 1.246 +static const double PIo2[] = { 1.247 + 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 1.248 + 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 1.249 + 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ 1.250 + 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ 1.251 + 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ 1.252 + 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ 1.253 + 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ 1.254 + 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ 1.255 +}; 1.256 + 1.257 +static const double 1.258 +zeroB = 0.0, 1.259 +one = 1.0, 1.260 +two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 1.261 +twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */ 1.262 + 1.263 +static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) { 1.264 + int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; 1.265 + double z,fw,f[20],fq[20],q[20]; 1.266 + 1.267 + /* initialize jk*/ 1.268 + jk = init_jk[prec]; 1.269 + jp = jk; 1.270 + 1.271 + /* determine jx,jv,q0, note that 3>q0 */ 1.272 + jx = nx-1; 1.273 + jv = (e0-3)/24; if(jv<0) jv=0; 1.274 + q0 = e0-24*(jv+1); 1.275 + 1.276 + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ 1.277 + j = jv-jx; m = jx+jk; 1.278 + for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j]; 1.279 + 1.280 + /* compute q[0],q[1],...q[jk] */ 1.281 + for (i=0;i<=jk;i++) { 1.282 + for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; 1.283 + } 1.284 + 1.285 + jz = jk; 1.286 +recompute: 1.287 + /* distill q[] into iq[] reversingly */ 1.288 + for(i=0,j=jz,z=q[jz];j>0;i++,j--) { 1.289 + fw = (double)((int)(twon24* z)); 1.290 + iq[i] = (int)(z-two24B*fw); 1.291 + z = q[j-1]+fw; 1.292 + } 1.293 + 1.294 + /* compute n */ 1.295 + z = scalbnA(z,q0); /* actual value of z */ 1.296 + z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ 1.297 + n = (int) z; 1.298 + z -= (double)n; 1.299 + ih = 0; 1.300 + if(q0>0) { /* need iq[jz-1] to determine n */ 1.301 + i = (iq[jz-1]>>(24-q0)); n += i; 1.302 + iq[jz-1] -= i<<(24-q0); 1.303 + ih = iq[jz-1]>>(23-q0); 1.304 + } 1.305 + else if(q0==0) ih = iq[jz-1]>>23; 1.306 + else if(z>=0.5) ih=2; 1.307 + 1.308 + if(ih>0) { /* q > 0.5 */ 1.309 + n += 1; carry = 0; 1.310 + for(i=0;i<jz ;i++) { /* compute 1-q */ 1.311 + j = iq[i]; 1.312 + if(carry==0) { 1.313 + if(j!=0) { 1.314 + carry = 1; iq[i] = 0x1000000- j; 1.315 + } 1.316 + } else iq[i] = 0xffffff - j; 1.317 + } 1.318 + if(q0>0) { /* rare case: chance is 1 in 12 */ 1.319 + switch(q0) { 1.320 + case 1: 1.321 + iq[jz-1] &= 0x7fffff; break; 1.322 + case 2: 1.323 + iq[jz-1] &= 0x3fffff; break; 1.324 + } 1.325 + } 1.326 + if(ih==2) { 1.327 + z = one - z; 1.328 + if(carry!=0) z -= scalbnA(one,q0); 1.329 + } 1.330 + } 1.331 + 1.332 + /* check if recomputation is needed */ 1.333 + if(z==zeroB) { 1.334 + j = 0; 1.335 + for (i=jz-1;i>=jk;i--) j |= iq[i]; 1.336 + if(j==0) { /* need recomputation */ 1.337 + for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ 1.338 + 1.339 + for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ 1.340 + f[jx+i] = (double) ipio2[jv+i]; 1.341 + for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; 1.342 + q[i] = fw; 1.343 + } 1.344 + jz += k; 1.345 + goto recompute; 1.346 + } 1.347 + } 1.348 + 1.349 + /* chop off zero terms */ 1.350 + if(z==0.0) { 1.351 + jz -= 1; q0 -= 24; 1.352 + while(iq[jz]==0) { jz--; q0-=24;} 1.353 + } else { /* break z into 24-bit if neccessary */ 1.354 + z = scalbnA(z,-q0); 1.355 + if(z>=two24B) { 1.356 + fw = (double)((int)(twon24*z)); 1.357 + iq[jz] = (int)(z-two24B*fw); 1.358 + jz += 1; q0 += 24; 1.359 + iq[jz] = (int) fw; 1.360 + } else iq[jz] = (int) z ; 1.361 + } 1.362 + 1.363 + /* convert integer "bit" chunk to floating-point value */ 1.364 + fw = scalbnA(one,q0); 1.365 + for(i=jz;i>=0;i--) { 1.366 + q[i] = fw*(double)iq[i]; fw*=twon24; 1.367 + } 1.368 + 1.369 + /* compute PIo2[0,...,jp]*q[jz,...,0] */ 1.370 + for(i=jz;i>=0;i--) { 1.371 + for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; 1.372 + fq[jz-i] = fw; 1.373 + } 1.374 + 1.375 + /* compress fq[] into y[] */ 1.376 + switch(prec) { 1.377 + case 0: 1.378 + fw = 0.0; 1.379 + for (i=jz;i>=0;i--) fw += fq[i]; 1.380 + y[0] = (ih==0)? fw: -fw; 1.381 + break; 1.382 + case 1: 1.383 + case 2: 1.384 + fw = 0.0; 1.385 + for (i=jz;i>=0;i--) fw += fq[i]; 1.386 + y[0] = (ih==0)? fw: -fw; 1.387 + fw = fq[0]-fw; 1.388 + for (i=1;i<=jz;i++) fw += fq[i]; 1.389 + y[1] = (ih==0)? fw: -fw; 1.390 + break; 1.391 + case 3: /* painful */ 1.392 + for (i=jz;i>0;i--) { 1.393 + fw = fq[i-1]+fq[i]; 1.394 + fq[i] += fq[i-1]-fw; 1.395 + fq[i-1] = fw; 1.396 + } 1.397 + for (i=jz;i>1;i--) { 1.398 + fw = fq[i-1]+fq[i]; 1.399 + fq[i] += fq[i-1]-fw; 1.400 + fq[i-1] = fw; 1.401 + } 1.402 + for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; 1.403 + if(ih==0) { 1.404 + y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; 1.405 + } else { 1.406 + y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; 1.407 + } 1.408 + } 1.409 + return n&7; 1.410 +} 1.411 + 1.412 + 1.413 +/* 1.414 + * ==================================================== 1.415 + * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved. 1.416 + * 1.417 + * Developed at SunPro, a Sun Microsystems, Inc. business. 1.418 + * Permission to use, copy, modify, and distribute this 1.419 + * software is freely granted, provided that this notice 1.420 + * is preserved. 1.421 + * ==================================================== 1.422 + * 1.423 + */ 1.424 + 1.425 +/* __ieee754_rem_pio2(x,y) 1.426 + * 1.427 + * return the remainder of x rem pi/2 in y[0]+y[1] 1.428 + * use __kernel_rem_pio2() 1.429 + */ 1.430 + 1.431 +/* 1.432 + * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi 1.433 + */ 1.434 +static const int two_over_pi[] = { 1.435 + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 1.436 + 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 1.437 + 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 1.438 + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 1.439 + 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 1.440 + 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 1.441 + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 1.442 + 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 1.443 + 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 1.444 + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 1.445 + 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, 1.446 +}; 1.447 + 1.448 +static const int npio2_hw[] = { 1.449 + 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 1.450 + 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, 1.451 + 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, 1.452 + 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, 1.453 + 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, 1.454 + 0x404858EB, 0x404921FB, 1.455 +}; 1.456 + 1.457 +/* 1.458 + * invpio2: 53 bits of 2/pi 1.459 + * pio2_1: first 33 bit of pi/2 1.460 + * pio2_1t: pi/2 - pio2_1 1.461 + * pio2_2: second 33 bit of pi/2 1.462 + * pio2_2t: pi/2 - (pio2_1+pio2_2) 1.463 + * pio2_3: third 33 bit of pi/2 1.464 + * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) 1.465 + */ 1.466 + 1.467 +static const double 1.468 +zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ 1.469 +half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */ 1.470 +two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */ 1.471 +invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ 1.472 +pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ 1.473 +pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ 1.474 +pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */ 1.475 +pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ 1.476 +pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ 1.477 +pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ 1.478 + 1.479 +static SAFEBUF int __ieee754_rem_pio2(double x, double *y) { 1.480 + double z,w,t,r,fn; 1.481 + double tx[3]; 1.482 + int e0,i,j,nx,n,ix,hx,i0; 1.483 + 1.484 + i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ 1.485 + hx = *(i0+(int*)&x); /* high word of x */ 1.486 + ix = hx&0x7fffffff; 1.487 + if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */ 1.488 + {y[0] = x; y[1] = 0; return 0;} 1.489 + if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ 1.490 + if(hx>0) { 1.491 + z = x - pio2_1; 1.492 + if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ 1.493 + y[0] = z - pio2_1t; 1.494 + y[1] = (z-y[0])-pio2_1t; 1.495 + } else { /* near pi/2, use 33+33+53 bit pi */ 1.496 + z -= pio2_2; 1.497 + y[0] = z - pio2_2t; 1.498 + y[1] = (z-y[0])-pio2_2t; 1.499 + } 1.500 + return 1; 1.501 + } else { /* negative x */ 1.502 + z = x + pio2_1; 1.503 + if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ 1.504 + y[0] = z + pio2_1t; 1.505 + y[1] = (z-y[0])+pio2_1t; 1.506 + } else { /* near pi/2, use 33+33+53 bit pi */ 1.507 + z += pio2_2; 1.508 + y[0] = z + pio2_2t; 1.509 + y[1] = (z-y[0])+pio2_2t; 1.510 + } 1.511 + return -1; 1.512 + } 1.513 + } 1.514 + if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ 1.515 + t = fabsd(x); 1.516 + n = (int) (t*invpio2+half); 1.517 + fn = (double)n; 1.518 + r = t-fn*pio2_1; 1.519 + w = fn*pio2_1t; /* 1st round good to 85 bit */ 1.520 + if(n<32&&ix!=npio2_hw[n-1]) { 1.521 + y[0] = r-w; /* quick check no cancellation */ 1.522 + } else { 1.523 + j = ix>>20; 1.524 + y[0] = r-w; 1.525 + i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); 1.526 + if(i>16) { /* 2nd iteration needed, good to 118 */ 1.527 + t = r; 1.528 + w = fn*pio2_2; 1.529 + r = t-w; 1.530 + w = fn*pio2_2t-((t-r)-w); 1.531 + y[0] = r-w; 1.532 + i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); 1.533 + if(i>49) { /* 3rd iteration need, 151 bits acc */ 1.534 + t = r; /* will cover all possible cases */ 1.535 + w = fn*pio2_3; 1.536 + r = t-w; 1.537 + w = fn*pio2_3t-((t-r)-w); 1.538 + y[0] = r-w; 1.539 + } 1.540 + } 1.541 + } 1.542 + y[1] = (r-y[0])-w; 1.543 + if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} 1.544 + else return n; 1.545 + } 1.546 + /* 1.547 + * all other (large) arguments 1.548 + */ 1.549 + if(ix>=0x7ff00000) { /* x is inf or NaN */ 1.550 + y[0]=y[1]=x-x; return 0; 1.551 + } 1.552 + /* set z = scalbn(|x|,ilogb(x)-23) */ 1.553 + *(1-i0+(int*)&z) = *(1-i0+(int*)&x); 1.554 + e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ 1.555 + *(i0+(int*)&z) = ix - (e0<<20); 1.556 + for(i=0;i<2;i++) { 1.557 + tx[i] = (double)((int)(z)); 1.558 + z = (z-tx[i])*two24A; 1.559 + } 1.560 + tx[2] = z; 1.561 + nx = 3; 1.562 + while(tx[nx-1]==zeroA) nx--; /* skip zero term */ 1.563 + n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); 1.564 + if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} 1.565 + return n; 1.566 +} 1.567 + 1.568 + 1.569 +/* __kernel_sin( x, y, iy) 1.570 + * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 1.571 + * Input x is assumed to be bounded by ~pi/4 in magnitude. 1.572 + * Input y is the tail of x. 1.573 + * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). 1.574 + * 1.575 + * Algorithm 1.576 + * 1. Since sin(-x) = -sin(x), we need only to consider positive x. 1.577 + * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. 1.578 + * 3. sin(x) is approximated by a polynomial of degree 13 on 1.579 + * [0,pi/4] 1.580 + * 3 13 1.581 + * sin(x) ~ x + S1*x + ... + S6*x 1.582 + * where 1.583 + * 1.584 + * |sin(x) 2 4 6 8 10 12 | -58 1.585 + * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 1.586 + * | x | 1.587 + * 1.588 + * 4. sin(x+y) = sin(x) + sin'(x')*y 1.589 + * ~ sin(x) + (1-x*x/2)*y 1.590 + * For better accuracy, let 1.591 + * 3 2 2 2 2 1.592 + * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) 1.593 + * then 3 2 1.594 + * sin(x) = x + (S1*x + (x *(r-y/2)+y)) 1.595 + */ 1.596 + 1.597 +static const double 1.598 +S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ 1.599 +S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ 1.600 +S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ 1.601 +S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ 1.602 +S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ 1.603 +S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ 1.604 + 1.605 +static double __kernel_sin(double x, double y, int iy) 1.606 +{ 1.607 + double z,r,v; 1.608 + int ix; 1.609 + ix = __HI(x)&0x7fffffff; /* high word of x */ 1.610 + if(ix<0x3e400000) /* |x| < 2**-27 */ 1.611 + {if((int)x==0) return x;} /* generate inexact */ 1.612 + z = x*x; 1.613 + v = z*x; 1.614 + r = S2+z*(S3+z*(S4+z*(S5+z*S6))); 1.615 + if(iy==0) return x+v*(S1+z*r); 1.616 + else return x-((z*(half*y-v*r)-y)-v*S1); 1.617 +} 1.618 + 1.619 +/* 1.620 + * __kernel_cos( x, y ) 1.621 + * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 1.622 + * Input x is assumed to be bounded by ~pi/4 in magnitude. 1.623 + * Input y is the tail of x. 1.624 + * 1.625 + * Algorithm 1.626 + * 1. Since cos(-x) = cos(x), we need only to consider positive x. 1.627 + * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. 1.628 + * 3. cos(x) is approximated by a polynomial of degree 14 on 1.629 + * [0,pi/4] 1.630 + * 4 14 1.631 + * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x 1.632 + * where the remez error is 1.633 + * 1.634 + * | 2 4 6 8 10 12 14 | -58 1.635 + * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 1.636 + * | | 1.637 + * 1.638 + * 4 6 8 10 12 14 1.639 + * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then 1.640 + * cos(x) = 1 - x*x/2 + r 1.641 + * since cos(x+y) ~ cos(x) - sin(x)*y 1.642 + * ~ cos(x) - x*y, 1.643 + * a correction term is necessary in cos(x) and hence 1.644 + * cos(x+y) = 1 - (x*x/2 - (r - x*y)) 1.645 + * For better accuracy when x > 0.3, let qx = |x|/4 with 1.646 + * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. 1.647 + * Then 1.648 + * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). 1.649 + * Note that 1-qx and (x*x/2-qx) is EXACT here, and the 1.650 + * magnitude of the latter is at least a quarter of x*x/2, 1.651 + * thus, reducing the rounding error in the subtraction. 1.652 + */ 1.653 + 1.654 +static const double 1.655 +C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ 1.656 +C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ 1.657 +C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ 1.658 +C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ 1.659 +C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ 1.660 +C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ 1.661 + 1.662 +static double __kernel_cos(double x, double y) 1.663 +{ 1.664 + double a,h,z,r,qx; 1.665 + int ix; 1.666 + ix = __HI(x)&0x7fffffff; /* ix = |x|'s high word*/ 1.667 + if(ix<0x3e400000) { /* if x < 2**27 */ 1.668 + if(((int)x)==0) return one; /* generate inexact */ 1.669 + } 1.670 + z = x*x; 1.671 + r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); 1.672 + if(ix < 0x3FD33333) /* if |x| < 0.3 */ 1.673 + return one - (0.5*z - (z*r - x*y)); 1.674 + else { 1.675 + if(ix > 0x3fe90000) { /* x > 0.78125 */ 1.676 + qx = 0.28125; 1.677 + } else { 1.678 + __HI(qx) = ix-0x00200000; /* x/4 */ 1.679 + __LO(qx) = 0; 1.680 + } 1.681 + h = 0.5*z-qx; 1.682 + a = one-qx; 1.683 + return a - (h - (z*r-x*y)); 1.684 + } 1.685 +} 1.686 + 1.687 +/* __kernel_tan( x, y, k ) 1.688 + * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 1.689 + * Input x is assumed to be bounded by ~pi/4 in magnitude. 1.690 + * Input y is the tail of x. 1.691 + * Input k indicates whether tan (if k=1) or 1.692 + * -1/tan (if k= -1) is returned. 1.693 + * 1.694 + * Algorithm 1.695 + * 1. Since tan(-x) = -tan(x), we need only to consider positive x. 1.696 + * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. 1.697 + * 3. tan(x) is approximated by a odd polynomial of degree 27 on 1.698 + * [0,0.67434] 1.699 + * 3 27 1.700 + * tan(x) ~ x + T1*x + ... + T13*x 1.701 + * where 1.702 + * 1.703 + * |tan(x) 2 4 26 | -59.2 1.704 + * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 1.705 + * | x | 1.706 + * 1.707 + * Note: tan(x+y) = tan(x) + tan'(x)*y 1.708 + * ~ tan(x) + (1+x*x)*y 1.709 + * Therefore, for better accuracy in computing tan(x+y), let 1.710 + * 3 2 2 2 2 1.711 + * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) 1.712 + * then 1.713 + * 3 2 1.714 + * tan(x+y) = x + (T1*x + (x *(r+y)+y)) 1.715 + * 1.716 + * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then 1.717 + * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) 1.718 + * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) 1.719 + */ 1.720 + 1.721 +static const double 1.722 +pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */ 1.723 +pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */ 1.724 +T[] = { 1.725 + 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */ 1.726 + 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */ 1.727 + 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */ 1.728 + 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */ 1.729 + 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */ 1.730 + 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */ 1.731 + 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */ 1.732 + 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */ 1.733 + 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */ 1.734 + 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */ 1.735 + 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */ 1.736 + -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */ 1.737 + 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */ 1.738 +}; 1.739 + 1.740 +static double __kernel_tan(double x, double y, int iy) 1.741 +{ 1.742 + double z,r,v,w,s; 1.743 + int ix,hx; 1.744 + hx = __HI(x); /* high word of x */ 1.745 + ix = hx&0x7fffffff; /* high word of |x| */ 1.746 + if(ix<0x3e300000) { /* x < 2**-28 */ 1.747 + if((int)x==0) { /* generate inexact */ 1.748 + if (((ix | __LO(x)) | (iy + 1)) == 0) 1.749 + return one / fabsd(x); 1.750 + else { 1.751 + if (iy == 1) 1.752 + return x; 1.753 + else { /* compute -1 / (x+y) carefully */ 1.754 + double a, t; 1.755 + 1.756 + z = w = x + y; 1.757 + __LO(z) = 0; 1.758 + v = y - (z - x); 1.759 + t = a = -one / w; 1.760 + __LO(t) = 0; 1.761 + s = one + t * z; 1.762 + return t + a * (s + t * v); 1.763 + } 1.764 + } 1.765 + } 1.766 + } 1.767 + if(ix>=0x3FE59428) { /* |x|>=0.6744 */ 1.768 + if(hx<0) {x = -x; y = -y;} 1.769 + z = pio4-x; 1.770 + w = pio4lo-y; 1.771 + x = z+w; y = 0.0; 1.772 + } 1.773 + z = x*x; 1.774 + w = z*z; 1.775 + /* Break x^5*(T[1]+x^2*T[2]+...) into 1.776 + * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + 1.777 + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) 1.778 + */ 1.779 + r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11])))); 1.780 + v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12]))))); 1.781 + s = z*x; 1.782 + r = y + z*(s*(r+v)+y); 1.783 + r += T[0]*s; 1.784 + w = x+r; 1.785 + if(ix>=0x3FE59428) { 1.786 + v = (double)iy; 1.787 + return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r))); 1.788 + } 1.789 + if(iy==1) return w; 1.790 + else { /* if allow error up to 2 ulp, 1.791 + simply return -1.0/(x+r) here */ 1.792 + /* compute -1.0/(x+r) accurately */ 1.793 + double a,t; 1.794 + z = w; 1.795 + __LO(z) = 0; 1.796 + v = r-(z - x); /* z+v = r+x */ 1.797 + t = a = -1.0/w; /* a = -1.0/w */ 1.798 + __LO(t) = 0; 1.799 + s = 1.0+t*z; 1.800 + return t+a*(s+t*v); 1.801 + } 1.802 +} 1.803 + 1.804 + 1.805 +//---------------------------------------------------------------------- 1.806 +// 1.807 +// Routines for new sin/cos implementation 1.808 +// 1.809 +//---------------------------------------------------------------------- 1.810 + 1.811 +/* sin(x) 1.812 + * Return sine function of x. 1.813 + * 1.814 + * kernel function: 1.815 + * __kernel_sin ... sine function on [-pi/4,pi/4] 1.816 + * __kernel_cos ... cose function on [-pi/4,pi/4] 1.817 + * __ieee754_rem_pio2 ... argument reduction routine 1.818 + * 1.819 + * Method. 1.820 + * Let S,C and T denote the sin, cos and tan respectively on 1.821 + * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 1.822 + * in [-pi/4 , +pi/4], and let n = k mod 4. 1.823 + * We have 1.824 + * 1.825 + * n sin(x) cos(x) tan(x) 1.826 + * ---------------------------------------------------------- 1.827 + * 0 S C T 1.828 + * 1 C -S -1/T 1.829 + * 2 -S -C T 1.830 + * 3 -C S -1/T 1.831 + * ---------------------------------------------------------- 1.832 + * 1.833 + * Special cases: 1.834 + * Let trig be any of sin, cos, or tan. 1.835 + * trig(+-INF) is NaN, with signals; 1.836 + * trig(NaN) is that NaN; 1.837 + * 1.838 + * Accuracy: 1.839 + * TRIG(x) returns trig(x) nearly rounded 1.840 + */ 1.841 + 1.842 +JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x)) 1.843 + double y[2],z=0.0; 1.844 + int n, ix; 1.845 + 1.846 + /* High word of x. */ 1.847 + ix = __HI(x); 1.848 + 1.849 + /* |x| ~< pi/4 */ 1.850 + ix &= 0x7fffffff; 1.851 + if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0); 1.852 + 1.853 + /* sin(Inf or NaN) is NaN */ 1.854 + else if (ix>=0x7ff00000) return x-x; 1.855 + 1.856 + /* argument reduction needed */ 1.857 + else { 1.858 + n = __ieee754_rem_pio2(x,y); 1.859 + switch(n&3) { 1.860 + case 0: return __kernel_sin(y[0],y[1],1); 1.861 + case 1: return __kernel_cos(y[0],y[1]); 1.862 + case 2: return -__kernel_sin(y[0],y[1],1); 1.863 + default: 1.864 + return -__kernel_cos(y[0],y[1]); 1.865 + } 1.866 + } 1.867 +JRT_END 1.868 + 1.869 +/* cos(x) 1.870 + * Return cosine function of x. 1.871 + * 1.872 + * kernel function: 1.873 + * __kernel_sin ... sine function on [-pi/4,pi/4] 1.874 + * __kernel_cos ... cosine function on [-pi/4,pi/4] 1.875 + * __ieee754_rem_pio2 ... argument reduction routine 1.876 + * 1.877 + * Method. 1.878 + * Let S,C and T denote the sin, cos and tan respectively on 1.879 + * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 1.880 + * in [-pi/4 , +pi/4], and let n = k mod 4. 1.881 + * We have 1.882 + * 1.883 + * n sin(x) cos(x) tan(x) 1.884 + * ---------------------------------------------------------- 1.885 + * 0 S C T 1.886 + * 1 C -S -1/T 1.887 + * 2 -S -C T 1.888 + * 3 -C S -1/T 1.889 + * ---------------------------------------------------------- 1.890 + * 1.891 + * Special cases: 1.892 + * Let trig be any of sin, cos, or tan. 1.893 + * trig(+-INF) is NaN, with signals; 1.894 + * trig(NaN) is that NaN; 1.895 + * 1.896 + * Accuracy: 1.897 + * TRIG(x) returns trig(x) nearly rounded 1.898 + */ 1.899 + 1.900 +JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x)) 1.901 + double y[2],z=0.0; 1.902 + int n, ix; 1.903 + 1.904 + /* High word of x. */ 1.905 + ix = __HI(x); 1.906 + 1.907 + /* |x| ~< pi/4 */ 1.908 + ix &= 0x7fffffff; 1.909 + if(ix <= 0x3fe921fb) return __kernel_cos(x,z); 1.910 + 1.911 + /* cos(Inf or NaN) is NaN */ 1.912 + else if (ix>=0x7ff00000) return x-x; 1.913 + 1.914 + /* argument reduction needed */ 1.915 + else { 1.916 + n = __ieee754_rem_pio2(x,y); 1.917 + switch(n&3) { 1.918 + case 0: return __kernel_cos(y[0],y[1]); 1.919 + case 1: return -__kernel_sin(y[0],y[1],1); 1.920 + case 2: return -__kernel_cos(y[0],y[1]); 1.921 + default: 1.922 + return __kernel_sin(y[0],y[1],1); 1.923 + } 1.924 + } 1.925 +JRT_END 1.926 + 1.927 +/* tan(x) 1.928 + * Return tangent function of x. 1.929 + * 1.930 + * kernel function: 1.931 + * __kernel_tan ... tangent function on [-pi/4,pi/4] 1.932 + * __ieee754_rem_pio2 ... argument reduction routine 1.933 + * 1.934 + * Method. 1.935 + * Let S,C and T denote the sin, cos and tan respectively on 1.936 + * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 1.937 + * in [-pi/4 , +pi/4], and let n = k mod 4. 1.938 + * We have 1.939 + * 1.940 + * n sin(x) cos(x) tan(x) 1.941 + * ---------------------------------------------------------- 1.942 + * 0 S C T 1.943 + * 1 C -S -1/T 1.944 + * 2 -S -C T 1.945 + * 3 -C S -1/T 1.946 + * ---------------------------------------------------------- 1.947 + * 1.948 + * Special cases: 1.949 + * Let trig be any of sin, cos, or tan. 1.950 + * trig(+-INF) is NaN, with signals; 1.951 + * trig(NaN) is that NaN; 1.952 + * 1.953 + * Accuracy: 1.954 + * TRIG(x) returns trig(x) nearly rounded 1.955 + */ 1.956 + 1.957 +JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x)) 1.958 + double y[2],z=0.0; 1.959 + int n, ix; 1.960 + 1.961 + /* High word of x. */ 1.962 + ix = __HI(x); 1.963 + 1.964 + /* |x| ~< pi/4 */ 1.965 + ix &= 0x7fffffff; 1.966 + if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1); 1.967 + 1.968 + /* tan(Inf or NaN) is NaN */ 1.969 + else if (ix>=0x7ff00000) return x-x; /* NaN */ 1.970 + 1.971 + /* argument reduction needed */ 1.972 + else { 1.973 + n = __ieee754_rem_pio2(x,y); 1.974 + return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even 1.975 + -1 -- n odd */ 1.976 + } 1.977 +JRT_END 1.978 + 1.979 + 1.980 +#ifdef WIN32 1.981 +# pragma optimize ( "", on ) 1.982 +#endif