Mon, 09 Mar 2020 12:54:53 +0000
8240295: hs_err elapsed time in seconds is not accurate enough
Reviewed-by: dholmes, sspitsyn
1 /*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "prims/jni.h"
27 #include "runtime/interfaceSupport.hpp"
28 #include "runtime/sharedRuntime.hpp"
30 // This file contains copies of the fdlibm routines used by
31 // StrictMath. It turns out that it is almost always required to use
32 // these runtime routines; the Intel CPU doesn't meet the Java
33 // specification for sin/cos outside a certain limited argument range,
34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
35 // also turns out that avoiding the indirect call through function
36 // pointer out to libjava.so in SharedRuntime speeds these routines up
37 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
39 // Enabling optimizations in this file causes incorrect code to be
40 // generated; can not figure out how to turn down optimization for one
41 // file in the IDE on Windows
42 #ifdef WIN32
43 # pragma optimize ( "", off )
44 #endif
46 /* The above workaround now causes more problems with the latest MS compiler.
47 * Visual Studio 2010's /GS option tries to guard against buffer overruns.
48 * /GS is on by default if you specify optimizations, which we do globally
49 * via /W3 /O2. However the above selective turning off of optimizations means
50 * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
51 * then the compilation fails. There are several possible solutions
52 * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
53 * (2) Stop treating warnings as errors - would be a backward step
54 * (3) Disable /GS - may help performance but you lose the security checks
55 * (4) Disable the warning with "#pragma warning( disable : 4748 )"
56 * (5) Disable planting the code with __declspec(safebuffers)
57 * I've opted for (5) although we should investigate the local performance
58 * benefits of (1) and global performance benefit of (3).
59 */
60 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
61 #define SAFEBUF __declspec(safebuffers)
62 #else
63 #define SAFEBUF
64 #endif
66 #include "runtime/sharedRuntimeMath.hpp"
68 /*
69 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
70 * double x[],y[]; int e0,nx,prec; int ipio2[];
71 *
72 * __kernel_rem_pio2 return the last three digits of N with
73 * y = x - N*pi/2
74 * so that |y| < pi/2.
75 *
76 * The method is to compute the integer (mod 8) and fraction parts of
77 * (2/pi)*x without doing the full multiplication. In general we
78 * skip the part of the product that are known to be a huge integer (
79 * more accurately, = 0 mod 8 ). Thus the number of operations are
80 * independent of the exponent of the input.
81 *
82 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
83 *
84 * Input parameters:
85 * x[] The input value (must be positive) is broken into nx
86 * pieces of 24-bit integers in double precision format.
87 * x[i] will be the i-th 24 bit of x. The scaled exponent
88 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
89 * match x's up to 24 bits.
90 *
91 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
92 * e0 = ilogb(z)-23
93 * z = scalbn(z,-e0)
94 * for i = 0,1,2
95 * x[i] = floor(z)
96 * z = (z-x[i])*2**24
97 *
98 *
99 * y[] ouput result in an array of double precision numbers.
100 * The dimension of y[] is:
101 * 24-bit precision 1
102 * 53-bit precision 2
103 * 64-bit precision 2
104 * 113-bit precision 3
105 * The actual value is the sum of them. Thus for 113-bit
106 * precsion, one may have to do something like:
107 *
108 * long double t,w,r_head, r_tail;
109 * t = (long double)y[2] + (long double)y[1];
110 * w = (long double)y[0];
111 * r_head = t+w;
112 * r_tail = w - (r_head - t);
113 *
114 * e0 The exponent of x[0]
115 *
116 * nx dimension of x[]
117 *
118 * prec an interger indicating the precision:
119 * 0 24 bits (single)
120 * 1 53 bits (double)
121 * 2 64 bits (extended)
122 * 3 113 bits (quad)
123 *
124 * ipio2[]
125 * integer array, contains the (24*i)-th to (24*i+23)-th
126 * bit of 2/pi after binary point. The corresponding
127 * floating value is
128 *
129 * ipio2[i] * 2^(-24(i+1)).
130 *
131 * External function:
132 * double scalbn(), floor();
133 *
134 *
135 * Here is the description of some local variables:
136 *
137 * jk jk+1 is the initial number of terms of ipio2[] needed
138 * in the computation. The recommended value is 2,3,4,
139 * 6 for single, double, extended,and quad.
140 *
141 * jz local integer variable indicating the number of
142 * terms of ipio2[] used.
143 *
144 * jx nx - 1
145 *
146 * jv index for pointing to the suitable ipio2[] for the
147 * computation. In general, we want
148 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
149 * is an integer. Thus
150 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
151 * Hence jv = max(0,(e0-3)/24).
152 *
153 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
154 *
155 * q[] double array with integral value, representing the
156 * 24-bits chunk of the product of x and 2/pi.
157 *
158 * q0 the corresponding exponent of q[0]. Note that the
159 * exponent for q[i] would be q0-24*i.
160 *
161 * PIo2[] double precision array, obtained by cutting pi/2
162 * into 24 bits chunks.
163 *
164 * f[] ipio2[] in floating point
165 *
166 * iq[] integer array by breaking up q[] in 24-bits chunk.
167 *
168 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
169 *
170 * ih integer. If >0 it indicats q[] is >= 0.5, hence
171 * it also indicates the *sign* of the result.
172 *
173 */
176 /*
177 * Constants:
178 * The hexadecimal values are the intended ones for the following
179 * constants. The decimal values may be used, provided that the
180 * compiler will convert from decimal to binary accurately enough
181 * to produce the hexadecimal values shown.
182 */
185 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
187 static const double PIo2[] = {
188 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
189 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
190 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
191 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
192 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
193 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
194 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
195 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
196 };
198 static const double
199 zeroB = 0.0,
200 one = 1.0,
201 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
202 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
204 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
205 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
206 double z,fw,f[20],fq[20],q[20];
208 /* initialize jk*/
209 jk = init_jk[prec];
210 jp = jk;
212 /* determine jx,jv,q0, note that 3>q0 */
213 jx = nx-1;
214 jv = (e0-3)/24; if(jv<0) jv=0;
215 q0 = e0-24*(jv+1);
217 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
218 j = jv-jx; m = jx+jk;
219 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
221 /* compute q[0],q[1],...q[jk] */
222 for (i=0;i<=jk;i++) {
223 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
224 }
226 jz = jk;
227 recompute:
228 /* distill q[] into iq[] reversingly */
229 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
230 fw = (double)((int)(twon24* z));
231 iq[i] = (int)(z-two24B*fw);
232 z = q[j-1]+fw;
233 }
235 /* compute n */
236 z = scalbnA(z,q0); /* actual value of z */
237 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
238 n = (int) z;
239 z -= (double)n;
240 ih = 0;
241 if(q0>0) { /* need iq[jz-1] to determine n */
242 i = (iq[jz-1]>>(24-q0)); n += i;
243 iq[jz-1] -= i<<(24-q0);
244 ih = iq[jz-1]>>(23-q0);
245 }
246 else if(q0==0) ih = iq[jz-1]>>23;
247 else if(z>=0.5) ih=2;
249 if(ih>0) { /* q > 0.5 */
250 n += 1; carry = 0;
251 for(i=0;i<jz ;i++) { /* compute 1-q */
252 j = iq[i];
253 if(carry==0) {
254 if(j!=0) {
255 carry = 1; iq[i] = 0x1000000- j;
256 }
257 } else iq[i] = 0xffffff - j;
258 }
259 if(q0>0) { /* rare case: chance is 1 in 12 */
260 switch(q0) {
261 case 1:
262 iq[jz-1] &= 0x7fffff; break;
263 case 2:
264 iq[jz-1] &= 0x3fffff; break;
265 }
266 }
267 if(ih==2) {
268 z = one - z;
269 if(carry!=0) z -= scalbnA(one,q0);
270 }
271 }
273 /* check if recomputation is needed */
274 if(z==zeroB) {
275 j = 0;
276 for (i=jz-1;i>=jk;i--) j |= iq[i];
277 if(j==0) { /* need recomputation */
278 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
280 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
281 f[jx+i] = (double) ipio2[jv+i];
282 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
283 q[i] = fw;
284 }
285 jz += k;
286 goto recompute;
287 }
288 }
290 /* chop off zero terms */
291 if(z==0.0) {
292 jz -= 1; q0 -= 24;
293 while(iq[jz]==0) { jz--; q0-=24;}
294 } else { /* break z into 24-bit if neccessary */
295 z = scalbnA(z,-q0);
296 if(z>=two24B) {
297 fw = (double)((int)(twon24*z));
298 iq[jz] = (int)(z-two24B*fw);
299 jz += 1; q0 += 24;
300 iq[jz] = (int) fw;
301 } else iq[jz] = (int) z ;
302 }
304 /* convert integer "bit" chunk to floating-point value */
305 fw = scalbnA(one,q0);
306 for(i=jz;i>=0;i--) {
307 q[i] = fw*(double)iq[i]; fw*=twon24;
308 }
310 /* compute PIo2[0,...,jp]*q[jz,...,0] */
311 for(i=jz;i>=0;i--) {
312 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
313 fq[jz-i] = fw;
314 }
316 /* compress fq[] into y[] */
317 switch(prec) {
318 case 0:
319 fw = 0.0;
320 for (i=jz;i>=0;i--) fw += fq[i];
321 y[0] = (ih==0)? fw: -fw;
322 break;
323 case 1:
324 case 2:
325 fw = 0.0;
326 for (i=jz;i>=0;i--) fw += fq[i];
327 y[0] = (ih==0)? fw: -fw;
328 fw = fq[0]-fw;
329 for (i=1;i<=jz;i++) fw += fq[i];
330 y[1] = (ih==0)? fw: -fw;
331 break;
332 case 3: /* painful */
333 for (i=jz;i>0;i--) {
334 fw = fq[i-1]+fq[i];
335 fq[i] += fq[i-1]-fw;
336 fq[i-1] = fw;
337 }
338 for (i=jz;i>1;i--) {
339 fw = fq[i-1]+fq[i];
340 fq[i] += fq[i-1]-fw;
341 fq[i-1] = fw;
342 }
343 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
344 if(ih==0) {
345 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
346 } else {
347 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
348 }
349 }
350 return n&7;
351 }
354 /*
355 * ====================================================
356 * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.
357 *
358 * Developed at SunPro, a Sun Microsystems, Inc. business.
359 * Permission to use, copy, modify, and distribute this
360 * software is freely granted, provided that this notice
361 * is preserved.
362 * ====================================================
363 *
364 */
366 /* __ieee754_rem_pio2(x,y)
367 *
368 * return the remainder of x rem pi/2 in y[0]+y[1]
369 * use __kernel_rem_pio2()
370 */
372 /*
373 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
374 */
375 static const int two_over_pi[] = {
376 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
377 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
378 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
379 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
380 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
381 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
382 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
383 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
384 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
385 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
386 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
387 };
389 static const int npio2_hw[] = {
390 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
391 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
392 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
393 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
394 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
395 0x404858EB, 0x404921FB,
396 };
398 /*
399 * invpio2: 53 bits of 2/pi
400 * pio2_1: first 33 bit of pi/2
401 * pio2_1t: pi/2 - pio2_1
402 * pio2_2: second 33 bit of pi/2
403 * pio2_2t: pi/2 - (pio2_1+pio2_2)
404 * pio2_3: third 33 bit of pi/2
405 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
406 */
408 static const double
409 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
410 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
411 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
412 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
413 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
414 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
415 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
416 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
417 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
418 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
420 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
421 double z,w,t,r,fn;
422 double tx[3];
423 int e0,i,j,nx,n,ix,hx,i0;
425 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
426 hx = *(i0+(int*)&x); /* high word of x */
427 ix = hx&0x7fffffff;
428 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
429 {y[0] = x; y[1] = 0; return 0;}
430 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
431 if(hx>0) {
432 z = x - pio2_1;
433 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
434 y[0] = z - pio2_1t;
435 y[1] = (z-y[0])-pio2_1t;
436 } else { /* near pi/2, use 33+33+53 bit pi */
437 z -= pio2_2;
438 y[0] = z - pio2_2t;
439 y[1] = (z-y[0])-pio2_2t;
440 }
441 return 1;
442 } else { /* negative x */
443 z = x + pio2_1;
444 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
445 y[0] = z + pio2_1t;
446 y[1] = (z-y[0])+pio2_1t;
447 } else { /* near pi/2, use 33+33+53 bit pi */
448 z += pio2_2;
449 y[0] = z + pio2_2t;
450 y[1] = (z-y[0])+pio2_2t;
451 }
452 return -1;
453 }
454 }
455 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
456 t = fabsd(x);
457 n = (int) (t*invpio2+half);
458 fn = (double)n;
459 r = t-fn*pio2_1;
460 w = fn*pio2_1t; /* 1st round good to 85 bit */
461 if(n<32&&ix!=npio2_hw[n-1]) {
462 y[0] = r-w; /* quick check no cancellation */
463 } else {
464 j = ix>>20;
465 y[0] = r-w;
466 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
467 if(i>16) { /* 2nd iteration needed, good to 118 */
468 t = r;
469 w = fn*pio2_2;
470 r = t-w;
471 w = fn*pio2_2t-((t-r)-w);
472 y[0] = r-w;
473 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
474 if(i>49) { /* 3rd iteration need, 151 bits acc */
475 t = r; /* will cover all possible cases */
476 w = fn*pio2_3;
477 r = t-w;
478 w = fn*pio2_3t-((t-r)-w);
479 y[0] = r-w;
480 }
481 }
482 }
483 y[1] = (r-y[0])-w;
484 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
485 else return n;
486 }
487 /*
488 * all other (large) arguments
489 */
490 if(ix>=0x7ff00000) { /* x is inf or NaN */
491 y[0]=y[1]=x-x; return 0;
492 }
493 /* set z = scalbn(|x|,ilogb(x)-23) */
494 *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
495 e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
496 *(i0+(int*)&z) = ix - (e0<<20);
497 for(i=0;i<2;i++) {
498 tx[i] = (double)((int)(z));
499 z = (z-tx[i])*two24A;
500 }
501 tx[2] = z;
502 nx = 3;
503 while(tx[nx-1]==zeroA) nx--; /* skip zero term */
504 n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
505 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
506 return n;
507 }
510 /* __kernel_sin( x, y, iy)
511 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
512 * Input x is assumed to be bounded by ~pi/4 in magnitude.
513 * Input y is the tail of x.
514 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
515 *
516 * Algorithm
517 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
518 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
519 * 3. sin(x) is approximated by a polynomial of degree 13 on
520 * [0,pi/4]
521 * 3 13
522 * sin(x) ~ x + S1*x + ... + S6*x
523 * where
524 *
525 * |sin(x) 2 4 6 8 10 12 | -58
526 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
527 * | x |
528 *
529 * 4. sin(x+y) = sin(x) + sin'(x')*y
530 * ~ sin(x) + (1-x*x/2)*y
531 * For better accuracy, let
532 * 3 2 2 2 2
533 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
534 * then 3 2
535 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
536 */
538 static const double
539 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
540 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
541 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
542 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
543 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
544 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
546 static double __kernel_sin(double x, double y, int iy)
547 {
548 double z,r,v;
549 int ix;
550 ix = high(x)&0x7fffffff; /* high word of x */
551 if(ix<0x3e400000) /* |x| < 2**-27 */
552 {if((int)x==0) return x;} /* generate inexact */
553 z = x*x;
554 v = z*x;
555 r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
556 if(iy==0) return x+v*(S1+z*r);
557 else return x-((z*(half*y-v*r)-y)-v*S1);
558 }
560 /*
561 * __kernel_cos( x, y )
562 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
563 * Input x is assumed to be bounded by ~pi/4 in magnitude.
564 * Input y is the tail of x.
565 *
566 * Algorithm
567 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
568 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
569 * 3. cos(x) is approximated by a polynomial of degree 14 on
570 * [0,pi/4]
571 * 4 14
572 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
573 * where the remez error is
574 *
575 * | 2 4 6 8 10 12 14 | -58
576 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
577 * | |
578 *
579 * 4 6 8 10 12 14
580 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
581 * cos(x) = 1 - x*x/2 + r
582 * since cos(x+y) ~ cos(x) - sin(x)*y
583 * ~ cos(x) - x*y,
584 * a correction term is necessary in cos(x) and hence
585 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
586 * For better accuracy when x > 0.3, let qx = |x|/4 with
587 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
588 * Then
589 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
590 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
591 * magnitude of the latter is at least a quarter of x*x/2,
592 * thus, reducing the rounding error in the subtraction.
593 */
595 static const double
596 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
597 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
598 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
599 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
600 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
601 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
603 static double __kernel_cos(double x, double y)
604 {
605 double a,h,z,r,qx=0;
606 int ix;
607 ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/
608 if(ix<0x3e400000) { /* if x < 2**27 */
609 if(((int)x)==0) return one; /* generate inexact */
610 }
611 z = x*x;
612 r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
613 if(ix < 0x3FD33333) /* if |x| < 0.3 */
614 return one - (0.5*z - (z*r - x*y));
615 else {
616 if(ix > 0x3fe90000) { /* x > 0.78125 */
617 qx = 0.28125;
618 } else {
619 set_high(&qx, ix-0x00200000); /* x/4 */
620 set_low(&qx, 0);
621 }
622 h = 0.5*z-qx;
623 a = one-qx;
624 return a - (h - (z*r-x*y));
625 }
626 }
628 /* __kernel_tan( x, y, k )
629 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
630 * Input x is assumed to be bounded by ~pi/4 in magnitude.
631 * Input y is the tail of x.
632 * Input k indicates whether tan (if k=1) or
633 * -1/tan (if k= -1) is returned.
634 *
635 * Algorithm
636 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
637 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
638 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
639 * [0,0.67434]
640 * 3 27
641 * tan(x) ~ x + T1*x + ... + T13*x
642 * where
643 *
644 * |tan(x) 2 4 26 | -59.2
645 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
646 * | x |
647 *
648 * Note: tan(x+y) = tan(x) + tan'(x)*y
649 * ~ tan(x) + (1+x*x)*y
650 * Therefore, for better accuracy in computing tan(x+y), let
651 * 3 2 2 2 2
652 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
653 * then
654 * 3 2
655 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
656 *
657 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
658 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
659 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
660 */
662 static const double
663 pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
664 pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
665 T[] = {
666 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
667 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
668 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
669 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
670 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
671 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
672 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
673 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
674 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
675 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
676 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
677 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
678 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
679 };
681 static double __kernel_tan(double x, double y, int iy)
682 {
683 double z,r,v,w,s;
684 int ix,hx;
685 hx = high(x); /* high word of x */
686 ix = hx&0x7fffffff; /* high word of |x| */
687 if(ix<0x3e300000) { /* x < 2**-28 */
688 if((int)x==0) { /* generate inexact */
689 if (((ix | low(x)) | (iy + 1)) == 0)
690 return one / fabsd(x);
691 else {
692 if (iy == 1)
693 return x;
694 else { /* compute -1 / (x+y) carefully */
695 double a, t;
697 z = w = x + y;
698 set_low(&z, 0);
699 v = y - (z - x);
700 t = a = -one / w;
701 set_low(&t, 0);
702 s = one + t * z;
703 return t + a * (s + t * v);
704 }
705 }
706 }
707 }
708 if(ix>=0x3FE59428) { /* |x|>=0.6744 */
709 if(hx<0) {x = -x; y = -y;}
710 z = pio4-x;
711 w = pio4lo-y;
712 x = z+w; y = 0.0;
713 }
714 z = x*x;
715 w = z*z;
716 /* Break x^5*(T[1]+x^2*T[2]+...) into
717 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
718 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
719 */
720 r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
721 v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
722 s = z*x;
723 r = y + z*(s*(r+v)+y);
724 r += T[0]*s;
725 w = x+r;
726 if(ix>=0x3FE59428) {
727 v = (double)iy;
728 return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
729 }
730 if(iy==1) return w;
731 else { /* if allow error up to 2 ulp,
732 simply return -1.0/(x+r) here */
733 /* compute -1.0/(x+r) accurately */
734 double a,t;
735 z = w;
736 set_low(&z, 0);
737 v = r-(z - x); /* z+v = r+x */
738 t = a = -1.0/w; /* a = -1.0/w */
739 set_low(&t, 0);
740 s = 1.0+t*z;
741 return t+a*(s+t*v);
742 }
743 }
746 //----------------------------------------------------------------------
747 //
748 // Routines for new sin/cos implementation
749 //
750 //----------------------------------------------------------------------
752 /* sin(x)
753 * Return sine function of x.
754 *
755 * kernel function:
756 * __kernel_sin ... sine function on [-pi/4,pi/4]
757 * __kernel_cos ... cose function on [-pi/4,pi/4]
758 * __ieee754_rem_pio2 ... argument reduction routine
759 *
760 * Method.
761 * Let S,C and T denote the sin, cos and tan respectively on
762 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
763 * in [-pi/4 , +pi/4], and let n = k mod 4.
764 * We have
765 *
766 * n sin(x) cos(x) tan(x)
767 * ----------------------------------------------------------
768 * 0 S C T
769 * 1 C -S -1/T
770 * 2 -S -C T
771 * 3 -C S -1/T
772 * ----------------------------------------------------------
773 *
774 * Special cases:
775 * Let trig be any of sin, cos, or tan.
776 * trig(+-INF) is NaN, with signals;
777 * trig(NaN) is that NaN;
778 *
779 * Accuracy:
780 * TRIG(x) returns trig(x) nearly rounded
781 */
783 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
784 double y[2],z=0.0;
785 int n, ix;
787 /* High word of x. */
788 ix = high(x);
790 /* |x| ~< pi/4 */
791 ix &= 0x7fffffff;
792 if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
794 /* sin(Inf or NaN) is NaN */
795 else if (ix>=0x7ff00000) return x-x;
797 /* argument reduction needed */
798 else {
799 n = __ieee754_rem_pio2(x,y);
800 switch(n&3) {
801 case 0: return __kernel_sin(y[0],y[1],1);
802 case 1: return __kernel_cos(y[0],y[1]);
803 case 2: return -__kernel_sin(y[0],y[1],1);
804 default:
805 return -__kernel_cos(y[0],y[1]);
806 }
807 }
808 JRT_END
810 /* cos(x)
811 * Return cosine function of x.
812 *
813 * kernel function:
814 * __kernel_sin ... sine function on [-pi/4,pi/4]
815 * __kernel_cos ... cosine function on [-pi/4,pi/4]
816 * __ieee754_rem_pio2 ... argument reduction routine
817 *
818 * Method.
819 * Let S,C and T denote the sin, cos and tan respectively on
820 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
821 * in [-pi/4 , +pi/4], and let n = k mod 4.
822 * We have
823 *
824 * n sin(x) cos(x) tan(x)
825 * ----------------------------------------------------------
826 * 0 S C T
827 * 1 C -S -1/T
828 * 2 -S -C T
829 * 3 -C S -1/T
830 * ----------------------------------------------------------
831 *
832 * Special cases:
833 * Let trig be any of sin, cos, or tan.
834 * trig(+-INF) is NaN, with signals;
835 * trig(NaN) is that NaN;
836 *
837 * Accuracy:
838 * TRIG(x) returns trig(x) nearly rounded
839 */
841 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
842 double y[2],z=0.0;
843 int n, ix;
845 /* High word of x. */
846 ix = high(x);
848 /* |x| ~< pi/4 */
849 ix &= 0x7fffffff;
850 if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
852 /* cos(Inf or NaN) is NaN */
853 else if (ix>=0x7ff00000) return x-x;
855 /* argument reduction needed */
856 else {
857 n = __ieee754_rem_pio2(x,y);
858 switch(n&3) {
859 case 0: return __kernel_cos(y[0],y[1]);
860 case 1: return -__kernel_sin(y[0],y[1],1);
861 case 2: return -__kernel_cos(y[0],y[1]);
862 default:
863 return __kernel_sin(y[0],y[1],1);
864 }
865 }
866 JRT_END
868 /* tan(x)
869 * Return tangent function of x.
870 *
871 * kernel function:
872 * __kernel_tan ... tangent function on [-pi/4,pi/4]
873 * __ieee754_rem_pio2 ... argument reduction routine
874 *
875 * Method.
876 * Let S,C and T denote the sin, cos and tan respectively on
877 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
878 * in [-pi/4 , +pi/4], and let n = k mod 4.
879 * We have
880 *
881 * n sin(x) cos(x) tan(x)
882 * ----------------------------------------------------------
883 * 0 S C T
884 * 1 C -S -1/T
885 * 2 -S -C T
886 * 3 -C S -1/T
887 * ----------------------------------------------------------
888 *
889 * Special cases:
890 * Let trig be any of sin, cos, or tan.
891 * trig(+-INF) is NaN, with signals;
892 * trig(NaN) is that NaN;
893 *
894 * Accuracy:
895 * TRIG(x) returns trig(x) nearly rounded
896 */
898 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
899 double y[2],z=0.0;
900 int n, ix;
902 /* High word of x. */
903 ix = high(x);
905 /* |x| ~< pi/4 */
906 ix &= 0x7fffffff;
907 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
909 /* tan(Inf or NaN) is NaN */
910 else if (ix>=0x7ff00000) return x-x; /* NaN */
912 /* argument reduction needed */
913 else {
914 n = __ieee754_rem_pio2(x,y);
915 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
916 -1 -- n odd */
917 }
918 JRT_END
921 #ifdef WIN32
922 # pragma optimize ( "", on )
923 #endif