Mon, 18 Jun 2012 12:29:21 -0700
7176856: add the JRE name to the error log
Reviewed-by: coleenp, jrose, kvn, twisti
Contributed-by: Krystal Mok <sajia@taobao.com>
1 /*
2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "prims/jni.h"
27 #include "runtime/interfaceSupport.hpp"
28 #include "runtime/sharedRuntime.hpp"
30 // This file contains copies of the fdlibm routines used by
31 // StrictMath. It turns out that it is almost always required to use
32 // these runtime routines; the Intel CPU doesn't meet the Java
33 // specification for sin/cos outside a certain limited argument range,
34 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
35 // also turns out that avoiding the indirect call through function
36 // pointer out to libjava.so in SharedRuntime speeds these routines up
37 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
39 // Enabling optimizations in this file causes incorrect code to be
40 // generated; can not figure out how to turn down optimization for one
41 // file in the IDE on Windows
42 #ifdef WIN32
43 # pragma optimize ( "", off )
44 #endif
46 /* The above workaround now causes more problems with the latest MS compiler.
47 * Visual Studio 2010's /GS option tries to guard against buffer overruns.
48 * /GS is on by default if you specify optimizations, which we do globally
49 * via /W3 /O2. However the above selective turning off of optimizations means
50 * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
51 * then the compilation fails. There are several possible solutions
52 * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
53 * (2) Stop treating warnings as errors - would be a backward step
54 * (3) Disable /GS - may help performance but you lose the security checks
55 * (4) Disable the warning with "#pragma warning( disable : 4748 )"
56 * (5) Disable planting the code with __declspec(safebuffers)
57 * I've opted for (5) although we should investigate the local performance
58 * benefits of (1) and global performance benefit of (3).
59 */
60 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
61 #define SAFEBUF __declspec(safebuffers)
62 #else
63 #define SAFEBUF
64 #endif
66 #include <math.h>
68 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
69 // [jk] this is not 100% correct because the float word order may different
70 // from the byte order (e.g. on ARM)
71 #ifdef VM_LITTLE_ENDIAN
72 # define __HI(x) *(1+(int*)&x)
73 # define __LO(x) *(int*)&x
74 #else
75 # define __HI(x) *(int*)&x
76 # define __LO(x) *(1+(int*)&x)
77 #endif
79 static double copysignA(double x, double y) {
80 __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
81 return x;
82 }
84 /*
85 * scalbn (double x, int n)
86 * scalbn(x,n) returns x* 2**n computed by exponent
87 * manipulation rather than by actually performing an
88 * exponentiation or a multiplication.
89 */
91 static const double
92 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
93 twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
94 hugeX = 1.0e+300,
95 tiny = 1.0e-300;
97 static double scalbnA (double x, int n) {
98 int k,hx,lx;
99 hx = __HI(x);
100 lx = __LO(x);
101 k = (hx&0x7ff00000)>>20; /* extract exponent */
102 if (k==0) { /* 0 or subnormal x */
103 if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
104 x *= two54;
105 hx = __HI(x);
106 k = ((hx&0x7ff00000)>>20) - 54;
107 if (n< -50000) return tiny*x; /*underflow*/
108 }
109 if (k==0x7ff) return x+x; /* NaN or Inf */
110 k = k+n;
111 if (k > 0x7fe) return hugeX*copysignA(hugeX,x); /* overflow */
112 if (k > 0) /* normal result */
113 {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
114 if (k <= -54) {
115 if (n > 50000) /* in case integer overflow in n+k */
116 return hugeX*copysignA(hugeX,x); /*overflow*/
117 else return tiny*copysignA(tiny,x); /*underflow*/
118 }
119 k += 54; /* subnormal result */
120 __HI(x) = (hx&0x800fffff)|(k<<20);
121 return x*twom54;
122 }
124 /*
125 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
126 * double x[],y[]; int e0,nx,prec; int ipio2[];
127 *
128 * __kernel_rem_pio2 return the last three digits of N with
129 * y = x - N*pi/2
130 * so that |y| < pi/2.
131 *
132 * The method is to compute the integer (mod 8) and fraction parts of
133 * (2/pi)*x without doing the full multiplication. In general we
134 * skip the part of the product that are known to be a huge integer (
135 * more accurately, = 0 mod 8 ). Thus the number of operations are
136 * independent of the exponent of the input.
137 *
138 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
139 *
140 * Input parameters:
141 * x[] The input value (must be positive) is broken into nx
142 * pieces of 24-bit integers in double precision format.
143 * x[i] will be the i-th 24 bit of x. The scaled exponent
144 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
145 * match x's up to 24 bits.
146 *
147 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
148 * e0 = ilogb(z)-23
149 * z = scalbn(z,-e0)
150 * for i = 0,1,2
151 * x[i] = floor(z)
152 * z = (z-x[i])*2**24
153 *
154 *
155 * y[] ouput result in an array of double precision numbers.
156 * The dimension of y[] is:
157 * 24-bit precision 1
158 * 53-bit precision 2
159 * 64-bit precision 2
160 * 113-bit precision 3
161 * The actual value is the sum of them. Thus for 113-bit
162 * precsion, one may have to do something like:
163 *
164 * long double t,w,r_head, r_tail;
165 * t = (long double)y[2] + (long double)y[1];
166 * w = (long double)y[0];
167 * r_head = t+w;
168 * r_tail = w - (r_head - t);
169 *
170 * e0 The exponent of x[0]
171 *
172 * nx dimension of x[]
173 *
174 * prec an interger indicating the precision:
175 * 0 24 bits (single)
176 * 1 53 bits (double)
177 * 2 64 bits (extended)
178 * 3 113 bits (quad)
179 *
180 * ipio2[]
181 * integer array, contains the (24*i)-th to (24*i+23)-th
182 * bit of 2/pi after binary point. The corresponding
183 * floating value is
184 *
185 * ipio2[i] * 2^(-24(i+1)).
186 *
187 * External function:
188 * double scalbn(), floor();
189 *
190 *
191 * Here is the description of some local variables:
192 *
193 * jk jk+1 is the initial number of terms of ipio2[] needed
194 * in the computation. The recommended value is 2,3,4,
195 * 6 for single, double, extended,and quad.
196 *
197 * jz local integer variable indicating the number of
198 * terms of ipio2[] used.
199 *
200 * jx nx - 1
201 *
202 * jv index for pointing to the suitable ipio2[] for the
203 * computation. In general, we want
204 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
205 * is an integer. Thus
206 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
207 * Hence jv = max(0,(e0-3)/24).
208 *
209 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
210 *
211 * q[] double array with integral value, representing the
212 * 24-bits chunk of the product of x and 2/pi.
213 *
214 * q0 the corresponding exponent of q[0]. Note that the
215 * exponent for q[i] would be q0-24*i.
216 *
217 * PIo2[] double precision array, obtained by cutting pi/2
218 * into 24 bits chunks.
219 *
220 * f[] ipio2[] in floating point
221 *
222 * iq[] integer array by breaking up q[] in 24-bits chunk.
223 *
224 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
225 *
226 * ih integer. If >0 it indicats q[] is >= 0.5, hence
227 * it also indicates the *sign* of the result.
228 *
229 */
232 /*
233 * Constants:
234 * The hexadecimal values are the intended ones for the following
235 * constants. The decimal values may be used, provided that the
236 * compiler will convert from decimal to binary accurately enough
237 * to produce the hexadecimal values shown.
238 */
241 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
243 static const double PIo2[] = {
244 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
245 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
246 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
247 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
248 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
249 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
250 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
251 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
252 };
254 static const double
255 zeroB = 0.0,
256 one = 1.0,
257 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
258 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
260 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
261 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
262 double z,fw,f[20],fq[20],q[20];
264 /* initialize jk*/
265 jk = init_jk[prec];
266 jp = jk;
268 /* determine jx,jv,q0, note that 3>q0 */
269 jx = nx-1;
270 jv = (e0-3)/24; if(jv<0) jv=0;
271 q0 = e0-24*(jv+1);
273 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
274 j = jv-jx; m = jx+jk;
275 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
277 /* compute q[0],q[1],...q[jk] */
278 for (i=0;i<=jk;i++) {
279 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
280 }
282 jz = jk;
283 recompute:
284 /* distill q[] into iq[] reversingly */
285 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
286 fw = (double)((int)(twon24* z));
287 iq[i] = (int)(z-two24B*fw);
288 z = q[j-1]+fw;
289 }
291 /* compute n */
292 z = scalbnA(z,q0); /* actual value of z */
293 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
294 n = (int) z;
295 z -= (double)n;
296 ih = 0;
297 if(q0>0) { /* need iq[jz-1] to determine n */
298 i = (iq[jz-1]>>(24-q0)); n += i;
299 iq[jz-1] -= i<<(24-q0);
300 ih = iq[jz-1]>>(23-q0);
301 }
302 else if(q0==0) ih = iq[jz-1]>>23;
303 else if(z>=0.5) ih=2;
305 if(ih>0) { /* q > 0.5 */
306 n += 1; carry = 0;
307 for(i=0;i<jz ;i++) { /* compute 1-q */
308 j = iq[i];
309 if(carry==0) {
310 if(j!=0) {
311 carry = 1; iq[i] = 0x1000000- j;
312 }
313 } else iq[i] = 0xffffff - j;
314 }
315 if(q0>0) { /* rare case: chance is 1 in 12 */
316 switch(q0) {
317 case 1:
318 iq[jz-1] &= 0x7fffff; break;
319 case 2:
320 iq[jz-1] &= 0x3fffff; break;
321 }
322 }
323 if(ih==2) {
324 z = one - z;
325 if(carry!=0) z -= scalbnA(one,q0);
326 }
327 }
329 /* check if recomputation is needed */
330 if(z==zeroB) {
331 j = 0;
332 for (i=jz-1;i>=jk;i--) j |= iq[i];
333 if(j==0) { /* need recomputation */
334 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
336 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
337 f[jx+i] = (double) ipio2[jv+i];
338 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
339 q[i] = fw;
340 }
341 jz += k;
342 goto recompute;
343 }
344 }
346 /* chop off zero terms */
347 if(z==0.0) {
348 jz -= 1; q0 -= 24;
349 while(iq[jz]==0) { jz--; q0-=24;}
350 } else { /* break z into 24-bit if neccessary */
351 z = scalbnA(z,-q0);
352 if(z>=two24B) {
353 fw = (double)((int)(twon24*z));
354 iq[jz] = (int)(z-two24B*fw);
355 jz += 1; q0 += 24;
356 iq[jz] = (int) fw;
357 } else iq[jz] = (int) z ;
358 }
360 /* convert integer "bit" chunk to floating-point value */
361 fw = scalbnA(one,q0);
362 for(i=jz;i>=0;i--) {
363 q[i] = fw*(double)iq[i]; fw*=twon24;
364 }
366 /* compute PIo2[0,...,jp]*q[jz,...,0] */
367 for(i=jz;i>=0;i--) {
368 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
369 fq[jz-i] = fw;
370 }
372 /* compress fq[] into y[] */
373 switch(prec) {
374 case 0:
375 fw = 0.0;
376 for (i=jz;i>=0;i--) fw += fq[i];
377 y[0] = (ih==0)? fw: -fw;
378 break;
379 case 1:
380 case 2:
381 fw = 0.0;
382 for (i=jz;i>=0;i--) fw += fq[i];
383 y[0] = (ih==0)? fw: -fw;
384 fw = fq[0]-fw;
385 for (i=1;i<=jz;i++) fw += fq[i];
386 y[1] = (ih==0)? fw: -fw;
387 break;
388 case 3: /* painful */
389 for (i=jz;i>0;i--) {
390 fw = fq[i-1]+fq[i];
391 fq[i] += fq[i-1]-fw;
392 fq[i-1] = fw;
393 }
394 for (i=jz;i>1;i--) {
395 fw = fq[i-1]+fq[i];
396 fq[i] += fq[i-1]-fw;
397 fq[i-1] = fw;
398 }
399 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
400 if(ih==0) {
401 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
402 } else {
403 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
404 }
405 }
406 return n&7;
407 }
410 /*
411 * ====================================================
412 * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.
413 *
414 * Developed at SunPro, a Sun Microsystems, Inc. business.
415 * Permission to use, copy, modify, and distribute this
416 * software is freely granted, provided that this notice
417 * is preserved.
418 * ====================================================
419 *
420 */
422 /* __ieee754_rem_pio2(x,y)
423 *
424 * return the remainder of x rem pi/2 in y[0]+y[1]
425 * use __kernel_rem_pio2()
426 */
428 /*
429 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
430 */
431 static const int two_over_pi[] = {
432 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
433 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
434 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
435 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
436 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
437 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
438 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
439 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
440 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
441 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
442 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
443 };
445 static const int npio2_hw[] = {
446 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
447 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
448 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
449 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
450 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
451 0x404858EB, 0x404921FB,
452 };
454 /*
455 * invpio2: 53 bits of 2/pi
456 * pio2_1: first 33 bit of pi/2
457 * pio2_1t: pi/2 - pio2_1
458 * pio2_2: second 33 bit of pi/2
459 * pio2_2t: pi/2 - (pio2_1+pio2_2)
460 * pio2_3: third 33 bit of pi/2
461 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
462 */
464 static const double
465 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
466 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
467 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
468 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
469 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
470 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
471 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
472 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
473 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
474 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
476 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
477 double z,w,t,r,fn;
478 double tx[3];
479 int e0,i,j,nx,n,ix,hx,i0;
481 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
482 hx = *(i0+(int*)&x); /* high word of x */
483 ix = hx&0x7fffffff;
484 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
485 {y[0] = x; y[1] = 0; return 0;}
486 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
487 if(hx>0) {
488 z = x - pio2_1;
489 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
490 y[0] = z - pio2_1t;
491 y[1] = (z-y[0])-pio2_1t;
492 } else { /* near pi/2, use 33+33+53 bit pi */
493 z -= pio2_2;
494 y[0] = z - pio2_2t;
495 y[1] = (z-y[0])-pio2_2t;
496 }
497 return 1;
498 } else { /* negative x */
499 z = x + pio2_1;
500 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
501 y[0] = z + pio2_1t;
502 y[1] = (z-y[0])+pio2_1t;
503 } else { /* near pi/2, use 33+33+53 bit pi */
504 z += pio2_2;
505 y[0] = z + pio2_2t;
506 y[1] = (z-y[0])+pio2_2t;
507 }
508 return -1;
509 }
510 }
511 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
512 t = fabsd(x);
513 n = (int) (t*invpio2+half);
514 fn = (double)n;
515 r = t-fn*pio2_1;
516 w = fn*pio2_1t; /* 1st round good to 85 bit */
517 if(n<32&&ix!=npio2_hw[n-1]) {
518 y[0] = r-w; /* quick check no cancellation */
519 } else {
520 j = ix>>20;
521 y[0] = r-w;
522 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
523 if(i>16) { /* 2nd iteration needed, good to 118 */
524 t = r;
525 w = fn*pio2_2;
526 r = t-w;
527 w = fn*pio2_2t-((t-r)-w);
528 y[0] = r-w;
529 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
530 if(i>49) { /* 3rd iteration need, 151 bits acc */
531 t = r; /* will cover all possible cases */
532 w = fn*pio2_3;
533 r = t-w;
534 w = fn*pio2_3t-((t-r)-w);
535 y[0] = r-w;
536 }
537 }
538 }
539 y[1] = (r-y[0])-w;
540 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
541 else return n;
542 }
543 /*
544 * all other (large) arguments
545 */
546 if(ix>=0x7ff00000) { /* x is inf or NaN */
547 y[0]=y[1]=x-x; return 0;
548 }
549 /* set z = scalbn(|x|,ilogb(x)-23) */
550 *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
551 e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
552 *(i0+(int*)&z) = ix - (e0<<20);
553 for(i=0;i<2;i++) {
554 tx[i] = (double)((int)(z));
555 z = (z-tx[i])*two24A;
556 }
557 tx[2] = z;
558 nx = 3;
559 while(tx[nx-1]==zeroA) nx--; /* skip zero term */
560 n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
561 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
562 return n;
563 }
566 /* __kernel_sin( x, y, iy)
567 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
568 * Input x is assumed to be bounded by ~pi/4 in magnitude.
569 * Input y is the tail of x.
570 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
571 *
572 * Algorithm
573 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
574 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
575 * 3. sin(x) is approximated by a polynomial of degree 13 on
576 * [0,pi/4]
577 * 3 13
578 * sin(x) ~ x + S1*x + ... + S6*x
579 * where
580 *
581 * |sin(x) 2 4 6 8 10 12 | -58
582 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
583 * | x |
584 *
585 * 4. sin(x+y) = sin(x) + sin'(x')*y
586 * ~ sin(x) + (1-x*x/2)*y
587 * For better accuracy, let
588 * 3 2 2 2 2
589 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
590 * then 3 2
591 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
592 */
594 static const double
595 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
596 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
597 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
598 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
599 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
600 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
602 static double __kernel_sin(double x, double y, int iy)
603 {
604 double z,r,v;
605 int ix;
606 ix = __HI(x)&0x7fffffff; /* high word of x */
607 if(ix<0x3e400000) /* |x| < 2**-27 */
608 {if((int)x==0) return x;} /* generate inexact */
609 z = x*x;
610 v = z*x;
611 r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
612 if(iy==0) return x+v*(S1+z*r);
613 else return x-((z*(half*y-v*r)-y)-v*S1);
614 }
616 /*
617 * __kernel_cos( x, y )
618 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
619 * Input x is assumed to be bounded by ~pi/4 in magnitude.
620 * Input y is the tail of x.
621 *
622 * Algorithm
623 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
624 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
625 * 3. cos(x) is approximated by a polynomial of degree 14 on
626 * [0,pi/4]
627 * 4 14
628 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
629 * where the remez error is
630 *
631 * | 2 4 6 8 10 12 14 | -58
632 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
633 * | |
634 *
635 * 4 6 8 10 12 14
636 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
637 * cos(x) = 1 - x*x/2 + r
638 * since cos(x+y) ~ cos(x) - sin(x)*y
639 * ~ cos(x) - x*y,
640 * a correction term is necessary in cos(x) and hence
641 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
642 * For better accuracy when x > 0.3, let qx = |x|/4 with
643 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
644 * Then
645 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
646 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
647 * magnitude of the latter is at least a quarter of x*x/2,
648 * thus, reducing the rounding error in the subtraction.
649 */
651 static const double
652 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
653 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
654 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
655 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
656 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
657 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
659 static double __kernel_cos(double x, double y)
660 {
661 double a,hz,z,r,qx;
662 int ix;
663 ix = __HI(x)&0x7fffffff; /* ix = |x|'s high word*/
664 if(ix<0x3e400000) { /* if x < 2**27 */
665 if(((int)x)==0) return one; /* generate inexact */
666 }
667 z = x*x;
668 r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
669 if(ix < 0x3FD33333) /* if |x| < 0.3 */
670 return one - (0.5*z - (z*r - x*y));
671 else {
672 if(ix > 0x3fe90000) { /* x > 0.78125 */
673 qx = 0.28125;
674 } else {
675 __HI(qx) = ix-0x00200000; /* x/4 */
676 __LO(qx) = 0;
677 }
678 hz = 0.5*z-qx;
679 a = one-qx;
680 return a - (hz - (z*r-x*y));
681 }
682 }
684 /* __kernel_tan( x, y, k )
685 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
686 * Input x is assumed to be bounded by ~pi/4 in magnitude.
687 * Input y is the tail of x.
688 * Input k indicates whether tan (if k=1) or
689 * -1/tan (if k= -1) is returned.
690 *
691 * Algorithm
692 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
693 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
694 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
695 * [0,0.67434]
696 * 3 27
697 * tan(x) ~ x + T1*x + ... + T13*x
698 * where
699 *
700 * |tan(x) 2 4 26 | -59.2
701 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
702 * | x |
703 *
704 * Note: tan(x+y) = tan(x) + tan'(x)*y
705 * ~ tan(x) + (1+x*x)*y
706 * Therefore, for better accuracy in computing tan(x+y), let
707 * 3 2 2 2 2
708 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
709 * then
710 * 3 2
711 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
712 *
713 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
714 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
715 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
716 */
718 static const double
719 pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
720 pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
721 T[] = {
722 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
723 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
724 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
725 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
726 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
727 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
728 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
729 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
730 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
731 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
732 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
733 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
734 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
735 };
737 static double __kernel_tan(double x, double y, int iy)
738 {
739 double z,r,v,w,s;
740 int ix,hx;
741 hx = __HI(x); /* high word of x */
742 ix = hx&0x7fffffff; /* high word of |x| */
743 if(ix<0x3e300000) { /* x < 2**-28 */
744 if((int)x==0) { /* generate inexact */
745 if (((ix | __LO(x)) | (iy + 1)) == 0)
746 return one / fabsd(x);
747 else {
748 if (iy == 1)
749 return x;
750 else { /* compute -1 / (x+y) carefully */
751 double a, t;
753 z = w = x + y;
754 __LO(z) = 0;
755 v = y - (z - x);
756 t = a = -one / w;
757 __LO(t) = 0;
758 s = one + t * z;
759 return t + a * (s + t * v);
760 }
761 }
762 }
763 }
764 if(ix>=0x3FE59428) { /* |x|>=0.6744 */
765 if(hx<0) {x = -x; y = -y;}
766 z = pio4-x;
767 w = pio4lo-y;
768 x = z+w; y = 0.0;
769 }
770 z = x*x;
771 w = z*z;
772 /* Break x^5*(T[1]+x^2*T[2]+...) into
773 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
774 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
775 */
776 r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
777 v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
778 s = z*x;
779 r = y + z*(s*(r+v)+y);
780 r += T[0]*s;
781 w = x+r;
782 if(ix>=0x3FE59428) {
783 v = (double)iy;
784 return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
785 }
786 if(iy==1) return w;
787 else { /* if allow error up to 2 ulp,
788 simply return -1.0/(x+r) here */
789 /* compute -1.0/(x+r) accurately */
790 double a,t;
791 z = w;
792 __LO(z) = 0;
793 v = r-(z - x); /* z+v = r+x */
794 t = a = -1.0/w; /* a = -1.0/w */
795 __LO(t) = 0;
796 s = 1.0+t*z;
797 return t+a*(s+t*v);
798 }
799 }
802 //----------------------------------------------------------------------
803 //
804 // Routines for new sin/cos implementation
805 //
806 //----------------------------------------------------------------------
808 /* sin(x)
809 * Return sine function of x.
810 *
811 * kernel function:
812 * __kernel_sin ... sine function on [-pi/4,pi/4]
813 * __kernel_cos ... cose function on [-pi/4,pi/4]
814 * __ieee754_rem_pio2 ... argument reduction routine
815 *
816 * Method.
817 * Let S,C and T denote the sin, cos and tan respectively on
818 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
819 * in [-pi/4 , +pi/4], and let n = k mod 4.
820 * We have
821 *
822 * n sin(x) cos(x) tan(x)
823 * ----------------------------------------------------------
824 * 0 S C T
825 * 1 C -S -1/T
826 * 2 -S -C T
827 * 3 -C S -1/T
828 * ----------------------------------------------------------
829 *
830 * Special cases:
831 * Let trig be any of sin, cos, or tan.
832 * trig(+-INF) is NaN, with signals;
833 * trig(NaN) is that NaN;
834 *
835 * Accuracy:
836 * TRIG(x) returns trig(x) nearly rounded
837 */
839 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
840 double y[2],z=0.0;
841 int n, ix;
843 /* High word of x. */
844 ix = __HI(x);
846 /* |x| ~< pi/4 */
847 ix &= 0x7fffffff;
848 if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
850 /* sin(Inf or NaN) is NaN */
851 else if (ix>=0x7ff00000) return x-x;
853 /* argument reduction needed */
854 else {
855 n = __ieee754_rem_pio2(x,y);
856 switch(n&3) {
857 case 0: return __kernel_sin(y[0],y[1],1);
858 case 1: return __kernel_cos(y[0],y[1]);
859 case 2: return -__kernel_sin(y[0],y[1],1);
860 default:
861 return -__kernel_cos(y[0],y[1]);
862 }
863 }
864 JRT_END
866 /* cos(x)
867 * Return cosine function of x.
868 *
869 * kernel function:
870 * __kernel_sin ... sine function on [-pi/4,pi/4]
871 * __kernel_cos ... cosine function on [-pi/4,pi/4]
872 * __ieee754_rem_pio2 ... argument reduction routine
873 *
874 * Method.
875 * Let S,C and T denote the sin, cos and tan respectively on
876 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
877 * in [-pi/4 , +pi/4], and let n = k mod 4.
878 * We have
879 *
880 * n sin(x) cos(x) tan(x)
881 * ----------------------------------------------------------
882 * 0 S C T
883 * 1 C -S -1/T
884 * 2 -S -C T
885 * 3 -C S -1/T
886 * ----------------------------------------------------------
887 *
888 * Special cases:
889 * Let trig be any of sin, cos, or tan.
890 * trig(+-INF) is NaN, with signals;
891 * trig(NaN) is that NaN;
892 *
893 * Accuracy:
894 * TRIG(x) returns trig(x) nearly rounded
895 */
897 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
898 double y[2],z=0.0;
899 int n, ix;
901 /* High word of x. */
902 ix = __HI(x);
904 /* |x| ~< pi/4 */
905 ix &= 0x7fffffff;
906 if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
908 /* cos(Inf or NaN) is NaN */
909 else if (ix>=0x7ff00000) return x-x;
911 /* argument reduction needed */
912 else {
913 n = __ieee754_rem_pio2(x,y);
914 switch(n&3) {
915 case 0: return __kernel_cos(y[0],y[1]);
916 case 1: return -__kernel_sin(y[0],y[1],1);
917 case 2: return -__kernel_cos(y[0],y[1]);
918 default:
919 return __kernel_sin(y[0],y[1],1);
920 }
921 }
922 JRT_END
924 /* tan(x)
925 * Return tangent function of x.
926 *
927 * kernel function:
928 * __kernel_tan ... tangent function on [-pi/4,pi/4]
929 * __ieee754_rem_pio2 ... argument reduction routine
930 *
931 * Method.
932 * Let S,C and T denote the sin, cos and tan respectively on
933 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
934 * in [-pi/4 , +pi/4], and let n = k mod 4.
935 * We have
936 *
937 * n sin(x) cos(x) tan(x)
938 * ----------------------------------------------------------
939 * 0 S C T
940 * 1 C -S -1/T
941 * 2 -S -C T
942 * 3 -C S -1/T
943 * ----------------------------------------------------------
944 *
945 * Special cases:
946 * Let trig be any of sin, cos, or tan.
947 * trig(+-INF) is NaN, with signals;
948 * trig(NaN) is that NaN;
949 *
950 * Accuracy:
951 * TRIG(x) returns trig(x) nearly rounded
952 */
954 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
955 double y[2],z=0.0;
956 int n, ix;
958 /* High word of x. */
959 ix = __HI(x);
961 /* |x| ~< pi/4 */
962 ix &= 0x7fffffff;
963 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
965 /* tan(Inf or NaN) is NaN */
966 else if (ix>=0x7ff00000) return x-x; /* NaN */
968 /* argument reduction needed */
969 else {
970 n = __ieee754_rem_pio2(x,y);
971 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
972 -1 -- n odd */
973 }
974 JRT_END
977 #ifdef WIN32
978 # pragma optimize ( "", on )
979 #endif