Thu, 12 Oct 2017 21:27:07 +0800
merge
1 /*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 /*
26 * This file has been modified by Loongson Technology in 2015. These
27 * modifications are Copyright (c) 2015 Loongson Technology, and are made
28 * available on the same license terms set forth above.
29 */
31 #include "precompiled.hpp"
32 #include "prims/jni.h"
33 #include "runtime/interfaceSupport.hpp"
34 #include "runtime/sharedRuntime.hpp"
36 // This file contains copies of the fdlibm routines used by
37 // StrictMath. It turns out that it is almost always required to use
38 // these runtime routines; the Intel CPU doesn't meet the Java
39 // specification for sin/cos outside a certain limited argument range,
40 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
41 // also turns out that avoiding the indirect call through function
42 // pointer out to libjava.so in SharedRuntime speeds these routines up
43 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
45 // Enabling optimizations in this file causes incorrect code to be
46 // generated; can not figure out how to turn down optimization for one
47 // file in the IDE on Windows
48 #ifdef WIN32
49 # pragma optimize ( "", off )
50 #endif
52 /* The above workaround now causes more problems with the latest MS compiler.
53 * Visual Studio 2010's /GS option tries to guard against buffer overruns.
54 * /GS is on by default if you specify optimizations, which we do globally
55 * via /W3 /O2. However the above selective turning off of optimizations means
56 * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
57 * then the compilation fails. There are several possible solutions
58 * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
59 * (2) Stop treating warnings as errors - would be a backward step
60 * (3) Disable /GS - may help performance but you lose the security checks
61 * (4) Disable the warning with "#pragma warning( disable : 4748 )"
62 * (5) Disable planting the code with __declspec(safebuffers)
63 * I've opted for (5) although we should investigate the local performance
64 * benefits of (1) and global performance benefit of (3).
65 */
66 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
67 #define SAFEBUF __declspec(safebuffers)
68 #else
69 #define SAFEBUF
70 #endif
72 #include "runtime/sharedRuntimeMath.hpp"
74 /*
75 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
76 * double x[],y[]; int e0,nx,prec; int ipio2[];
77 *
78 * __kernel_rem_pio2 return the last three digits of N with
79 * y = x - N*pi/2
80 * so that |y| < pi/2.
81 *
82 * The method is to compute the integer (mod 8) and fraction parts of
83 * (2/pi)*x without doing the full multiplication. In general we
84 * skip the part of the product that are known to be a huge integer (
85 * more accurately, = 0 mod 8 ). Thus the number of operations are
86 * independent of the exponent of the input.
87 *
88 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
89 *
90 * Input parameters:
91 * x[] The input value (must be positive) is broken into nx
92 * pieces of 24-bit integers in double precision format.
93 * x[i] will be the i-th 24 bit of x. The scaled exponent
94 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
95 * match x's up to 24 bits.
96 *
97 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
98 * e0 = ilogb(z)-23
99 * z = scalbn(z,-e0)
100 * for i = 0,1,2
101 * x[i] = floor(z)
102 * z = (z-x[i])*2**24
103 *
104 *
105 * y[] ouput result in an array of double precision numbers.
106 * The dimension of y[] is:
107 * 24-bit precision 1
108 * 53-bit precision 2
109 * 64-bit precision 2
110 * 113-bit precision 3
111 * The actual value is the sum of them. Thus for 113-bit
112 * precsion, one may have to do something like:
113 *
114 * long double t,w,r_head, r_tail;
115 * t = (long double)y[2] + (long double)y[1];
116 * w = (long double)y[0];
117 * r_head = t+w;
118 * r_tail = w - (r_head - t);
119 *
120 * e0 The exponent of x[0]
121 *
122 * nx dimension of x[]
123 *
124 * prec an interger indicating the precision:
125 * 0 24 bits (single)
126 * 1 53 bits (double)
127 * 2 64 bits (extended)
128 * 3 113 bits (quad)
129 *
130 * ipio2[]
131 * integer array, contains the (24*i)-th to (24*i+23)-th
132 * bit of 2/pi after binary point. The corresponding
133 * floating value is
134 *
135 * ipio2[i] * 2^(-24(i+1)).
136 *
137 * External function:
138 * double scalbn(), floor();
139 *
140 *
141 * Here is the description of some local variables:
142 *
143 * jk jk+1 is the initial number of terms of ipio2[] needed
144 * in the computation. The recommended value is 2,3,4,
145 * 6 for single, double, extended,and quad.
146 *
147 * jz local integer variable indicating the number of
148 * terms of ipio2[] used.
149 *
150 * jx nx - 1
151 *
152 * jv index for pointing to the suitable ipio2[] for the
153 * computation. In general, we want
154 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
155 * is an integer. Thus
156 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
157 * Hence jv = max(0,(e0-3)/24).
158 *
159 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
160 *
161 * q[] double array with integral value, representing the
162 * 24-bits chunk of the product of x and 2/pi.
163 *
164 * q0 the corresponding exponent of q[0]. Note that the
165 * exponent for q[i] would be q0-24*i.
166 *
167 * PIo2[] double precision array, obtained by cutting pi/2
168 * into 24 bits chunks.
169 *
170 * f[] ipio2[] in floating point
171 *
172 * iq[] integer array by breaking up q[] in 24-bits chunk.
173 *
174 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
175 *
176 * ih integer. If >0 it indicats q[] is >= 0.5, hence
177 * it also indicates the *sign* of the result.
178 *
179 */
182 /*
183 * Constants:
184 * The hexadecimal values are the intended ones for the following
185 * constants. The decimal values may be used, provided that the
186 * compiler will convert from decimal to binary accurately enough
187 * to produce the hexadecimal values shown.
188 */
191 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
193 static const double PIo2[] = {
194 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
195 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
196 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
197 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
198 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
199 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
200 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
201 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
202 };
204 static const double
205 zeroB = 0.0,
206 one = 1.0,
207 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
208 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
210 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
211 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
212 double z,fw,f[20],fq[20],q[20];
214 /* initialize jk*/
215 jk = init_jk[prec];
216 jp = jk;
218 /* determine jx,jv,q0, note that 3>q0 */
219 jx = nx-1;
220 jv = (e0-3)/24; if(jv<0) jv=0;
221 q0 = e0-24*(jv+1);
223 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
224 j = jv-jx; m = jx+jk;
225 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
227 /* compute q[0],q[1],...q[jk] */
228 for (i=0;i<=jk;i++) {
229 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
230 }
232 jz = jk;
233 recompute:
234 /* distill q[] into iq[] reversingly */
235 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
236 fw = (double)((int)(twon24* z));
237 iq[i] = (int)(z-two24B*fw);
238 z = q[j-1]+fw;
239 }
241 /* compute n */
242 z = scalbnA(z,q0); /* actual value of z */
243 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
244 n = (int) z;
245 z -= (double)n;
246 ih = 0;
247 if(q0>0) { /* need iq[jz-1] to determine n */
248 i = (iq[jz-1]>>(24-q0)); n += i;
249 iq[jz-1] -= i<<(24-q0);
250 ih = iq[jz-1]>>(23-q0);
251 }
252 else if(q0==0) ih = iq[jz-1]>>23;
253 else if(z>=0.5) ih=2;
255 if(ih>0) { /* q > 0.5 */
256 n += 1; carry = 0;
257 for(i=0;i<jz ;i++) { /* compute 1-q */
258 j = iq[i];
259 if(carry==0) {
260 if(j!=0) {
261 carry = 1; iq[i] = 0x1000000- j;
262 }
263 } else iq[i] = 0xffffff - j;
264 }
265 if(q0>0) { /* rare case: chance is 1 in 12 */
266 switch(q0) {
267 case 1:
268 iq[jz-1] &= 0x7fffff; break;
269 case 2:
270 iq[jz-1] &= 0x3fffff; break;
271 }
272 }
273 if(ih==2) {
274 z = one - z;
275 if(carry!=0) z -= scalbnA(one,q0);
276 }
277 }
279 /* check if recomputation is needed */
280 if(z==zeroB) {
281 j = 0;
282 for (i=jz-1;i>=jk;i--) j |= iq[i];
283 if(j==0) { /* need recomputation */
284 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
286 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
287 f[jx+i] = (double) ipio2[jv+i];
288 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
289 q[i] = fw;
290 }
291 jz += k;
292 goto recompute;
293 }
294 }
296 /* chop off zero terms */
297 if(z==0.0) {
298 jz -= 1; q0 -= 24;
299 while(iq[jz]==0) { jz--; q0-=24;}
300 } else { /* break z into 24-bit if neccessary */
301 z = scalbnA(z,-q0);
302 if(z>=two24B) {
303 fw = (double)((int)(twon24*z));
304 iq[jz] = (int)(z-two24B*fw);
305 jz += 1; q0 += 24;
306 iq[jz] = (int) fw;
307 } else iq[jz] = (int) z ;
308 }
310 /* convert integer "bit" chunk to floating-point value */
311 fw = scalbnA(one,q0);
312 for(i=jz;i>=0;i--) {
313 q[i] = fw*(double)iq[i]; fw*=twon24;
314 }
316 /* compute PIo2[0,...,jp]*q[jz,...,0] */
317 for(i=jz;i>=0;i--) {
318 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
319 fq[jz-i] = fw;
320 }
322 /* compress fq[] into y[] */
323 switch(prec) {
324 case 0:
325 fw = 0.0;
326 for (i=jz;i>=0;i--) fw += fq[i];
327 y[0] = (ih==0)? fw: -fw;
328 break;
329 case 1:
330 case 2:
331 fw = 0.0;
332 for (i=jz;i>=0;i--) fw += fq[i];
333 y[0] = (ih==0)? fw: -fw;
334 fw = fq[0]-fw;
335 for (i=1;i<=jz;i++) fw += fq[i];
336 y[1] = (ih==0)? fw: -fw;
337 break;
338 case 3: /* painful */
339 for (i=jz;i>0;i--) {
340 fw = fq[i-1]+fq[i];
341 fq[i] += fq[i-1]-fw;
342 fq[i-1] = fw;
343 }
344 for (i=jz;i>1;i--) {
345 fw = fq[i-1]+fq[i];
346 fq[i] += fq[i-1]-fw;
347 fq[i-1] = fw;
348 }
349 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
350 if(ih==0) {
351 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
352 } else {
353 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
354 }
355 }
356 return n&7;
357 }
360 /*
361 * ====================================================
362 * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.
363 *
364 * Developed at SunPro, a Sun Microsystems, Inc. business.
365 * Permission to use, copy, modify, and distribute this
366 * software is freely granted, provided that this notice
367 * is preserved.
368 * ====================================================
369 *
370 */
372 /* __ieee754_rem_pio2(x,y)
373 *
374 * return the remainder of x rem pi/2 in y[0]+y[1]
375 * use __kernel_rem_pio2()
376 */
378 /*
379 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
380 */
381 static const int two_over_pi[] = {
382 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
383 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
384 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
385 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
386 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
387 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
388 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
389 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
390 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
391 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
392 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
393 };
395 static const int npio2_hw[] = {
396 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
397 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
398 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
399 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
400 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
401 0x404858EB, 0x404921FB,
402 };
404 /*
405 * invpio2: 53 bits of 2/pi
406 * pio2_1: first 33 bit of pi/2
407 * pio2_1t: pi/2 - pio2_1
408 * pio2_2: second 33 bit of pi/2
409 * pio2_2t: pi/2 - (pio2_1+pio2_2)
410 * pio2_3: third 33 bit of pi/2
411 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
412 */
414 static const double
415 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
416 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
417 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
418 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
419 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
420 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
421 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
422 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
423 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
424 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
426 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
427 double z,w,t,r,fn;
428 double tx[3];
429 int e0,i,j,nx,n,ix,hx,i0;
431 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
432 hx = *(i0+(int*)&x); /* high word of x */
433 ix = hx&0x7fffffff;
434 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
435 {y[0] = x; y[1] = 0; return 0;}
436 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
437 if(hx>0) {
438 z = x - pio2_1;
439 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
440 y[0] = z - pio2_1t;
441 y[1] = (z-y[0])-pio2_1t;
442 } else { /* near pi/2, use 33+33+53 bit pi */
443 z -= pio2_2;
444 y[0] = z - pio2_2t;
445 y[1] = (z-y[0])-pio2_2t;
446 }
447 return 1;
448 } else { /* negative x */
449 z = x + pio2_1;
450 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
451 y[0] = z + pio2_1t;
452 y[1] = (z-y[0])+pio2_1t;
453 } else { /* near pi/2, use 33+33+53 bit pi */
454 z += pio2_2;
455 y[0] = z + pio2_2t;
456 y[1] = (z-y[0])+pio2_2t;
457 }
458 return -1;
459 }
460 }
461 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
462 t = fabsd(x);
463 n = (int) (t*invpio2+half);
464 fn = (double)n;
465 r = t-fn*pio2_1;
466 w = fn*pio2_1t; /* 1st round good to 85 bit */
467 if(n<32&&ix!=npio2_hw[n-1]) {
468 y[0] = r-w; /* quick check no cancellation */
469 } else {
470 j = ix>>20;
471 y[0] = r-w;
472 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
473 if(i>16) { /* 2nd iteration needed, good to 118 */
474 t = r;
475 w = fn*pio2_2;
476 r = t-w;
477 w = fn*pio2_2t-((t-r)-w);
478 y[0] = r-w;
479 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
480 if(i>49) { /* 3rd iteration need, 151 bits acc */
481 t = r; /* will cover all possible cases */
482 w = fn*pio2_3;
483 r = t-w;
484 w = fn*pio2_3t-((t-r)-w);
485 y[0] = r-w;
486 }
487 }
488 }
489 y[1] = (r-y[0])-w;
490 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
491 else return n;
492 }
493 /*
494 * all other (large) arguments
495 */
496 if(ix>=0x7ff00000) { /* x is inf or NaN */
497 y[0]=y[1]=x-x; return 0;
498 }
499 /* set z = scalbn(|x|,ilogb(x)-23) */
500 *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
501 e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
502 *(i0+(int*)&z) = ix - (e0<<20);
503 for(i=0;i<2;i++) {
504 tx[i] = (double)((int)(z));
505 z = (z-tx[i])*two24A;
506 }
507 tx[2] = z;
508 nx = 3;
509 while(tx[nx-1]==zeroA) nx--; /* skip zero term */
510 n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
511 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
512 return n;
513 }
516 /* __kernel_sin( x, y, iy)
517 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
518 * Input x is assumed to be bounded by ~pi/4 in magnitude.
519 * Input y is the tail of x.
520 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
521 *
522 * Algorithm
523 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
524 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
525 * 3. sin(x) is approximated by a polynomial of degree 13 on
526 * [0,pi/4]
527 * 3 13
528 * sin(x) ~ x + S1*x + ... + S6*x
529 * where
530 *
531 * |sin(x) 2 4 6 8 10 12 | -58
532 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
533 * | x |
534 *
535 * 4. sin(x+y) = sin(x) + sin'(x')*y
536 * ~ sin(x) + (1-x*x/2)*y
537 * For better accuracy, let
538 * 3 2 2 2 2
539 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
540 * then 3 2
541 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
542 */
543 #ifdef MIPS64
544 #undef S1
545 #undef S2
546 #undef S3
547 #undef S4
548 #undef S5
549 #undef S6
550 #endif
552 static const double
553 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
554 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
555 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
556 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
557 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
558 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
560 static double __kernel_sin(double x, double y, int iy)
561 {
562 double z,r,v;
563 int ix;
564 ix = high(x)&0x7fffffff; /* high word of x */
565 if(ix<0x3e400000) /* |x| < 2**-27 */
566 {if((int)x==0) return x;} /* generate inexact */
567 z = x*x;
568 v = z*x;
569 r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
570 if(iy==0) return x+v*(S1+z*r);
571 else return x-((z*(half*y-v*r)-y)-v*S1);
572 }
574 /*
575 * __kernel_cos( x, y )
576 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
577 * Input x is assumed to be bounded by ~pi/4 in magnitude.
578 * Input y is the tail of x.
579 *
580 * Algorithm
581 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
582 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
583 * 3. cos(x) is approximated by a polynomial of degree 14 on
584 * [0,pi/4]
585 * 4 14
586 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
587 * where the remez error is
588 *
589 * | 2 4 6 8 10 12 14 | -58
590 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
591 * | |
592 *
593 * 4 6 8 10 12 14
594 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
595 * cos(x) = 1 - x*x/2 + r
596 * since cos(x+y) ~ cos(x) - sin(x)*y
597 * ~ cos(x) - x*y,
598 * a correction term is necessary in cos(x) and hence
599 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
600 * For better accuracy when x > 0.3, let qx = |x|/4 with
601 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
602 * Then
603 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
604 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
605 * magnitude of the latter is at least a quarter of x*x/2,
606 * thus, reducing the rounding error in the subtraction.
607 */
609 static const double
610 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
611 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
612 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
613 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
614 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
615 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
617 static double __kernel_cos(double x, double y)
618 {
619 double a,h,z,r,qx=0;
620 int ix;
621 ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/
622 if(ix<0x3e400000) { /* if x < 2**27 */
623 if(((int)x)==0) return one; /* generate inexact */
624 }
625 z = x*x;
626 r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
627 if(ix < 0x3FD33333) /* if |x| < 0.3 */
628 return one - (0.5*z - (z*r - x*y));
629 else {
630 if(ix > 0x3fe90000) { /* x > 0.78125 */
631 qx = 0.28125;
632 } else {
633 set_high(&qx, ix-0x00200000); /* x/4 */
634 set_low(&qx, 0);
635 }
636 h = 0.5*z-qx;
637 a = one-qx;
638 return a - (h - (z*r-x*y));
639 }
640 }
642 /* __kernel_tan( x, y, k )
643 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
644 * Input x is assumed to be bounded by ~pi/4 in magnitude.
645 * Input y is the tail of x.
646 * Input k indicates whether tan (if k=1) or
647 * -1/tan (if k= -1) is returned.
648 *
649 * Algorithm
650 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
651 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
652 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
653 * [0,0.67434]
654 * 3 27
655 * tan(x) ~ x + T1*x + ... + T13*x
656 * where
657 *
658 * |tan(x) 2 4 26 | -59.2
659 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
660 * | x |
661 *
662 * Note: tan(x+y) = tan(x) + tan'(x)*y
663 * ~ tan(x) + (1+x*x)*y
664 * Therefore, for better accuracy in computing tan(x+y), let
665 * 3 2 2 2 2
666 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
667 * then
668 * 3 2
669 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
670 *
671 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
672 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
673 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
674 */
676 static const double
677 pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
678 pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
679 T[] = {
680 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
681 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
682 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
683 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
684 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
685 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
686 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
687 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
688 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
689 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
690 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
691 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
692 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
693 };
695 static double __kernel_tan(double x, double y, int iy)
696 {
697 double z,r,v,w,s;
698 int ix,hx;
699 hx = high(x); /* high word of x */
700 ix = hx&0x7fffffff; /* high word of |x| */
701 if(ix<0x3e300000) { /* x < 2**-28 */
702 if((int)x==0) { /* generate inexact */
703 if (((ix | low(x)) | (iy + 1)) == 0)
704 return one / fabsd(x);
705 else {
706 if (iy == 1)
707 return x;
708 else { /* compute -1 / (x+y) carefully */
709 double a, t;
711 z = w = x + y;
712 set_low(&z, 0);
713 v = y - (z - x);
714 t = a = -one / w;
715 set_low(&t, 0);
716 s = one + t * z;
717 return t + a * (s + t * v);
718 }
719 }
720 }
721 }
722 if(ix>=0x3FE59428) { /* |x|>=0.6744 */
723 if(hx<0) {x = -x; y = -y;}
724 z = pio4-x;
725 w = pio4lo-y;
726 x = z+w; y = 0.0;
727 }
728 z = x*x;
729 w = z*z;
730 /* Break x^5*(T[1]+x^2*T[2]+...) into
731 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
732 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
733 */
734 r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
735 v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
736 s = z*x;
737 r = y + z*(s*(r+v)+y);
738 r += T[0]*s;
739 w = x+r;
740 if(ix>=0x3FE59428) {
741 v = (double)iy;
742 return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
743 }
744 if(iy==1) return w;
745 else { /* if allow error up to 2 ulp,
746 simply return -1.0/(x+r) here */
747 /* compute -1.0/(x+r) accurately */
748 double a,t;
749 z = w;
750 set_low(&z, 0);
751 v = r-(z - x); /* z+v = r+x */
752 t = a = -1.0/w; /* a = -1.0/w */
753 set_low(&t, 0);
754 s = 1.0+t*z;
755 return t+a*(s+t*v);
756 }
757 }
760 //----------------------------------------------------------------------
761 //
762 // Routines for new sin/cos implementation
763 //
764 //----------------------------------------------------------------------
766 /* sin(x)
767 * Return sine function of x.
768 *
769 * kernel function:
770 * __kernel_sin ... sine function on [-pi/4,pi/4]
771 * __kernel_cos ... cose function on [-pi/4,pi/4]
772 * __ieee754_rem_pio2 ... argument reduction routine
773 *
774 * Method.
775 * Let S,C and T denote the sin, cos and tan respectively on
776 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
777 * in [-pi/4 , +pi/4], and let n = k mod 4.
778 * We have
779 *
780 * n sin(x) cos(x) tan(x)
781 * ----------------------------------------------------------
782 * 0 S C T
783 * 1 C -S -1/T
784 * 2 -S -C T
785 * 3 -C S -1/T
786 * ----------------------------------------------------------
787 *
788 * Special cases:
789 * Let trig be any of sin, cos, or tan.
790 * trig(+-INF) is NaN, with signals;
791 * trig(NaN) is that NaN;
792 *
793 * Accuracy:
794 * TRIG(x) returns trig(x) nearly rounded
795 */
797 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
798 double y[2],z=0.0;
799 int n, ix;
801 /* High word of x. */
802 ix = high(x);
804 /* |x| ~< pi/4 */
805 ix &= 0x7fffffff;
806 if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
808 /* sin(Inf or NaN) is NaN */
809 else if (ix>=0x7ff00000) return x-x;
811 /* argument reduction needed */
812 else {
813 n = __ieee754_rem_pio2(x,y);
814 switch(n&3) {
815 case 0: return __kernel_sin(y[0],y[1],1);
816 case 1: return __kernel_cos(y[0],y[1]);
817 case 2: return -__kernel_sin(y[0],y[1],1);
818 default:
819 return -__kernel_cos(y[0],y[1]);
820 }
821 }
822 JRT_END
824 /* cos(x)
825 * Return cosine function of x.
826 *
827 * kernel function:
828 * __kernel_sin ... sine function on [-pi/4,pi/4]
829 * __kernel_cos ... cosine function on [-pi/4,pi/4]
830 * __ieee754_rem_pio2 ... argument reduction routine
831 *
832 * Method.
833 * Let S,C and T denote the sin, cos and tan respectively on
834 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
835 * in [-pi/4 , +pi/4], and let n = k mod 4.
836 * We have
837 *
838 * n sin(x) cos(x) tan(x)
839 * ----------------------------------------------------------
840 * 0 S C T
841 * 1 C -S -1/T
842 * 2 -S -C T
843 * 3 -C S -1/T
844 * ----------------------------------------------------------
845 *
846 * Special cases:
847 * Let trig be any of sin, cos, or tan.
848 * trig(+-INF) is NaN, with signals;
849 * trig(NaN) is that NaN;
850 *
851 * Accuracy:
852 * TRIG(x) returns trig(x) nearly rounded
853 */
855 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
856 double y[2],z=0.0;
857 int n, ix;
859 /* High word of x. */
860 ix = high(x);
862 /* |x| ~< pi/4 */
863 ix &= 0x7fffffff;
864 if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
866 /* cos(Inf or NaN) is NaN */
867 else if (ix>=0x7ff00000) return x-x;
869 /* argument reduction needed */
870 else {
871 n = __ieee754_rem_pio2(x,y);
872 switch(n&3) {
873 case 0: return __kernel_cos(y[0],y[1]);
874 case 1: return -__kernel_sin(y[0],y[1],1);
875 case 2: return -__kernel_cos(y[0],y[1]);
876 default:
877 return __kernel_sin(y[0],y[1],1);
878 }
879 }
880 JRT_END
882 /* tan(x)
883 * Return tangent function of x.
884 *
885 * kernel function:
886 * __kernel_tan ... tangent function on [-pi/4,pi/4]
887 * __ieee754_rem_pio2 ... argument reduction routine
888 *
889 * Method.
890 * Let S,C and T denote the sin, cos and tan respectively on
891 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
892 * in [-pi/4 , +pi/4], and let n = k mod 4.
893 * We have
894 *
895 * n sin(x) cos(x) tan(x)
896 * ----------------------------------------------------------
897 * 0 S C T
898 * 1 C -S -1/T
899 * 2 -S -C T
900 * 3 -C S -1/T
901 * ----------------------------------------------------------
902 *
903 * Special cases:
904 * Let trig be any of sin, cos, or tan.
905 * trig(+-INF) is NaN, with signals;
906 * trig(NaN) is that NaN;
907 *
908 * Accuracy:
909 * TRIG(x) returns trig(x) nearly rounded
910 */
912 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
913 double y[2],z=0.0;
914 int n, ix;
916 /* High word of x. */
917 ix = high(x);
919 /* |x| ~< pi/4 */
920 ix &= 0x7fffffff;
921 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
923 /* tan(Inf or NaN) is NaN */
924 else if (ix>=0x7ff00000) return x-x; /* NaN */
926 /* argument reduction needed */
927 else {
928 n = __ieee754_rem_pio2(x,y);
929 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
930 -1 -- n odd */
931 }
932 JRT_END
935 #ifdef WIN32
936 # pragma optimize ( "", on )
937 #endif