Fri, 29 Apr 2016 00:06:10 +0800
Added MIPS 64-bit port.
1 /*
2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 /*
26 * This file has been modified by Loongson Technology in 2015. These
27 * modifications are Copyright (c) 2015 Loongson Technology, and are made
28 * available on the same license terms set forth above.
29 */
31 #include "precompiled.hpp"
32 #include "prims/jni.h"
33 #include "runtime/interfaceSupport.hpp"
34 #include "runtime/sharedRuntime.hpp"
36 // This file contains copies of the fdlibm routines used by
37 // StrictMath. It turns out that it is almost always required to use
38 // these runtime routines; the Intel CPU doesn't meet the Java
39 // specification for sin/cos outside a certain limited argument range,
40 // and the SPARC CPU doesn't appear to have sin/cos instructions. It
41 // also turns out that avoiding the indirect call through function
42 // pointer out to libjava.so in SharedRuntime speeds these routines up
43 // by roughly 15% on both Win32/x86 and Solaris/SPARC.
45 // Enabling optimizations in this file causes incorrect code to be
46 // generated; can not figure out how to turn down optimization for one
47 // file in the IDE on Windows
48 #ifdef WIN32
49 # pragma optimize ( "", off )
50 #endif
52 /* The above workaround now causes more problems with the latest MS compiler.
53 * Visual Studio 2010's /GS option tries to guard against buffer overruns.
54 * /GS is on by default if you specify optimizations, which we do globally
55 * via /W3 /O2. However the above selective turning off of optimizations means
56 * that /GS issues a warning "4748". And since we treat warnings as errors (/WX)
57 * then the compilation fails. There are several possible solutions
58 * (1) Remove that pragma above as obsolete with VS2010 - requires testing.
59 * (2) Stop treating warnings as errors - would be a backward step
60 * (3) Disable /GS - may help performance but you lose the security checks
61 * (4) Disable the warning with "#pragma warning( disable : 4748 )"
62 * (5) Disable planting the code with __declspec(safebuffers)
63 * I've opted for (5) although we should investigate the local performance
64 * benefits of (1) and global performance benefit of (3).
65 */
66 #if defined(WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1600))
67 #define SAFEBUF __declspec(safebuffers)
68 #else
69 #define SAFEBUF
70 #endif
72 #include <math.h>
74 // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
75 // [jk] this is not 100% correct because the float word order may different
76 // from the byte order (e.g. on ARM)
77 #ifdef VM_LITTLE_ENDIAN
78 # define __HI(x) *(1+(int*)&x)
79 # define __LO(x) *(int*)&x
80 #else
81 # define __HI(x) *(int*)&x
82 # define __LO(x) *(1+(int*)&x)
83 #endif
85 static double copysignA(double x, double y) {
86 __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
87 return x;
88 }
90 /*
91 * scalbn (double x, int n)
92 * scalbn(x,n) returns x* 2**n computed by exponent
93 * manipulation rather than by actually performing an
94 * exponentiation or a multiplication.
95 */
97 static const double
98 two54 = 1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
99 twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
100 hugeX = 1.0e+300,
101 tiny = 1.0e-300;
103 static double scalbnA (double x, int n) {
104 int k,hx,lx;
105 hx = __HI(x);
106 lx = __LO(x);
107 k = (hx&0x7ff00000)>>20; /* extract exponent */
108 if (k==0) { /* 0 or subnormal x */
109 if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
110 x *= two54;
111 hx = __HI(x);
112 k = ((hx&0x7ff00000)>>20) - 54;
113 if (n< -50000) return tiny*x; /*underflow*/
114 }
115 if (k==0x7ff) return x+x; /* NaN or Inf */
116 k = k+n;
117 if (k > 0x7fe) return hugeX*copysignA(hugeX,x); /* overflow */
118 if (k > 0) /* normal result */
119 {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
120 if (k <= -54) {
121 if (n > 50000) /* in case integer overflow in n+k */
122 return hugeX*copysignA(hugeX,x); /*overflow*/
123 else return tiny*copysignA(tiny,x); /*underflow*/
124 }
125 k += 54; /* subnormal result */
126 __HI(x) = (hx&0x800fffff)|(k<<20);
127 return x*twom54;
128 }
130 /*
131 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
132 * double x[],y[]; int e0,nx,prec; int ipio2[];
133 *
134 * __kernel_rem_pio2 return the last three digits of N with
135 * y = x - N*pi/2
136 * so that |y| < pi/2.
137 *
138 * The method is to compute the integer (mod 8) and fraction parts of
139 * (2/pi)*x without doing the full multiplication. In general we
140 * skip the part of the product that are known to be a huge integer (
141 * more accurately, = 0 mod 8 ). Thus the number of operations are
142 * independent of the exponent of the input.
143 *
144 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
145 *
146 * Input parameters:
147 * x[] The input value (must be positive) is broken into nx
148 * pieces of 24-bit integers in double precision format.
149 * x[i] will be the i-th 24 bit of x. The scaled exponent
150 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
151 * match x's up to 24 bits.
152 *
153 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
154 * e0 = ilogb(z)-23
155 * z = scalbn(z,-e0)
156 * for i = 0,1,2
157 * x[i] = floor(z)
158 * z = (z-x[i])*2**24
159 *
160 *
161 * y[] ouput result in an array of double precision numbers.
162 * The dimension of y[] is:
163 * 24-bit precision 1
164 * 53-bit precision 2
165 * 64-bit precision 2
166 * 113-bit precision 3
167 * The actual value is the sum of them. Thus for 113-bit
168 * precsion, one may have to do something like:
169 *
170 * long double t,w,r_head, r_tail;
171 * t = (long double)y[2] + (long double)y[1];
172 * w = (long double)y[0];
173 * r_head = t+w;
174 * r_tail = w - (r_head - t);
175 *
176 * e0 The exponent of x[0]
177 *
178 * nx dimension of x[]
179 *
180 * prec an interger indicating the precision:
181 * 0 24 bits (single)
182 * 1 53 bits (double)
183 * 2 64 bits (extended)
184 * 3 113 bits (quad)
185 *
186 * ipio2[]
187 * integer array, contains the (24*i)-th to (24*i+23)-th
188 * bit of 2/pi after binary point. The corresponding
189 * floating value is
190 *
191 * ipio2[i] * 2^(-24(i+1)).
192 *
193 * External function:
194 * double scalbn(), floor();
195 *
196 *
197 * Here is the description of some local variables:
198 *
199 * jk jk+1 is the initial number of terms of ipio2[] needed
200 * in the computation. The recommended value is 2,3,4,
201 * 6 for single, double, extended,and quad.
202 *
203 * jz local integer variable indicating the number of
204 * terms of ipio2[] used.
205 *
206 * jx nx - 1
207 *
208 * jv index for pointing to the suitable ipio2[] for the
209 * computation. In general, we want
210 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
211 * is an integer. Thus
212 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
213 * Hence jv = max(0,(e0-3)/24).
214 *
215 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
216 *
217 * q[] double array with integral value, representing the
218 * 24-bits chunk of the product of x and 2/pi.
219 *
220 * q0 the corresponding exponent of q[0]. Note that the
221 * exponent for q[i] would be q0-24*i.
222 *
223 * PIo2[] double precision array, obtained by cutting pi/2
224 * into 24 bits chunks.
225 *
226 * f[] ipio2[] in floating point
227 *
228 * iq[] integer array by breaking up q[] in 24-bits chunk.
229 *
230 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
231 *
232 * ih integer. If >0 it indicats q[] is >= 0.5, hence
233 * it also indicates the *sign* of the result.
234 *
235 */
238 /*
239 * Constants:
240 * The hexadecimal values are the intended ones for the following
241 * constants. The decimal values may be used, provided that the
242 * compiler will convert from decimal to binary accurately enough
243 * to produce the hexadecimal values shown.
244 */
247 static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
249 static const double PIo2[] = {
250 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
251 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
252 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
253 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
254 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
255 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
256 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
257 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
258 };
260 static const double
261 zeroB = 0.0,
262 one = 1.0,
263 two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
264 twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
266 static SAFEBUF int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
267 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
268 double z,fw,f[20],fq[20],q[20];
270 /* initialize jk*/
271 jk = init_jk[prec];
272 jp = jk;
274 /* determine jx,jv,q0, note that 3>q0 */
275 jx = nx-1;
276 jv = (e0-3)/24; if(jv<0) jv=0;
277 q0 = e0-24*(jv+1);
279 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
280 j = jv-jx; m = jx+jk;
281 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
283 /* compute q[0],q[1],...q[jk] */
284 for (i=0;i<=jk;i++) {
285 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
286 }
288 jz = jk;
289 recompute:
290 /* distill q[] into iq[] reversingly */
291 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
292 fw = (double)((int)(twon24* z));
293 iq[i] = (int)(z-two24B*fw);
294 z = q[j-1]+fw;
295 }
297 /* compute n */
298 z = scalbnA(z,q0); /* actual value of z */
299 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
300 n = (int) z;
301 z -= (double)n;
302 ih = 0;
303 if(q0>0) { /* need iq[jz-1] to determine n */
304 i = (iq[jz-1]>>(24-q0)); n += i;
305 iq[jz-1] -= i<<(24-q0);
306 ih = iq[jz-1]>>(23-q0);
307 }
308 else if(q0==0) ih = iq[jz-1]>>23;
309 else if(z>=0.5) ih=2;
311 if(ih>0) { /* q > 0.5 */
312 n += 1; carry = 0;
313 for(i=0;i<jz ;i++) { /* compute 1-q */
314 j = iq[i];
315 if(carry==0) {
316 if(j!=0) {
317 carry = 1; iq[i] = 0x1000000- j;
318 }
319 } else iq[i] = 0xffffff - j;
320 }
321 if(q0>0) { /* rare case: chance is 1 in 12 */
322 switch(q0) {
323 case 1:
324 iq[jz-1] &= 0x7fffff; break;
325 case 2:
326 iq[jz-1] &= 0x3fffff; break;
327 }
328 }
329 if(ih==2) {
330 z = one - z;
331 if(carry!=0) z -= scalbnA(one,q0);
332 }
333 }
335 /* check if recomputation is needed */
336 if(z==zeroB) {
337 j = 0;
338 for (i=jz-1;i>=jk;i--) j |= iq[i];
339 if(j==0) { /* need recomputation */
340 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
342 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
343 f[jx+i] = (double) ipio2[jv+i];
344 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
345 q[i] = fw;
346 }
347 jz += k;
348 goto recompute;
349 }
350 }
352 /* chop off zero terms */
353 if(z==0.0) {
354 jz -= 1; q0 -= 24;
355 while(iq[jz]==0) { jz--; q0-=24;}
356 } else { /* break z into 24-bit if neccessary */
357 z = scalbnA(z,-q0);
358 if(z>=two24B) {
359 fw = (double)((int)(twon24*z));
360 iq[jz] = (int)(z-two24B*fw);
361 jz += 1; q0 += 24;
362 iq[jz] = (int) fw;
363 } else iq[jz] = (int) z ;
364 }
366 /* convert integer "bit" chunk to floating-point value */
367 fw = scalbnA(one,q0);
368 for(i=jz;i>=0;i--) {
369 q[i] = fw*(double)iq[i]; fw*=twon24;
370 }
372 /* compute PIo2[0,...,jp]*q[jz,...,0] */
373 for(i=jz;i>=0;i--) {
374 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
375 fq[jz-i] = fw;
376 }
378 /* compress fq[] into y[] */
379 switch(prec) {
380 case 0:
381 fw = 0.0;
382 for (i=jz;i>=0;i--) fw += fq[i];
383 y[0] = (ih==0)? fw: -fw;
384 break;
385 case 1:
386 case 2:
387 fw = 0.0;
388 for (i=jz;i>=0;i--) fw += fq[i];
389 y[0] = (ih==0)? fw: -fw;
390 fw = fq[0]-fw;
391 for (i=1;i<=jz;i++) fw += fq[i];
392 y[1] = (ih==0)? fw: -fw;
393 break;
394 case 3: /* painful */
395 for (i=jz;i>0;i--) {
396 fw = fq[i-1]+fq[i];
397 fq[i] += fq[i-1]-fw;
398 fq[i-1] = fw;
399 }
400 for (i=jz;i>1;i--) {
401 fw = fq[i-1]+fq[i];
402 fq[i] += fq[i-1]-fw;
403 fq[i-1] = fw;
404 }
405 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
406 if(ih==0) {
407 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
408 } else {
409 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
410 }
411 }
412 return n&7;
413 }
416 /*
417 * ====================================================
418 * Copyright (c) 1993 Oracle and/or its affilates. All rights reserved.
419 *
420 * Developed at SunPro, a Sun Microsystems, Inc. business.
421 * Permission to use, copy, modify, and distribute this
422 * software is freely granted, provided that this notice
423 * is preserved.
424 * ====================================================
425 *
426 */
428 /* __ieee754_rem_pio2(x,y)
429 *
430 * return the remainder of x rem pi/2 in y[0]+y[1]
431 * use __kernel_rem_pio2()
432 */
434 /*
435 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
436 */
437 static const int two_over_pi[] = {
438 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
439 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
440 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
441 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
442 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
443 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
444 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
445 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
446 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
447 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
448 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
449 };
451 static const int npio2_hw[] = {
452 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
453 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
454 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
455 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
456 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
457 0x404858EB, 0x404921FB,
458 };
460 /*
461 * invpio2: 53 bits of 2/pi
462 * pio2_1: first 33 bit of pi/2
463 * pio2_1t: pi/2 - pio2_1
464 * pio2_2: second 33 bit of pi/2
465 * pio2_2t: pi/2 - (pio2_1+pio2_2)
466 * pio2_3: third 33 bit of pi/2
467 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
468 */
470 static const double
471 zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
472 half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
473 two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
474 invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
475 pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
476 pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
477 pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
478 pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
479 pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
480 pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
482 static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
483 double z,w,t,r,fn;
484 double tx[3];
485 int e0,i,j,nx,n,ix,hx,i0;
487 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
488 hx = *(i0+(int*)&x); /* high word of x */
489 ix = hx&0x7fffffff;
490 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
491 {y[0] = x; y[1] = 0; return 0;}
492 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
493 if(hx>0) {
494 z = x - pio2_1;
495 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
496 y[0] = z - pio2_1t;
497 y[1] = (z-y[0])-pio2_1t;
498 } else { /* near pi/2, use 33+33+53 bit pi */
499 z -= pio2_2;
500 y[0] = z - pio2_2t;
501 y[1] = (z-y[0])-pio2_2t;
502 }
503 return 1;
504 } else { /* negative x */
505 z = x + pio2_1;
506 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
507 y[0] = z + pio2_1t;
508 y[1] = (z-y[0])+pio2_1t;
509 } else { /* near pi/2, use 33+33+53 bit pi */
510 z += pio2_2;
511 y[0] = z + pio2_2t;
512 y[1] = (z-y[0])+pio2_2t;
513 }
514 return -1;
515 }
516 }
517 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
518 t = fabsd(x);
519 n = (int) (t*invpio2+half);
520 fn = (double)n;
521 r = t-fn*pio2_1;
522 w = fn*pio2_1t; /* 1st round good to 85 bit */
523 if(n<32&&ix!=npio2_hw[n-1]) {
524 y[0] = r-w; /* quick check no cancellation */
525 } else {
526 j = ix>>20;
527 y[0] = r-w;
528 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
529 if(i>16) { /* 2nd iteration needed, good to 118 */
530 t = r;
531 w = fn*pio2_2;
532 r = t-w;
533 w = fn*pio2_2t-((t-r)-w);
534 y[0] = r-w;
535 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
536 if(i>49) { /* 3rd iteration need, 151 bits acc */
537 t = r; /* will cover all possible cases */
538 w = fn*pio2_3;
539 r = t-w;
540 w = fn*pio2_3t-((t-r)-w);
541 y[0] = r-w;
542 }
543 }
544 }
545 y[1] = (r-y[0])-w;
546 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
547 else return n;
548 }
549 /*
550 * all other (large) arguments
551 */
552 if(ix>=0x7ff00000) { /* x is inf or NaN */
553 y[0]=y[1]=x-x; return 0;
554 }
555 /* set z = scalbn(|x|,ilogb(x)-23) */
556 *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
557 e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
558 *(i0+(int*)&z) = ix - (e0<<20);
559 for(i=0;i<2;i++) {
560 tx[i] = (double)((int)(z));
561 z = (z-tx[i])*two24A;
562 }
563 tx[2] = z;
564 nx = 3;
565 while(tx[nx-1]==zeroA) nx--; /* skip zero term */
566 n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
567 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
568 return n;
569 }
572 /* __kernel_sin( x, y, iy)
573 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
574 * Input x is assumed to be bounded by ~pi/4 in magnitude.
575 * Input y is the tail of x.
576 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
577 *
578 * Algorithm
579 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
580 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
581 * 3. sin(x) is approximated by a polynomial of degree 13 on
582 * [0,pi/4]
583 * 3 13
584 * sin(x) ~ x + S1*x + ... + S6*x
585 * where
586 *
587 * |sin(x) 2 4 6 8 10 12 | -58
588 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
589 * | x |
590 *
591 * 4. sin(x+y) = sin(x) + sin'(x')*y
592 * ~ sin(x) + (1-x*x/2)*y
593 * For better accuracy, let
594 * 3 2 2 2 2
595 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
596 * then 3 2
597 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
598 */
599 #ifdef MIPS64
600 #undef S1
601 #undef S2
602 #undef S3
603 #undef S4
604 #undef S5
605 #undef S6
606 #endif
608 static const double
609 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
610 S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
611 S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
612 S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
613 S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
614 S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
616 static double __kernel_sin(double x, double y, int iy)
617 {
618 double z,r,v;
619 int ix;
620 ix = __HI(x)&0x7fffffff; /* high word of x */
621 if(ix<0x3e400000) /* |x| < 2**-27 */
622 {if((int)x==0) return x;} /* generate inexact */
623 z = x*x;
624 v = z*x;
625 r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
626 if(iy==0) return x+v*(S1+z*r);
627 else return x-((z*(half*y-v*r)-y)-v*S1);
628 }
630 /*
631 * __kernel_cos( x, y )
632 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
633 * Input x is assumed to be bounded by ~pi/4 in magnitude.
634 * Input y is the tail of x.
635 *
636 * Algorithm
637 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
638 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
639 * 3. cos(x) is approximated by a polynomial of degree 14 on
640 * [0,pi/4]
641 * 4 14
642 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
643 * where the remez error is
644 *
645 * | 2 4 6 8 10 12 14 | -58
646 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
647 * | |
648 *
649 * 4 6 8 10 12 14
650 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
651 * cos(x) = 1 - x*x/2 + r
652 * since cos(x+y) ~ cos(x) - sin(x)*y
653 * ~ cos(x) - x*y,
654 * a correction term is necessary in cos(x) and hence
655 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
656 * For better accuracy when x > 0.3, let qx = |x|/4 with
657 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
658 * Then
659 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
660 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
661 * magnitude of the latter is at least a quarter of x*x/2,
662 * thus, reducing the rounding error in the subtraction.
663 */
665 static const double
666 C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
667 C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
668 C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
669 C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
670 C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
671 C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
673 static double __kernel_cos(double x, double y)
674 {
675 double a,h,z,r,qx;
676 int ix;
677 ix = __HI(x)&0x7fffffff; /* ix = |x|'s high word*/
678 if(ix<0x3e400000) { /* if x < 2**27 */
679 if(((int)x)==0) return one; /* generate inexact */
680 }
681 z = x*x;
682 r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
683 if(ix < 0x3FD33333) /* if |x| < 0.3 */
684 return one - (0.5*z - (z*r - x*y));
685 else {
686 if(ix > 0x3fe90000) { /* x > 0.78125 */
687 qx = 0.28125;
688 } else {
689 __HI(qx) = ix-0x00200000; /* x/4 */
690 __LO(qx) = 0;
691 }
692 h = 0.5*z-qx;
693 a = one-qx;
694 return a - (h - (z*r-x*y));
695 }
696 }
698 /* __kernel_tan( x, y, k )
699 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
700 * Input x is assumed to be bounded by ~pi/4 in magnitude.
701 * Input y is the tail of x.
702 * Input k indicates whether tan (if k=1) or
703 * -1/tan (if k= -1) is returned.
704 *
705 * Algorithm
706 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
707 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
708 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
709 * [0,0.67434]
710 * 3 27
711 * tan(x) ~ x + T1*x + ... + T13*x
712 * where
713 *
714 * |tan(x) 2 4 26 | -59.2
715 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
716 * | x |
717 *
718 * Note: tan(x+y) = tan(x) + tan'(x)*y
719 * ~ tan(x) + (1+x*x)*y
720 * Therefore, for better accuracy in computing tan(x+y), let
721 * 3 2 2 2 2
722 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
723 * then
724 * 3 2
725 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
726 *
727 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
728 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
729 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
730 */
732 static const double
733 pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
734 pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
735 T[] = {
736 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
737 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
738 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
739 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
740 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
741 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
742 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
743 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
744 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
745 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
746 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
747 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
748 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
749 };
751 static double __kernel_tan(double x, double y, int iy)
752 {
753 double z,r,v,w,s;
754 int ix,hx;
755 hx = __HI(x); /* high word of x */
756 ix = hx&0x7fffffff; /* high word of |x| */
757 if(ix<0x3e300000) { /* x < 2**-28 */
758 if((int)x==0) { /* generate inexact */
759 if (((ix | __LO(x)) | (iy + 1)) == 0)
760 return one / fabsd(x);
761 else {
762 if (iy == 1)
763 return x;
764 else { /* compute -1 / (x+y) carefully */
765 double a, t;
767 z = w = x + y;
768 __LO(z) = 0;
769 v = y - (z - x);
770 t = a = -one / w;
771 __LO(t) = 0;
772 s = one + t * z;
773 return t + a * (s + t * v);
774 }
775 }
776 }
777 }
778 if(ix>=0x3FE59428) { /* |x|>=0.6744 */
779 if(hx<0) {x = -x; y = -y;}
780 z = pio4-x;
781 w = pio4lo-y;
782 x = z+w; y = 0.0;
783 }
784 z = x*x;
785 w = z*z;
786 /* Break x^5*(T[1]+x^2*T[2]+...) into
787 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
788 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
789 */
790 r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
791 v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
792 s = z*x;
793 r = y + z*(s*(r+v)+y);
794 r += T[0]*s;
795 w = x+r;
796 if(ix>=0x3FE59428) {
797 v = (double)iy;
798 return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
799 }
800 if(iy==1) return w;
801 else { /* if allow error up to 2 ulp,
802 simply return -1.0/(x+r) here */
803 /* compute -1.0/(x+r) accurately */
804 double a,t;
805 z = w;
806 __LO(z) = 0;
807 v = r-(z - x); /* z+v = r+x */
808 t = a = -1.0/w; /* a = -1.0/w */
809 __LO(t) = 0;
810 s = 1.0+t*z;
811 return t+a*(s+t*v);
812 }
813 }
816 //----------------------------------------------------------------------
817 //
818 // Routines for new sin/cos implementation
819 //
820 //----------------------------------------------------------------------
822 /* sin(x)
823 * Return sine function of x.
824 *
825 * kernel function:
826 * __kernel_sin ... sine function on [-pi/4,pi/4]
827 * __kernel_cos ... cose function on [-pi/4,pi/4]
828 * __ieee754_rem_pio2 ... argument reduction routine
829 *
830 * Method.
831 * Let S,C and T denote the sin, cos and tan respectively on
832 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
833 * in [-pi/4 , +pi/4], and let n = k mod 4.
834 * We have
835 *
836 * n sin(x) cos(x) tan(x)
837 * ----------------------------------------------------------
838 * 0 S C T
839 * 1 C -S -1/T
840 * 2 -S -C T
841 * 3 -C S -1/T
842 * ----------------------------------------------------------
843 *
844 * Special cases:
845 * Let trig be any of sin, cos, or tan.
846 * trig(+-INF) is NaN, with signals;
847 * trig(NaN) is that NaN;
848 *
849 * Accuracy:
850 * TRIG(x) returns trig(x) nearly rounded
851 */
853 JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
854 double y[2],z=0.0;
855 int n, ix;
857 /* High word of x. */
858 ix = __HI(x);
860 /* |x| ~< pi/4 */
861 ix &= 0x7fffffff;
862 if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
864 /* sin(Inf or NaN) is NaN */
865 else if (ix>=0x7ff00000) return x-x;
867 /* argument reduction needed */
868 else {
869 n = __ieee754_rem_pio2(x,y);
870 switch(n&3) {
871 case 0: return __kernel_sin(y[0],y[1],1);
872 case 1: return __kernel_cos(y[0],y[1]);
873 case 2: return -__kernel_sin(y[0],y[1],1);
874 default:
875 return -__kernel_cos(y[0],y[1]);
876 }
877 }
878 JRT_END
880 /* cos(x)
881 * Return cosine function of x.
882 *
883 * kernel function:
884 * __kernel_sin ... sine function on [-pi/4,pi/4]
885 * __kernel_cos ... cosine function on [-pi/4,pi/4]
886 * __ieee754_rem_pio2 ... argument reduction routine
887 *
888 * Method.
889 * Let S,C and T denote the sin, cos and tan respectively on
890 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
891 * in [-pi/4 , +pi/4], and let n = k mod 4.
892 * We have
893 *
894 * n sin(x) cos(x) tan(x)
895 * ----------------------------------------------------------
896 * 0 S C T
897 * 1 C -S -1/T
898 * 2 -S -C T
899 * 3 -C S -1/T
900 * ----------------------------------------------------------
901 *
902 * Special cases:
903 * Let trig be any of sin, cos, or tan.
904 * trig(+-INF) is NaN, with signals;
905 * trig(NaN) is that NaN;
906 *
907 * Accuracy:
908 * TRIG(x) returns trig(x) nearly rounded
909 */
911 JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
912 double y[2],z=0.0;
913 int n, ix;
915 /* High word of x. */
916 ix = __HI(x);
918 /* |x| ~< pi/4 */
919 ix &= 0x7fffffff;
920 if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
922 /* cos(Inf or NaN) is NaN */
923 else if (ix>=0x7ff00000) return x-x;
925 /* argument reduction needed */
926 else {
927 n = __ieee754_rem_pio2(x,y);
928 switch(n&3) {
929 case 0: return __kernel_cos(y[0],y[1]);
930 case 1: return -__kernel_sin(y[0],y[1],1);
931 case 2: return -__kernel_cos(y[0],y[1]);
932 default:
933 return __kernel_sin(y[0],y[1],1);
934 }
935 }
936 JRT_END
938 /* tan(x)
939 * Return tangent function of x.
940 *
941 * kernel function:
942 * __kernel_tan ... tangent function on [-pi/4,pi/4]
943 * __ieee754_rem_pio2 ... argument reduction routine
944 *
945 * Method.
946 * Let S,C and T denote the sin, cos and tan respectively on
947 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
948 * in [-pi/4 , +pi/4], and let n = k mod 4.
949 * We have
950 *
951 * n sin(x) cos(x) tan(x)
952 * ----------------------------------------------------------
953 * 0 S C T
954 * 1 C -S -1/T
955 * 2 -S -C T
956 * 3 -C S -1/T
957 * ----------------------------------------------------------
958 *
959 * Special cases:
960 * Let trig be any of sin, cos, or tan.
961 * trig(+-INF) is NaN, with signals;
962 * trig(NaN) is that NaN;
963 *
964 * Accuracy:
965 * TRIG(x) returns trig(x) nearly rounded
966 */
968 JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
969 double y[2],z=0.0;
970 int n, ix;
972 /* High word of x. */
973 ix = __HI(x);
975 /* |x| ~< pi/4 */
976 ix &= 0x7fffffff;
977 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
979 /* tan(Inf or NaN) is NaN */
980 else if (ix>=0x7ff00000) return x-x; /* NaN */
982 /* argument reduction needed */
983 else {
984 n = __ieee754_rem_pio2(x,y);
985 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
986 -1 -- n odd */
987 }
988 JRT_END
991 #ifdef WIN32
992 # pragma optimize ( "", on )
993 #endif