1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Mon May 14 09:36:00 2012 -0700 1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Tue May 15 10:10:23 2012 +0200 1.3 @@ -3578,6 +3578,21 @@ 1.4 emit_byte(0xF1); 1.5 } 1.6 1.7 +void Assembler::frndint() { 1.8 + emit_byte(0xD9); 1.9 + emit_byte(0xFC); 1.10 +} 1.11 + 1.12 +void Assembler::f2xm1() { 1.13 + emit_byte(0xD9); 1.14 + emit_byte(0xF0); 1.15 +} 1.16 + 1.17 +void Assembler::fldl2e() { 1.18 + emit_byte(0xD9); 1.19 + emit_byte(0xEA); 1.20 +} 1.21 + 1.22 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 1.23 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 1.24 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 1.25 @@ -6868,6 +6883,242 @@ 1.26 Assembler::fldcw(as_Address(src)); 1.27 } 1.28 1.29 +void MacroAssembler::pow_exp_core_encoding() { 1.30 + // kills rax, rcx, rdx 1.31 + subptr(rsp,sizeof(jdouble)); 1.32 + // computes 2^X. Stack: X ... 1.33 + // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 1.34 + // keep it on the thread's stack to compute 2^int(X) later 1.35 + // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 1.36 + // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 1.37 + fld_s(0); // Stack: X X ... 1.38 + frndint(); // Stack: int(X) X ... 1.39 + fsuba(1); // Stack: int(X) X-int(X) ... 1.40 + fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 1.41 + f2xm1(); // Stack: 2^(X-int(X))-1 ... 1.42 + fld1(); // Stack: 1 2^(X-int(X))-1 ... 1.43 + faddp(1); // Stack: 2^(X-int(X)) 1.44 + // computes 2^(int(X)): add exponent bias (1023) to int(X), then 1.45 + // shift int(X)+1023 to exponent position. 1.46 + // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 1.47 + // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 1.48 + // values so detect them and set result to NaN. 1.49 + movl(rax,Address(rsp,0)); 1.50 + movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 1.51 + addl(rax, 1023); 1.52 + movl(rdx,rax); 1.53 + shll(rax,20); 1.54 + // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 1.55 + addl(rdx,1); 1.56 + // Check that 1 < int(X)+1023+1 < 2048 1.57 + // in 3 steps: 1.58 + // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 1.59 + // 2- (int(X)+1023+1)&-2048 != 0 1.60 + // 3- (int(X)+1023+1)&-2048 != 1 1.61 + // Do 2- first because addl just updated the flags. 1.62 + cmov32(Assembler::equal,rax,rcx); 1.63 + cmpl(rdx,1); 1.64 + cmov32(Assembler::equal,rax,rcx); 1.65 + testl(rdx,rcx); 1.66 + cmov32(Assembler::notEqual,rax,rcx); 1.67 + movl(Address(rsp,4),rax); 1.68 + movl(Address(rsp,0),0); 1.69 + fmul_d(Address(rsp,0)); // Stack: 2^X ... 1.70 + addptr(rsp,sizeof(jdouble)); 1.71 +} 1.72 + 1.73 +void MacroAssembler::fast_pow() { 1.74 + // computes X^Y = 2^(Y * log2(X)) 1.75 + // if fast computation is not possible, result is NaN. Requires 1.76 + // fallback from user of this macro. 1.77 + fyl2x(); // Stack: (Y*log2(X)) ... 1.78 + pow_exp_core_encoding(); // Stack: exp(X) ... 1.79 +} 1.80 + 1.81 +void MacroAssembler::fast_exp() { 1.82 + // computes exp(X) = 2^(X * log2(e)) 1.83 + // if fast computation is not possible, result is NaN. Requires 1.84 + // fallback from user of this macro. 1.85 + fldl2e(); // Stack: log2(e) X ... 1.86 + fmulp(1); // Stack: (X*log2(e)) ... 1.87 + pow_exp_core_encoding(); // Stack: exp(X) ... 1.88 +} 1.89 + 1.90 +void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 1.91 + // kills rax, rcx, rdx 1.92 + // pow and exp needs 2 extra registers on the fpu stack. 1.93 + Label slow_case, done; 1.94 + Register tmp = noreg; 1.95 + if (!VM_Version::supports_cmov()) { 1.96 + // fcmp needs a temporary so preserve rdx, 1.97 + tmp = rdx; 1.98 + } 1.99 + Register tmp2 = rax; 1.100 + NOT_LP64(Register tmp3 = rcx;) 1.101 + 1.102 + if (is_exp) { 1.103 + // Stack: X 1.104 + fld_s(0); // duplicate argument for runtime call. Stack: X X 1.105 + fast_exp(); // Stack: exp(X) X 1.106 + fcmp(tmp, 0, false, false); // Stack: exp(X) X 1.107 + // exp(X) not equal to itself: exp(X) is NaN go to slow case. 1.108 + jcc(Assembler::parity, slow_case); 1.109 + // get rid of duplicate argument. Stack: exp(X) 1.110 + if (num_fpu_regs_in_use > 0) { 1.111 + fxch(); 1.112 + fpop(); 1.113 + } else { 1.114 + ffree(1); 1.115 + } 1.116 + jmp(done); 1.117 + } else { 1.118 + // Stack: X Y 1.119 + Label x_negative, y_odd; 1.120 + 1.121 + fldz(); // Stack: 0 X Y 1.122 + fcmp(tmp, 1, true, false); // Stack: X Y 1.123 + jcc(Assembler::above, x_negative); 1.124 + 1.125 + // X >= 0 1.126 + 1.127 + fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 1.128 + fld_s(1); // Stack: X Y X Y 1.129 + fast_pow(); // Stack: X^Y X Y 1.130 + fcmp(tmp, 0, false, false); // Stack: X^Y X Y 1.131 + // X^Y not equal to itself: X^Y is NaN go to slow case. 1.132 + jcc(Assembler::parity, slow_case); 1.133 + // get rid of duplicate arguments. Stack: X^Y 1.134 + if (num_fpu_regs_in_use > 0) { 1.135 + fxch(); fpop(); 1.136 + fxch(); fpop(); 1.137 + } else { 1.138 + ffree(2); 1.139 + ffree(1); 1.140 + } 1.141 + jmp(done); 1.142 + 1.143 + // X <= 0 1.144 + bind(x_negative); 1.145 + 1.146 + fld_s(1); // Stack: Y X Y 1.147 + frndint(); // Stack: int(Y) X Y 1.148 + fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 1.149 + jcc(Assembler::notEqual, slow_case); 1.150 + 1.151 + subptr(rsp, 8); 1.152 + 1.153 + // For X^Y, when X < 0, Y has to be an integer and the final 1.154 + // result depends on whether it's odd or even. We just checked 1.155 + // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 1.156 + // integer to test its parity. If int(Y) is huge and doesn't fit 1.157 + // in the 64 bit integer range, the integer indefinite value will 1.158 + // end up in the gp registers. Huge numbers are all even, the 1.159 + // integer indefinite number is even so it's fine. 1.160 + 1.161 +#ifdef ASSERT 1.162 + // Let's check we don't end up with an integer indefinite number 1.163 + // when not expected. First test for huge numbers: check whether 1.164 + // int(Y)+1 == int(Y) which is true for very large numbers and 1.165 + // those are all even. A 64 bit integer is guaranteed to not 1.166 + // overflow for numbers where y+1 != y (when precision is set to 1.167 + // double precision). 1.168 + Label y_not_huge; 1.169 + 1.170 + fld1(); // Stack: 1 int(Y) X Y 1.171 + fadd(1); // Stack: 1+int(Y) int(Y) X Y 1.172 + 1.173 +#ifdef _LP64 1.174 + // trip to memory to force the precision down from double extended 1.175 + // precision 1.176 + fstp_d(Address(rsp, 0)); 1.177 + fld_d(Address(rsp, 0)); 1.178 +#endif 1.179 + 1.180 + fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 1.181 +#endif 1.182 + 1.183 + // move int(Y) as 64 bit integer to thread's stack 1.184 + fistp_d(Address(rsp,0)); // Stack: X Y 1.185 + 1.186 +#ifdef ASSERT 1.187 + jcc(Assembler::notEqual, y_not_huge); 1.188 + 1.189 + // Y is huge so we know it's even. It may not fit in a 64 bit 1.190 + // integer and we don't want the debug code below to see the 1.191 + // integer indefinite value so overwrite int(Y) on the thread's 1.192 + // stack with 0. 1.193 + movl(Address(rsp, 0), 0); 1.194 + movl(Address(rsp, 4), 0); 1.195 + 1.196 + bind(y_not_huge); 1.197 +#endif 1.198 + 1.199 + fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 1.200 + fld_s(1); // Stack: X Y X Y 1.201 + fabs(); // Stack: abs(X) Y X Y 1.202 + fast_pow(); // Stack: abs(X)^Y X Y 1.203 + fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 1.204 + // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 1.205 + 1.206 + pop(tmp2); 1.207 + NOT_LP64(pop(tmp3)); 1.208 + jcc(Assembler::parity, slow_case); 1.209 + 1.210 +#ifdef ASSERT 1.211 + // Check that int(Y) is not integer indefinite value (int 1.212 + // overflow). Shouldn't happen because for values that would 1.213 + // overflow, 1+int(Y)==Y which was tested earlier. 1.214 +#ifndef _LP64 1.215 + { 1.216 + Label integer; 1.217 + testl(tmp2, tmp2); 1.218 + jcc(Assembler::notZero, integer); 1.219 + cmpl(tmp3, 0x80000000); 1.220 + jcc(Assembler::notZero, integer); 1.221 + stop("integer indefinite value shouldn't be seen here"); 1.222 + bind(integer); 1.223 + } 1.224 +#else 1.225 + { 1.226 + Label integer; 1.227 + shlq(tmp2, 1); 1.228 + jcc(Assembler::carryClear, integer); 1.229 + jcc(Assembler::notZero, integer); 1.230 + stop("integer indefinite value shouldn't be seen here"); 1.231 + bind(integer); 1.232 + } 1.233 +#endif 1.234 +#endif 1.235 + 1.236 + // get rid of duplicate arguments. Stack: X^Y 1.237 + if (num_fpu_regs_in_use > 0) { 1.238 + fxch(); fpop(); 1.239 + fxch(); fpop(); 1.240 + } else { 1.241 + ffree(2); 1.242 + ffree(1); 1.243 + } 1.244 + 1.245 + testl(tmp2, 1); 1.246 + jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 1.247 + // X <= 0, Y even: X^Y = -abs(X)^Y 1.248 + 1.249 + fchs(); // Stack: -abs(X)^Y Y 1.250 + jmp(done); 1.251 + } 1.252 + 1.253 + // slow case: runtime call 1.254 + bind(slow_case); 1.255 + 1.256 + fpop(); // pop incorrect result or int(Y) 1.257 + 1.258 + fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 1.259 + is_exp ? 1 : 2, num_fpu_regs_in_use); 1.260 + 1.261 + // Come here with result in F-TOS 1.262 + bind(done); 1.263 +} 1.264 + 1.265 void MacroAssembler::fpop() { 1.266 ffree(); 1.267 fincstp(); 1.268 @@ -8045,6 +8296,144 @@ 1.269 #endif 1.270 } 1.271 1.272 +void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 1.273 + pusha(); 1.274 + 1.275 + // if we are coming from c1, xmm registers may be live 1.276 + if (UseSSE >= 1) { 1.277 + subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 1.278 + } 1.279 + int off = 0; 1.280 + if (UseSSE == 1) { 1.281 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 1.282 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 1.283 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 1.284 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 1.285 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 1.286 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 1.287 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 1.288 + movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 1.289 + } else if (UseSSE >= 2) { 1.290 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); 1.291 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); 1.292 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); 1.293 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); 1.294 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); 1.295 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); 1.296 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); 1.297 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); 1.298 +#ifdef _LP64 1.299 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); 1.300 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); 1.301 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); 1.302 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); 1.303 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); 1.304 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); 1.305 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); 1.306 + movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); 1.307 +#endif 1.308 + } 1.309 + 1.310 + // Preserve registers across runtime call 1.311 + int incoming_argument_and_return_value_offset = -1; 1.312 + if (num_fpu_regs_in_use > 1) { 1.313 + // Must preserve all other FPU regs (could alternatively convert 1.314 + // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 1.315 + // FPU state, but can not trust C compiler) 1.316 + NEEDS_CLEANUP; 1.317 + // NOTE that in this case we also push the incoming argument(s) to 1.318 + // the stack and restore it later; we also use this stack slot to 1.319 + // hold the return value from dsin, dcos etc. 1.320 + for (int i = 0; i < num_fpu_regs_in_use; i++) { 1.321 + subptr(rsp, sizeof(jdouble)); 1.322 + fstp_d(Address(rsp, 0)); 1.323 + } 1.324 + incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 1.325 + for (int i = nb_args-1; i >= 0; i--) { 1.326 + fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 1.327 + } 1.328 + } 1.329 + 1.330 + subptr(rsp, nb_args*sizeof(jdouble)); 1.331 + for (int i = 0; i < nb_args; i++) { 1.332 + fstp_d(Address(rsp, i*sizeof(jdouble))); 1.333 + } 1.334 + 1.335 +#ifdef _LP64 1.336 + if (nb_args > 0) { 1.337 + movdbl(xmm0, Address(rsp, 0)); 1.338 + } 1.339 + if (nb_args > 1) { 1.340 + movdbl(xmm1, Address(rsp, sizeof(jdouble))); 1.341 + } 1.342 + assert(nb_args <= 2, "unsupported number of args"); 1.343 +#endif // _LP64 1.344 + 1.345 + // NOTE: we must not use call_VM_leaf here because that requires a 1.346 + // complete interpreter frame in debug mode -- same bug as 4387334 1.347 + // MacroAssembler::call_VM_leaf_base is perfectly safe and will 1.348 + // do proper 64bit abi 1.349 + 1.350 + NEEDS_CLEANUP; 1.351 + // Need to add stack banging before this runtime call if it needs to 1.352 + // be taken; however, there is no generic stack banging routine at 1.353 + // the MacroAssembler level 1.354 + 1.355 + MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 1.356 + 1.357 +#ifdef _LP64 1.358 + movsd(Address(rsp, 0), xmm0); 1.359 + fld_d(Address(rsp, 0)); 1.360 +#endif // _LP64 1.361 + addptr(rsp, sizeof(jdouble) * nb_args); 1.362 + if (num_fpu_regs_in_use > 1) { 1.363 + // Must save return value to stack and then restore entire FPU 1.364 + // stack except incoming arguments 1.365 + fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 1.366 + for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 1.367 + fld_d(Address(rsp, 0)); 1.368 + addptr(rsp, sizeof(jdouble)); 1.369 + } 1.370 + fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 1.371 + addptr(rsp, sizeof(jdouble) * nb_args); 1.372 + } 1.373 + 1.374 + off = 0; 1.375 + if (UseSSE == 1) { 1.376 + movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 1.377 + movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 1.378 + movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 1.379 + movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 1.380 + movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 1.381 + movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 1.382 + movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 1.383 + movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 1.384 + } else if (UseSSE >= 2) { 1.385 + movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); 1.386 + movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); 1.387 + movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); 1.388 + movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); 1.389 + movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); 1.390 + movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); 1.391 + movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); 1.392 + movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); 1.393 +#ifdef _LP64 1.394 + movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); 1.395 + movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); 1.396 + movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); 1.397 + movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); 1.398 + movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); 1.399 + movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); 1.400 + movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); 1.401 + movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); 1.402 +#endif 1.403 + } 1.404 + if (UseSSE >= 1) { 1.405 + addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 1.406 + } 1.407 + popa(); 1.408 +} 1.409 + 1.410 static const double pi_4 = 0.7853981633974483; 1.411 1.412 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 1.413 @@ -8092,73 +8481,27 @@ 1.414 1.415 // slow case: runtime call 1.416 bind(slow_case); 1.417 - // Preserve registers across runtime call 1.418 - pusha(); 1.419 - int incoming_argument_and_return_value_offset = -1; 1.420 - if (num_fpu_regs_in_use > 1) { 1.421 - // Must preserve all other FPU regs (could alternatively convert 1.422 - // SharedRuntime::dsin and dcos into assembly routines known not to trash 1.423 - // FPU state, but can not trust C compiler) 1.424 - NEEDS_CLEANUP; 1.425 - // NOTE that in this case we also push the incoming argument to 1.426 - // the stack and restore it later; we also use this stack slot to 1.427 - // hold the return value from dsin or dcos. 1.428 - for (int i = 0; i < num_fpu_regs_in_use; i++) { 1.429 - subptr(rsp, sizeof(jdouble)); 1.430 - fstp_d(Address(rsp, 0)); 1.431 - } 1.432 - incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 1.433 - fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 1.434 - } 1.435 - subptr(rsp, sizeof(jdouble)); 1.436 - fstp_d(Address(rsp, 0)); 1.437 -#ifdef _LP64 1.438 - movdbl(xmm0, Address(rsp, 0)); 1.439 -#endif // _LP64 1.440 - 1.441 - // NOTE: we must not use call_VM_leaf here because that requires a 1.442 - // complete interpreter frame in debug mode -- same bug as 4387334 1.443 - // MacroAssembler::call_VM_leaf_base is perfectly safe and will 1.444 - // do proper 64bit abi 1.445 - 1.446 - NEEDS_CLEANUP; 1.447 - // Need to add stack banging before this runtime call if it needs to 1.448 - // be taken; however, there is no generic stack banging routine at 1.449 - // the MacroAssembler level 1.450 + 1.451 switch(trig) { 1.452 case 's': 1.453 { 1.454 - MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 1.455 + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 1.456 } 1.457 break; 1.458 case 'c': 1.459 { 1.460 - MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 1.461 + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 1.462 } 1.463 break; 1.464 case 't': 1.465 { 1.466 - MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 1.467 + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 1.468 } 1.469 break; 1.470 default: 1.471 assert(false, "bad intrinsic"); 1.472 break; 1.473 } 1.474 -#ifdef _LP64 1.475 - movsd(Address(rsp, 0), xmm0); 1.476 - fld_d(Address(rsp, 0)); 1.477 -#endif // _LP64 1.478 - addptr(rsp, sizeof(jdouble)); 1.479 - if (num_fpu_regs_in_use > 1) { 1.480 - // Must save return value to stack and then restore entire FPU stack 1.481 - fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 1.482 - for (int i = 0; i < num_fpu_regs_in_use; i++) { 1.483 - fld_d(Address(rsp, 0)); 1.484 - addptr(rsp, sizeof(jdouble)); 1.485 - } 1.486 - } 1.487 - popa(); 1.488 1.489 // Come here with result in F-TOS 1.490 bind(done);