src/cpu/x86/vm/assembler_x86.cpp

changeset 3787
6759698e3140
parent 3687
fd09f2d8283e
child 3790
e2961d14584b
     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Mon May 14 09:36:00 2012 -0700
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Tue May 15 10:10:23 2012 +0200
     1.3 @@ -3578,6 +3578,21 @@
     1.4    emit_byte(0xF1);
     1.5  }
     1.6  
     1.7 +void Assembler::frndint() {
     1.8 +  emit_byte(0xD9);
     1.9 +  emit_byte(0xFC);
    1.10 +}
    1.11 +
    1.12 +void Assembler::f2xm1() {
    1.13 +  emit_byte(0xD9);
    1.14 +  emit_byte(0xF0);
    1.15 +}
    1.16 +
    1.17 +void Assembler::fldl2e() {
    1.18 +  emit_byte(0xD9);
    1.19 +  emit_byte(0xEA);
    1.20 +}
    1.21 +
    1.22  // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
    1.23  static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
    1.24  // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
    1.25 @@ -6868,6 +6883,242 @@
    1.26    Assembler::fldcw(as_Address(src));
    1.27  }
    1.28  
    1.29 +void MacroAssembler::pow_exp_core_encoding() {
    1.30 +  // kills rax, rcx, rdx
    1.31 +  subptr(rsp,sizeof(jdouble));
    1.32 +  // computes 2^X. Stack: X ...
    1.33 +  // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
    1.34 +  // keep it on the thread's stack to compute 2^int(X) later
    1.35 +  // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
    1.36 +  // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
    1.37 +  fld_s(0);                 // Stack: X X ...
    1.38 +  frndint();                // Stack: int(X) X ...
    1.39 +  fsuba(1);                 // Stack: int(X) X-int(X) ...
    1.40 +  fistp_s(Address(rsp,0));  // move int(X) as integer to thread's stack. Stack: X-int(X) ...
    1.41 +  f2xm1();                  // Stack: 2^(X-int(X))-1 ...
    1.42 +  fld1();                   // Stack: 1 2^(X-int(X))-1 ...
    1.43 +  faddp(1);                 // Stack: 2^(X-int(X))
    1.44 +  // computes 2^(int(X)): add exponent bias (1023) to int(X), then
    1.45 +  // shift int(X)+1023 to exponent position.
    1.46 +  // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
    1.47 +  // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
    1.48 +  // values so detect them and set result to NaN.
    1.49 +  movl(rax,Address(rsp,0));
    1.50 +  movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
    1.51 +  addl(rax, 1023);
    1.52 +  movl(rdx,rax);
    1.53 +  shll(rax,20);
    1.54 +  // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
    1.55 +  addl(rdx,1);
    1.56 +  // Check that 1 < int(X)+1023+1 < 2048
    1.57 +  // in 3 steps:
    1.58 +  // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
    1.59 +  // 2- (int(X)+1023+1)&-2048 != 0
    1.60 +  // 3- (int(X)+1023+1)&-2048 != 1
    1.61 +  // Do 2- first because addl just updated the flags.
    1.62 +  cmov32(Assembler::equal,rax,rcx);
    1.63 +  cmpl(rdx,1);
    1.64 +  cmov32(Assembler::equal,rax,rcx);
    1.65 +  testl(rdx,rcx);
    1.66 +  cmov32(Assembler::notEqual,rax,rcx);
    1.67 +  movl(Address(rsp,4),rax);
    1.68 +  movl(Address(rsp,0),0);
    1.69 +  fmul_d(Address(rsp,0));   // Stack: 2^X ...
    1.70 +  addptr(rsp,sizeof(jdouble));
    1.71 +}
    1.72 +
    1.73 +void MacroAssembler::fast_pow() {
    1.74 +  // computes X^Y = 2^(Y * log2(X))
    1.75 +  // if fast computation is not possible, result is NaN. Requires
    1.76 +  // fallback from user of this macro.
    1.77 +  fyl2x();                 // Stack: (Y*log2(X)) ...
    1.78 +  pow_exp_core_encoding(); // Stack: exp(X) ...
    1.79 +}
    1.80 +
    1.81 +void MacroAssembler::fast_exp() {
    1.82 +  // computes exp(X) = 2^(X * log2(e))
    1.83 +  // if fast computation is not possible, result is NaN. Requires
    1.84 +  // fallback from user of this macro.
    1.85 +  fldl2e();                // Stack: log2(e) X ...
    1.86 +  fmulp(1);                // Stack: (X*log2(e)) ...
    1.87 +  pow_exp_core_encoding(); // Stack: exp(X) ...
    1.88 +}
    1.89 +
    1.90 +void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
    1.91 +  // kills rax, rcx, rdx
    1.92 +  // pow and exp needs 2 extra registers on the fpu stack.
    1.93 +  Label slow_case, done;
    1.94 +  Register tmp = noreg;
    1.95 +  if (!VM_Version::supports_cmov()) {
    1.96 +    // fcmp needs a temporary so preserve rdx,
    1.97 +    tmp = rdx;
    1.98 +  }
    1.99 +  Register tmp2 = rax;
   1.100 +  NOT_LP64(Register tmp3 = rcx;)
   1.101 +
   1.102 +  if (is_exp) {
   1.103 +    // Stack: X
   1.104 +    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
   1.105 +    fast_exp();                 // Stack: exp(X) X
   1.106 +    fcmp(tmp, 0, false, false); // Stack: exp(X) X
   1.107 +    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
   1.108 +    jcc(Assembler::parity, slow_case);
   1.109 +    // get rid of duplicate argument. Stack: exp(X)
   1.110 +    if (num_fpu_regs_in_use > 0) {
   1.111 +      fxch();
   1.112 +      fpop();
   1.113 +    } else {
   1.114 +      ffree(1);
   1.115 +    }
   1.116 +    jmp(done);
   1.117 +  } else {
   1.118 +    // Stack: X Y
   1.119 +    Label x_negative, y_odd;
   1.120 +
   1.121 +    fldz();                     // Stack: 0 X Y
   1.122 +    fcmp(tmp, 1, true, false);  // Stack: X Y
   1.123 +    jcc(Assembler::above, x_negative);
   1.124 +
   1.125 +    // X >= 0
   1.126 +
   1.127 +    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
   1.128 +    fld_s(1);                   // Stack: X Y X Y
   1.129 +    fast_pow();                 // Stack: X^Y X Y
   1.130 +    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
   1.131 +    // X^Y not equal to itself: X^Y is NaN go to slow case.
   1.132 +    jcc(Assembler::parity, slow_case);
   1.133 +    // get rid of duplicate arguments. Stack: X^Y
   1.134 +    if (num_fpu_regs_in_use > 0) {
   1.135 +      fxch(); fpop();
   1.136 +      fxch(); fpop();
   1.137 +    } else {
   1.138 +      ffree(2);
   1.139 +      ffree(1);
   1.140 +    }
   1.141 +    jmp(done);
   1.142 +
   1.143 +    // X <= 0
   1.144 +    bind(x_negative);
   1.145 +
   1.146 +    fld_s(1);                   // Stack: Y X Y
   1.147 +    frndint();                  // Stack: int(Y) X Y
   1.148 +    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
   1.149 +    jcc(Assembler::notEqual, slow_case);
   1.150 +
   1.151 +    subptr(rsp, 8);
   1.152 +
   1.153 +    // For X^Y, when X < 0, Y has to be an integer and the final
   1.154 +    // result depends on whether it's odd or even. We just checked
   1.155 +    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
   1.156 +    // integer to test its parity. If int(Y) is huge and doesn't fit
   1.157 +    // in the 64 bit integer range, the integer indefinite value will
   1.158 +    // end up in the gp registers. Huge numbers are all even, the
   1.159 +    // integer indefinite number is even so it's fine.
   1.160 +
   1.161 +#ifdef ASSERT
   1.162 +    // Let's check we don't end up with an integer indefinite number
   1.163 +    // when not expected. First test for huge numbers: check whether
   1.164 +    // int(Y)+1 == int(Y) which is true for very large numbers and
   1.165 +    // those are all even. A 64 bit integer is guaranteed to not
   1.166 +    // overflow for numbers where y+1 != y (when precision is set to
   1.167 +    // double precision).
   1.168 +    Label y_not_huge;
   1.169 +
   1.170 +    fld1();                     // Stack: 1 int(Y) X Y
   1.171 +    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
   1.172 +
   1.173 +#ifdef _LP64
   1.174 +    // trip to memory to force the precision down from double extended
   1.175 +    // precision
   1.176 +    fstp_d(Address(rsp, 0));
   1.177 +    fld_d(Address(rsp, 0));
   1.178 +#endif
   1.179 +
   1.180 +    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
   1.181 +#endif
   1.182 +
   1.183 +    // move int(Y) as 64 bit integer to thread's stack
   1.184 +    fistp_d(Address(rsp,0));    // Stack: X Y
   1.185 +
   1.186 +#ifdef ASSERT
   1.187 +    jcc(Assembler::notEqual, y_not_huge);
   1.188 +
   1.189 +    // Y is huge so we know it's even. It may not fit in a 64 bit
   1.190 +    // integer and we don't want the debug code below to see the
   1.191 +    // integer indefinite value so overwrite int(Y) on the thread's
   1.192 +    // stack with 0.
   1.193 +    movl(Address(rsp, 0), 0);
   1.194 +    movl(Address(rsp, 4), 0);
   1.195 +
   1.196 +    bind(y_not_huge);
   1.197 +#endif
   1.198 +
   1.199 +    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
   1.200 +    fld_s(1);                   // Stack: X Y X Y
   1.201 +    fabs();                     // Stack: abs(X) Y X Y
   1.202 +    fast_pow();                 // Stack: abs(X)^Y X Y
   1.203 +    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
   1.204 +    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
   1.205 +
   1.206 +    pop(tmp2);
   1.207 +    NOT_LP64(pop(tmp3));
   1.208 +    jcc(Assembler::parity, slow_case);
   1.209 +
   1.210 +#ifdef ASSERT
   1.211 +    // Check that int(Y) is not integer indefinite value (int
   1.212 +    // overflow). Shouldn't happen because for values that would
   1.213 +    // overflow, 1+int(Y)==Y which was tested earlier.
   1.214 +#ifndef _LP64
   1.215 +    {
   1.216 +      Label integer;
   1.217 +      testl(tmp2, tmp2);
   1.218 +      jcc(Assembler::notZero, integer);
   1.219 +      cmpl(tmp3, 0x80000000);
   1.220 +      jcc(Assembler::notZero, integer);
   1.221 +      stop("integer indefinite value shouldn't be seen here");
   1.222 +      bind(integer);
   1.223 +    }
   1.224 +#else
   1.225 +    {
   1.226 +      Label integer;
   1.227 +      shlq(tmp2, 1);
   1.228 +      jcc(Assembler::carryClear, integer);
   1.229 +      jcc(Assembler::notZero, integer);
   1.230 +      stop("integer indefinite value shouldn't be seen here");
   1.231 +      bind(integer);
   1.232 +    }
   1.233 +#endif
   1.234 +#endif
   1.235 +
   1.236 +    // get rid of duplicate arguments. Stack: X^Y
   1.237 +    if (num_fpu_regs_in_use > 0) {
   1.238 +      fxch(); fpop();
   1.239 +      fxch(); fpop();
   1.240 +    } else {
   1.241 +      ffree(2);
   1.242 +      ffree(1);
   1.243 +    }
   1.244 +
   1.245 +    testl(tmp2, 1);
   1.246 +    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
   1.247 +    // X <= 0, Y even: X^Y = -abs(X)^Y
   1.248 +
   1.249 +    fchs();                     // Stack: -abs(X)^Y Y
   1.250 +    jmp(done);
   1.251 +  }
   1.252 +
   1.253 +  // slow case: runtime call
   1.254 +  bind(slow_case);
   1.255 +
   1.256 +  fpop();                       // pop incorrect result or int(Y)
   1.257 +
   1.258 +  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
   1.259 +                      is_exp ? 1 : 2, num_fpu_regs_in_use);
   1.260 +
   1.261 +  // Come here with result in F-TOS
   1.262 +  bind(done);
   1.263 +}
   1.264 +
   1.265  void MacroAssembler::fpop() {
   1.266    ffree();
   1.267    fincstp();
   1.268 @@ -8045,6 +8296,144 @@
   1.269  #endif
   1.270  }
   1.271  
   1.272 +void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
   1.273 +  pusha();
   1.274 +
   1.275 +  // if we are coming from c1, xmm registers may be live
   1.276 +  if (UseSSE >= 1) {
   1.277 +    subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
   1.278 +  }
   1.279 +  int off = 0;
   1.280 +  if (UseSSE == 1)  {
   1.281 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
   1.282 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
   1.283 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
   1.284 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
   1.285 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
   1.286 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
   1.287 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
   1.288 +    movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
   1.289 +  } else if (UseSSE >= 2)  {
   1.290 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0);
   1.291 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1);
   1.292 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2);
   1.293 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3);
   1.294 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4);
   1.295 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5);
   1.296 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6);
   1.297 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7);
   1.298 +#ifdef _LP64
   1.299 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8);
   1.300 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9);
   1.301 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10);
   1.302 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11);
   1.303 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12);
   1.304 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13);
   1.305 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14);
   1.306 +    movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15);
   1.307 +#endif
   1.308 +  }
   1.309 +
   1.310 +  // Preserve registers across runtime call
   1.311 +  int incoming_argument_and_return_value_offset = -1;
   1.312 +  if (num_fpu_regs_in_use > 1) {
   1.313 +    // Must preserve all other FPU regs (could alternatively convert
   1.314 +    // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
   1.315 +    // FPU state, but can not trust C compiler)
   1.316 +    NEEDS_CLEANUP;
   1.317 +    // NOTE that in this case we also push the incoming argument(s) to
   1.318 +    // the stack and restore it later; we also use this stack slot to
   1.319 +    // hold the return value from dsin, dcos etc.
   1.320 +    for (int i = 0; i < num_fpu_regs_in_use; i++) {
   1.321 +      subptr(rsp, sizeof(jdouble));
   1.322 +      fstp_d(Address(rsp, 0));
   1.323 +    }
   1.324 +    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
   1.325 +    for (int i = nb_args-1; i >= 0; i--) {
   1.326 +      fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
   1.327 +    }
   1.328 +  }
   1.329 +
   1.330 +  subptr(rsp, nb_args*sizeof(jdouble));
   1.331 +  for (int i = 0; i < nb_args; i++) {
   1.332 +    fstp_d(Address(rsp, i*sizeof(jdouble)));
   1.333 +  }
   1.334 +
   1.335 +#ifdef _LP64
   1.336 +  if (nb_args > 0) {
   1.337 +    movdbl(xmm0, Address(rsp, 0));
   1.338 +  }
   1.339 +  if (nb_args > 1) {
   1.340 +    movdbl(xmm1, Address(rsp, sizeof(jdouble)));
   1.341 +  }
   1.342 +  assert(nb_args <= 2, "unsupported number of args");
   1.343 +#endif // _LP64
   1.344 +
   1.345 +  // NOTE: we must not use call_VM_leaf here because that requires a
   1.346 +  // complete interpreter frame in debug mode -- same bug as 4387334
   1.347 +  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
   1.348 +  // do proper 64bit abi
   1.349 +
   1.350 +  NEEDS_CLEANUP;
   1.351 +  // Need to add stack banging before this runtime call if it needs to
   1.352 +  // be taken; however, there is no generic stack banging routine at
   1.353 +  // the MacroAssembler level
   1.354 +
   1.355 +  MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
   1.356 +
   1.357 +#ifdef _LP64
   1.358 +  movsd(Address(rsp, 0), xmm0);
   1.359 +  fld_d(Address(rsp, 0));
   1.360 +#endif // _LP64
   1.361 +  addptr(rsp, sizeof(jdouble) * nb_args);
   1.362 +  if (num_fpu_regs_in_use > 1) {
   1.363 +    // Must save return value to stack and then restore entire FPU
   1.364 +    // stack except incoming arguments
   1.365 +    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
   1.366 +    for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
   1.367 +      fld_d(Address(rsp, 0));
   1.368 +      addptr(rsp, sizeof(jdouble));
   1.369 +    }
   1.370 +    fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
   1.371 +    addptr(rsp, sizeof(jdouble) * nb_args);
   1.372 +  }
   1.373 +
   1.374 +  off = 0;
   1.375 +  if (UseSSE == 1)  {
   1.376 +    movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
   1.377 +    movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
   1.378 +    movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
   1.379 +    movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
   1.380 +    movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
   1.381 +    movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
   1.382 +    movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
   1.383 +    movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
   1.384 +  } else if (UseSSE >= 2)  {
   1.385 +    movdbl(xmm0, Address(rsp,off++*sizeof(jdouble)));
   1.386 +    movdbl(xmm1, Address(rsp,off++*sizeof(jdouble)));
   1.387 +    movdbl(xmm2, Address(rsp,off++*sizeof(jdouble)));
   1.388 +    movdbl(xmm3, Address(rsp,off++*sizeof(jdouble)));
   1.389 +    movdbl(xmm4, Address(rsp,off++*sizeof(jdouble)));
   1.390 +    movdbl(xmm5, Address(rsp,off++*sizeof(jdouble)));
   1.391 +    movdbl(xmm6, Address(rsp,off++*sizeof(jdouble)));
   1.392 +    movdbl(xmm7, Address(rsp,off++*sizeof(jdouble)));
   1.393 +#ifdef _LP64
   1.394 +    movdbl(xmm8, Address(rsp,off++*sizeof(jdouble)));
   1.395 +    movdbl(xmm9, Address(rsp,off++*sizeof(jdouble)));
   1.396 +    movdbl(xmm10, Address(rsp,off++*sizeof(jdouble)));
   1.397 +    movdbl(xmm11, Address(rsp,off++*sizeof(jdouble)));
   1.398 +    movdbl(xmm12, Address(rsp,off++*sizeof(jdouble)));
   1.399 +    movdbl(xmm13, Address(rsp,off++*sizeof(jdouble)));
   1.400 +    movdbl(xmm14, Address(rsp,off++*sizeof(jdouble)));
   1.401 +    movdbl(xmm15, Address(rsp,off++*sizeof(jdouble)));
   1.402 +#endif
   1.403 +  }
   1.404 +  if (UseSSE >= 1) {
   1.405 +    addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
   1.406 +  }
   1.407 +  popa();
   1.408 +}
   1.409 +
   1.410  static const double     pi_4 =  0.7853981633974483;
   1.411  
   1.412  void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
   1.413 @@ -8092,73 +8481,27 @@
   1.414  
   1.415    // slow case: runtime call
   1.416    bind(slow_case);
   1.417 -  // Preserve registers across runtime call
   1.418 -  pusha();
   1.419 -  int incoming_argument_and_return_value_offset = -1;
   1.420 -  if (num_fpu_regs_in_use > 1) {
   1.421 -    // Must preserve all other FPU regs (could alternatively convert
   1.422 -    // SharedRuntime::dsin and dcos into assembly routines known not to trash
   1.423 -    // FPU state, but can not trust C compiler)
   1.424 -    NEEDS_CLEANUP;
   1.425 -    // NOTE that in this case we also push the incoming argument to
   1.426 -    // the stack and restore it later; we also use this stack slot to
   1.427 -    // hold the return value from dsin or dcos.
   1.428 -    for (int i = 0; i < num_fpu_regs_in_use; i++) {
   1.429 -      subptr(rsp, sizeof(jdouble));
   1.430 -      fstp_d(Address(rsp, 0));
   1.431 -    }
   1.432 -    incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
   1.433 -    fld_d(Address(rsp, incoming_argument_and_return_value_offset));
   1.434 -  }
   1.435 -  subptr(rsp, sizeof(jdouble));
   1.436 -  fstp_d(Address(rsp, 0));
   1.437 -#ifdef _LP64
   1.438 -  movdbl(xmm0, Address(rsp, 0));
   1.439 -#endif // _LP64
   1.440 -
   1.441 -  // NOTE: we must not use call_VM_leaf here because that requires a
   1.442 -  // complete interpreter frame in debug mode -- same bug as 4387334
   1.443 -  // MacroAssembler::call_VM_leaf_base is perfectly safe and will
   1.444 -  // do proper 64bit abi
   1.445 -
   1.446 -  NEEDS_CLEANUP;
   1.447 -  // Need to add stack banging before this runtime call if it needs to
   1.448 -  // be taken; however, there is no generic stack banging routine at
   1.449 -  // the MacroAssembler level
   1.450 +
   1.451    switch(trig) {
   1.452    case 's':
   1.453      {
   1.454 -      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
   1.455 +      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
   1.456      }
   1.457      break;
   1.458    case 'c':
   1.459      {
   1.460 -      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
   1.461 +      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
   1.462      }
   1.463      break;
   1.464    case 't':
   1.465      {
   1.466 -      MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
   1.467 +      fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
   1.468      }
   1.469      break;
   1.470    default:
   1.471      assert(false, "bad intrinsic");
   1.472      break;
   1.473    }
   1.474 -#ifdef _LP64
   1.475 -    movsd(Address(rsp, 0), xmm0);
   1.476 -    fld_d(Address(rsp, 0));
   1.477 -#endif // _LP64
   1.478 -  addptr(rsp, sizeof(jdouble));
   1.479 -  if (num_fpu_regs_in_use > 1) {
   1.480 -    // Must save return value to stack and then restore entire FPU stack
   1.481 -    fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
   1.482 -    for (int i = 0; i < num_fpu_regs_in_use; i++) {
   1.483 -      fld_d(Address(rsp, 0));
   1.484 -      addptr(rsp, sizeof(jdouble));
   1.485 -    }
   1.486 -  }
   1.487 -  popa();
   1.488  
   1.489    // Come here with result in F-TOS
   1.490    bind(done);

mercurial