1.1 --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Fri Sep 14 22:00:41 2012 -0700 1.2 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Mon Sep 17 12:57:58 2012 -0700 1.3 @@ -364,9 +364,9 @@ 1.4 // --------------------------------------------------------------------------- 1.5 // The compiled Java calling convention. The Java convention always passes 1.6 // 64-bit values in adjacent aligned locations (either registers or stack), 1.7 -// floats in float registers and doubles in aligned float pairs. Values are 1.8 -// packed in the registers. There is no backing varargs store for values in 1.9 -// registers. In the 32-bit build, longs are passed in G1 and G4 (cannot be 1.10 +// floats in float registers and doubles in aligned float pairs. There is 1.11 +// no backing varargs store for values in registers. 1.12 +// In the 32-bit build, longs are passed on the stack (cannot be 1.13 // passed in I's, because longs in I's get their heads chopped off at 1.14 // interrupt). 1.15 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 1.16 @@ -375,76 +375,13 @@ 1.17 int is_outgoing) { 1.18 assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); 1.19 1.20 - // Convention is to pack the first 6 int/oop args into the first 6 registers 1.21 - // (I0-I5), extras spill to the stack. Then pack the first 8 float args 1.22 - // into F0-F7, extras spill to the stack. Then pad all register sets to 1.23 - // align. Then put longs and doubles into the same registers as they fit, 1.24 - // else spill to the stack. 1.25 const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; 1.26 const int flt_reg_max = 8; 1.27 - // 1.28 - // Where 32-bit 1-reg longs start being passed 1.29 - // In tiered we must pass on stack because c1 can't use a "pair" in a single reg. 1.30 - // So make it look like we've filled all the G regs that c2 wants to use. 1.31 - Register g_reg = TieredCompilation ? noreg : G1; 1.32 - 1.33 - // Count int/oop and float args. See how many stack slots we'll need and 1.34 - // where the longs & doubles will go. 1.35 - int int_reg_cnt = 0; 1.36 - int flt_reg_cnt = 0; 1.37 - // int stk_reg_pairs = frame::register_save_words*(wordSize>>2); 1.38 - // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots(); 1.39 - int stk_reg_pairs = 0; 1.40 - for (int i = 0; i < total_args_passed; i++) { 1.41 - switch (sig_bt[i]) { 1.42 - case T_LONG: // LP64, longs compete with int args 1.43 - assert(sig_bt[i+1] == T_VOID, ""); 1.44 -#ifdef _LP64 1.45 - if (int_reg_cnt < int_reg_max) int_reg_cnt++; 1.46 -#endif 1.47 - break; 1.48 - case T_OBJECT: 1.49 - case T_ARRAY: 1.50 - case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 1.51 - if (int_reg_cnt < int_reg_max) int_reg_cnt++; 1.52 -#ifndef _LP64 1.53 - else stk_reg_pairs++; 1.54 -#endif 1.55 - break; 1.56 - case T_INT: 1.57 - case T_SHORT: 1.58 - case T_CHAR: 1.59 - case T_BYTE: 1.60 - case T_BOOLEAN: 1.61 - if (int_reg_cnt < int_reg_max) int_reg_cnt++; 1.62 - else stk_reg_pairs++; 1.63 - break; 1.64 - case T_FLOAT: 1.65 - if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++; 1.66 - else stk_reg_pairs++; 1.67 - break; 1.68 - case T_DOUBLE: 1.69 - assert(sig_bt[i+1] == T_VOID, ""); 1.70 - break; 1.71 - case T_VOID: 1.72 - break; 1.73 - default: 1.74 - ShouldNotReachHere(); 1.75 - } 1.76 - } 1.77 - 1.78 - // This is where the longs/doubles start on the stack. 1.79 - stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round 1.80 - 1.81 - int flt_reg_pairs = (flt_reg_cnt+1) & ~1; 1.82 - 1.83 - // int stk_reg = frame::register_save_words*(wordSize>>2); 1.84 - // int stk_reg = SharedRuntime::out_preserve_stack_slots(); 1.85 - int stk_reg = 0; 1.86 + 1.87 int int_reg = 0; 1.88 int flt_reg = 0; 1.89 - 1.90 - // Now do the signature layout 1.91 + int slot = 0; 1.92 + 1.93 for (int i = 0; i < total_args_passed; i++) { 1.94 switch (sig_bt[i]) { 1.95 case T_INT: 1.96 @@ -461,11 +398,14 @@ 1.97 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.98 regs[i].set1(r->as_VMReg()); 1.99 } else { 1.100 - regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); 1.101 + regs[i].set1(VMRegImpl::stack2reg(slot++)); 1.102 } 1.103 break; 1.104 1.105 #ifdef _LP64 1.106 + case T_LONG: 1.107 + assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 1.108 + // fall-through 1.109 case T_OBJECT: 1.110 case T_ARRAY: 1.111 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 1.112 @@ -473,78 +413,57 @@ 1.113 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.114 regs[i].set2(r->as_VMReg()); 1.115 } else { 1.116 - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.117 - stk_reg_pairs += 2; 1.118 + slot = round_to(slot, 2); // align 1.119 + regs[i].set2(VMRegImpl::stack2reg(slot)); 1.120 + slot += 2; 1.121 } 1.122 break; 1.123 -#endif // _LP64 1.124 - 1.125 +#else 1.126 case T_LONG: 1.127 assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 1.128 -#ifdef _LP64 1.129 - if (int_reg < int_reg_max) { 1.130 - Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 1.131 - regs[i].set2(r->as_VMReg()); 1.132 - } else { 1.133 - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.134 - stk_reg_pairs += 2; 1.135 - } 1.136 -#else 1.137 -#ifdef COMPILER2 1.138 - // For 32-bit build, can't pass longs in O-regs because they become 1.139 - // I-regs and get trashed. Use G-regs instead. G1 and G4 are almost 1.140 - // spare and available. This convention isn't used by the Sparc ABI or 1.141 - // anywhere else. If we're tiered then we don't use G-regs because c1 1.142 - // can't deal with them as a "pair". (Tiered makes this code think g's are filled) 1.143 - // G0: zero 1.144 - // G1: 1st Long arg 1.145 - // G2: global allocated to TLS 1.146 - // G3: used in inline cache check 1.147 - // G4: 2nd Long arg 1.148 - // G5: used in inline cache check 1.149 - // G6: used by OS 1.150 - // G7: used by OS 1.151 - 1.152 - if (g_reg == G1) { 1.153 - regs[i].set2(G1->as_VMReg()); // This long arg in G1 1.154 - g_reg = G4; // Where the next arg goes 1.155 - } else if (g_reg == G4) { 1.156 - regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4 1.157 - g_reg = noreg; // No more longs in registers 1.158 - } else { 1.159 - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.160 - stk_reg_pairs += 2; 1.161 - } 1.162 -#else // COMPILER2 1.163 - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.164 - stk_reg_pairs += 2; 1.165 -#endif // COMPILER2 1.166 -#endif // _LP64 1.167 + // On 32-bit SPARC put longs always on the stack to keep the pressure off 1.168 + // integer argument registers. They should be used for oops. 1.169 + slot = round_to(slot, 2); // align 1.170 + regs[i].set2(VMRegImpl::stack2reg(slot)); 1.171 + slot += 2; 1.172 +#endif 1.173 break; 1.174 1.175 case T_FLOAT: 1.176 - if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg()); 1.177 - else regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); 1.178 + if (flt_reg < flt_reg_max) { 1.179 + FloatRegister r = as_FloatRegister(flt_reg++); 1.180 + regs[i].set1(r->as_VMReg()); 1.181 + } else { 1.182 + regs[i].set1(VMRegImpl::stack2reg(slot++)); 1.183 + } 1.184 break; 1.185 + 1.186 case T_DOUBLE: 1.187 assert(sig_bt[i+1] == T_VOID, "expecting half"); 1.188 - if (flt_reg_pairs + 1 < flt_reg_max) { 1.189 - regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg()); 1.190 - flt_reg_pairs += 2; 1.191 + if (round_to(flt_reg, 2) + 1 < flt_reg_max) { 1.192 + flt_reg = round_to(flt_reg, 2); // align 1.193 + FloatRegister r = as_FloatRegister(flt_reg); 1.194 + regs[i].set2(r->as_VMReg()); 1.195 + flt_reg += 2; 1.196 } else { 1.197 - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 1.198 - stk_reg_pairs += 2; 1.199 + slot = round_to(slot, 2); // align 1.200 + regs[i].set2(VMRegImpl::stack2reg(slot)); 1.201 + slot += 2; 1.202 } 1.203 break; 1.204 - case T_VOID: regs[i].set_bad(); break; // Halves of longs & doubles 1.205 + 1.206 + case T_VOID: 1.207 + regs[i].set_bad(); // Halves of longs & doubles 1.208 + break; 1.209 + 1.210 default: 1.211 - ShouldNotReachHere(); 1.212 + fatal(err_msg_res("unknown basic type %d", sig_bt[i])); 1.213 + break; 1.214 } 1.215 } 1.216 1.217 // retun the amount of stack space these arguments will need. 1.218 - return stk_reg_pairs; 1.219 - 1.220 + return slot; 1.221 } 1.222 1.223 // Helper class mostly to avoid passing masm everywhere, and handle 1.224 @@ -601,8 +520,7 @@ 1.225 Label L; 1.226 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 1.227 __ br_null(G3_scratch, false, Assembler::pt, L); 1.228 - // Schedule the branch target address early. 1.229 - __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 1.230 + __ delayed()->nop(); 1.231 // Call into the VM to patch the caller, then jump to compiled callee 1.232 __ save_frame(4); // Args in compiled layout; do not blow them 1.233 1.234 @@ -645,7 +563,6 @@ 1.235 __ ldx(FP, -8 + STACK_BIAS, G1); 1.236 __ ldx(FP, -16 + STACK_BIAS, G4); 1.237 __ mov(L5, G5_method); 1.238 - __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 1.239 #endif /* _LP64 */ 1.240 1.241 __ restore(); // Restore args 1.242 @@ -726,7 +643,7 @@ 1.243 int comp_args_on_stack, // VMRegStackSlots 1.244 const BasicType *sig_bt, 1.245 const VMRegPair *regs, 1.246 - Label& skip_fixup) { 1.247 + Label& L_skip_fixup) { 1.248 1.249 // Before we get into the guts of the C2I adapter, see if we should be here 1.250 // at all. We've come from compiled code and are attempting to jump to the 1.251 @@ -747,7 +664,7 @@ 1.252 1.253 patch_callers_callsite(); 1.254 1.255 - __ bind(skip_fixup); 1.256 + __ bind(L_skip_fixup); 1.257 1.258 // Since all args are passed on the stack, total_args_passed*wordSize is the 1.259 // space we need. Add in varargs area needed by the interpreter. Round up 1.260 @@ -757,46 +674,18 @@ 1.261 (frame::varargs_offset - frame::register_save_words)*wordSize; 1.262 const int extraspace = round_to(arg_size + varargs_area, 2*wordSize); 1.263 1.264 - int bias = STACK_BIAS; 1.265 + const int bias = STACK_BIAS; 1.266 const int interp_arg_offset = frame::varargs_offset*wordSize + 1.267 (total_args_passed-1)*Interpreter::stackElementSize; 1.268 1.269 - Register base = SP; 1.270 - 1.271 -#ifdef _LP64 1.272 - // In the 64bit build because of wider slots and STACKBIAS we can run 1.273 - // out of bits in the displacement to do loads and stores. Use g3 as 1.274 - // temporary displacement. 1.275 - if (!Assembler::is_simm13(extraspace)) { 1.276 - __ set(extraspace, G3_scratch); 1.277 - __ sub(SP, G3_scratch, SP); 1.278 - } else { 1.279 - __ sub(SP, extraspace, SP); 1.280 - } 1.281 + const Register base = SP; 1.282 + 1.283 + // Make some extra space on the stack. 1.284 + __ sub(SP, __ ensure_simm13_or_reg(extraspace, G3_scratch), SP); 1.285 set_Rdisp(G3_scratch); 1.286 -#else 1.287 - __ sub(SP, extraspace, SP); 1.288 -#endif // _LP64 1.289 - 1.290 - // First write G1 (if used) to where ever it must go 1.291 - for (int i=0; i<total_args_passed; i++) { 1.292 - const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias; 1.293 - VMReg r_1 = regs[i].first(); 1.294 - VMReg r_2 = regs[i].second(); 1.295 - if (r_1 == G1_scratch->as_VMReg()) { 1.296 - if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 1.297 - store_c2i_object(G1_scratch, base, st_off); 1.298 - } else if (sig_bt[i] == T_LONG) { 1.299 - assert(!TieredCompilation, "should not use register args for longs"); 1.300 - store_c2i_long(G1_scratch, base, st_off, false); 1.301 - } else { 1.302 - store_c2i_int(G1_scratch, base, st_off); 1.303 - } 1.304 - } 1.305 - } 1.306 - 1.307 - // Now write the args into the outgoing interpreter space 1.308 - for (int i=0; i<total_args_passed; i++) { 1.309 + 1.310 + // Write the args into the outgoing interpreter space. 1.311 + for (int i = 0; i < total_args_passed; i++) { 1.312 const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias; 1.313 VMReg r_1 = regs[i].first(); 1.314 VMReg r_2 = regs[i].second(); 1.315 @@ -804,23 +693,9 @@ 1.316 assert(!r_2->is_valid(), ""); 1.317 continue; 1.318 } 1.319 - // Skip G1 if found as we did it first in order to free it up 1.320 - if (r_1 == G1_scratch->as_VMReg()) { 1.321 - continue; 1.322 - } 1.323 -#ifdef ASSERT 1.324 - bool G1_forced = false; 1.325 -#endif // ASSERT 1.326 if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 1.327 -#ifdef _LP64 1.328 - Register ld_off = Rdisp; 1.329 - __ set(reg2offset(r_1) + extraspace + bias, ld_off); 1.330 -#else 1.331 - int ld_off = reg2offset(r_1) + extraspace + bias; 1.332 -#endif // _LP64 1.333 -#ifdef ASSERT 1.334 - G1_forced = true; 1.335 -#endif // ASSERT 1.336 + RegisterOrConstant ld_off = reg2offset(r_1) + extraspace + bias; 1.337 + ld_off = __ ensure_simm13_or_reg(ld_off, Rdisp); 1.338 r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle 1.339 if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); 1.340 else __ ldx(base, ld_off, G1_scratch); 1.341 @@ -831,11 +706,6 @@ 1.342 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 1.343 store_c2i_object(r, base, st_off); 1.344 } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 1.345 -#ifndef _LP64 1.346 - if (TieredCompilation) { 1.347 - assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs"); 1.348 - } 1.349 -#endif // _LP64 1.350 store_c2i_long(r, base, st_off, r_2->is_stack()); 1.351 } else { 1.352 store_c2i_int(r, base, st_off); 1.353 @@ -851,19 +721,12 @@ 1.354 } 1.355 } 1.356 1.357 -#ifdef _LP64 1.358 - // Need to reload G3_scratch, used for temporary displacements. 1.359 + // Load the interpreter entry point. 1.360 __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 1.361 1.362 // Pass O5_savedSP as an argument to the interpreter. 1.363 // The interpreter will restore SP to this value before returning. 1.364 - __ set(extraspace, G1); 1.365 - __ add(SP, G1, O5_savedSP); 1.366 -#else 1.367 - // Pass O5_savedSP as an argument to the interpreter. 1.368 - // The interpreter will restore SP to this value before returning. 1.369 - __ add(SP, extraspace, O5_savedSP); 1.370 -#endif // _LP64 1.371 + __ add(SP, __ ensure_simm13_or_reg(extraspace, G1), O5_savedSP); 1.372 1.373 __ mov((frame::varargs_offset)*wordSize - 1.374 1*Interpreter::stackElementSize+bias+BytesPerWord, G1); 1.375 @@ -971,7 +834,6 @@ 1.376 1.377 // Outputs: 1.378 // G2_thread - TLS 1.379 - // G1, G4 - Outgoing long args in 32-bit build 1.380 // O0-O5 - Outgoing args in compiled layout 1.381 // O6 - Adjusted or restored SP 1.382 // O7 - Valid return address 1.383 @@ -1016,10 +878,10 @@ 1.384 // +--------------+ <--- start of outgoing args 1.385 // | pad, align | | 1.386 // +--------------+ | 1.387 - // | ints, floats | |---Outgoing stack args, packed low. 1.388 - // +--------------+ | First few args in registers. 1.389 - // : doubles : | 1.390 - // | longs | | 1.391 + // | ints, longs, | | 1.392 + // | floats, | |---Outgoing stack args. 1.393 + // : doubles : | First few args in registers. 1.394 + // | | | 1.395 // +--------------+ <--- SP' + 16*wordsize 1.396 // | | 1.397 // : window : 1.398 @@ -1033,7 +895,6 @@ 1.399 // Cut-out for having no stack args. Since up to 6 args are passed 1.400 // in registers, we will commonly have no stack args. 1.401 if (comp_args_on_stack > 0) { 1.402 - 1.403 // Convert VMReg stack slots to words. 1.404 int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1.405 // Round up to miminum stack alignment, in wordSize 1.406 @@ -1044,13 +905,9 @@ 1.407 __ sub(SP, (comp_words_on_stack)*wordSize, SP); 1.408 } 1.409 1.410 - // Will jump to the compiled code just as if compiled code was doing it. 1.411 - // Pre-load the register-jump target early, to schedule it better. 1.412 - __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); 1.413 - 1.414 // Now generate the shuffle code. Pick up all register args and move the 1.415 // rest through G1_scratch. 1.416 - for (int i=0; i<total_args_passed; i++) { 1.417 + for (int i = 0; i < total_args_passed; i++) { 1.418 if (sig_bt[i] == T_VOID) { 1.419 // Longs and doubles are passed in native word order, but misaligned 1.420 // in the 32-bit build. 1.421 @@ -1088,14 +945,13 @@ 1.422 next_arg_slot(ld_off) : arg_slot(ld_off); 1.423 __ ldx(Gargs, slot, r); 1.424 #else 1.425 - // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the 1.426 - // stack shuffle. Load the first 2 longs into G1/G4 later. 1.427 + fatal("longs should be on stack"); 1.428 #endif 1.429 } 1.430 } else { 1.431 assert(r_1->is_FloatRegister(), ""); 1.432 if (!r_2->is_valid()) { 1.433 - __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); 1.434 + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); 1.435 } else { 1.436 #ifdef _LP64 1.437 // In V9, doubles are given 2 64-bit slots in the interpreter, but the 1.438 @@ -1104,11 +960,11 @@ 1.439 // spare float register. 1.440 RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 1.441 next_arg_slot(ld_off) : arg_slot(ld_off); 1.442 - __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); 1.443 + __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); 1.444 #else 1.445 // Need to marshal 64-bit value from misaligned Lesp loads 1.446 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister()); 1.447 - __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); 1.448 + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); 1.449 #endif 1.450 } 1.451 } 1.452 @@ -1124,76 +980,35 @@ 1.453 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot); 1.454 } 1.455 } 1.456 - bool made_space = false; 1.457 -#ifndef _LP64 1.458 - // May need to pick up a few long args in G1/G4 1.459 - bool g4_crushed = false; 1.460 - bool g3_crushed = false; 1.461 - for (int i=0; i<total_args_passed; i++) { 1.462 - if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) { 1.463 - // Load in argument order going down 1.464 - int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; 1.465 - // Need to marshal 64-bit value from misaligned Lesp loads 1.466 - Register r = regs[i].first()->as_Register()->after_restore(); 1.467 - if (r == G1 || r == G4) { 1.468 - assert(!g4_crushed, "ordering problem"); 1.469 - if (r == G4){ 1.470 - g4_crushed = true; 1.471 - __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits 1.472 - __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1.473 - } else { 1.474 - // better schedule this way 1.475 - __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1.476 - __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits 1.477 - } 1.478 - g3_crushed = true; 1.479 - __ sllx(r, 32, r); 1.480 - __ or3(G3_scratch, r, r); 1.481 - } else { 1.482 - assert(r->is_out(), "longs passed in two O registers"); 1.483 - __ ld (Gargs, arg_slot(ld_off) , r->successor()); // Load lo bits 1.484 - __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1.485 - } 1.486 - } 1.487 + 1.488 + // Jump to the compiled code just as if compiled code was doing it. 1.489 + __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); 1.490 + 1.491 + // 6243940 We might end up in handle_wrong_method if 1.492 + // the callee is deoptimized as we race thru here. If that 1.493 + // happens we don't want to take a safepoint because the 1.494 + // caller frame will look interpreted and arguments are now 1.495 + // "compiled" so it is much better to make this transition 1.496 + // invisible to the stack walking code. Unfortunately if 1.497 + // we try and find the callee by normal means a safepoint 1.498 + // is possible. So we stash the desired callee in the thread 1.499 + // and the vm will find there should this case occur. 1.500 + Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); 1.501 + __ st_ptr(G5_method, callee_target_addr); 1.502 + 1.503 + if (StressNonEntrant) { 1.504 + // Open a big window for deopt failure 1.505 + __ save_frame(0); 1.506 + __ mov(G0, L0); 1.507 + Label loop; 1.508 + __ bind(loop); 1.509 + __ sub(L0, 1, L0); 1.510 + __ br_null_short(L0, Assembler::pt, loop); 1.511 + __ restore(); 1.512 } 1.513 -#endif 1.514 - 1.515 - // Jump to the compiled code just as if compiled code was doing it. 1.516 - // 1.517 -#ifndef _LP64 1.518 - if (g3_crushed) { 1.519 - // Rats load was wasted, at least it is in cache... 1.520 - __ ld_ptr(G5_method, Method::from_compiled_offset(), G3); 1.521 - } 1.522 -#endif /* _LP64 */ 1.523 - 1.524 - // 6243940 We might end up in handle_wrong_method if 1.525 - // the callee is deoptimized as we race thru here. If that 1.526 - // happens we don't want to take a safepoint because the 1.527 - // caller frame will look interpreted and arguments are now 1.528 - // "compiled" so it is much better to make this transition 1.529 - // invisible to the stack walking code. Unfortunately if 1.530 - // we try and find the callee by normal means a safepoint 1.531 - // is possible. So we stash the desired callee in the thread 1.532 - // and the vm will find there should this case occur. 1.533 - Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); 1.534 - __ st_ptr(G5_method, callee_target_addr); 1.535 - 1.536 - if (StressNonEntrant) { 1.537 - // Open a big window for deopt failure 1.538 - __ save_frame(0); 1.539 - __ mov(G0, L0); 1.540 - Label loop; 1.541 - __ bind(loop); 1.542 - __ sub(L0, 1, L0); 1.543 - __ br_null_short(L0, Assembler::pt, loop); 1.544 - 1.545 - __ restore(); 1.546 - } 1.547 - 1.548 - 1.549 - __ jmpl(G3, 0, G0); 1.550 - __ delayed()->nop(); 1.551 + 1.552 + __ jmpl(G3, 0, G0); 1.553 + __ delayed()->nop(); 1.554 } 1.555 1.556 // --------------------------------------------------------------- 1.557 @@ -1221,28 +1036,17 @@ 1.558 // compiled code, which relys solely on SP and not FP, get sick). 1.559 1.560 address c2i_unverified_entry = __ pc(); 1.561 - Label skip_fixup; 1.562 + Label L_skip_fixup; 1.563 { 1.564 -#if !defined(_LP64) && defined(COMPILER2) 1.565 - Register R_temp = L0; // another scratch register 1.566 -#else 1.567 - Register R_temp = G1; // another scratch register 1.568 -#endif 1.569 + Register R_temp = G1; // another scratch register 1.570 1.571 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 1.572 1.573 __ verify_oop(O0); 1.574 __ load_klass(O0, G3_scratch); 1.575 1.576 -#if !defined(_LP64) && defined(COMPILER2) 1.577 - __ save(SP, -frame::register_save_words*wordSize, SP); 1.578 __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); 1.579 __ cmp(G3_scratch, R_temp); 1.580 - __ restore(); 1.581 -#else 1.582 - __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); 1.583 - __ cmp(G3_scratch, R_temp); 1.584 -#endif 1.585 1.586 Label ok, ok2; 1.587 __ brx(Assembler::equal, false, Assembler::pt, ok); 1.588 @@ -1256,8 +1060,8 @@ 1.589 // the call site corrected. 1.590 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 1.591 __ bind(ok2); 1.592 - __ br_null(G3_scratch, false, Assembler::pt, skip_fixup); 1.593 - __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 1.594 + __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup); 1.595 + __ delayed()->nop(); 1.596 __ jump_to(ic_miss, G3_scratch); 1.597 __ delayed()->nop(); 1.598 1.599 @@ -1265,7 +1069,7 @@ 1.600 1.601 address c2i_entry = __ pc(); 1.602 1.603 - agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); 1.604 + agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup); 1.605 1.606 __ flush(); 1.607 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); 1.608 @@ -1985,12 +1789,12 @@ 1.609 } 1.610 1.611 static void verify_oop_args(MacroAssembler* masm, 1.612 - int total_args_passed, 1.613 + methodHandle method, 1.614 const BasicType* sig_bt, 1.615 const VMRegPair* regs) { 1.616 Register temp_reg = G5_method; // not part of any compiled calling seq 1.617 if (VerifyOops) { 1.618 - for (int i = 0; i < total_args_passed; i++) { 1.619 + for (int i = 0; i < method->size_of_parameters(); i++) { 1.620 if (sig_bt[i] == T_OBJECT || 1.621 sig_bt[i] == T_ARRAY) { 1.622 VMReg r = regs[i].first(); 1.623 @@ -2009,35 +1813,32 @@ 1.624 } 1.625 1.626 static void gen_special_dispatch(MacroAssembler* masm, 1.627 - int total_args_passed, 1.628 - int comp_args_on_stack, 1.629 - vmIntrinsics::ID special_dispatch, 1.630 + methodHandle method, 1.631 const BasicType* sig_bt, 1.632 const VMRegPair* regs) { 1.633 - verify_oop_args(masm, total_args_passed, sig_bt, regs); 1.634 + verify_oop_args(masm, method, sig_bt, regs); 1.635 + vmIntrinsics::ID iid = method->intrinsic_id(); 1.636 1.637 // Now write the args into the outgoing interpreter space 1.638 bool has_receiver = false; 1.639 Register receiver_reg = noreg; 1.640 int member_arg_pos = -1; 1.641 Register member_reg = noreg; 1.642 - int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); 1.643 + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1.644 if (ref_kind != 0) { 1.645 - member_arg_pos = total_args_passed - 1; // trailing MemberName argument 1.646 + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1.647 member_reg = G5_method; // known to be free at this point 1.648 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1.649 - } else if (special_dispatch == vmIntrinsics::_invokeBasic) { 1.650 + } else if (iid == vmIntrinsics::_invokeBasic) { 1.651 has_receiver = true; 1.652 } else { 1.653 - fatal(err_msg("special_dispatch=%d", special_dispatch)); 1.654 + fatal(err_msg_res("unexpected intrinsic id %d", iid)); 1.655 } 1.656 1.657 if (member_reg != noreg) { 1.658 // Load the member_arg into register, if necessary. 1.659 - assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); 1.660 - assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); 1.661 + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1.662 VMReg r = regs[member_arg_pos].first(); 1.663 - assert(r->is_valid(), "bad member arg"); 1.664 if (r->is_stack()) { 1.665 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1.666 ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 1.667 @@ -2050,7 +1851,7 @@ 1.668 1.669 if (has_receiver) { 1.670 // Make sure the receiver is loaded into a register. 1.671 - assert(total_args_passed > 0, "oob"); 1.672 + assert(method->size_of_parameters() > 0, "oob"); 1.673 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1.674 VMReg r = regs[0].first(); 1.675 assert(r->is_valid(), "bad receiver arg"); 1.676 @@ -2058,7 +1859,7 @@ 1.677 // Porting note: This assumes that compiled calling conventions always 1.678 // pass the receiver oop in a register. If this is not true on some 1.679 // platform, pick a temp and load the receiver from stack. 1.680 - assert(false, "receiver always in a register"); 1.681 + fatal("receiver always in a register"); 1.682 receiver_reg = G3_scratch; // known to be free at this point 1.683 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1.684 ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 1.685 @@ -2070,7 +1871,7 @@ 1.686 } 1.687 1.688 // Figure out which address we are really jumping to: 1.689 - MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, 1.690 + MethodHandles::generate_method_handle_dispatch(masm, iid, 1.691 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1.692 } 1.693 1.694 @@ -2103,11 +1904,9 @@ 1.695 // transition back to thread_in_Java 1.696 // return to caller 1.697 // 1.698 -nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1.699 +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1.700 methodHandle method, 1.701 int compile_id, 1.702 - int total_in_args, 1.703 - int comp_args_on_stack, // in VMRegStackSlots 1.704 BasicType* in_sig_bt, 1.705 VMRegPair* in_regs, 1.706 BasicType ret_type) { 1.707 @@ -2116,9 +1915,7 @@ 1.708 intptr_t start = (intptr_t)__ pc(); 1.709 int vep_offset = ((intptr_t)__ pc()) - start; 1.710 gen_special_dispatch(masm, 1.711 - total_in_args, 1.712 - comp_args_on_stack, 1.713 - method->intrinsic_id(), 1.714 + method, 1.715 in_sig_bt, 1.716 in_regs); 1.717 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1.718 @@ -2220,6 +2017,7 @@ 1.719 // we convert the java signature to a C signature by inserting 1.720 // the hidden arguments as arg[0] and possibly arg[1] (static method) 1.721 1.722 + const int total_in_args = method->size_of_parameters(); 1.723 int total_c_args = total_in_args; 1.724 int total_save_slots = 6 * VMRegImpl::slots_per_word; 1.725 if (!is_critical_native) {