src/cpu/sparc/vm/sharedRuntime_sparc.cpp

changeset 4101
2cb2f30450c7
parent 4051
8a02ca5e5576
child 4103
137868b7aa6f
     1.1 --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Fri Sep 14 22:00:41 2012 -0700
     1.2 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Mon Sep 17 12:57:58 2012 -0700
     1.3 @@ -364,9 +364,9 @@
     1.4  // ---------------------------------------------------------------------------
     1.5  // The compiled Java calling convention.  The Java convention always passes
     1.6  // 64-bit values in adjacent aligned locations (either registers or stack),
     1.7 -// floats in float registers and doubles in aligned float pairs.  Values are
     1.8 -// packed in the registers.  There is no backing varargs store for values in
     1.9 -// registers.  In the 32-bit build, longs are passed in G1 and G4 (cannot be
    1.10 +// floats in float registers and doubles in aligned float pairs.  There is
    1.11 +// no backing varargs store for values in registers.
    1.12 +// In the 32-bit build, longs are passed on the stack (cannot be
    1.13  // passed in I's, because longs in I's get their heads chopped off at
    1.14  // interrupt).
    1.15  int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
    1.16 @@ -375,76 +375,13 @@
    1.17                                             int is_outgoing) {
    1.18    assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers");
    1.19  
    1.20 -  // Convention is to pack the first 6 int/oop args into the first 6 registers
    1.21 -  // (I0-I5), extras spill to the stack.  Then pack the first 8 float args
    1.22 -  // into F0-F7, extras spill to the stack.  Then pad all register sets to
    1.23 -  // align.  Then put longs and doubles into the same registers as they fit,
    1.24 -  // else spill to the stack.
    1.25    const int int_reg_max = SPARC_ARGS_IN_REGS_NUM;
    1.26    const int flt_reg_max = 8;
    1.27 -  //
    1.28 -  // Where 32-bit 1-reg longs start being passed
    1.29 -  // In tiered we must pass on stack because c1 can't use a "pair" in a single reg.
    1.30 -  // So make it look like we've filled all the G regs that c2 wants to use.
    1.31 -  Register g_reg = TieredCompilation ? noreg : G1;
    1.32 -
    1.33 -  // Count int/oop and float args.  See how many stack slots we'll need and
    1.34 -  // where the longs & doubles will go.
    1.35 -  int int_reg_cnt   = 0;
    1.36 -  int flt_reg_cnt   = 0;
    1.37 -  // int stk_reg_pairs = frame::register_save_words*(wordSize>>2);
    1.38 -  // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots();
    1.39 -  int stk_reg_pairs = 0;
    1.40 -  for (int i = 0; i < total_args_passed; i++) {
    1.41 -    switch (sig_bt[i]) {
    1.42 -    case T_LONG:                // LP64, longs compete with int args
    1.43 -      assert(sig_bt[i+1] == T_VOID, "");
    1.44 -#ifdef _LP64
    1.45 -      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
    1.46 -#endif
    1.47 -      break;
    1.48 -    case T_OBJECT:
    1.49 -    case T_ARRAY:
    1.50 -    case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
    1.51 -      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
    1.52 -#ifndef _LP64
    1.53 -      else                            stk_reg_pairs++;
    1.54 -#endif
    1.55 -      break;
    1.56 -    case T_INT:
    1.57 -    case T_SHORT:
    1.58 -    case T_CHAR:
    1.59 -    case T_BYTE:
    1.60 -    case T_BOOLEAN:
    1.61 -      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
    1.62 -      else                            stk_reg_pairs++;
    1.63 -      break;
    1.64 -    case T_FLOAT:
    1.65 -      if (flt_reg_cnt < flt_reg_max)  flt_reg_cnt++;
    1.66 -      else                            stk_reg_pairs++;
    1.67 -      break;
    1.68 -    case T_DOUBLE:
    1.69 -      assert(sig_bt[i+1] == T_VOID, "");
    1.70 -      break;
    1.71 -    case T_VOID:
    1.72 -      break;
    1.73 -    default:
    1.74 -      ShouldNotReachHere();
    1.75 -    }
    1.76 -  }
    1.77 -
    1.78 -  // This is where the longs/doubles start on the stack.
    1.79 -  stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
    1.80 -
    1.81 -  int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
    1.82 -
    1.83 -  // int stk_reg = frame::register_save_words*(wordSize>>2);
    1.84 -  // int stk_reg = SharedRuntime::out_preserve_stack_slots();
    1.85 -  int stk_reg = 0;
    1.86 +
    1.87    int int_reg = 0;
    1.88    int flt_reg = 0;
    1.89 -
    1.90 -  // Now do the signature layout
    1.91 +  int slot = 0;
    1.92 +
    1.93    for (int i = 0; i < total_args_passed; i++) {
    1.94      switch (sig_bt[i]) {
    1.95      case T_INT:
    1.96 @@ -461,11 +398,14 @@
    1.97          Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
    1.98          regs[i].set1(r->as_VMReg());
    1.99        } else {
   1.100 -        regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
   1.101 +        regs[i].set1(VMRegImpl::stack2reg(slot++));
   1.102        }
   1.103        break;
   1.104  
   1.105  #ifdef _LP64
   1.106 +    case T_LONG:
   1.107 +      assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
   1.108 +      // fall-through
   1.109      case T_OBJECT:
   1.110      case T_ARRAY:
   1.111      case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
   1.112 @@ -473,78 +413,57 @@
   1.113          Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   1.114          regs[i].set2(r->as_VMReg());
   1.115        } else {
   1.116 -        regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   1.117 -        stk_reg_pairs += 2;
   1.118 +        slot = round_to(slot, 2);  // align
   1.119 +        regs[i].set2(VMRegImpl::stack2reg(slot));
   1.120 +        slot += 2;
   1.121        }
   1.122        break;
   1.123 -#endif // _LP64
   1.124 -
   1.125 +#else
   1.126      case T_LONG:
   1.127        assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half");
   1.128 -#ifdef _LP64
   1.129 -        if (int_reg < int_reg_max) {
   1.130 -          Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++);
   1.131 -          regs[i].set2(r->as_VMReg());
   1.132 -        } else {
   1.133 -          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   1.134 -          stk_reg_pairs += 2;
   1.135 -        }
   1.136 -#else
   1.137 -#ifdef COMPILER2
   1.138 -        // For 32-bit build, can't pass longs in O-regs because they become
   1.139 -        // I-regs and get trashed.  Use G-regs instead.  G1 and G4 are almost
   1.140 -        // spare and available.  This convention isn't used by the Sparc ABI or
   1.141 -        // anywhere else. If we're tiered then we don't use G-regs because c1
   1.142 -        // can't deal with them as a "pair". (Tiered makes this code think g's are filled)
   1.143 -        // G0: zero
   1.144 -        // G1: 1st Long arg
   1.145 -        // G2: global allocated to TLS
   1.146 -        // G3: used in inline cache check
   1.147 -        // G4: 2nd Long arg
   1.148 -        // G5: used in inline cache check
   1.149 -        // G6: used by OS
   1.150 -        // G7: used by OS
   1.151 -
   1.152 -        if (g_reg == G1) {
   1.153 -          regs[i].set2(G1->as_VMReg()); // This long arg in G1
   1.154 -          g_reg = G4;                  // Where the next arg goes
   1.155 -        } else if (g_reg == G4) {
   1.156 -          regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4
   1.157 -          g_reg = noreg;               // No more longs in registers
   1.158 -        } else {
   1.159 -          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   1.160 -          stk_reg_pairs += 2;
   1.161 -        }
   1.162 -#else // COMPILER2
   1.163 -          regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   1.164 -          stk_reg_pairs += 2;
   1.165 -#endif // COMPILER2
   1.166 -#endif // _LP64
   1.167 +      // On 32-bit SPARC put longs always on the stack to keep the pressure off
   1.168 +      // integer argument registers.  They should be used for oops.
   1.169 +      slot = round_to(slot, 2);  // align
   1.170 +      regs[i].set2(VMRegImpl::stack2reg(slot));
   1.171 +      slot += 2;
   1.172 +#endif
   1.173        break;
   1.174  
   1.175      case T_FLOAT:
   1.176 -      if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
   1.177 -      else                       regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
   1.178 +      if (flt_reg < flt_reg_max) {
   1.179 +        FloatRegister r = as_FloatRegister(flt_reg++);
   1.180 +        regs[i].set1(r->as_VMReg());
   1.181 +      } else {
   1.182 +        regs[i].set1(VMRegImpl::stack2reg(slot++));
   1.183 +      }
   1.184        break;
   1.185 +
   1.186      case T_DOUBLE:
   1.187        assert(sig_bt[i+1] == T_VOID, "expecting half");
   1.188 -      if (flt_reg_pairs + 1 < flt_reg_max) {
   1.189 -        regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg());
   1.190 -        flt_reg_pairs += 2;
   1.191 +      if (round_to(flt_reg, 2) + 1 < flt_reg_max) {
   1.192 +        flt_reg = round_to(flt_reg, 2);  // align
   1.193 +        FloatRegister r = as_FloatRegister(flt_reg);
   1.194 +        regs[i].set2(r->as_VMReg());
   1.195 +        flt_reg += 2;
   1.196        } else {
   1.197 -        regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
   1.198 -        stk_reg_pairs += 2;
   1.199 +        slot = round_to(slot, 2);  // align
   1.200 +        regs[i].set2(VMRegImpl::stack2reg(slot));
   1.201 +        slot += 2;
   1.202        }
   1.203        break;
   1.204 -    case T_VOID: regs[i].set_bad();  break; // Halves of longs & doubles
   1.205 +
   1.206 +    case T_VOID:
   1.207 +      regs[i].set_bad();   // Halves of longs & doubles
   1.208 +      break;
   1.209 +
   1.210      default:
   1.211 -      ShouldNotReachHere();
   1.212 +      fatal(err_msg_res("unknown basic type %d", sig_bt[i]));
   1.213 +      break;
   1.214      }
   1.215    }
   1.216  
   1.217    // retun the amount of stack space these arguments will need.
   1.218 -  return stk_reg_pairs;
   1.219 -
   1.220 +  return slot;
   1.221  }
   1.222  
   1.223  // Helper class mostly to avoid passing masm everywhere, and handle
   1.224 @@ -601,8 +520,7 @@
   1.225    Label L;
   1.226    __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
   1.227    __ br_null(G3_scratch, false, Assembler::pt, L);
   1.228 -  // Schedule the branch target address early.
   1.229 -  __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch);
   1.230 +  __ delayed()->nop();
   1.231    // Call into the VM to patch the caller, then jump to compiled callee
   1.232    __ save_frame(4);     // Args in compiled layout; do not blow them
   1.233  
   1.234 @@ -645,7 +563,6 @@
   1.235    __ ldx(FP, -8 + STACK_BIAS, G1);
   1.236    __ ldx(FP, -16 + STACK_BIAS, G4);
   1.237    __ mov(L5, G5_method);
   1.238 -  __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch);
   1.239  #endif /* _LP64 */
   1.240  
   1.241    __ restore();      // Restore args
   1.242 @@ -726,7 +643,7 @@
   1.243                              int comp_args_on_stack, // VMRegStackSlots
   1.244                              const BasicType *sig_bt,
   1.245                              const VMRegPair *regs,
   1.246 -                            Label& skip_fixup) {
   1.247 +                            Label& L_skip_fixup) {
   1.248  
   1.249    // Before we get into the guts of the C2I adapter, see if we should be here
   1.250    // at all.  We've come from compiled code and are attempting to jump to the
   1.251 @@ -747,7 +664,7 @@
   1.252  
   1.253    patch_callers_callsite();
   1.254  
   1.255 -  __ bind(skip_fixup);
   1.256 +  __ bind(L_skip_fixup);
   1.257  
   1.258    // Since all args are passed on the stack, total_args_passed*wordSize is the
   1.259    // space we need.  Add in varargs area needed by the interpreter. Round up
   1.260 @@ -757,46 +674,18 @@
   1.261                   (frame::varargs_offset - frame::register_save_words)*wordSize;
   1.262    const int extraspace = round_to(arg_size + varargs_area, 2*wordSize);
   1.263  
   1.264 -  int bias = STACK_BIAS;
   1.265 +  const int bias = STACK_BIAS;
   1.266    const int interp_arg_offset = frame::varargs_offset*wordSize +
   1.267                          (total_args_passed-1)*Interpreter::stackElementSize;
   1.268  
   1.269 -  Register base = SP;
   1.270 -
   1.271 -#ifdef _LP64
   1.272 -  // In the 64bit build because of wider slots and STACKBIAS we can run
   1.273 -  // out of bits in the displacement to do loads and stores.  Use g3 as
   1.274 -  // temporary displacement.
   1.275 -  if (!Assembler::is_simm13(extraspace)) {
   1.276 -    __ set(extraspace, G3_scratch);
   1.277 -    __ sub(SP, G3_scratch, SP);
   1.278 -  } else {
   1.279 -    __ sub(SP, extraspace, SP);
   1.280 -  }
   1.281 +  const Register base = SP;
   1.282 +
   1.283 +  // Make some extra space on the stack.
   1.284 +  __ sub(SP, __ ensure_simm13_or_reg(extraspace, G3_scratch), SP);
   1.285    set_Rdisp(G3_scratch);
   1.286 -#else
   1.287 -  __ sub(SP, extraspace, SP);
   1.288 -#endif // _LP64
   1.289 -
   1.290 -  // First write G1 (if used) to where ever it must go
   1.291 -  for (int i=0; i<total_args_passed; i++) {
   1.292 -    const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
   1.293 -    VMReg r_1 = regs[i].first();
   1.294 -    VMReg r_2 = regs[i].second();
   1.295 -    if (r_1 == G1_scratch->as_VMReg()) {
   1.296 -      if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
   1.297 -        store_c2i_object(G1_scratch, base, st_off);
   1.298 -      } else if (sig_bt[i] == T_LONG) {
   1.299 -        assert(!TieredCompilation, "should not use register args for longs");
   1.300 -        store_c2i_long(G1_scratch, base, st_off, false);
   1.301 -      } else {
   1.302 -        store_c2i_int(G1_scratch, base, st_off);
   1.303 -      }
   1.304 -    }
   1.305 -  }
   1.306 -
   1.307 -  // Now write the args into the outgoing interpreter space
   1.308 -  for (int i=0; i<total_args_passed; i++) {
   1.309 +
   1.310 +  // Write the args into the outgoing interpreter space.
   1.311 +  for (int i = 0; i < total_args_passed; i++) {
   1.312      const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias;
   1.313      VMReg r_1 = regs[i].first();
   1.314      VMReg r_2 = regs[i].second();
   1.315 @@ -804,23 +693,9 @@
   1.316        assert(!r_2->is_valid(), "");
   1.317        continue;
   1.318      }
   1.319 -    // Skip G1 if found as we did it first in order to free it up
   1.320 -    if (r_1 == G1_scratch->as_VMReg()) {
   1.321 -      continue;
   1.322 -    }
   1.323 -#ifdef ASSERT
   1.324 -    bool G1_forced = false;
   1.325 -#endif // ASSERT
   1.326      if (r_1->is_stack()) {        // Pretend stack targets are loaded into G1
   1.327 -#ifdef _LP64
   1.328 -      Register ld_off = Rdisp;
   1.329 -      __ set(reg2offset(r_1) + extraspace + bias, ld_off);
   1.330 -#else
   1.331 -      int ld_off = reg2offset(r_1) + extraspace + bias;
   1.332 -#endif // _LP64
   1.333 -#ifdef ASSERT
   1.334 -      G1_forced = true;
   1.335 -#endif // ASSERT
   1.336 +      RegisterOrConstant ld_off = reg2offset(r_1) + extraspace + bias;
   1.337 +      ld_off = __ ensure_simm13_or_reg(ld_off, Rdisp);
   1.338        r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle
   1.339        if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch);
   1.340        else                  __ ldx(base, ld_off, G1_scratch);
   1.341 @@ -831,11 +706,6 @@
   1.342        if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
   1.343          store_c2i_object(r, base, st_off);
   1.344        } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
   1.345 -#ifndef _LP64
   1.346 -        if (TieredCompilation) {
   1.347 -          assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs");
   1.348 -        }
   1.349 -#endif // _LP64
   1.350          store_c2i_long(r, base, st_off, r_2->is_stack());
   1.351        } else {
   1.352          store_c2i_int(r, base, st_off);
   1.353 @@ -851,19 +721,12 @@
   1.354      }
   1.355    }
   1.356  
   1.357 -#ifdef _LP64
   1.358 -  // Need to reload G3_scratch, used for temporary displacements.
   1.359 +  // Load the interpreter entry point.
   1.360    __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch);
   1.361  
   1.362    // Pass O5_savedSP as an argument to the interpreter.
   1.363    // The interpreter will restore SP to this value before returning.
   1.364 -  __ set(extraspace, G1);
   1.365 -  __ add(SP, G1, O5_savedSP);
   1.366 -#else
   1.367 -  // Pass O5_savedSP as an argument to the interpreter.
   1.368 -  // The interpreter will restore SP to this value before returning.
   1.369 -  __ add(SP, extraspace, O5_savedSP);
   1.370 -#endif // _LP64
   1.371 +  __ add(SP, __ ensure_simm13_or_reg(extraspace, G1), O5_savedSP);
   1.372  
   1.373    __ mov((frame::varargs_offset)*wordSize -
   1.374           1*Interpreter::stackElementSize+bias+BytesPerWord, G1);
   1.375 @@ -971,7 +834,6 @@
   1.376  
   1.377    // Outputs:
   1.378    // G2_thread      - TLS
   1.379 -  // G1, G4         - Outgoing long args in 32-bit build
   1.380    // O0-O5          - Outgoing args in compiled layout
   1.381    // O6             - Adjusted or restored SP
   1.382    // O7             - Valid return address
   1.383 @@ -1016,10 +878,10 @@
   1.384    // +--------------+ <--- start of outgoing args
   1.385    // |  pad, align  |   |
   1.386    // +--------------+   |
   1.387 -  // | ints, floats |   |---Outgoing stack args, packed low.
   1.388 -  // +--------------+   |   First few args in registers.
   1.389 -  // :   doubles    :   |
   1.390 -  // |   longs      |   |
   1.391 +  // | ints, longs, |   |
   1.392 +  // |    floats,   |   |---Outgoing stack args.
   1.393 +  // :    doubles   :   |   First few args in registers.
   1.394 +  // |              |   |
   1.395    // +--------------+ <--- SP' + 16*wordsize
   1.396    // |              |
   1.397    // :    window    :
   1.398 @@ -1033,7 +895,6 @@
   1.399    // Cut-out for having no stack args.  Since up to 6 args are passed
   1.400    // in registers, we will commonly have no stack args.
   1.401    if (comp_args_on_stack > 0) {
   1.402 -
   1.403      // Convert VMReg stack slots to words.
   1.404      int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
   1.405      // Round up to miminum stack alignment, in wordSize
   1.406 @@ -1044,13 +905,9 @@
   1.407      __ sub(SP, (comp_words_on_stack)*wordSize, SP);
   1.408    }
   1.409  
   1.410 -  // Will jump to the compiled code just as if compiled code was doing it.
   1.411 -  // Pre-load the register-jump target early, to schedule it better.
   1.412 -  __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
   1.413 -
   1.414    // Now generate the shuffle code.  Pick up all register args and move the
   1.415    // rest through G1_scratch.
   1.416 -  for (int i=0; i<total_args_passed; i++) {
   1.417 +  for (int i = 0; i < total_args_passed; i++) {
   1.418      if (sig_bt[i] == T_VOID) {
   1.419        // Longs and doubles are passed in native word order, but misaligned
   1.420        // in the 32-bit build.
   1.421 @@ -1088,14 +945,13 @@
   1.422                next_arg_slot(ld_off) : arg_slot(ld_off);
   1.423          __ ldx(Gargs, slot, r);
   1.424  #else
   1.425 -        // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the
   1.426 -        // stack shuffle.  Load the first 2 longs into G1/G4 later.
   1.427 +        fatal("longs should be on stack");
   1.428  #endif
   1.429        }
   1.430      } else {
   1.431        assert(r_1->is_FloatRegister(), "");
   1.432        if (!r_2->is_valid()) {
   1.433 -        __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister());
   1.434 +        __ ldf(FloatRegisterImpl::S, Gargs,      arg_slot(ld_off), r_1->as_FloatRegister());
   1.435        } else {
   1.436  #ifdef _LP64
   1.437          // In V9, doubles are given 2 64-bit slots in the interpreter, but the
   1.438 @@ -1104,11 +960,11 @@
   1.439          // spare float register.
   1.440          RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
   1.441                next_arg_slot(ld_off) : arg_slot(ld_off);
   1.442 -        __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister());
   1.443 +        __ ldf(FloatRegisterImpl::D, Gargs,                  slot, r_1->as_FloatRegister());
   1.444  #else
   1.445          // Need to marshal 64-bit value from misaligned Lesp loads
   1.446          __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister());
   1.447 -        __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister());
   1.448 +        __ ldf(FloatRegisterImpl::S, Gargs,      arg_slot(ld_off), r_2->as_FloatRegister());
   1.449  #endif
   1.450        }
   1.451      }
   1.452 @@ -1124,76 +980,35 @@
   1.453        else                  __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot);
   1.454      }
   1.455    }
   1.456 -  bool made_space = false;
   1.457 -#ifndef _LP64
   1.458 -  // May need to pick up a few long args in G1/G4
   1.459 -  bool g4_crushed = false;
   1.460 -  bool g3_crushed = false;
   1.461 -  for (int i=0; i<total_args_passed; i++) {
   1.462 -    if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) {
   1.463 -      // Load in argument order going down
   1.464 -      int ld_off = (total_args_passed-i)*Interpreter::stackElementSize;
   1.465 -      // Need to marshal 64-bit value from misaligned Lesp loads
   1.466 -      Register r = regs[i].first()->as_Register()->after_restore();
   1.467 -      if (r == G1 || r == G4) {
   1.468 -        assert(!g4_crushed, "ordering problem");
   1.469 -        if (r == G4){
   1.470 -          g4_crushed = true;
   1.471 -          __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits
   1.472 -          __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits
   1.473 -        } else {
   1.474 -          // better schedule this way
   1.475 -          __ ld  (Gargs, next_arg_slot(ld_off), r);          // Load hi bits
   1.476 -          __ lduw(Gargs, arg_slot(ld_off)     , G3_scratch); // Load lo bits
   1.477 -        }
   1.478 -        g3_crushed = true;
   1.479 -        __ sllx(r, 32, r);
   1.480 -        __ or3(G3_scratch, r, r);
   1.481 -      } else {
   1.482 -        assert(r->is_out(), "longs passed in two O registers");
   1.483 -        __ ld  (Gargs, arg_slot(ld_off)     , r->successor()); // Load lo bits
   1.484 -        __ ld  (Gargs, next_arg_slot(ld_off), r);              // Load hi bits
   1.485 -      }
   1.486 -    }
   1.487 +
   1.488 +  // Jump to the compiled code just as if compiled code was doing it.
   1.489 +  __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
   1.490 +
   1.491 +  // 6243940 We might end up in handle_wrong_method if
   1.492 +  // the callee is deoptimized as we race thru here. If that
   1.493 +  // happens we don't want to take a safepoint because the
   1.494 +  // caller frame will look interpreted and arguments are now
   1.495 +  // "compiled" so it is much better to make this transition
   1.496 +  // invisible to the stack walking code. Unfortunately if
   1.497 +  // we try and find the callee by normal means a safepoint
   1.498 +  // is possible. So we stash the desired callee in the thread
   1.499 +  // and the vm will find there should this case occur.
   1.500 +  Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());
   1.501 +  __ st_ptr(G5_method, callee_target_addr);
   1.502 +
   1.503 +  if (StressNonEntrant) {
   1.504 +    // Open a big window for deopt failure
   1.505 +    __ save_frame(0);
   1.506 +    __ mov(G0, L0);
   1.507 +    Label loop;
   1.508 +    __ bind(loop);
   1.509 +    __ sub(L0, 1, L0);
   1.510 +    __ br_null_short(L0, Assembler::pt, loop);
   1.511 +    __ restore();
   1.512    }
   1.513 -#endif
   1.514 -
   1.515 -  // Jump to the compiled code just as if compiled code was doing it.
   1.516 -  //
   1.517 -#ifndef _LP64
   1.518 -    if (g3_crushed) {
   1.519 -      // Rats load was wasted, at least it is in cache...
   1.520 -      __ ld_ptr(G5_method, Method::from_compiled_offset(), G3);
   1.521 -    }
   1.522 -#endif /* _LP64 */
   1.523 -
   1.524 -    // 6243940 We might end up in handle_wrong_method if
   1.525 -    // the callee is deoptimized as we race thru here. If that
   1.526 -    // happens we don't want to take a safepoint because the
   1.527 -    // caller frame will look interpreted and arguments are now
   1.528 -    // "compiled" so it is much better to make this transition
   1.529 -    // invisible to the stack walking code. Unfortunately if
   1.530 -    // we try and find the callee by normal means a safepoint
   1.531 -    // is possible. So we stash the desired callee in the thread
   1.532 -    // and the vm will find there should this case occur.
   1.533 -    Address callee_target_addr(G2_thread, JavaThread::callee_target_offset());
   1.534 -    __ st_ptr(G5_method, callee_target_addr);
   1.535 -
   1.536 -    if (StressNonEntrant) {
   1.537 -      // Open a big window for deopt failure
   1.538 -      __ save_frame(0);
   1.539 -      __ mov(G0, L0);
   1.540 -      Label loop;
   1.541 -      __ bind(loop);
   1.542 -      __ sub(L0, 1, L0);
   1.543 -      __ br_null_short(L0, Assembler::pt, loop);
   1.544 -
   1.545 -      __ restore();
   1.546 -    }
   1.547 -
   1.548 -
   1.549 -    __ jmpl(G3, 0, G0);
   1.550 -    __ delayed()->nop();
   1.551 +
   1.552 +  __ jmpl(G3, 0, G0);
   1.553 +  __ delayed()->nop();
   1.554  }
   1.555  
   1.556  // ---------------------------------------------------------------
   1.557 @@ -1221,28 +1036,17 @@
   1.558    // compiled code, which relys solely on SP and not FP, get sick).
   1.559  
   1.560    address c2i_unverified_entry = __ pc();
   1.561 -  Label skip_fixup;
   1.562 +  Label L_skip_fixup;
   1.563    {
   1.564 -#if !defined(_LP64) && defined(COMPILER2)
   1.565 -    Register R_temp   = L0;   // another scratch register
   1.566 -#else
   1.567 -    Register R_temp   = G1;   // another scratch register
   1.568 -#endif
   1.569 +    Register R_temp = G1;  // another scratch register
   1.570  
   1.571      AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
   1.572  
   1.573      __ verify_oop(O0);
   1.574      __ load_klass(O0, G3_scratch);
   1.575  
   1.576 -#if !defined(_LP64) && defined(COMPILER2)
   1.577 -    __ save(SP, -frame::register_save_words*wordSize, SP);
   1.578      __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp);
   1.579      __ cmp(G3_scratch, R_temp);
   1.580 -    __ restore();
   1.581 -#else
   1.582 -    __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp);
   1.583 -    __ cmp(G3_scratch, R_temp);
   1.584 -#endif
   1.585  
   1.586      Label ok, ok2;
   1.587      __ brx(Assembler::equal, false, Assembler::pt, ok);
   1.588 @@ -1256,8 +1060,8 @@
   1.589      // the call site corrected.
   1.590      __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch);
   1.591      __ bind(ok2);
   1.592 -    __ br_null(G3_scratch, false, Assembler::pt, skip_fixup);
   1.593 -    __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch);
   1.594 +    __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup);
   1.595 +    __ delayed()->nop();
   1.596      __ jump_to(ic_miss, G3_scratch);
   1.597      __ delayed()->nop();
   1.598  
   1.599 @@ -1265,7 +1069,7 @@
   1.600  
   1.601    address c2i_entry = __ pc();
   1.602  
   1.603 -  agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
   1.604 +  agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup);
   1.605  
   1.606    __ flush();
   1.607    return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
   1.608 @@ -1985,12 +1789,12 @@
   1.609  }
   1.610  
   1.611  static void verify_oop_args(MacroAssembler* masm,
   1.612 -                            int total_args_passed,
   1.613 +                            methodHandle method,
   1.614                              const BasicType* sig_bt,
   1.615                              const VMRegPair* regs) {
   1.616    Register temp_reg = G5_method;  // not part of any compiled calling seq
   1.617    if (VerifyOops) {
   1.618 -    for (int i = 0; i < total_args_passed; i++) {
   1.619 +    for (int i = 0; i < method->size_of_parameters(); i++) {
   1.620        if (sig_bt[i] == T_OBJECT ||
   1.621            sig_bt[i] == T_ARRAY) {
   1.622          VMReg r = regs[i].first();
   1.623 @@ -2009,35 +1813,32 @@
   1.624  }
   1.625  
   1.626  static void gen_special_dispatch(MacroAssembler* masm,
   1.627 -                                 int total_args_passed,
   1.628 -                                 int comp_args_on_stack,
   1.629 -                                 vmIntrinsics::ID special_dispatch,
   1.630 +                                 methodHandle method,
   1.631                                   const BasicType* sig_bt,
   1.632                                   const VMRegPair* regs) {
   1.633 -  verify_oop_args(masm, total_args_passed, sig_bt, regs);
   1.634 +  verify_oop_args(masm, method, sig_bt, regs);
   1.635 +  vmIntrinsics::ID iid = method->intrinsic_id();
   1.636  
   1.637    // Now write the args into the outgoing interpreter space
   1.638    bool     has_receiver   = false;
   1.639    Register receiver_reg   = noreg;
   1.640    int      member_arg_pos = -1;
   1.641    Register member_reg     = noreg;
   1.642 -  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
   1.643 +  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
   1.644    if (ref_kind != 0) {
   1.645 -    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
   1.646 +    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
   1.647      member_reg = G5_method;  // known to be free at this point
   1.648      has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
   1.649 -  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
   1.650 +  } else if (iid == vmIntrinsics::_invokeBasic) {
   1.651      has_receiver = true;
   1.652    } else {
   1.653 -    fatal(err_msg("special_dispatch=%d", special_dispatch));
   1.654 +    fatal(err_msg_res("unexpected intrinsic id %d", iid));
   1.655    }
   1.656  
   1.657    if (member_reg != noreg) {
   1.658      // Load the member_arg into register, if necessary.
   1.659 -    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
   1.660 -    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
   1.661 +    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
   1.662      VMReg r = regs[member_arg_pos].first();
   1.663 -    assert(r->is_valid(), "bad member arg");
   1.664      if (r->is_stack()) {
   1.665        RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
   1.666        ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
   1.667 @@ -2050,7 +1851,7 @@
   1.668  
   1.669    if (has_receiver) {
   1.670      // Make sure the receiver is loaded into a register.
   1.671 -    assert(total_args_passed > 0, "oob");
   1.672 +    assert(method->size_of_parameters() > 0, "oob");
   1.673      assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
   1.674      VMReg r = regs[0].first();
   1.675      assert(r->is_valid(), "bad receiver arg");
   1.676 @@ -2058,7 +1859,7 @@
   1.677        // Porting note:  This assumes that compiled calling conventions always
   1.678        // pass the receiver oop in a register.  If this is not true on some
   1.679        // platform, pick a temp and load the receiver from stack.
   1.680 -      assert(false, "receiver always in a register");
   1.681 +      fatal("receiver always in a register");
   1.682        receiver_reg = G3_scratch;  // known to be free at this point
   1.683        RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
   1.684        ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
   1.685 @@ -2070,7 +1871,7 @@
   1.686    }
   1.687  
   1.688    // Figure out which address we are really jumping to:
   1.689 -  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
   1.690 +  MethodHandles::generate_method_handle_dispatch(masm, iid,
   1.691                                                   receiver_reg, member_reg, /*for_compiler_entry:*/ true);
   1.692  }
   1.693  
   1.694 @@ -2103,11 +1904,9 @@
   1.695  //    transition back to thread_in_Java
   1.696  //    return to caller
   1.697  //
   1.698 -nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   1.699 +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   1.700                                                  methodHandle method,
   1.701                                                  int compile_id,
   1.702 -                                                int total_in_args,
   1.703 -                                                int comp_args_on_stack, // in VMRegStackSlots
   1.704                                                  BasicType* in_sig_bt,
   1.705                                                  VMRegPair* in_regs,
   1.706                                                  BasicType ret_type) {
   1.707 @@ -2116,9 +1915,7 @@
   1.708      intptr_t start = (intptr_t)__ pc();
   1.709      int vep_offset = ((intptr_t)__ pc()) - start;
   1.710      gen_special_dispatch(masm,
   1.711 -                         total_in_args,
   1.712 -                         comp_args_on_stack,
   1.713 -                         method->intrinsic_id(),
   1.714 +                         method,
   1.715                           in_sig_bt,
   1.716                           in_regs);
   1.717      int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
   1.718 @@ -2220,6 +2017,7 @@
   1.719    // we convert the java signature to a C signature by inserting
   1.720    // the hidden arguments as arg[0] and possibly arg[1] (static method)
   1.721  
   1.722 +  const int total_in_args = method->size_of_parameters();
   1.723    int total_c_args = total_in_args;
   1.724    int total_save_slots = 6 * VMRegImpl::slots_per_word;
   1.725    if (!is_critical_native) {

mercurial