src/cpu/x86/vm/macroAssembler_x86.cpp

changeset 6429
606acabe7b5c
parent 6356
4d4ea046d32a
child 6557
526acaf3626f
     1.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Sat Mar 22 00:26:48 2014 +0400
     1.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Mar 20 17:49:27 2014 -0700
     1.3 @@ -301,7 +301,9 @@
     1.4    mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
     1.5  }
     1.6  
     1.7 -void MacroAssembler::movptr(Register dst, AddressLiteral src) {
     1.8 +void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
     1.9 +  // scratch register is not used,
    1.10 +  // it is defined to match parameters of 64-bit version of this method.
    1.11    if (src.is_lval()) {
    1.12      mov_literal32(dst, (intptr_t)src.target(), src.rspec());
    1.13    } else {
    1.14 @@ -613,6 +615,15 @@
    1.15    /* else */      { subq(dst, value)       ; return; }
    1.16  }
    1.17  
    1.18 +void MacroAssembler::incrementq(AddressLiteral dst) {
    1.19 +  if (reachable(dst)) {
    1.20 +    incrementq(as_Address(dst));
    1.21 +  } else {
    1.22 +    lea(rscratch1, dst);
    1.23 +    incrementq(Address(rscratch1, 0));
    1.24 +  }
    1.25 +}
    1.26 +
    1.27  void MacroAssembler::incrementq(Register reg, int value) {
    1.28    if (value == min_jint) { addq(reg, value); return; }
    1.29    if (value <  0) { decrementq(reg, -value); return; }
    1.30 @@ -681,15 +692,15 @@
    1.31    movq(dst, rscratch1);
    1.32  }
    1.33  
    1.34 -void MacroAssembler::movptr(Register dst, AddressLiteral src) {
    1.35 +void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
    1.36    if (src.is_lval()) {
    1.37      mov_literal64(dst, (intptr_t)src.target(), src.rspec());
    1.38    } else {
    1.39      if (reachable(src)) {
    1.40        movq(dst, as_Address(src));
    1.41      } else {
    1.42 -      lea(rscratch1, src);
    1.43 -      movq(dst, Address(rscratch1,0));
    1.44 +      lea(scratch, src);
    1.45 +      movq(dst, Address(scratch, 0));
    1.46      }
    1.47    }
    1.48  }
    1.49 @@ -988,20 +999,37 @@
    1.50    LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
    1.51  }
    1.52  
    1.53 -void MacroAssembler::atomic_incl(AddressLiteral counter_addr) {
    1.54 -  pushf();
    1.55 +void MacroAssembler::atomic_incl(Address counter_addr) {
    1.56 +  if (os::is_MP())
    1.57 +    lock();
    1.58 +  incrementl(counter_addr);
    1.59 +}
    1.60 +
    1.61 +void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
    1.62    if (reachable(counter_addr)) {
    1.63 -    if (os::is_MP())
    1.64 -      lock();
    1.65 -    incrementl(as_Address(counter_addr));
    1.66 +    atomic_incl(as_Address(counter_addr));
    1.67    } else {
    1.68 -    lea(rscratch1, counter_addr);
    1.69 -    if (os::is_MP())
    1.70 -      lock();
    1.71 -    incrementl(Address(rscratch1, 0));
    1.72 -  }
    1.73 -  popf();
    1.74 -}
    1.75 +    lea(scr, counter_addr);
    1.76 +    atomic_incl(Address(scr, 0));
    1.77 +  }
    1.78 +}
    1.79 +
    1.80 +#ifdef _LP64
    1.81 +void MacroAssembler::atomic_incq(Address counter_addr) {
    1.82 +  if (os::is_MP())
    1.83 +    lock();
    1.84 +  incrementq(counter_addr);
    1.85 +}
    1.86 +
    1.87 +void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
    1.88 +  if (reachable(counter_addr)) {
    1.89 +    atomic_incq(as_Address(counter_addr));
    1.90 +  } else {
    1.91 +    lea(scr, counter_addr);
    1.92 +    atomic_incq(Address(scr, 0));
    1.93 +  }
    1.94 +}
    1.95 +#endif
    1.96  
    1.97  // Writes to stack successive pages until offset reached to check for
    1.98  // stack overflow + shadow pages.  This clobbers tmp.
    1.99 @@ -1274,6 +1302,325 @@
   1.100  }
   1.101  
   1.102  #ifdef COMPILER2
   1.103 +
   1.104 +#if INCLUDE_RTM_OPT
   1.105 +
   1.106 +// Update rtm_counters based on abort status
   1.107 +// input: abort_status
   1.108 +//        rtm_counters (RTMLockingCounters*)
   1.109 +// flags are killed
   1.110 +void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
   1.111 +
   1.112 +  atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
   1.113 +  if (PrintPreciseRTMLockingStatistics) {
   1.114 +    for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
   1.115 +      Label check_abort;
   1.116 +      testl(abort_status, (1<<i));
   1.117 +      jccb(Assembler::equal, check_abort);
   1.118 +      atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
   1.119 +      bind(check_abort);
   1.120 +    }
   1.121 +  }
   1.122 +}
   1.123 +
   1.124 +// Branch if (random & (count-1) != 0), count is 2^n
   1.125 +// tmp, scr and flags are killed
   1.126 +void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) {
   1.127 +  assert(tmp == rax, "");
   1.128 +  assert(scr == rdx, "");
   1.129 +  rdtsc(); // modifies EDX:EAX
   1.130 +  andptr(tmp, count-1);
   1.131 +  jccb(Assembler::notZero, brLabel);
   1.132 +}
   1.133 +
   1.134 +// Perform abort ratio calculation, set no_rtm bit if high ratio
   1.135 +// input:  rtm_counters_Reg (RTMLockingCounters* address)
   1.136 +// tmpReg, rtm_counters_Reg and flags are killed
   1.137 +void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
   1.138 +                                                 Register rtm_counters_Reg,
   1.139 +                                                 RTMLockingCounters* rtm_counters,
   1.140 +                                                 Metadata* method_data) {
   1.141 +  Label L_done, L_check_always_rtm1, L_check_always_rtm2;
   1.142 +
   1.143 +  if (RTMLockingCalculationDelay > 0) {
   1.144 +    // Delay calculation
   1.145 +    movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
   1.146 +    testptr(tmpReg, tmpReg);
   1.147 +    jccb(Assembler::equal, L_done);
   1.148 +  }
   1.149 +  // Abort ratio calculation only if abort_count > RTMAbortThreshold
   1.150 +  //   Aborted transactions = abort_count * 100
   1.151 +  //   All transactions = total_count *  RTMTotalCountIncrRate
   1.152 +  //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
   1.153 +
   1.154 +  movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
   1.155 +  cmpptr(tmpReg, RTMAbortThreshold);
   1.156 +  jccb(Assembler::below, L_check_always_rtm2);
   1.157 +  imulptr(tmpReg, tmpReg, 100);
   1.158 +
   1.159 +  Register scrReg = rtm_counters_Reg;
   1.160 +  movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
   1.161 +  imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
   1.162 +  imulptr(scrReg, scrReg, RTMAbortRatio);
   1.163 +  cmpptr(tmpReg, scrReg);
   1.164 +  jccb(Assembler::below, L_check_always_rtm1);
   1.165 +  if (method_data != NULL) {
   1.166 +    // set rtm_state to "no rtm" in MDO
   1.167 +    mov_metadata(tmpReg, method_data);
   1.168 +    if (os::is_MP()) {
   1.169 +      lock();
   1.170 +    }
   1.171 +    orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
   1.172 +  }
   1.173 +  jmpb(L_done);
   1.174 +  bind(L_check_always_rtm1);
   1.175 +  // Reload RTMLockingCounters* address
   1.176 +  lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
   1.177 +  bind(L_check_always_rtm2);
   1.178 +  movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
   1.179 +  cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
   1.180 +  jccb(Assembler::below, L_done);
   1.181 +  if (method_data != NULL) {
   1.182 +    // set rtm_state to "always rtm" in MDO
   1.183 +    mov_metadata(tmpReg, method_data);
   1.184 +    if (os::is_MP()) {
   1.185 +      lock();
   1.186 +    }
   1.187 +    orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
   1.188 +  }
   1.189 +  bind(L_done);
   1.190 +}
   1.191 +
   1.192 +// Update counters and perform abort ratio calculation
   1.193 +// input:  abort_status_Reg
   1.194 +// rtm_counters_Reg, flags are killed
   1.195 +void MacroAssembler::rtm_profiling(Register abort_status_Reg,
   1.196 +                                   Register rtm_counters_Reg,
   1.197 +                                   RTMLockingCounters* rtm_counters,
   1.198 +                                   Metadata* method_data,
   1.199 +                                   bool profile_rtm) {
   1.200 +
   1.201 +  assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
   1.202 +  // update rtm counters based on rax value at abort
   1.203 +  // reads abort_status_Reg, updates flags
   1.204 +  lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
   1.205 +  rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
   1.206 +  if (profile_rtm) {
   1.207 +    // Save abort status because abort_status_Reg is used by following code.
   1.208 +    if (RTMRetryCount > 0) {
   1.209 +      push(abort_status_Reg);
   1.210 +    }
   1.211 +    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
   1.212 +    rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
   1.213 +    // restore abort status
   1.214 +    if (RTMRetryCount > 0) {
   1.215 +      pop(abort_status_Reg);
   1.216 +    }
   1.217 +  }
   1.218 +}
   1.219 +
   1.220 +// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
   1.221 +// inputs: retry_count_Reg
   1.222 +//       : abort_status_Reg
   1.223 +// output: retry_count_Reg decremented by 1
   1.224 +// flags are killed
   1.225 +void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
   1.226 +  Label doneRetry;
   1.227 +  assert(abort_status_Reg == rax, "");
   1.228 +  // The abort reason bits are in eax (see all states in rtmLocking.hpp)
   1.229 +  // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
   1.230 +  // if reason is in 0x6 and retry count != 0 then retry
   1.231 +  andptr(abort_status_Reg, 0x6);
   1.232 +  jccb(Assembler::zero, doneRetry);
   1.233 +  testl(retry_count_Reg, retry_count_Reg);
   1.234 +  jccb(Assembler::zero, doneRetry);
   1.235 +  pause();
   1.236 +  decrementl(retry_count_Reg);
   1.237 +  jmp(retryLabel);
   1.238 +  bind(doneRetry);
   1.239 +}
   1.240 +
   1.241 +// Spin and retry if lock is busy,
   1.242 +// inputs: box_Reg (monitor address)
   1.243 +//       : retry_count_Reg
   1.244 +// output: retry_count_Reg decremented by 1
   1.245 +//       : clear z flag if retry count exceeded
   1.246 +// tmp_Reg, scr_Reg, flags are killed
   1.247 +void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
   1.248 +                                            Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
   1.249 +  Label SpinLoop, SpinExit, doneRetry;
   1.250 +  // Clean monitor_value bit to get valid pointer
   1.251 +  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
   1.252 +
   1.253 +  testl(retry_count_Reg, retry_count_Reg);
   1.254 +  jccb(Assembler::zero, doneRetry);
   1.255 +  decrementl(retry_count_Reg);
   1.256 +  movptr(scr_Reg, RTMSpinLoopCount);
   1.257 +
   1.258 +  bind(SpinLoop);
   1.259 +  pause();
   1.260 +  decrementl(scr_Reg);
   1.261 +  jccb(Assembler::lessEqual, SpinExit);
   1.262 +  movptr(tmp_Reg, Address(box_Reg, owner_offset));
   1.263 +  testptr(tmp_Reg, tmp_Reg);
   1.264 +  jccb(Assembler::notZero, SpinLoop);
   1.265 +
   1.266 +  bind(SpinExit);
   1.267 +  jmp(retryLabel);
   1.268 +  bind(doneRetry);
   1.269 +  incrementl(retry_count_Reg); // clear z flag
   1.270 +}
   1.271 +
   1.272 +// Use RTM for normal stack locks
   1.273 +// Input: objReg (object to lock)
   1.274 +void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
   1.275 +                                       Register retry_on_abort_count_Reg,
   1.276 +                                       RTMLockingCounters* stack_rtm_counters,
   1.277 +                                       Metadata* method_data, bool profile_rtm,
   1.278 +                                       Label& DONE_LABEL, Label& IsInflated) {
   1.279 +  assert(UseRTMForStackLocks, "why call this otherwise?");
   1.280 +  assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
   1.281 +  assert(tmpReg == rax, "");
   1.282 +  assert(scrReg == rdx, "");
   1.283 +  Label L_rtm_retry, L_decrement_retry, L_on_abort;
   1.284 +
   1.285 +  if (RTMRetryCount > 0) {
   1.286 +    movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
   1.287 +    bind(L_rtm_retry);
   1.288 +  }
   1.289 +  if (!UseRTMXendForLockBusy) {
   1.290 +    movptr(tmpReg, Address(objReg, 0));
   1.291 +    testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
   1.292 +    jcc(Assembler::notZero, IsInflated);
   1.293 +  }
   1.294 +  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
   1.295 +    Label L_noincrement;
   1.296 +    if (RTMTotalCountIncrRate > 1) {
   1.297 +      // tmpReg, scrReg and flags are killed
   1.298 +      branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
   1.299 +    }
   1.300 +    assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
   1.301 +    atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
   1.302 +    bind(L_noincrement);
   1.303 +  }
   1.304 +  xbegin(L_on_abort);
   1.305 +  movptr(tmpReg, Address(objReg, 0));       // fetch markword
   1.306 +  andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
   1.307 +  cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
   1.308 +  jcc(Assembler::equal, DONE_LABEL);        // all done if unlocked
   1.309 +
   1.310 +  Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
   1.311 +  if (UseRTMXendForLockBusy) {
   1.312 +    xend();
   1.313 +    movptr(tmpReg, Address(objReg, 0));
   1.314 +    testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
   1.315 +    jcc(Assembler::notZero, IsInflated);
   1.316 +    movptr(abort_status_Reg, 0x1);                // Set the abort status to 1 (as xabort does)
   1.317 +    jmp(L_decrement_retry);
   1.318 +  }
   1.319 +  else {
   1.320 +    xabort(0);
   1.321 +  }
   1.322 +  bind(L_on_abort);
   1.323 +  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
   1.324 +    rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
   1.325 +  }
   1.326 +  bind(L_decrement_retry);
   1.327 +  if (RTMRetryCount > 0) {
   1.328 +    // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
   1.329 +    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
   1.330 +  }
   1.331 +}
   1.332 +
   1.333 +// Use RTM for inflating locks
   1.334 +// inputs: objReg (object to lock)
   1.335 +//         boxReg (on-stack box address (displaced header location) - KILLED)
   1.336 +//         tmpReg (ObjectMonitor address + 2(monitor_value))
   1.337 +void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
   1.338 +                                          Register scrReg, Register retry_on_busy_count_Reg,
   1.339 +                                          Register retry_on_abort_count_Reg,
   1.340 +                                          RTMLockingCounters* rtm_counters,
   1.341 +                                          Metadata* method_data, bool profile_rtm,
   1.342 +                                          Label& DONE_LABEL) {
   1.343 +  assert(UseRTMLocking, "why call this otherwise?");
   1.344 +  assert(tmpReg == rax, "");
   1.345 +  assert(scrReg == rdx, "");
   1.346 +  Label L_rtm_retry, L_decrement_retry, L_on_abort;
   1.347 +  // Clean monitor_value bit to get valid pointer
   1.348 +  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
   1.349 +
   1.350 +  // Without cast to int32_t a movptr will destroy r10 which is typically obj
   1.351 +  movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
   1.352 +  movptr(boxReg, tmpReg); // Save ObjectMonitor address
   1.353 +
   1.354 +  if (RTMRetryCount > 0) {
   1.355 +    movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
   1.356 +    movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
   1.357 +    bind(L_rtm_retry);
   1.358 +  }
   1.359 +  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
   1.360 +    Label L_noincrement;
   1.361 +    if (RTMTotalCountIncrRate > 1) {
   1.362 +      // tmpReg, scrReg and flags are killed
   1.363 +      branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
   1.364 +    }
   1.365 +    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
   1.366 +    atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
   1.367 +    bind(L_noincrement);
   1.368 +  }
   1.369 +  xbegin(L_on_abort);
   1.370 +  movptr(tmpReg, Address(objReg, 0));
   1.371 +  movptr(tmpReg, Address(tmpReg, owner_offset));
   1.372 +  testptr(tmpReg, tmpReg);
   1.373 +  jcc(Assembler::zero, DONE_LABEL);
   1.374 +  if (UseRTMXendForLockBusy) {
   1.375 +    xend();
   1.376 +    jmp(L_decrement_retry);
   1.377 +  }
   1.378 +  else {
   1.379 +    xabort(0);
   1.380 +  }
   1.381 +  bind(L_on_abort);
   1.382 +  Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
   1.383 +  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
   1.384 +    rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
   1.385 +  }
   1.386 +  if (RTMRetryCount > 0) {
   1.387 +    // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
   1.388 +    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
   1.389 +  }
   1.390 +
   1.391 +  movptr(tmpReg, Address(boxReg, owner_offset)) ;
   1.392 +  testptr(tmpReg, tmpReg) ;
   1.393 +  jccb(Assembler::notZero, L_decrement_retry) ;
   1.394 +
   1.395 +  // Appears unlocked - try to swing _owner from null to non-null.
   1.396 +  // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
   1.397 +#ifdef _LP64
   1.398 +  Register threadReg = r15_thread;
   1.399 +#else
   1.400 +  get_thread(scrReg);
   1.401 +  Register threadReg = scrReg;
   1.402 +#endif
   1.403 +  if (os::is_MP()) {
   1.404 +    lock();
   1.405 +  }
   1.406 +  cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
   1.407 +
   1.408 +  if (RTMRetryCount > 0) {
   1.409 +    // success done else retry
   1.410 +    jccb(Assembler::equal, DONE_LABEL) ;
   1.411 +    bind(L_decrement_retry);
   1.412 +    // Spin and retry if lock is busy.
   1.413 +    rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
   1.414 +  }
   1.415 +  else {
   1.416 +    bind(L_decrement_retry);
   1.417 +  }
   1.418 +}
   1.419 +
   1.420 +#endif //  INCLUDE_RTM_OPT
   1.421 +
   1.422  // Fast_Lock and Fast_Unlock used by C2
   1.423  
   1.424  // Because the transitions from emitted code to the runtime
   1.425 @@ -1350,17 +1697,26 @@
   1.426  // box: on-stack box address (displaced header location) - KILLED
   1.427  // rax,: tmp -- KILLED
   1.428  // scr: tmp -- KILLED
   1.429 -void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) {
   1.430 +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
   1.431 +                               Register scrReg, Register cx1Reg, Register cx2Reg,
   1.432 +                               BiasedLockingCounters* counters,
   1.433 +                               RTMLockingCounters* rtm_counters,
   1.434 +                               RTMLockingCounters* stack_rtm_counters,
   1.435 +                               Metadata* method_data,
   1.436 +                               bool use_rtm, bool profile_rtm) {
   1.437    // Ensure the register assignents are disjoint
   1.438 -  guarantee (objReg != boxReg, "");
   1.439 -  guarantee (objReg != tmpReg, "");
   1.440 -  guarantee (objReg != scrReg, "");
   1.441 -  guarantee (boxReg != tmpReg, "");
   1.442 -  guarantee (boxReg != scrReg, "");
   1.443 -  guarantee (tmpReg == rax, "");
   1.444 +  assert(tmpReg == rax, "");
   1.445 +
   1.446 +  if (use_rtm) {
   1.447 +    assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
   1.448 +  } else {
   1.449 +    assert(cx1Reg == noreg, "");
   1.450 +    assert(cx2Reg == noreg, "");
   1.451 +    assert_different_registers(objReg, boxReg, tmpReg, scrReg);
   1.452 +  }
   1.453  
   1.454    if (counters != NULL) {
   1.455 -    atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()));
   1.456 +    atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
   1.457    }
   1.458    if (EmitSync & 1) {
   1.459        // set box->dhw = unused_mark (3)
   1.460 @@ -1419,12 +1775,20 @@
   1.461        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters);
   1.462      }
   1.463  
   1.464 +#if INCLUDE_RTM_OPT
   1.465 +    if (UseRTMForStackLocks && use_rtm) {
   1.466 +      rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
   1.467 +                        stack_rtm_counters, method_data, profile_rtm,
   1.468 +                        DONE_LABEL, IsInflated);
   1.469 +    }
   1.470 +#endif // INCLUDE_RTM_OPT
   1.471 +
   1.472      movptr(tmpReg, Address(objReg, 0));          // [FETCH]
   1.473 -    testl (tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
   1.474 -    jccb  (Assembler::notZero, IsInflated);
   1.475 +    testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
   1.476 +    jccb(Assembler::notZero, IsInflated);
   1.477  
   1.478      // Attempt stack-locking ...
   1.479 -    orptr (tmpReg, 0x1);
   1.480 +    orptr (tmpReg, markOopDesc::unlocked_value);
   1.481      movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
   1.482      if (os::is_MP()) {
   1.483        lock();
   1.484 @@ -1434,19 +1798,32 @@
   1.485        cond_inc32(Assembler::equal,
   1.486                   ExternalAddress((address)counters->fast_path_entry_count_addr()));
   1.487      }
   1.488 -    jccb(Assembler::equal, DONE_LABEL);
   1.489 -
   1.490 -    // Recursive locking
   1.491 +    jcc(Assembler::equal, DONE_LABEL);           // Success
   1.492 +
   1.493 +    // Recursive locking.
   1.494 +    // The object is stack-locked: markword contains stack pointer to BasicLock.
   1.495 +    // Locked by current thread if difference with current SP is less than one page.
   1.496      subptr(tmpReg, rsp);
   1.497 +    // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
   1.498      andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
   1.499      movptr(Address(boxReg, 0), tmpReg);
   1.500      if (counters != NULL) {
   1.501        cond_inc32(Assembler::equal,
   1.502                   ExternalAddress((address)counters->fast_path_entry_count_addr()));
   1.503      }
   1.504 -    jmpb(DONE_LABEL);
   1.505 +    jmp(DONE_LABEL);
   1.506  
   1.507      bind(IsInflated);
   1.508 +    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value)
   1.509 +
   1.510 +#if INCLUDE_RTM_OPT
   1.511 +    // Use the same RTM locking code in 32- and 64-bit VM.
   1.512 +    if (use_rtm) {
   1.513 +      rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
   1.514 +                           rtm_counters, method_data, profile_rtm, DONE_LABEL);
   1.515 +    } else {
   1.516 +#endif // INCLUDE_RTM_OPT
   1.517 +
   1.518  #ifndef _LP64
   1.519      // The object is inflated.
   1.520      //
   1.521 @@ -1576,7 +1953,7 @@
   1.522      // Without cast to int32_t a movptr will destroy r10 which is typically obj
   1.523      movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
   1.524  
   1.525 -    mov    (boxReg, tmpReg);
   1.526 +    movptr (boxReg, tmpReg);
   1.527      movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
   1.528      testptr(tmpReg, tmpReg);
   1.529      jccb   (Assembler::notZero, DONE_LABEL);
   1.530 @@ -1587,9 +1964,11 @@
   1.531      }
   1.532      cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
   1.533      // Intentional fall-through into DONE_LABEL ...
   1.534 -
   1.535 +#endif // _LP64
   1.536 +
   1.537 +#if INCLUDE_RTM_OPT
   1.538 +    } // use_rtm()
   1.539  #endif
   1.540 -
   1.541      // DONE_LABEL is a hot target - we'd really like to place it at the
   1.542      // start of cache line by padding with NOPs.
   1.543      // See the AMD and Intel software optimization manuals for the
   1.544 @@ -1631,11 +2010,9 @@
   1.545  // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
   1.546  // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
   1.547  
   1.548 -void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
   1.549 -  guarantee (objReg != boxReg, "");
   1.550 -  guarantee (objReg != tmpReg, "");
   1.551 -  guarantee (boxReg != tmpReg, "");
   1.552 -  guarantee (boxReg == rax, "");
   1.553 +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
   1.554 +  assert(boxReg == rax, "");
   1.555 +  assert_different_registers(objReg, boxReg, tmpReg);
   1.556  
   1.557    if (EmitSync & 4) {
   1.558      // Disable - inhibit all inlining.  Force control through the slow-path
   1.559 @@ -1667,14 +2044,41 @@
   1.560         biased_locking_exit(objReg, tmpReg, DONE_LABEL);
   1.561      }
   1.562  
   1.563 +#if INCLUDE_RTM_OPT
   1.564 +    if (UseRTMForStackLocks && use_rtm) {
   1.565 +      assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
   1.566 +      Label L_regular_unlock;
   1.567 +      movptr(tmpReg, Address(objReg, 0));           // fetch markword
   1.568 +      andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
   1.569 +      cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
   1.570 +      jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
   1.571 +      xend();                                       // otherwise end...
   1.572 +      jmp(DONE_LABEL);                              // ... and we're done
   1.573 +      bind(L_regular_unlock);
   1.574 +    }
   1.575 +#endif
   1.576 +
   1.577      cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
   1.578 +    jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
   1.579      movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
   1.580 -    jccb  (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
   1.581 -
   1.582 -    testptr(tmpReg, 0x02);                          // Inflated?
   1.583 +    testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
   1.584      jccb  (Assembler::zero, Stacked);
   1.585  
   1.586      // It's inflated.
   1.587 +#if INCLUDE_RTM_OPT
   1.588 +    if (use_rtm) {
   1.589 +      Label L_regular_inflated_unlock;
   1.590 +      // Clean monitor_value bit to get valid pointer
   1.591 +      int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
   1.592 +      movptr(boxReg, Address(tmpReg, owner_offset));
   1.593 +      testptr(boxReg, boxReg);
   1.594 +      jccb(Assembler::notZero, L_regular_inflated_unlock);
   1.595 +      xend();
   1.596 +      jmpb(DONE_LABEL);
   1.597 +      bind(L_regular_inflated_unlock);
   1.598 +    }
   1.599 +#endif
   1.600 +
   1.601      // Despite our balanced locking property we still check that m->_owner == Self
   1.602      // as java routines or native JNI code called by this thread might
   1.603      // have released the lock.
   1.604 @@ -2448,7 +2852,9 @@
   1.605    Condition negated_cond = negate_condition(cond);
   1.606    Label L;
   1.607    jcc(negated_cond, L);
   1.608 +  pushf(); // Preserve flags
   1.609    atomic_incl(counter_addr);
   1.610 +  popf();
   1.611    bind(L);
   1.612  }
   1.613  

mercurial