1.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Sat Mar 22 00:26:48 2014 +0400 1.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Mar 20 17:49:27 2014 -0700 1.3 @@ -301,7 +301,9 @@ 1.4 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 1.5 } 1.6 1.7 -void MacroAssembler::movptr(Register dst, AddressLiteral src) { 1.8 +void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { 1.9 + // scratch register is not used, 1.10 + // it is defined to match parameters of 64-bit version of this method. 1.11 if (src.is_lval()) { 1.12 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 1.13 } else { 1.14 @@ -613,6 +615,15 @@ 1.15 /* else */ { subq(dst, value) ; return; } 1.16 } 1.17 1.18 +void MacroAssembler::incrementq(AddressLiteral dst) { 1.19 + if (reachable(dst)) { 1.20 + incrementq(as_Address(dst)); 1.21 + } else { 1.22 + lea(rscratch1, dst); 1.23 + incrementq(Address(rscratch1, 0)); 1.24 + } 1.25 +} 1.26 + 1.27 void MacroAssembler::incrementq(Register reg, int value) { 1.28 if (value == min_jint) { addq(reg, value); return; } 1.29 if (value < 0) { decrementq(reg, -value); return; } 1.30 @@ -681,15 +692,15 @@ 1.31 movq(dst, rscratch1); 1.32 } 1.33 1.34 -void MacroAssembler::movptr(Register dst, AddressLiteral src) { 1.35 +void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { 1.36 if (src.is_lval()) { 1.37 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1.38 } else { 1.39 if (reachable(src)) { 1.40 movq(dst, as_Address(src)); 1.41 } else { 1.42 - lea(rscratch1, src); 1.43 - movq(dst, Address(rscratch1,0)); 1.44 + lea(scratch, src); 1.45 + movq(dst, Address(scratch, 0)); 1.46 } 1.47 } 1.48 } 1.49 @@ -988,20 +999,37 @@ 1.50 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 1.51 } 1.52 1.53 -void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 1.54 - pushf(); 1.55 +void MacroAssembler::atomic_incl(Address counter_addr) { 1.56 + if (os::is_MP()) 1.57 + lock(); 1.58 + incrementl(counter_addr); 1.59 +} 1.60 + 1.61 +void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) { 1.62 if (reachable(counter_addr)) { 1.63 - if (os::is_MP()) 1.64 - lock(); 1.65 - incrementl(as_Address(counter_addr)); 1.66 + atomic_incl(as_Address(counter_addr)); 1.67 } else { 1.68 - lea(rscratch1, counter_addr); 1.69 - if (os::is_MP()) 1.70 - lock(); 1.71 - incrementl(Address(rscratch1, 0)); 1.72 - } 1.73 - popf(); 1.74 -} 1.75 + lea(scr, counter_addr); 1.76 + atomic_incl(Address(scr, 0)); 1.77 + } 1.78 +} 1.79 + 1.80 +#ifdef _LP64 1.81 +void MacroAssembler::atomic_incq(Address counter_addr) { 1.82 + if (os::is_MP()) 1.83 + lock(); 1.84 + incrementq(counter_addr); 1.85 +} 1.86 + 1.87 +void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) { 1.88 + if (reachable(counter_addr)) { 1.89 + atomic_incq(as_Address(counter_addr)); 1.90 + } else { 1.91 + lea(scr, counter_addr); 1.92 + atomic_incq(Address(scr, 0)); 1.93 + } 1.94 +} 1.95 +#endif 1.96 1.97 // Writes to stack successive pages until offset reached to check for 1.98 // stack overflow + shadow pages. This clobbers tmp. 1.99 @@ -1274,6 +1302,325 @@ 1.100 } 1.101 1.102 #ifdef COMPILER2 1.103 + 1.104 +#if INCLUDE_RTM_OPT 1.105 + 1.106 +// Update rtm_counters based on abort status 1.107 +// input: abort_status 1.108 +// rtm_counters (RTMLockingCounters*) 1.109 +// flags are killed 1.110 +void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) { 1.111 + 1.112 + atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset())); 1.113 + if (PrintPreciseRTMLockingStatistics) { 1.114 + for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { 1.115 + Label check_abort; 1.116 + testl(abort_status, (1<<i)); 1.117 + jccb(Assembler::equal, check_abort); 1.118 + atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx)))); 1.119 + bind(check_abort); 1.120 + } 1.121 + } 1.122 +} 1.123 + 1.124 +// Branch if (random & (count-1) != 0), count is 2^n 1.125 +// tmp, scr and flags are killed 1.126 +void MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) { 1.127 + assert(tmp == rax, ""); 1.128 + assert(scr == rdx, ""); 1.129 + rdtsc(); // modifies EDX:EAX 1.130 + andptr(tmp, count-1); 1.131 + jccb(Assembler::notZero, brLabel); 1.132 +} 1.133 + 1.134 +// Perform abort ratio calculation, set no_rtm bit if high ratio 1.135 +// input: rtm_counters_Reg (RTMLockingCounters* address) 1.136 +// tmpReg, rtm_counters_Reg and flags are killed 1.137 +void MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg, 1.138 + Register rtm_counters_Reg, 1.139 + RTMLockingCounters* rtm_counters, 1.140 + Metadata* method_data) { 1.141 + Label L_done, L_check_always_rtm1, L_check_always_rtm2; 1.142 + 1.143 + if (RTMLockingCalculationDelay > 0) { 1.144 + // Delay calculation 1.145 + movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg); 1.146 + testptr(tmpReg, tmpReg); 1.147 + jccb(Assembler::equal, L_done); 1.148 + } 1.149 + // Abort ratio calculation only if abort_count > RTMAbortThreshold 1.150 + // Aborted transactions = abort_count * 100 1.151 + // All transactions = total_count * RTMTotalCountIncrRate 1.152 + // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) 1.153 + 1.154 + movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset())); 1.155 + cmpptr(tmpReg, RTMAbortThreshold); 1.156 + jccb(Assembler::below, L_check_always_rtm2); 1.157 + imulptr(tmpReg, tmpReg, 100); 1.158 + 1.159 + Register scrReg = rtm_counters_Reg; 1.160 + movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); 1.161 + imulptr(scrReg, scrReg, RTMTotalCountIncrRate); 1.162 + imulptr(scrReg, scrReg, RTMAbortRatio); 1.163 + cmpptr(tmpReg, scrReg); 1.164 + jccb(Assembler::below, L_check_always_rtm1); 1.165 + if (method_data != NULL) { 1.166 + // set rtm_state to "no rtm" in MDO 1.167 + mov_metadata(tmpReg, method_data); 1.168 + if (os::is_MP()) { 1.169 + lock(); 1.170 + } 1.171 + orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM); 1.172 + } 1.173 + jmpb(L_done); 1.174 + bind(L_check_always_rtm1); 1.175 + // Reload RTMLockingCounters* address 1.176 + lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); 1.177 + bind(L_check_always_rtm2); 1.178 + movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); 1.179 + cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); 1.180 + jccb(Assembler::below, L_done); 1.181 + if (method_data != NULL) { 1.182 + // set rtm_state to "always rtm" in MDO 1.183 + mov_metadata(tmpReg, method_data); 1.184 + if (os::is_MP()) { 1.185 + lock(); 1.186 + } 1.187 + orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM); 1.188 + } 1.189 + bind(L_done); 1.190 +} 1.191 + 1.192 +// Update counters and perform abort ratio calculation 1.193 +// input: abort_status_Reg 1.194 +// rtm_counters_Reg, flags are killed 1.195 +void MacroAssembler::rtm_profiling(Register abort_status_Reg, 1.196 + Register rtm_counters_Reg, 1.197 + RTMLockingCounters* rtm_counters, 1.198 + Metadata* method_data, 1.199 + bool profile_rtm) { 1.200 + 1.201 + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); 1.202 + // update rtm counters based on rax value at abort 1.203 + // reads abort_status_Reg, updates flags 1.204 + lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); 1.205 + rtm_counters_update(abort_status_Reg, rtm_counters_Reg); 1.206 + if (profile_rtm) { 1.207 + // Save abort status because abort_status_Reg is used by following code. 1.208 + if (RTMRetryCount > 0) { 1.209 + push(abort_status_Reg); 1.210 + } 1.211 + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); 1.212 + rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data); 1.213 + // restore abort status 1.214 + if (RTMRetryCount > 0) { 1.215 + pop(abort_status_Reg); 1.216 + } 1.217 + } 1.218 +} 1.219 + 1.220 +// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4) 1.221 +// inputs: retry_count_Reg 1.222 +// : abort_status_Reg 1.223 +// output: retry_count_Reg decremented by 1 1.224 +// flags are killed 1.225 +void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) { 1.226 + Label doneRetry; 1.227 + assert(abort_status_Reg == rax, ""); 1.228 + // The abort reason bits are in eax (see all states in rtmLocking.hpp) 1.229 + // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4) 1.230 + // if reason is in 0x6 and retry count != 0 then retry 1.231 + andptr(abort_status_Reg, 0x6); 1.232 + jccb(Assembler::zero, doneRetry); 1.233 + testl(retry_count_Reg, retry_count_Reg); 1.234 + jccb(Assembler::zero, doneRetry); 1.235 + pause(); 1.236 + decrementl(retry_count_Reg); 1.237 + jmp(retryLabel); 1.238 + bind(doneRetry); 1.239 +} 1.240 + 1.241 +// Spin and retry if lock is busy, 1.242 +// inputs: box_Reg (monitor address) 1.243 +// : retry_count_Reg 1.244 +// output: retry_count_Reg decremented by 1 1.245 +// : clear z flag if retry count exceeded 1.246 +// tmp_Reg, scr_Reg, flags are killed 1.247 +void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg, 1.248 + Register tmp_Reg, Register scr_Reg, Label& retryLabel) { 1.249 + Label SpinLoop, SpinExit, doneRetry; 1.250 + // Clean monitor_value bit to get valid pointer 1.251 + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; 1.252 + 1.253 + testl(retry_count_Reg, retry_count_Reg); 1.254 + jccb(Assembler::zero, doneRetry); 1.255 + decrementl(retry_count_Reg); 1.256 + movptr(scr_Reg, RTMSpinLoopCount); 1.257 + 1.258 + bind(SpinLoop); 1.259 + pause(); 1.260 + decrementl(scr_Reg); 1.261 + jccb(Assembler::lessEqual, SpinExit); 1.262 + movptr(tmp_Reg, Address(box_Reg, owner_offset)); 1.263 + testptr(tmp_Reg, tmp_Reg); 1.264 + jccb(Assembler::notZero, SpinLoop); 1.265 + 1.266 + bind(SpinExit); 1.267 + jmp(retryLabel); 1.268 + bind(doneRetry); 1.269 + incrementl(retry_count_Reg); // clear z flag 1.270 +} 1.271 + 1.272 +// Use RTM for normal stack locks 1.273 +// Input: objReg (object to lock) 1.274 +void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg, 1.275 + Register retry_on_abort_count_Reg, 1.276 + RTMLockingCounters* stack_rtm_counters, 1.277 + Metadata* method_data, bool profile_rtm, 1.278 + Label& DONE_LABEL, Label& IsInflated) { 1.279 + assert(UseRTMForStackLocks, "why call this otherwise?"); 1.280 + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); 1.281 + assert(tmpReg == rax, ""); 1.282 + assert(scrReg == rdx, ""); 1.283 + Label L_rtm_retry, L_decrement_retry, L_on_abort; 1.284 + 1.285 + if (RTMRetryCount > 0) { 1.286 + movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort 1.287 + bind(L_rtm_retry); 1.288 + } 1.289 + if (!UseRTMXendForLockBusy) { 1.290 + movptr(tmpReg, Address(objReg, 0)); 1.291 + testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased 1.292 + jcc(Assembler::notZero, IsInflated); 1.293 + } 1.294 + if (PrintPreciseRTMLockingStatistics || profile_rtm) { 1.295 + Label L_noincrement; 1.296 + if (RTMTotalCountIncrRate > 1) { 1.297 + // tmpReg, scrReg and flags are killed 1.298 + branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); 1.299 + } 1.300 + assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM"); 1.301 + atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg); 1.302 + bind(L_noincrement); 1.303 + } 1.304 + xbegin(L_on_abort); 1.305 + movptr(tmpReg, Address(objReg, 0)); // fetch markword 1.306 + andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits 1.307 + cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked 1.308 + jcc(Assembler::equal, DONE_LABEL); // all done if unlocked 1.309 + 1.310 + Register abort_status_Reg = tmpReg; // status of abort is stored in RAX 1.311 + if (UseRTMXendForLockBusy) { 1.312 + xend(); 1.313 + movptr(tmpReg, Address(objReg, 0)); 1.314 + testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased 1.315 + jcc(Assembler::notZero, IsInflated); 1.316 + movptr(abort_status_Reg, 0x1); // Set the abort status to 1 (as xabort does) 1.317 + jmp(L_decrement_retry); 1.318 + } 1.319 + else { 1.320 + xabort(0); 1.321 + } 1.322 + bind(L_on_abort); 1.323 + if (PrintPreciseRTMLockingStatistics || profile_rtm) { 1.324 + rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm); 1.325 + } 1.326 + bind(L_decrement_retry); 1.327 + if (RTMRetryCount > 0) { 1.328 + // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) 1.329 + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); 1.330 + } 1.331 +} 1.332 + 1.333 +// Use RTM for inflating locks 1.334 +// inputs: objReg (object to lock) 1.335 +// boxReg (on-stack box address (displaced header location) - KILLED) 1.336 +// tmpReg (ObjectMonitor address + 2(monitor_value)) 1.337 +void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg, 1.338 + Register scrReg, Register retry_on_busy_count_Reg, 1.339 + Register retry_on_abort_count_Reg, 1.340 + RTMLockingCounters* rtm_counters, 1.341 + Metadata* method_data, bool profile_rtm, 1.342 + Label& DONE_LABEL) { 1.343 + assert(UseRTMLocking, "why call this otherwise?"); 1.344 + assert(tmpReg == rax, ""); 1.345 + assert(scrReg == rdx, ""); 1.346 + Label L_rtm_retry, L_decrement_retry, L_on_abort; 1.347 + // Clean monitor_value bit to get valid pointer 1.348 + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; 1.349 + 1.350 + // Without cast to int32_t a movptr will destroy r10 which is typically obj 1.351 + movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); 1.352 + movptr(boxReg, tmpReg); // Save ObjectMonitor address 1.353 + 1.354 + if (RTMRetryCount > 0) { 1.355 + movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy 1.356 + movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort 1.357 + bind(L_rtm_retry); 1.358 + } 1.359 + if (PrintPreciseRTMLockingStatistics || profile_rtm) { 1.360 + Label L_noincrement; 1.361 + if (RTMTotalCountIncrRate > 1) { 1.362 + // tmpReg, scrReg and flags are killed 1.363 + branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement); 1.364 + } 1.365 + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); 1.366 + atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg); 1.367 + bind(L_noincrement); 1.368 + } 1.369 + xbegin(L_on_abort); 1.370 + movptr(tmpReg, Address(objReg, 0)); 1.371 + movptr(tmpReg, Address(tmpReg, owner_offset)); 1.372 + testptr(tmpReg, tmpReg); 1.373 + jcc(Assembler::zero, DONE_LABEL); 1.374 + if (UseRTMXendForLockBusy) { 1.375 + xend(); 1.376 + jmp(L_decrement_retry); 1.377 + } 1.378 + else { 1.379 + xabort(0); 1.380 + } 1.381 + bind(L_on_abort); 1.382 + Register abort_status_Reg = tmpReg; // status of abort is stored in RAX 1.383 + if (PrintPreciseRTMLockingStatistics || profile_rtm) { 1.384 + rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm); 1.385 + } 1.386 + if (RTMRetryCount > 0) { 1.387 + // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) 1.388 + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); 1.389 + } 1.390 + 1.391 + movptr(tmpReg, Address(boxReg, owner_offset)) ; 1.392 + testptr(tmpReg, tmpReg) ; 1.393 + jccb(Assembler::notZero, L_decrement_retry) ; 1.394 + 1.395 + // Appears unlocked - try to swing _owner from null to non-null. 1.396 + // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 1.397 +#ifdef _LP64 1.398 + Register threadReg = r15_thread; 1.399 +#else 1.400 + get_thread(scrReg); 1.401 + Register threadReg = scrReg; 1.402 +#endif 1.403 + if (os::is_MP()) { 1.404 + lock(); 1.405 + } 1.406 + cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg 1.407 + 1.408 + if (RTMRetryCount > 0) { 1.409 + // success done else retry 1.410 + jccb(Assembler::equal, DONE_LABEL) ; 1.411 + bind(L_decrement_retry); 1.412 + // Spin and retry if lock is busy. 1.413 + rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); 1.414 + } 1.415 + else { 1.416 + bind(L_decrement_retry); 1.417 + } 1.418 +} 1.419 + 1.420 +#endif // INCLUDE_RTM_OPT 1.421 + 1.422 // Fast_Lock and Fast_Unlock used by C2 1.423 1.424 // Because the transitions from emitted code to the runtime 1.425 @@ -1350,17 +1697,26 @@ 1.426 // box: on-stack box address (displaced header location) - KILLED 1.427 // rax,: tmp -- KILLED 1.428 // scr: tmp -- KILLED 1.429 -void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg, BiasedLockingCounters* counters) { 1.430 +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, 1.431 + Register scrReg, Register cx1Reg, Register cx2Reg, 1.432 + BiasedLockingCounters* counters, 1.433 + RTMLockingCounters* rtm_counters, 1.434 + RTMLockingCounters* stack_rtm_counters, 1.435 + Metadata* method_data, 1.436 + bool use_rtm, bool profile_rtm) { 1.437 // Ensure the register assignents are disjoint 1.438 - guarantee (objReg != boxReg, ""); 1.439 - guarantee (objReg != tmpReg, ""); 1.440 - guarantee (objReg != scrReg, ""); 1.441 - guarantee (boxReg != tmpReg, ""); 1.442 - guarantee (boxReg != scrReg, ""); 1.443 - guarantee (tmpReg == rax, ""); 1.444 + assert(tmpReg == rax, ""); 1.445 + 1.446 + if (use_rtm) { 1.447 + assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg); 1.448 + } else { 1.449 + assert(cx1Reg == noreg, ""); 1.450 + assert(cx2Reg == noreg, ""); 1.451 + assert_different_registers(objReg, boxReg, tmpReg, scrReg); 1.452 + } 1.453 1.454 if (counters != NULL) { 1.455 - atomic_incl(ExternalAddress((address)counters->total_entry_count_addr())); 1.456 + atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg); 1.457 } 1.458 if (EmitSync & 1) { 1.459 // set box->dhw = unused_mark (3) 1.460 @@ -1419,12 +1775,20 @@ 1.461 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, true, DONE_LABEL, NULL, counters); 1.462 } 1.463 1.464 +#if INCLUDE_RTM_OPT 1.465 + if (UseRTMForStackLocks && use_rtm) { 1.466 + rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, 1.467 + stack_rtm_counters, method_data, profile_rtm, 1.468 + DONE_LABEL, IsInflated); 1.469 + } 1.470 +#endif // INCLUDE_RTM_OPT 1.471 + 1.472 movptr(tmpReg, Address(objReg, 0)); // [FETCH] 1.473 - testl (tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased 1.474 - jccb (Assembler::notZero, IsInflated); 1.475 + testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased 1.476 + jccb(Assembler::notZero, IsInflated); 1.477 1.478 // Attempt stack-locking ... 1.479 - orptr (tmpReg, 0x1); 1.480 + orptr (tmpReg, markOopDesc::unlocked_value); 1.481 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS 1.482 if (os::is_MP()) { 1.483 lock(); 1.484 @@ -1434,19 +1798,32 @@ 1.485 cond_inc32(Assembler::equal, 1.486 ExternalAddress((address)counters->fast_path_entry_count_addr())); 1.487 } 1.488 - jccb(Assembler::equal, DONE_LABEL); 1.489 - 1.490 - // Recursive locking 1.491 + jcc(Assembler::equal, DONE_LABEL); // Success 1.492 + 1.493 + // Recursive locking. 1.494 + // The object is stack-locked: markword contains stack pointer to BasicLock. 1.495 + // Locked by current thread if difference with current SP is less than one page. 1.496 subptr(tmpReg, rsp); 1.497 + // Next instruction set ZFlag == 1 (Success) if difference is less then one page. 1.498 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) ); 1.499 movptr(Address(boxReg, 0), tmpReg); 1.500 if (counters != NULL) { 1.501 cond_inc32(Assembler::equal, 1.502 ExternalAddress((address)counters->fast_path_entry_count_addr())); 1.503 } 1.504 - jmpb(DONE_LABEL); 1.505 + jmp(DONE_LABEL); 1.506 1.507 bind(IsInflated); 1.508 + // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value) 1.509 + 1.510 +#if INCLUDE_RTM_OPT 1.511 + // Use the same RTM locking code in 32- and 64-bit VM. 1.512 + if (use_rtm) { 1.513 + rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg, 1.514 + rtm_counters, method_data, profile_rtm, DONE_LABEL); 1.515 + } else { 1.516 +#endif // INCLUDE_RTM_OPT 1.517 + 1.518 #ifndef _LP64 1.519 // The object is inflated. 1.520 // 1.521 @@ -1576,7 +1953,7 @@ 1.522 // Without cast to int32_t a movptr will destroy r10 which is typically obj 1.523 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); 1.524 1.525 - mov (boxReg, tmpReg); 1.526 + movptr (boxReg, tmpReg); 1.527 movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); 1.528 testptr(tmpReg, tmpReg); 1.529 jccb (Assembler::notZero, DONE_LABEL); 1.530 @@ -1587,9 +1964,11 @@ 1.531 } 1.532 cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)); 1.533 // Intentional fall-through into DONE_LABEL ... 1.534 - 1.535 +#endif // _LP64 1.536 + 1.537 +#if INCLUDE_RTM_OPT 1.538 + } // use_rtm() 1.539 #endif 1.540 - 1.541 // DONE_LABEL is a hot target - we'd really like to place it at the 1.542 // start of cache line by padding with NOPs. 1.543 // See the AMD and Intel software optimization manuals for the 1.544 @@ -1631,11 +2010,9 @@ 1.545 // should not be unlocked by "normal" java-level locking and vice-versa. The specification 1.546 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. 1.547 1.548 -void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { 1.549 - guarantee (objReg != boxReg, ""); 1.550 - guarantee (objReg != tmpReg, ""); 1.551 - guarantee (boxReg != tmpReg, ""); 1.552 - guarantee (boxReg == rax, ""); 1.553 +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { 1.554 + assert(boxReg == rax, ""); 1.555 + assert_different_registers(objReg, boxReg, tmpReg); 1.556 1.557 if (EmitSync & 4) { 1.558 // Disable - inhibit all inlining. Force control through the slow-path 1.559 @@ -1667,14 +2044,41 @@ 1.560 biased_locking_exit(objReg, tmpReg, DONE_LABEL); 1.561 } 1.562 1.563 +#if INCLUDE_RTM_OPT 1.564 + if (UseRTMForStackLocks && use_rtm) { 1.565 + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); 1.566 + Label L_regular_unlock; 1.567 + movptr(tmpReg, Address(objReg, 0)); // fetch markword 1.568 + andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits 1.569 + cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked 1.570 + jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock 1.571 + xend(); // otherwise end... 1.572 + jmp(DONE_LABEL); // ... and we're done 1.573 + bind(L_regular_unlock); 1.574 + } 1.575 +#endif 1.576 + 1.577 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header 1.578 + jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock 1.579 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword 1.580 - jccb (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock 1.581 - 1.582 - testptr(tmpReg, 0x02); // Inflated? 1.583 + testptr(tmpReg, markOopDesc::monitor_value); // Inflated? 1.584 jccb (Assembler::zero, Stacked); 1.585 1.586 // It's inflated. 1.587 +#if INCLUDE_RTM_OPT 1.588 + if (use_rtm) { 1.589 + Label L_regular_inflated_unlock; 1.590 + // Clean monitor_value bit to get valid pointer 1.591 + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; 1.592 + movptr(boxReg, Address(tmpReg, owner_offset)); 1.593 + testptr(boxReg, boxReg); 1.594 + jccb(Assembler::notZero, L_regular_inflated_unlock); 1.595 + xend(); 1.596 + jmpb(DONE_LABEL); 1.597 + bind(L_regular_inflated_unlock); 1.598 + } 1.599 +#endif 1.600 + 1.601 // Despite our balanced locking property we still check that m->_owner == Self 1.602 // as java routines or native JNI code called by this thread might 1.603 // have released the lock. 1.604 @@ -2448,7 +2852,9 @@ 1.605 Condition negated_cond = negate_condition(cond); 1.606 Label L; 1.607 jcc(negated_cond, L); 1.608 + pushf(); // Preserve flags 1.609 atomic_incl(counter_addr); 1.610 + popf(); 1.611 bind(L); 1.612 } 1.613