1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/mips/vm/macroAssembler_mips.cpp Thu Sep 07 09:12:16 2017 +0800 1.3 @@ -0,0 +1,3844 @@ 1.4 +/* 1.5 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 2017, Loongson Technology. All rights reserved. 1.7 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 + * 1.9 + * This code is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License version 2 only, as 1.11 + * published by the Free Software Foundation. 1.12 + * 1.13 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.16 + * version 2 for more details (a copy is included in the LICENSE file that 1.17 + * accompanied this code). 1.18 + * 1.19 + * You should have received a copy of the GNU General Public License version 1.20 + * 2 along with this work; if not, write to the Free Software Foundation, 1.21 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.22 + * 1.23 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.24 + * or visit www.oracle.com if you need additional information or have any 1.25 + * questions. 1.26 + * 1.27 + */ 1.28 + 1.29 +#include "precompiled.hpp" 1.30 +#include "asm/assembler.hpp" 1.31 +#include "asm/assembler.inline.hpp" 1.32 +#include "asm/macroAssembler.inline.hpp" 1.33 +#include "compiler/disassembler.hpp" 1.34 +#include "gc_interface/collectedHeap.inline.hpp" 1.35 +#include "interpreter/interpreter.hpp" 1.36 +#include "memory/cardTableModRefBS.hpp" 1.37 +#include "memory/resourceArea.hpp" 1.38 +#include "memory/universe.hpp" 1.39 +#include "prims/methodHandles.hpp" 1.40 +#include "runtime/biasedLocking.hpp" 1.41 +#include "runtime/interfaceSupport.hpp" 1.42 +#include "runtime/objectMonitor.hpp" 1.43 +#include "runtime/os.hpp" 1.44 +#include "runtime/sharedRuntime.hpp" 1.45 +#include "runtime/stubRoutines.hpp" 1.46 +#include "utilities/macros.hpp" 1.47 +#if INCLUDE_ALL_GCS 1.48 +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 1.49 +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 1.50 +#include "gc_implementation/g1/heapRegion.hpp" 1.51 +#endif // INCLUDE_ALL_GCS 1.52 + 1.53 +// Implementation of MacroAssembler 1.54 + 1.55 +intptr_t MacroAssembler::i[32] = {0}; 1.56 +float MacroAssembler::f[32] = {0.0}; 1.57 + 1.58 +void MacroAssembler::print(outputStream *s) { 1.59 + unsigned int k; 1.60 + for(k=0; k<sizeof(i)/sizeof(i[0]); k++) { 1.61 + s->print_cr("i%d = 0x%.16lx", k, i[k]); 1.62 + } 1.63 + s->cr(); 1.64 + 1.65 + for(k=0; k<sizeof(f)/sizeof(f[0]); k++) { 1.66 + s->print_cr("f%d = %f", k, f[k]); 1.67 + } 1.68 + s->cr(); 1.69 +} 1.70 + 1.71 +int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } 1.72 +int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } 1.73 + 1.74 +void MacroAssembler::save_registers(MacroAssembler *masm) { 1.75 +#define __ masm-> 1.76 + for(int k=0; k<32; k++) { 1.77 + __ sw (as_Register(k), A0, i_offset(k)); 1.78 + } 1.79 + 1.80 + for(int k=0; k<32; k++) { 1.81 + __ swc1 (as_FloatRegister(k), A0, f_offset(k)); 1.82 + } 1.83 +#undef __ 1.84 +} 1.85 + 1.86 +void MacroAssembler::restore_registers(MacroAssembler *masm) { 1.87 +#define __ masm-> 1.88 + for(int k=0; k<32; k++) { 1.89 + __ lw (as_Register(k), A0, i_offset(k)); 1.90 + } 1.91 + 1.92 + for(int k=0; k<32; k++) { 1.93 + __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); 1.94 + } 1.95 +#undef __ 1.96 +} 1.97 + 1.98 + 1.99 +void MacroAssembler::pd_patch_instruction(address branch, address target) { 1.100 + jint& stub_inst = *(jint*) branch; 1.101 + 1.102 +/* * 1.103 + move(AT, RA); // dadd 1.104 + emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); 1.105 + nop(); 1.106 + lui(T9, 0); // to be patched 1.107 + ori(T9, 0); 1.108 + daddu(T9, T9, RA); 1.109 + move(RA, AT); 1.110 + jr(T9); 1.111 + */ 1.112 + if(special(stub_inst) == dadd_op) { 1.113 + jint *pc = (jint *)branch; 1.114 + 1.115 + assert(opcode(pc[3]) == lui_op 1.116 + && opcode(pc[4]) == ori_op 1.117 + && special(pc[5]) == daddu_op, "Not a branch label patch"); 1.118 + if(!(opcode(pc[3]) == lui_op 1.119 + && opcode(pc[4]) == ori_op 1.120 + && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } 1.121 + 1.122 + int offset = target - branch; 1.123 + if (!is_simm16(offset)) 1.124 + { 1.125 + pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); 1.126 + pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); 1.127 + } 1.128 + else 1.129 + { 1.130 + /* revert to "beq + nop" */ 1.131 + CodeBuffer cb(branch, 4 * 10); 1.132 + MacroAssembler masm(&cb); 1.133 +#define __ masm. 1.134 + __ b(target); 1.135 + __ nop(); 1.136 + __ nop(); 1.137 + __ nop(); 1.138 + __ nop(); 1.139 + __ nop(); 1.140 + __ nop(); 1.141 + __ nop(); 1.142 + } 1.143 + return; 1.144 + } 1.145 + 1.146 +#ifndef PRODUCT 1.147 + if (!is_simm16((target - branch - 4) >> 2)) 1.148 + { 1.149 + tty->print_cr("Illegal patching: target=0x%lx", target); 1.150 + int *p = (int *)branch; 1.151 + for (int i = -10; i < 10; i++) 1.152 + { 1.153 + tty->print("0x%lx, ", p[i]); 1.154 + } 1.155 + tty->print_cr(""); 1.156 + } 1.157 +#endif 1.158 + 1.159 + stub_inst = patched_branch(target - branch, stub_inst, 0); 1.160 +} 1.161 + 1.162 +static inline address first_cache_address() { 1.163 + return CodeCache::low_bound() + sizeof(HeapBlock::Header); 1.164 +} 1.165 + 1.166 +static inline address last_cache_address() { 1.167 + return CodeCache::high_bound() - Assembler::InstructionSize; 1.168 +} 1.169 + 1.170 +int MacroAssembler::call_size(address target, bool far, bool patchable) { 1.171 + if (patchable) return 6 << Assembler::LogInstructionSize; 1.172 + if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop 1.173 + return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; 1.174 +} 1.175 + 1.176 +// Can we reach target using jal/j from anywhere 1.177 +// in the code cache (because code can be relocated)? 1.178 +bool MacroAssembler::reachable_from_cache(address target) { 1.179 + address cl = first_cache_address(); 1.180 + address ch = last_cache_address(); 1.181 + 1.182 + return fit_in_jal(target, cl) && fit_in_jal(target, ch); 1.183 +} 1.184 + 1.185 +void MacroAssembler::general_jump(address target) { 1.186 + if (reachable_from_cache(target)) { 1.187 + j(target); 1.188 + nop(); 1.189 + } else { 1.190 + set64(T9, (long)target); 1.191 + jr(T9); 1.192 + nop(); 1.193 + } 1.194 +} 1.195 + 1.196 +int MacroAssembler::insts_for_general_jump(address target) { 1.197 + if (reachable_from_cache(target)) { 1.198 + //j(target); 1.199 + //nop(); 1.200 + return 2; 1.201 + } else { 1.202 + //set64(T9, (long)target); 1.203 + //jr(T9); 1.204 + //nop(); 1.205 + return insts_for_set64((jlong)target) + 2; 1.206 + } 1.207 +} 1.208 + 1.209 +void MacroAssembler::patchable_jump(address target) { 1.210 + if (reachable_from_cache(target)) { 1.211 + nop(); 1.212 + nop(); 1.213 + nop(); 1.214 + nop(); 1.215 + j(target); 1.216 + nop(); 1.217 + } else { 1.218 + patchable_set48(T9, (long)target); 1.219 + jr(T9); 1.220 + nop(); 1.221 + } 1.222 +} 1.223 + 1.224 +int MacroAssembler::insts_for_patchable_jump(address target) { 1.225 + return 6; 1.226 +} 1.227 + 1.228 +void MacroAssembler::general_call(address target) { 1.229 + if (reachable_from_cache(target)) { 1.230 + jal(target); 1.231 + nop(); 1.232 + } else { 1.233 + set64(T9, (long)target); 1.234 + jalr(T9); 1.235 + nop(); 1.236 + } 1.237 +} 1.238 + 1.239 +int MacroAssembler::insts_for_general_call(address target) { 1.240 + if (reachable_from_cache(target)) { 1.241 + //jal(target); 1.242 + //nop(); 1.243 + return 2; 1.244 + } else { 1.245 + //set64(T9, (long)target); 1.246 + //jalr(T9); 1.247 + //nop(); 1.248 + return insts_for_set64((jlong)target) + 2; 1.249 + } 1.250 +} 1.251 + 1.252 +void MacroAssembler::patchable_call(address target) { 1.253 + if (reachable_from_cache(target)) { 1.254 + nop(); 1.255 + nop(); 1.256 + nop(); 1.257 + nop(); 1.258 + jal(target); 1.259 + nop(); 1.260 + } else { 1.261 + patchable_set48(T9, (long)target); 1.262 + jalr(T9); 1.263 + nop(); 1.264 + } 1.265 +} 1.266 + 1.267 +int MacroAssembler::insts_for_patchable_call(address target) { 1.268 + return 6; 1.269 +} 1.270 + 1.271 +void MacroAssembler::beq_far(Register rs, Register rt, address entry) 1.272 +{ 1.273 + u_char * cur_pc = pc(); 1.274 + 1.275 + /* Jin: Near/Far jump */ 1.276 + if(is_simm16((entry - pc() - 4) / 4)) 1.277 + { 1.278 + Assembler::beq(rs, rt, offset(entry)); 1.279 + } 1.280 + else 1.281 + { 1.282 + Label not_jump; 1.283 + bne(rs, rt, not_jump); 1.284 + delayed()->nop(); 1.285 + 1.286 + b_far(entry); 1.287 + delayed()->nop(); 1.288 + 1.289 + bind(not_jump); 1.290 + has_delay_slot(); 1.291 + } 1.292 +} 1.293 + 1.294 +void MacroAssembler::beq_far(Register rs, Register rt, Label& L) 1.295 +{ 1.296 + if (L.is_bound()) { 1.297 + beq_far(rs, rt, target(L)); 1.298 + } else { 1.299 + u_char * cur_pc = pc(); 1.300 + Label not_jump; 1.301 + bne(rs, rt, not_jump); 1.302 + delayed()->nop(); 1.303 + 1.304 + b_far(L); 1.305 + delayed()->nop(); 1.306 + 1.307 + bind(not_jump); 1.308 + has_delay_slot(); 1.309 + } 1.310 +} 1.311 + 1.312 +void MacroAssembler::bne_far(Register rs, Register rt, address entry) 1.313 +{ 1.314 + u_char * cur_pc = pc(); 1.315 + 1.316 + /* Jin: Near/Far jump */ 1.317 + if(is_simm16((entry - pc() - 4) / 4)) 1.318 + { 1.319 + Assembler::bne(rs, rt, offset(entry)); 1.320 + } 1.321 + else 1.322 + { 1.323 + Label not_jump; 1.324 + beq(rs, rt, not_jump); 1.325 + delayed()->nop(); 1.326 + 1.327 + b_far(entry); 1.328 + delayed()->nop(); 1.329 + 1.330 + bind(not_jump); 1.331 + has_delay_slot(); 1.332 + } 1.333 +} 1.334 + 1.335 +void MacroAssembler::bne_far(Register rs, Register rt, Label& L) 1.336 +{ 1.337 + if (L.is_bound()) { 1.338 + bne_far(rs, rt, target(L)); 1.339 + } else { 1.340 + u_char * cur_pc = pc(); 1.341 + Label not_jump; 1.342 + beq(rs, rt, not_jump); 1.343 + delayed()->nop(); 1.344 + 1.345 + b_far(L); 1.346 + delayed()->nop(); 1.347 + 1.348 + bind(not_jump); 1.349 + has_delay_slot(); 1.350 + } 1.351 +} 1.352 + 1.353 +void MacroAssembler::b_far(Label& L) 1.354 +{ 1.355 + if (L.is_bound()) { 1.356 + b_far(target(L)); 1.357 + } else { 1.358 + volatile address dest = target(L); 1.359 +/* 1.360 +MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 1.361 + 0x00000055651ed514: dadd at, ra, zero 1.362 + 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 1.363 + 1.364 + 0x00000055651ed51c: sll zero, zero, 0 1.365 + 0x00000055651ed520: lui t9, 0x0 1.366 + 0x00000055651ed524: ori t9, t9, 0x21b8 1.367 + 0x00000055651ed528: daddu t9, t9, ra 1.368 + 0x00000055651ed52c: dadd ra, at, zero 1.369 + 0x00000055651ed530: jr t9 1.370 + 0x00000055651ed534: sll zero, zero, 0 1.371 +*/ 1.372 + move(AT, RA); 1.373 + emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); 1.374 + nop(); 1.375 + lui(T9, 0); // to be patched 1.376 + ori(T9, T9, 0); 1.377 + daddu(T9, T9, RA); 1.378 + move(RA, AT); 1.379 + jr(T9); 1.380 + } 1.381 +} 1.382 + 1.383 +void MacroAssembler::b_far(address entry) 1.384 +{ 1.385 + u_char * cur_pc = pc(); 1.386 + 1.387 + /* Jin: Near/Far jump */ 1.388 + if(is_simm16((entry - pc() - 4) / 4)) 1.389 + { 1.390 + b(offset(entry)); 1.391 + } 1.392 + else 1.393 + { 1.394 + /* address must be bounded */ 1.395 + move(AT, RA); 1.396 + emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); 1.397 + nop(); 1.398 + li32(T9, entry - pc()); 1.399 + daddu(T9, T9, RA); 1.400 + move(RA, AT); 1.401 + jr(T9); 1.402 + } 1.403 +} 1.404 + 1.405 +void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) { 1.406 + addu_long(AT, base, offset); 1.407 + ld_ptr(rt, 0, AT); 1.408 +} 1.409 + 1.410 +void MacroAssembler::st_ptr(Register rt, Register offset, Register base) { 1.411 + addu_long(AT, base, offset); 1.412 + st_ptr(rt, 0, AT); 1.413 +} 1.414 + 1.415 +void MacroAssembler::ld_long(Register rt, Register offset, Register base) { 1.416 + addu_long(AT, base, offset); 1.417 + ld_long(rt, 0, AT); 1.418 +} 1.419 + 1.420 +void MacroAssembler::st_long(Register rt, Register offset, Register base) { 1.421 + addu_long(AT, base, offset); 1.422 + st_long(rt, 0, AT); 1.423 +} 1.424 + 1.425 +Address MacroAssembler::as_Address(AddressLiteral adr) { 1.426 + return Address(adr.target(), adr.rspec()); 1.427 +} 1.428 + 1.429 +Address MacroAssembler::as_Address(ArrayAddress adr) { 1.430 + return Address::make_array(adr); 1.431 +} 1.432 + 1.433 +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). 1.434 +void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { 1.435 + Label again; 1.436 + 1.437 + li(tmp_reg1, counter_addr); 1.438 + bind(again); 1.439 + if(!Use3A2000) sync(); 1.440 + ll(tmp_reg2, tmp_reg1, 0); 1.441 + addi(tmp_reg2, tmp_reg2, inc); 1.442 + sc(tmp_reg2, tmp_reg1, 0); 1.443 + beq(tmp_reg2, R0, again); 1.444 + delayed()->nop(); 1.445 +} 1.446 + 1.447 +int MacroAssembler::biased_locking_enter(Register lock_reg, 1.448 + Register obj_reg, 1.449 + Register swap_reg, 1.450 + Register tmp_reg, 1.451 + bool swap_reg_contains_mark, 1.452 + Label& done, 1.453 + Label* slow_case, 1.454 + BiasedLockingCounters* counters) { 1.455 + assert(UseBiasedLocking, "why call this otherwise?"); 1.456 + bool need_tmp_reg = false; 1.457 + if (tmp_reg == noreg) { 1.458 + need_tmp_reg = true; 1.459 + tmp_reg = T9; 1.460 + } 1.461 + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); 1.462 + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 1.463 + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 1.464 + Address saved_mark_addr(lock_reg, 0); 1.465 + 1.466 + // Biased locking 1.467 + // See whether the lock is currently biased toward our thread and 1.468 + // whether the epoch is still valid 1.469 + // Note that the runtime guarantees sufficient alignment of JavaThread 1.470 + // pointers to allow age to be placed into low bits 1.471 + // First check to see whether biasing is even enabled for this object 1.472 + Label cas_label; 1.473 + int null_check_offset = -1; 1.474 + if (!swap_reg_contains_mark) { 1.475 + null_check_offset = offset(); 1.476 + ld_ptr(swap_reg, mark_addr); 1.477 + } 1.478 + 1.479 + if (need_tmp_reg) { 1.480 + push(tmp_reg); 1.481 + } 1.482 + move(tmp_reg, swap_reg); 1.483 + andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); 1.484 +#ifdef _LP64 1.485 + daddi(AT, R0, markOopDesc::biased_lock_pattern); 1.486 + dsub(AT, AT, tmp_reg); 1.487 +#else 1.488 + addi(AT, R0, markOopDesc::biased_lock_pattern); 1.489 + sub(AT, AT, tmp_reg); 1.490 +#endif 1.491 + if (need_tmp_reg) { 1.492 + pop(tmp_reg); 1.493 + } 1.494 + 1.495 + bne(AT, R0, cas_label); 1.496 + delayed()->nop(); 1.497 + 1.498 + 1.499 + // The bias pattern is present in the object's header. Need to check 1.500 + // whether the bias owner and the epoch are both still current. 1.501 + // Note that because there is no current thread register on MIPS we 1.502 + // need to store off the mark word we read out of the object to 1.503 + // avoid reloading it and needing to recheck invariants below. This 1.504 + // store is unfortunate but it makes the overall code shorter and 1.505 + // simpler. 1.506 + st_ptr(swap_reg, saved_mark_addr); 1.507 + if (need_tmp_reg) { 1.508 + push(tmp_reg); 1.509 + } 1.510 + if (swap_reg_contains_mark) { 1.511 + null_check_offset = offset(); 1.512 + } 1.513 + load_prototype_header(tmp_reg, obj_reg); 1.514 + xorr(tmp_reg, tmp_reg, swap_reg); 1.515 + get_thread(swap_reg); 1.516 + xorr(swap_reg, swap_reg, tmp_reg); 1.517 + 1.518 + move(AT, ~((int) markOopDesc::age_mask_in_place)); 1.519 + andr(swap_reg, swap_reg, AT); 1.520 + 1.521 + if (PrintBiasedLockingStatistics) { 1.522 + Label L; 1.523 + bne(swap_reg, R0, L); 1.524 + delayed()->nop(); 1.525 + push(tmp_reg); 1.526 + push(A0); 1.527 + atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); 1.528 + pop(A0); 1.529 + pop(tmp_reg); 1.530 + bind(L); 1.531 + } 1.532 + if (need_tmp_reg) { 1.533 + pop(tmp_reg); 1.534 + } 1.535 + beq(swap_reg, R0, done); 1.536 + delayed()->nop(); 1.537 + Label try_revoke_bias; 1.538 + Label try_rebias; 1.539 + 1.540 + // At this point we know that the header has the bias pattern and 1.541 + // that we are not the bias owner in the current epoch. We need to 1.542 + // figure out more details about the state of the header in order to 1.543 + // know what operations can be legally performed on the object's 1.544 + // header. 1.545 + 1.546 + // If the low three bits in the xor result aren't clear, that means 1.547 + // the prototype header is no longer biased and we have to revoke 1.548 + // the bias on this object. 1.549 + 1.550 + move(AT, markOopDesc::biased_lock_mask_in_place); 1.551 + andr(AT, swap_reg, AT); 1.552 + bne(AT, R0, try_revoke_bias); 1.553 + delayed()->nop(); 1.554 + // Biasing is still enabled for this data type. See whether the 1.555 + // epoch of the current bias is still valid, meaning that the epoch 1.556 + // bits of the mark word are equal to the epoch bits of the 1.557 + // prototype header. (Note that the prototype header's epoch bits 1.558 + // only change at a safepoint.) If not, attempt to rebias the object 1.559 + // toward the current thread. Note that we must be absolutely sure 1.560 + // that the current epoch is invalid in order to do this because 1.561 + // otherwise the manipulations it performs on the mark word are 1.562 + // illegal. 1.563 + 1.564 + move(AT, markOopDesc::epoch_mask_in_place); 1.565 + andr(AT,swap_reg, AT); 1.566 + bne(AT, R0, try_rebias); 1.567 + delayed()->nop(); 1.568 + // The epoch of the current bias is still valid but we know nothing 1.569 + // about the owner; it might be set or it might be clear. Try to 1.570 + // acquire the bias of the object using an atomic operation. If this 1.571 + // fails we will go in to the runtime to revoke the object's bias. 1.572 + // Note that we first construct the presumed unbiased header so we 1.573 + // don't accidentally blow away another thread's valid bias. 1.574 + 1.575 + ld_ptr(swap_reg, saved_mark_addr); 1.576 + 1.577 + move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 1.578 + andr(swap_reg, swap_reg, AT); 1.579 + 1.580 + if (need_tmp_reg) { 1.581 + push(tmp_reg); 1.582 + } 1.583 + get_thread(tmp_reg); 1.584 + orr(tmp_reg, tmp_reg, swap_reg); 1.585 + //if (os::is_MP()) { 1.586 + // sync(); 1.587 + //} 1.588 + cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); 1.589 + if (need_tmp_reg) { 1.590 + pop(tmp_reg); 1.591 + } 1.592 + // If the biasing toward our thread failed, this means that 1.593 + // another thread succeeded in biasing it toward itself and we 1.594 + // need to revoke that bias. The revocation will occur in the 1.595 + // interpreter runtime in the slow case. 1.596 + if (PrintBiasedLockingStatistics) { 1.597 + Label L; 1.598 + bne(AT, R0, L); 1.599 + delayed()->nop(); 1.600 + push(tmp_reg); 1.601 + push(A0); 1.602 + atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); 1.603 + pop(A0); 1.604 + pop(tmp_reg); 1.605 + bind(L); 1.606 + } 1.607 + if (slow_case != NULL) { 1.608 + beq_far(AT, R0, *slow_case); 1.609 + delayed()->nop(); 1.610 + } 1.611 + b(done); 1.612 + delayed()->nop(); 1.613 + 1.614 + bind(try_rebias); 1.615 + // At this point we know the epoch has expired, meaning that the 1.616 + // current "bias owner", if any, is actually invalid. Under these 1.617 + // circumstances _only_, we are allowed to use the current header's 1.618 + // value as the comparison value when doing the cas to acquire the 1.619 + // bias in the current epoch. In other words, we allow transfer of 1.620 + // the bias from one thread to another directly in this situation. 1.621 + // 1.622 + // FIXME: due to a lack of registers we currently blow away the age 1.623 + // bits in this situation. Should attempt to preserve them. 1.624 + if (need_tmp_reg) { 1.625 + push(tmp_reg); 1.626 + } 1.627 + load_prototype_header(tmp_reg, obj_reg); 1.628 + get_thread(swap_reg); 1.629 + orr(tmp_reg, tmp_reg, swap_reg); 1.630 + ld_ptr(swap_reg, saved_mark_addr); 1.631 + 1.632 + //if (os::is_MP()) { 1.633 + // sync(); 1.634 + //} 1.635 + cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); 1.636 + if (need_tmp_reg) { 1.637 + pop(tmp_reg); 1.638 + } 1.639 + // If the biasing toward our thread failed, then another thread 1.640 + // succeeded in biasing it toward itself and we need to revoke that 1.641 + // bias. The revocation will occur in the runtime in the slow case. 1.642 + if (PrintBiasedLockingStatistics) { 1.643 + Label L; 1.644 + bne(AT, R0, L); 1.645 + delayed()->nop(); 1.646 + push(AT); 1.647 + push(tmp_reg); 1.648 + atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); 1.649 + pop(tmp_reg); 1.650 + pop(AT); 1.651 + bind(L); 1.652 + } 1.653 + if (slow_case != NULL) { 1.654 + beq_far(AT, R0, *slow_case); 1.655 + delayed()->nop(); 1.656 + } 1.657 + 1.658 + b(done); 1.659 + delayed()->nop(); 1.660 + bind(try_revoke_bias); 1.661 + // The prototype mark in the klass doesn't have the bias bit set any 1.662 + // more, indicating that objects of this data type are not supposed 1.663 + // to be biased any more. We are going to try to reset the mark of 1.664 + // this object to the prototype value and fall through to the 1.665 + // CAS-based locking scheme. Note that if our CAS fails, it means 1.666 + // that another thread raced us for the privilege of revoking the 1.667 + // bias of this particular object, so it's okay to continue in the 1.668 + // normal locking code. 1.669 + // 1.670 + // FIXME: due to a lack of registers we currently blow away the age 1.671 + // bits in this situation. Should attempt to preserve them. 1.672 + ld_ptr(swap_reg, saved_mark_addr); 1.673 + 1.674 + if (need_tmp_reg) { 1.675 + push(tmp_reg); 1.676 + } 1.677 + load_prototype_header(tmp_reg, obj_reg); 1.678 + //if (os::is_MP()) { 1.679 + // lock(); 1.680 + //} 1.681 + cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); 1.682 + if (need_tmp_reg) { 1.683 + pop(tmp_reg); 1.684 + } 1.685 + // Fall through to the normal CAS-based lock, because no matter what 1.686 + // the result of the above CAS, some thread must have succeeded in 1.687 + // removing the bias bit from the object's header. 1.688 + if (PrintBiasedLockingStatistics) { 1.689 + Label L; 1.690 + bne(AT, R0, L); 1.691 + delayed()->nop(); 1.692 + push(AT); 1.693 + push(tmp_reg); 1.694 + atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); 1.695 + pop(tmp_reg); 1.696 + pop(AT); 1.697 + bind(L); 1.698 + } 1.699 + 1.700 + bind(cas_label); 1.701 + return null_check_offset; 1.702 +} 1.703 + 1.704 +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 1.705 + assert(UseBiasedLocking, "why call this otherwise?"); 1.706 + 1.707 + // Check for biased locking unlock case, which is a no-op 1.708 + // Note: we do not have to check the thread ID for two reasons. 1.709 + // First, the interpreter checks for IllegalMonitorStateException at 1.710 + // a higher level. Second, if the bias was revoked while we held the 1.711 + // lock, the object could not be rebiased toward another thread, so 1.712 + // the bias bit would be clear. 1.713 +#ifdef _LP64 1.714 + ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1.715 + andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); 1.716 + daddi(AT, R0, markOopDesc::biased_lock_pattern); 1.717 +#else 1.718 + lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1.719 + andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); 1.720 + addi(AT, R0, markOopDesc::biased_lock_pattern); 1.721 +#endif 1.722 + 1.723 + beq(AT, temp_reg, done); 1.724 + delayed()->nop(); 1.725 +} 1.726 + 1.727 +// NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME. 1.728 +// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf 1.729 +// this method will handle the stack problem, you need not to preserve the stack space for the argument now 1.730 +void MacroAssembler::call_VM_leaf_base(address entry_point, 1.731 + int number_of_arguments) { 1.732 + //call(RuntimeAddress(entry_point)); 1.733 + //increment(rsp, number_of_arguments * wordSize); 1.734 + Label L, E; 1.735 + 1.736 + assert(number_of_arguments <= 4, "just check"); 1.737 + 1.738 + andi(AT, SP, 0xf); 1.739 + beq(AT, R0, L); 1.740 + delayed()->nop(); 1.741 + daddi(SP, SP, -8); 1.742 + call(entry_point, relocInfo::runtime_call_type); 1.743 + delayed()->nop(); 1.744 + daddi(SP, SP, 8); 1.745 + b(E); 1.746 + delayed()->nop(); 1.747 + 1.748 + bind(L); 1.749 + call(entry_point, relocInfo::runtime_call_type); 1.750 + delayed()->nop(); 1.751 + bind(E); 1.752 +} 1.753 + 1.754 + 1.755 +void MacroAssembler::jmp(address entry) { 1.756 + patchable_set48(T9, (long)entry); 1.757 + jr(T9); 1.758 +} 1.759 + 1.760 +void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { 1.761 + switch (rtype) { 1.762 + case relocInfo::runtime_call_type: 1.763 + case relocInfo::none: 1.764 + jmp(entry); 1.765 + break; 1.766 + default: 1.767 + { 1.768 + InstructionMark im(this); 1.769 + relocate(rtype); 1.770 + patchable_set48(T9, (long)entry); 1.771 + jr(T9); 1.772 + } 1.773 + break; 1.774 + } 1.775 +} 1.776 + 1.777 +void MacroAssembler::call(address entry) { 1.778 +// c/c++ code assume T9 is entry point, so we just always move entry to t9 1.779 +// maybe there is some more graceful method to handle this. FIXME 1.780 +// For more info, see class NativeCall. 1.781 +#ifndef _LP64 1.782 + move(T9, (int)entry); 1.783 +#else 1.784 + patchable_set48(T9, (long)entry); 1.785 +#endif 1.786 + jalr(T9); 1.787 +} 1.788 + 1.789 +void MacroAssembler::call(address entry, relocInfo::relocType rtype) { 1.790 + switch (rtype) { 1.791 + case relocInfo::runtime_call_type: 1.792 + case relocInfo::none: 1.793 + call(entry); 1.794 + break; 1.795 + default: 1.796 + { 1.797 + InstructionMark im(this); 1.798 + relocate(rtype); 1.799 + call(entry); 1.800 + } 1.801 + break; 1.802 + } 1.803 +} 1.804 + 1.805 +void MacroAssembler::call(address entry, RelocationHolder& rh) 1.806 +{ 1.807 + switch (rh.type()) { 1.808 + case relocInfo::runtime_call_type: 1.809 + case relocInfo::none: 1.810 + call(entry); 1.811 + break; 1.812 + default: 1.813 + { 1.814 + InstructionMark im(this); 1.815 + relocate(rh); 1.816 + call(entry); 1.817 + } 1.818 + break; 1.819 + } 1.820 +} 1.821 + 1.822 +void MacroAssembler::ic_call(address entry) { 1.823 + RelocationHolder rh = virtual_call_Relocation::spec(pc()); 1.824 + patchable_set48(IC_Klass, (long)Universe::non_oop_word()); 1.825 + assert(entry != NULL, "call most probably wrong"); 1.826 + InstructionMark im(this); 1.827 + relocate(rh); 1.828 + patchable_call(entry); 1.829 +} 1.830 + 1.831 +void MacroAssembler::c2bool(Register r) { 1.832 + Label L; 1.833 + Assembler::beq(r, R0, L); 1.834 + delayed()->nop(); 1.835 + move(r, 1); 1.836 + bind(L); 1.837 +} 1.838 + 1.839 +#ifndef PRODUCT 1.840 +extern "C" void findpc(intptr_t x); 1.841 +#endif 1.842 + 1.843 +void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 1.844 + // In order to get locks to work, we need to fake a in_VM state 1.845 + JavaThread* thread = JavaThread::current(); 1.846 + JavaThreadState saved_state = thread->thread_state(); 1.847 + thread->set_thread_state(_thread_in_vm); 1.848 + if (ShowMessageBoxOnError) { 1.849 + JavaThread* thread = JavaThread::current(); 1.850 + JavaThreadState saved_state = thread->thread_state(); 1.851 + thread->set_thread_state(_thread_in_vm); 1.852 + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1.853 + ttyLocker ttyl; 1.854 + BytecodeCounter::print(); 1.855 + } 1.856 + // To see where a verify_oop failed, get $ebx+40/X for this frame. 1.857 + // This is the value of eip which points to where verify_oop will return. 1.858 + if (os::message_box(msg, "Execution stopped, print registers?")) { 1.859 + ttyLocker ttyl; 1.860 + tty->print_cr("eip = 0x%08x", eip); 1.861 +#ifndef PRODUCT 1.862 + tty->cr(); 1.863 + findpc(eip); 1.864 + tty->cr(); 1.865 +#endif 1.866 + tty->print_cr("rax, = 0x%08x", rax); 1.867 + tty->print_cr("rbx, = 0x%08x", rbx); 1.868 + tty->print_cr("rcx = 0x%08x", rcx); 1.869 + tty->print_cr("rdx = 0x%08x", rdx); 1.870 + tty->print_cr("rdi = 0x%08x", rdi); 1.871 + tty->print_cr("rsi = 0x%08x", rsi); 1.872 + tty->print_cr("rbp, = 0x%08x", rbp); 1.873 + tty->print_cr("rsp = 0x%08x", rsp); 1.874 + BREAKPOINT; 1.875 + } 1.876 + } else { 1.877 + ttyLocker ttyl; 1.878 + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1.879 + assert(false, "DEBUG MESSAGE"); 1.880 + } 1.881 + ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 1.882 +} 1.883 + 1.884 +void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { 1.885 + if ( ShowMessageBoxOnError ) { 1.886 + JavaThreadState saved_state = JavaThread::current()->thread_state(); 1.887 + JavaThread::current()->set_thread_state(_thread_in_vm); 1.888 + { 1.889 + // In order to get locks work, we need to fake a in_VM state 1.890 + ttyLocker ttyl; 1.891 + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); 1.892 + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1.893 + BytecodeCounter::print(); 1.894 + } 1.895 + 1.896 + // if (os::message_box(msg, "Execution stopped, print registers?")) 1.897 + // regs->print(::tty); 1.898 + } 1.899 + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); 1.900 + } 1.901 + else 1.902 + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 1.903 +} 1.904 + 1.905 + 1.906 +void MacroAssembler::stop(const char* msg) { 1.907 + li(A0, (long)msg); 1.908 +#ifndef _LP64 1.909 + //reserver space for argument. added by yjl 7/10/2005 1.910 + addiu(SP, SP, - 1 * wordSize); 1.911 +#endif 1.912 + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); 1.913 + delayed()->nop(); 1.914 +#ifndef _LP64 1.915 + //restore space for argument 1.916 + addiu(SP, SP, 1 * wordSize); 1.917 +#endif 1.918 + brk(17); 1.919 +} 1.920 + 1.921 +void MacroAssembler::warn(const char* msg) { 1.922 +#ifdef _LP64 1.923 + pushad(); 1.924 + li(A0, (long)msg); 1.925 + push(S2); 1.926 + move(AT, -(StackAlignmentInBytes)); 1.927 + move(S2, SP); // use S2 as a sender SP holder 1.928 + andr(SP, SP, AT); // align stack as required by ABI 1.929 + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); 1.930 + delayed()->nop(); 1.931 + move(SP, S2); // use S2 as a sender SP holder 1.932 + pop(S2); 1.933 + popad(); 1.934 +#else 1.935 + pushad(); 1.936 + addi(SP, SP, -4); 1.937 + sw(A0, SP, -1 * wordSize); 1.938 + li(A0, (long)msg); 1.939 + addi(SP, SP, -1 * wordSize); 1.940 + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); 1.941 + delayed()->nop(); 1.942 + addi(SP, SP, 1 * wordSize); 1.943 + lw(A0, SP, -1 * wordSize); 1.944 + addi(SP, SP, 4); 1.945 + popad(); 1.946 +#endif 1.947 +} 1.948 + 1.949 +void MacroAssembler::print_reg(Register reg) { 1.950 +/* 1.951 +char *s = getenv("PRINT_REG"); 1.952 +if (s == NULL) 1.953 + return; 1.954 +if (strcmp(s, "1") != 0) 1.955 + return; 1.956 +*/ 1.957 + void * cur_pc = pc(); 1.958 + pushad(); 1.959 + NOT_LP64(push(FP);) 1.960 + 1.961 + li(A0, (long)reg->name()); 1.962 + if (reg == SP) 1.963 + addiu(A1, SP, wordSize * 23); //23 registers saved in pushad() 1.964 + else if (reg == A0) 1.965 + ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code! 1.966 + else 1.967 + move(A1, reg); 1.968 + li(A2, (long)cur_pc); 1.969 + push(S2); 1.970 + move(AT, -(StackAlignmentInBytes)); 1.971 + move(S2, SP); // use S2 as a sender SP holder 1.972 + andr(SP, SP, AT); // align stack as required by ABI 1.973 + call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type); 1.974 + delayed()->nop(); 1.975 + move(SP, S2); // use S2 as a sender SP holder 1.976 + pop(S2); 1.977 + NOT_LP64(pop(FP);) 1.978 + popad(); 1.979 + 1.980 +/* 1.981 + pushad(); 1.982 +#ifdef _LP64 1.983 + if (reg == SP) 1.984 + addiu(A0, SP, wordSize * 23); //23 registers saved in pushad() 1.985 + else 1.986 + move(A0, reg); 1.987 + call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); 1.988 + delayed()->nop(); 1.989 +#else 1.990 + push(FP); 1.991 + move(A0, reg); 1.992 + dsrl32(A1, reg, 0); 1.993 + //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type); 1.994 + call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); 1.995 + delayed()->nop(); 1.996 + pop(FP); 1.997 +#endif 1.998 + popad(); 1.999 + pushad(); 1.1000 + NOT_LP64(push(FP);) 1.1001 + char b[50]; 1.1002 + sprintf((char *)b, " pc: %p\n",cur_pc); 1.1003 + li(A0, (long)(char *)b); 1.1004 + call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); 1.1005 + delayed()->nop(); 1.1006 + NOT_LP64(pop(FP);) 1.1007 + popad(); 1.1008 +*/ 1.1009 +} 1.1010 + 1.1011 +void MacroAssembler::print_reg(FloatRegister reg) { 1.1012 + void * cur_pc = pc(); 1.1013 + pushad(); 1.1014 + NOT_LP64(push(FP);) 1.1015 + li(A0, (long)reg->name()); 1.1016 + push(S2); 1.1017 + move(AT, -(StackAlignmentInBytes)); 1.1018 + move(S2, SP); // use S2 as a sender SP holder 1.1019 + andr(SP, SP, AT); // align stack as required by ABI 1.1020 + call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); 1.1021 + delayed()->nop(); 1.1022 + move(SP, S2); // use S2 as a sender SP holder 1.1023 + pop(S2); 1.1024 + NOT_LP64(pop(FP);) 1.1025 + popad(); 1.1026 + 1.1027 + pushad(); 1.1028 + NOT_LP64(push(FP);) 1.1029 +#if 1 1.1030 + move(FP, SP); 1.1031 + move(AT, -(StackAlignmentInBytes)); 1.1032 + andr(SP , SP , AT); 1.1033 + mov_d(F12, reg); 1.1034 + call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type); 1.1035 + delayed()->nop(); 1.1036 + move(SP, FP); 1.1037 +#else 1.1038 + mov_s(F12, reg); 1.1039 + //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type); 1.1040 + //delayed()->nop(); 1.1041 +#endif 1.1042 + NOT_LP64(pop(FP);) 1.1043 + popad(); 1.1044 + 1.1045 +#if 0 1.1046 + pushad(); 1.1047 + NOT_LP64(push(FP);) 1.1048 + char* b = new char[50]; 1.1049 + sprintf(b, " pc: %p\n", cur_pc); 1.1050 + li(A0, (long)b); 1.1051 + call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); 1.1052 + delayed()->nop(); 1.1053 + NOT_LP64(pop(FP);) 1.1054 + popad(); 1.1055 +#endif 1.1056 +} 1.1057 + 1.1058 +void MacroAssembler::increment(Register reg, int imm) { 1.1059 + if (!imm) return; 1.1060 + if (is_simm16(imm)) { 1.1061 +#ifdef _LP64 1.1062 + daddiu(reg, reg, imm); 1.1063 +#else 1.1064 + addiu(reg, reg, imm); 1.1065 +#endif 1.1066 + } else { 1.1067 + move(AT, imm); 1.1068 +#ifdef _LP64 1.1069 + daddu(reg, reg, AT); 1.1070 +#else 1.1071 + addu(reg, reg, AT); 1.1072 +#endif 1.1073 + } 1.1074 +} 1.1075 + 1.1076 +void MacroAssembler::decrement(Register reg, int imm) { 1.1077 + increment(reg, -imm); 1.1078 +} 1.1079 + 1.1080 + 1.1081 +void MacroAssembler::call_VM(Register oop_result, 1.1082 + address entry_point, 1.1083 + bool check_exceptions) { 1.1084 + call_VM_helper(oop_result, entry_point, 0, check_exceptions); 1.1085 +} 1.1086 + 1.1087 +void MacroAssembler::call_VM(Register oop_result, 1.1088 + address entry_point, 1.1089 + Register arg_1, 1.1090 + bool check_exceptions) { 1.1091 + if (arg_1!=A1) move(A1, arg_1); 1.1092 + call_VM_helper(oop_result, entry_point, 1, check_exceptions); 1.1093 +} 1.1094 + 1.1095 +void MacroAssembler::call_VM(Register oop_result, 1.1096 + address entry_point, 1.1097 + Register arg_1, 1.1098 + Register arg_2, 1.1099 + bool check_exceptions) { 1.1100 + if (arg_1!=A1) move(A1, arg_1); 1.1101 + if (arg_2!=A2) move(A2, arg_2); 1.1102 + assert(arg_2 != A1, "smashed argument"); 1.1103 + call_VM_helper(oop_result, entry_point, 2, check_exceptions); 1.1104 +} 1.1105 + 1.1106 +void MacroAssembler::call_VM(Register oop_result, 1.1107 + address entry_point, 1.1108 + Register arg_1, 1.1109 + Register arg_2, 1.1110 + Register arg_3, 1.1111 + bool check_exceptions) { 1.1112 + if (arg_1!=A1) move(A1, arg_1); 1.1113 + if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); 1.1114 + if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); 1.1115 + call_VM_helper(oop_result, entry_point, 3, check_exceptions); 1.1116 +} 1.1117 + 1.1118 +void MacroAssembler::call_VM(Register oop_result, 1.1119 + Register last_java_sp, 1.1120 + address entry_point, 1.1121 + int number_of_arguments, 1.1122 + bool check_exceptions) { 1.1123 + call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1.1124 +} 1.1125 + 1.1126 +void MacroAssembler::call_VM(Register oop_result, 1.1127 + Register last_java_sp, 1.1128 + address entry_point, 1.1129 + Register arg_1, 1.1130 + bool check_exceptions) { 1.1131 + if (arg_1 != A1) move(A1, arg_1); 1.1132 + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1.1133 +} 1.1134 + 1.1135 +void MacroAssembler::call_VM(Register oop_result, 1.1136 + Register last_java_sp, 1.1137 + address entry_point, 1.1138 + Register arg_1, 1.1139 + Register arg_2, 1.1140 + bool check_exceptions) { 1.1141 + if (arg_1 != A1) move(A1, arg_1); 1.1142 + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); 1.1143 + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1.1144 +} 1.1145 + 1.1146 +void MacroAssembler::call_VM(Register oop_result, 1.1147 + Register last_java_sp, 1.1148 + address entry_point, 1.1149 + Register arg_1, 1.1150 + Register arg_2, 1.1151 + Register arg_3, 1.1152 + bool check_exceptions) { 1.1153 + if (arg_1 != A1) move(A1, arg_1); 1.1154 + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); 1.1155 + if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); 1.1156 + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1.1157 +} 1.1158 + 1.1159 +void MacroAssembler::call_VM_base(Register oop_result, 1.1160 + Register java_thread, 1.1161 + Register last_java_sp, 1.1162 + address entry_point, 1.1163 + int number_of_arguments, 1.1164 + bool check_exceptions) { 1.1165 + 1.1166 + address before_call_pc; 1.1167 + // determine java_thread register 1.1168 + if (!java_thread->is_valid()) { 1.1169 +#ifndef OPT_THREAD 1.1170 + java_thread = T2; 1.1171 + get_thread(java_thread); 1.1172 +#else 1.1173 + java_thread = TREG; 1.1174 +#endif 1.1175 + } 1.1176 + // determine last_java_sp register 1.1177 + if (!last_java_sp->is_valid()) { 1.1178 + last_java_sp = SP; 1.1179 + } 1.1180 + // debugging support 1.1181 + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 1.1182 + assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); 1.1183 + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 1.1184 + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 1.1185 + 1.1186 + assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp"); 1.1187 + 1.1188 + // set last Java frame before call 1.1189 + before_call_pc = (address)pc(); 1.1190 + set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); 1.1191 + 1.1192 + // do the call 1.1193 + move(A0, java_thread); 1.1194 + call(entry_point, relocInfo::runtime_call_type); 1.1195 + delayed()->nop(); 1.1196 + 1.1197 + // restore the thread (cannot use the pushed argument since arguments 1.1198 + // may be overwritten by C code generated by an optimizing compiler); 1.1199 + // however can use the register value directly if it is callee saved. 1.1200 +#ifndef OPT_THREAD 1.1201 + if (java_thread >=S0 && java_thread <=S7) { 1.1202 +#ifdef ASSERT 1.1203 + { Label L; 1.1204 + get_thread(AT); 1.1205 + beq(java_thread, AT, L); 1.1206 + delayed()->nop(); 1.1207 + stop("MacroAssembler::call_VM_base: edi not callee saved?"); 1.1208 + bind(L); 1.1209 + } 1.1210 +#endif 1.1211 + } else { 1.1212 + get_thread(java_thread); 1.1213 + } 1.1214 +#endif 1.1215 + 1.1216 + // discard thread and arguments 1.1217 + ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); 1.1218 + // reset last Java frame 1.1219 + reset_last_Java_frame(java_thread, false, true); 1.1220 + 1.1221 + check_and_handle_popframe(java_thread); 1.1222 + check_and_handle_earlyret(java_thread); 1.1223 + if (check_exceptions) { 1.1224 + // check for pending exceptions (java_thread is set upon return) 1.1225 + Label L; 1.1226 +#ifdef _LP64 1.1227 + ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); 1.1228 +#else 1.1229 + lw(AT, java_thread, in_bytes(Thread::pending_exception_offset())); 1.1230 +#endif 1.1231 + beq(AT, R0, L); 1.1232 + delayed()->nop(); 1.1233 + li(AT, before_call_pc); 1.1234 + push(AT); 1.1235 + jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); 1.1236 + delayed()->nop(); 1.1237 + bind(L); 1.1238 + } 1.1239 + 1.1240 + // get oop result if there is one and reset the value in the thread 1.1241 + if (oop_result->is_valid()) { 1.1242 +#ifdef _LP64 1.1243 + ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); 1.1244 + sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); 1.1245 +#else 1.1246 + lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); 1.1247 + sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); 1.1248 +#endif 1.1249 + verify_oop(oop_result); 1.1250 + } 1.1251 +} 1.1252 + 1.1253 +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 1.1254 + 1.1255 + move(V0, SP); 1.1256 + //we also reserve space for java_thread here 1.1257 +#ifndef _LP64 1.1258 + daddi(SP, SP, (1 + number_of_arguments) * (- wordSize)); 1.1259 +#endif 1.1260 + move(AT, -(StackAlignmentInBytes)); 1.1261 + andr(SP, SP, AT); 1.1262 + call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); 1.1263 + 1.1264 +} 1.1265 + 1.1266 +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 1.1267 + call_VM_leaf_base(entry_point, number_of_arguments); 1.1268 +} 1.1269 + 1.1270 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 1.1271 + if (arg_0 != A0) move(A0, arg_0); 1.1272 + call_VM_leaf(entry_point, 1); 1.1273 +} 1.1274 + 1.1275 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1.1276 + if (arg_0 != A0) move(A0, arg_0); 1.1277 + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); 1.1278 + call_VM_leaf(entry_point, 2); 1.1279 +} 1.1280 + 1.1281 +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1.1282 + if (arg_0 != A0) move(A0, arg_0); 1.1283 + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); 1.1284 + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); 1.1285 + call_VM_leaf(entry_point, 3); 1.1286 +} 1.1287 +void MacroAssembler::super_call_VM_leaf(address entry_point) { 1.1288 + MacroAssembler::call_VM_leaf_base(entry_point, 0); 1.1289 +} 1.1290 + 1.1291 + 1.1292 +void MacroAssembler::super_call_VM_leaf(address entry_point, 1.1293 + Register arg_1) { 1.1294 + if (arg_1 != A0) move(A0, arg_1); 1.1295 + MacroAssembler::call_VM_leaf_base(entry_point, 1); 1.1296 +} 1.1297 + 1.1298 + 1.1299 +void MacroAssembler::super_call_VM_leaf(address entry_point, 1.1300 + Register arg_1, 1.1301 + Register arg_2) { 1.1302 + if (arg_1 != A0) move(A0, arg_1); 1.1303 + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); 1.1304 + MacroAssembler::call_VM_leaf_base(entry_point, 2); 1.1305 +} 1.1306 +void MacroAssembler::super_call_VM_leaf(address entry_point, 1.1307 + Register arg_1, 1.1308 + Register arg_2, 1.1309 + Register arg_3) { 1.1310 + if (arg_1 != A0) move(A0, arg_1); 1.1311 + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); 1.1312 + if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); 1.1313 + MacroAssembler::call_VM_leaf_base(entry_point, 3); 1.1314 +} 1.1315 + 1.1316 +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 1.1317 +} 1.1318 + 1.1319 +void MacroAssembler::check_and_handle_popframe(Register java_thread) { 1.1320 +} 1.1321 + 1.1322 +void MacroAssembler::null_check(Register reg, int offset) { 1.1323 + if (needs_explicit_null_check(offset)) { 1.1324 + // provoke OS NULL exception if reg = NULL by 1.1325 + // accessing M[reg] w/o changing any (non-CC) registers 1.1326 + // NOTE: cmpl is plenty here to provoke a segv 1.1327 + lw(AT, reg, 0); 1.1328 +/* Jin 1.1329 + nop(); 1.1330 + nop(); 1.1331 + nop(); 1.1332 +*/ 1.1333 + // Note: should probably use testl(rax, Address(reg, 0)); 1.1334 + // may be shorter code (however, this version of 1.1335 + // testl needs to be implemented first) 1.1336 + } else { 1.1337 + // nothing to do, (later) access of M[reg + offset] 1.1338 + // will provoke OS NULL exception if reg = NULL 1.1339 + } 1.1340 +} 1.1341 + 1.1342 +void MacroAssembler::enter() { 1.1343 + push2(RA, FP); 1.1344 + move(FP, SP); 1.1345 +} 1.1346 + 1.1347 +void MacroAssembler::leave() { 1.1348 +#ifndef _LP64 1.1349 + //move(SP, FP); 1.1350 + //pop2(FP, RA); 1.1351 + addi(SP, FP, 2 * wordSize); 1.1352 + lw(RA, SP, - 1 * wordSize); 1.1353 + lw(FP, SP, - 2 * wordSize); 1.1354 +#else 1.1355 + daddi(SP, FP, 2 * wordSize); 1.1356 + ld(RA, SP, - 1 * wordSize); 1.1357 + ld(FP, SP, - 2 * wordSize); 1.1358 +#endif 1.1359 +} 1.1360 +/* 1.1361 +void MacroAssembler::os_breakpoint() { 1.1362 + // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 1.1363 + // (e.g., MSVC can't call ps() otherwise) 1.1364 + call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 1.1365 +} 1.1366 +*/ 1.1367 +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 1.1368 + // determine java_thread register 1.1369 + if (!java_thread->is_valid()) { 1.1370 +#ifndef OPT_THREAD 1.1371 + java_thread = T1; 1.1372 + get_thread(java_thread); 1.1373 +#else 1.1374 + java_thread = TREG; 1.1375 +#endif 1.1376 + } 1.1377 + // we must set sp to zero to clear frame 1.1378 + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); 1.1379 + // must clear fp, so that compiled frames are not confused; it is possible 1.1380 + // that we need it only for debugging 1.1381 + if(clear_fp) 1.1382 + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); 1.1383 + 1.1384 + if (clear_pc) 1.1385 + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); 1.1386 +} 1.1387 + 1.1388 +void MacroAssembler::reset_last_Java_frame(bool clear_fp, 1.1389 + bool clear_pc) { 1.1390 + Register thread = TREG; 1.1391 +#ifndef OPT_THREAD 1.1392 + get_thread(thread); 1.1393 +#endif 1.1394 + // we must set sp to zero to clear frame 1.1395 + sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); 1.1396 + // must clear fp, so that compiled frames are not confused; it is 1.1397 + // possible that we need it only for debugging 1.1398 + if (clear_fp) { 1.1399 + sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); 1.1400 + } 1.1401 + 1.1402 + if (clear_pc) { 1.1403 + sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); 1.1404 + } 1.1405 +} 1.1406 + 1.1407 +// Write serialization page so VM thread can do a pseudo remote membar. 1.1408 +// We use the current thread pointer to calculate a thread specific 1.1409 +// offset to write to within the page. This minimizes bus traffic 1.1410 +// due to cache line collision. 1.1411 +void MacroAssembler::serialize_memory(Register thread, Register tmp) { 1.1412 + move(tmp, thread); 1.1413 + srl(tmp, tmp,os::get_serialize_page_shift_count()); 1.1414 + move(AT, (os::vm_page_size() - sizeof(int))); 1.1415 + andr(tmp, tmp,AT); 1.1416 + sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page())); 1.1417 +} 1.1418 + 1.1419 +// Calls to C land 1.1420 +// 1.1421 +// When entering C land, the rbp, & rsp of the last Java frame have to be recorded 1.1422 +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp 1.1423 +// has to be reset to 0. This is required to allow proper stack traversal. 1.1424 +void MacroAssembler::set_last_Java_frame(Register java_thread, 1.1425 + Register last_java_sp, 1.1426 + Register last_java_fp, 1.1427 + address last_java_pc) { 1.1428 + // determine java_thread register 1.1429 + if (!java_thread->is_valid()) { 1.1430 +#ifndef OPT_THREAD 1.1431 + java_thread = T2; 1.1432 + get_thread(java_thread); 1.1433 +#else 1.1434 + java_thread = TREG; 1.1435 +#endif 1.1436 + } 1.1437 + // determine last_java_sp register 1.1438 + if (!last_java_sp->is_valid()) { 1.1439 + last_java_sp = SP; 1.1440 + } 1.1441 + 1.1442 + // last_java_fp is optional 1.1443 + 1.1444 + if (last_java_fp->is_valid()) { 1.1445 + st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); 1.1446 + } 1.1447 + 1.1448 + // last_java_pc is optional 1.1449 + 1.1450 + if (last_java_pc != NULL) { 1.1451 + relocate(relocInfo::internal_pc_type); 1.1452 + patchable_set48(AT, (long)last_java_pc); 1.1453 + st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); 1.1454 + } 1.1455 + st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); 1.1456 +} 1.1457 + 1.1458 +void MacroAssembler::set_last_Java_frame(Register last_java_sp, 1.1459 + Register last_java_fp, 1.1460 + address last_java_pc) { 1.1461 + // determine last_java_sp register 1.1462 + if (!last_java_sp->is_valid()) { 1.1463 + last_java_sp = SP; 1.1464 + } 1.1465 + 1.1466 + Register thread = TREG; 1.1467 +#ifndef OPT_THREAD 1.1468 + get_thread(thread); 1.1469 +#endif 1.1470 + // last_java_fp is optional 1.1471 + if (last_java_fp->is_valid()) { 1.1472 + sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); 1.1473 + } 1.1474 + 1.1475 + // last_java_pc is optional 1.1476 + if (last_java_pc != NULL) { 1.1477 + Address java_pc(thread, 1.1478 + JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 1.1479 + li(AT, (intptr_t)(last_java_pc)); 1.1480 + sd(AT, java_pc); 1.1481 + } 1.1482 + 1.1483 + sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); 1.1484 +} 1.1485 + 1.1486 +////////////////////////////////////////////////////////////////////////////////// 1.1487 +#if INCLUDE_ALL_GCS 1.1488 + 1.1489 +void MacroAssembler::g1_write_barrier_pre(Register obj, 1.1490 +#ifndef _LP64 1.1491 + Register thread, 1.1492 +#endif 1.1493 + Register tmp, 1.1494 + Register tmp2, 1.1495 + bool tosca_live) { 1.1496 + Unimplemented(); 1.1497 +} 1.1498 + 1.1499 +void MacroAssembler::g1_write_barrier_post(Register store_addr, 1.1500 + Register new_val, 1.1501 +#ifndef _LP64 1.1502 + Register thread, 1.1503 +#endif 1.1504 + Register tmp, 1.1505 + Register tmp2) { 1.1506 + 1.1507 + Unimplemented(); 1.1508 +} 1.1509 + 1.1510 +#endif // INCLUDE_ALL_GCS 1.1511 +////////////////////////////////////////////////////////////////////////////////// 1.1512 + 1.1513 + 1.1514 +void MacroAssembler::store_check(Register obj) { 1.1515 + // Does a store check for the oop in register obj. The content of 1.1516 + // register obj is destroyed afterwards. 1.1517 + store_check_part_1(obj); 1.1518 + store_check_part_2(obj); 1.1519 +} 1.1520 + 1.1521 +void MacroAssembler::store_check(Register obj, Address dst) { 1.1522 + store_check(obj); 1.1523 +} 1.1524 + 1.1525 + 1.1526 +// split the store check operation so that other instructions can be scheduled inbetween 1.1527 +void MacroAssembler::store_check_part_1(Register obj) { 1.1528 + BarrierSet* bs = Universe::heap()->barrier_set(); 1.1529 + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 1.1530 +#ifdef _LP64 1.1531 + dsrl(obj, obj, CardTableModRefBS::card_shift); 1.1532 +#else 1.1533 + shr(obj, CardTableModRefBS::card_shift); 1.1534 +#endif 1.1535 +} 1.1536 + 1.1537 +void MacroAssembler::store_check_part_2(Register obj) { 1.1538 + BarrierSet* bs = Universe::heap()->barrier_set(); 1.1539 + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 1.1540 + CardTableModRefBS* ct = (CardTableModRefBS*)bs; 1.1541 + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 1.1542 + 1.1543 + li(AT, (long)ct->byte_map_base); 1.1544 +#ifdef _LP64 1.1545 + dadd(AT, AT, obj); 1.1546 +#else 1.1547 + add(AT, AT, obj); 1.1548 +#endif 1.1549 + sb(R0, AT, 0); 1.1550 + sync(); 1.1551 +} 1.1552 + 1.1553 +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 1.1554 +void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, 1.1555 + Register t1, Register t2, Label& slow_case) { 1.1556 + assert_different_registers(obj, var_size_in_bytes, t1, t2, AT); 1.1557 + 1.1558 + Register end = t2; 1.1559 +#ifndef OPT_THREAD 1.1560 + Register thread = t1; 1.1561 + get_thread(thread); 1.1562 +#else 1.1563 + Register thread = TREG; 1.1564 +#endif 1.1565 + verify_tlab(t1, t2);//blows t1&t2 1.1566 + 1.1567 + ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset())); 1.1568 + 1.1569 + if (var_size_in_bytes == NOREG) { 1.1570 + // i dont think we need move con_size_in_bytes to a register first. 1.1571 + // by yjl 8/17/2005 1.1572 + assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); 1.1573 + addi(end, obj, con_size_in_bytes); 1.1574 + } else { 1.1575 + add(end, obj, var_size_in_bytes); 1.1576 + } 1.1577 + 1.1578 + ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset())); 1.1579 + sltu(AT, AT, end); 1.1580 + bne_far(AT, R0, slow_case); 1.1581 + delayed()->nop(); 1.1582 + 1.1583 + 1.1584 + // update the tlab top pointer 1.1585 + st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset())); 1.1586 + 1.1587 + // recover var_size_in_bytes if necessary 1.1588 + /*if (var_size_in_bytes == end) { 1.1589 + sub(var_size_in_bytes, end, obj); 1.1590 + }*/ 1.1591 + 1.1592 + verify_tlab(t1, t2); 1.1593 +} 1.1594 + 1.1595 +// Defines obj, preserves var_size_in_bytes 1.1596 +void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, 1.1597 + Register t1, Register t2, Label& slow_case) { 1.1598 + assert_different_registers(obj, var_size_in_bytes, t1, AT); 1.1599 + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq 1.1600 + // No allocation in the shared eden. 1.1601 + b_far(slow_case); 1.1602 + delayed()->nop(); 1.1603 + } else { 1.1604 + 1.1605 +#ifndef _LP64 1.1606 + Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr())); 1.1607 + lui(t1, split_high((intptr_t)Universe::heap()->top_addr())); 1.1608 +#else 1.1609 + Address heap_top(t1); 1.1610 + li(t1, (long)Universe::heap()->top_addr()); 1.1611 +#endif 1.1612 + ld_ptr(obj, heap_top); 1.1613 + 1.1614 + Register end = t2; 1.1615 + Label retry; 1.1616 + 1.1617 + bind(retry); 1.1618 + if (var_size_in_bytes == NOREG) { 1.1619 + // i dont think we need move con_size_in_bytes to a register first. 1.1620 + assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); 1.1621 + addi(end, obj, con_size_in_bytes); 1.1622 + } else { 1.1623 + add(end, obj, var_size_in_bytes); 1.1624 + } 1.1625 + // if end < obj then we wrapped around => object too long => slow case 1.1626 + sltu(AT, end, obj); 1.1627 + bne_far(AT, R0, slow_case); 1.1628 + delayed()->nop(); 1.1629 + 1.1630 + li(AT, (long)Universe::heap()->end_addr()); 1.1631 + sltu(AT, AT, end); 1.1632 + bne_far(AT, R0, slow_case); 1.1633 + delayed()->nop(); 1.1634 + // Compare obj with the top addr, and if still equal, store the new top addr in 1.1635 + // end at the address of the top addr pointer. Sets ZF if was equal, and clears 1.1636 + // it otherwise. Use lock prefix for atomicity on MPs. 1.1637 + //if (os::is_MP()) { 1.1638 + // sync(); 1.1639 + //} 1.1640 + 1.1641 + // if someone beat us on the allocation, try again, otherwise continue 1.1642 + cmpxchg(end, heap_top, obj); 1.1643 + beq_far(AT, R0, retry); //by yyq 1.1644 + delayed()->nop(); 1.1645 + 1.1646 + } 1.1647 +} 1.1648 + 1.1649 +// C2 doesn't invoke this one. 1.1650 +void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { 1.1651 + Register top = T0; 1.1652 + Register t1 = T1; 1.1653 +/* Jin: tlab_refill() is called in 1.1654 + 1.1655 + [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id); 1.1656 + 1.1657 + In generate_code_for(), T2 has been assigned as a register(length), which is used 1.1658 + after calling tlab_refill(); 1.1659 + Therefore, tlab_refill() should not use T2. 1.1660 + 1.1661 + Source: 1.1662 + 1.1663 +Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException 1.1664 + at java.lang.System.arraycopy(Native Method) 1.1665 + at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array 1.1666 + at sun.misc.Resource.getBytes(Resource.java:117) 1.1667 + at java.net.URLClassLoader.defineClass(URLClassLoader.java:273) 1.1668 + at java.net.URLClassLoader.findClass(URLClassLoader.java:205) 1.1669 + at java.lang.ClassLoader.loadClass(ClassLoader.java:321) 1.1670 + */ 1.1671 + Register t2 = T9; 1.1672 + Register t3 = T3; 1.1673 + Register thread_reg = T8; 1.1674 + Label do_refill, discard_tlab; 1.1675 + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq 1.1676 + // No allocation in the shared eden. 1.1677 + b(slow_case); 1.1678 + delayed()->nop(); 1.1679 + } 1.1680 + 1.1681 + get_thread(thread_reg); 1.1682 + 1.1683 + ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); 1.1684 + ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset())); 1.1685 + 1.1686 + // calculate amount of free space 1.1687 + sub(t1, t1, top); 1.1688 + shr(t1, LogHeapWordSize); 1.1689 + 1.1690 + // Retain tlab and allocate object in shared space if 1.1691 + // the amount free in the tlab is too large to discard. 1.1692 + ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); 1.1693 + slt(AT, t2, t1); 1.1694 + beq(AT, R0, discard_tlab); 1.1695 + delayed()->nop(); 1.1696 + 1.1697 + // Retain 1.1698 + 1.1699 +#ifndef _LP64 1.1700 + move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); 1.1701 +#else 1.1702 + li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); 1.1703 +#endif 1.1704 + add(t2, t2, AT); 1.1705 + st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); 1.1706 + 1.1707 + if (TLABStats) { 1.1708 + // increment number of slow_allocations 1.1709 + lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); 1.1710 + addiu(AT, AT, 1); 1.1711 + sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); 1.1712 + } 1.1713 + b(try_eden); 1.1714 + delayed()->nop(); 1.1715 + 1.1716 + bind(discard_tlab); 1.1717 + if (TLABStats) { 1.1718 + // increment number of refills 1.1719 + lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); 1.1720 + addi(AT, AT, 1); 1.1721 + sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); 1.1722 + // accumulate wastage -- t1 is amount free in tlab 1.1723 + lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); 1.1724 + add(AT, AT, t1); 1.1725 + sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); 1.1726 + } 1.1727 + 1.1728 + // if tlab is currently allocated (top or end != null) then 1.1729 + // fill [top, end + alignment_reserve) with array object 1.1730 + beq(top, R0, do_refill); 1.1731 + delayed()->nop(); 1.1732 + 1.1733 + // set up the mark word 1.1734 + li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2)); 1.1735 + st_ptr(AT, top, oopDesc::mark_offset_in_bytes()); 1.1736 + 1.1737 + // set the length to the remaining space 1.1738 + addi(t1, t1, - typeArrayOopDesc::header_size(T_INT)); 1.1739 + addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve()); 1.1740 + shl(t1, log2_intptr(HeapWordSize/sizeof(jint))); 1.1741 + sw(t1, top, arrayOopDesc::length_offset_in_bytes()); 1.1742 + 1.1743 + // set klass to intArrayKlass 1.1744 +#ifndef _LP64 1.1745 + lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr())); 1.1746 + lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr())); 1.1747 +#else 1.1748 + li(AT, (intptr_t)Universe::intArrayKlassObj_addr()); 1.1749 + ld_ptr(t1, AT, 0); 1.1750 +#endif 1.1751 + //st_ptr(t1, top, oopDesc::klass_offset_in_bytes()); 1.1752 + store_klass(top, t1); 1.1753 + 1.1754 + // refill the tlab with an eden allocation 1.1755 + bind(do_refill); 1.1756 + ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset())); 1.1757 + shl(t1, LogHeapWordSize); 1.1758 + // add object_size ?? 1.1759 + eden_allocate(top, t1, 0, t2, t3, slow_case); 1.1760 + 1.1761 + // Check that t1 was preserved in eden_allocate. 1.1762 +#ifdef ASSERT 1.1763 + if (UseTLAB) { 1.1764 + Label ok; 1.1765 + assert_different_registers(thread_reg, t1); 1.1766 + ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset())); 1.1767 + shl(AT, LogHeapWordSize); 1.1768 + beq(AT, t1, ok); 1.1769 + delayed()->nop(); 1.1770 + stop("assert(t1 != tlab size)"); 1.1771 + should_not_reach_here(); 1.1772 + 1.1773 + bind(ok); 1.1774 + } 1.1775 +#endif 1.1776 + st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset())); 1.1777 + st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); 1.1778 + add(top, top, t1); 1.1779 + addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 1.1780 + st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset())); 1.1781 + verify_tlab(t1, t2); 1.1782 + b(retry); 1.1783 + delayed()->nop(); 1.1784 +} 1.1785 + 1.1786 +static const double pi_4 = 0.7853981633974483; 1.1787 + 1.1788 +// the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME 1.1789 +// must get argument(a double) in F12/F13 1.1790 +//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { 1.1791 +//We need to preseve the register which maybe modified during the Call @Jerome 1.1792 +void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 1.1793 +//save all modified register here 1.1794 +// if (preserve_cpu_regs) { 1.1795 +// } 1.1796 +//FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9 1.1797 + pushad(); 1.1798 +//we should preserve the stack space before we call 1.1799 + addi(SP, SP, -wordSize * 2); 1.1800 + switch (trig){ 1.1801 + case 's' : 1.1802 + call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); 1.1803 + delayed()->nop(); 1.1804 + break; 1.1805 + case 'c': 1.1806 + call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); 1.1807 + delayed()->nop(); 1.1808 + break; 1.1809 + case 't': 1.1810 + call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); 1.1811 + delayed()->nop(); 1.1812 + break; 1.1813 + default:assert (false, "bad intrinsic"); 1.1814 + break; 1.1815 + 1.1816 + } 1.1817 + 1.1818 + addi(SP, SP, wordSize * 2); 1.1819 + popad(); 1.1820 +// if (preserve_cpu_regs) { 1.1821 +// } 1.1822 +} 1.1823 + 1.1824 +#ifdef _LP64 1.1825 +void MacroAssembler::li(Register rd, long imm) { 1.1826 + if (imm <= max_jint && imm >= min_jint) { 1.1827 + li32(rd, (int)imm); 1.1828 + } else if (julong(imm) <= 0xFFFFFFFF) { 1.1829 + assert_not_delayed(); 1.1830 + // lui sign-extends, so we can't use that. 1.1831 + ori(rd, R0, julong(imm) >> 16); 1.1832 + dsll(rd, rd, 16); 1.1833 + ori(rd, rd, split_low(imm)); 1.1834 + //aoqi_test 1.1835 + //} else if ((imm > 0) && ((imm >> 48) == 0)) { 1.1836 + } else if ((imm > 0) && is_simm16(imm >> 32)) { 1.1837 + /* A 48-bit address */ 1.1838 + li48(rd, imm); 1.1839 + } else { 1.1840 + li64(rd, imm); 1.1841 + } 1.1842 +} 1.1843 +#else 1.1844 +void MacroAssembler::li(Register rd, long imm) { 1.1845 + li32(rd, (int)imm); 1.1846 +} 1.1847 +#endif 1.1848 + 1.1849 +void MacroAssembler::li32(Register reg, int imm) { 1.1850 + if (is_simm16(imm)) { 1.1851 + /* Jin: for imm < 0, we should use addi instead of addiu. 1.1852 + * 1.1853 + * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint) 1.1854 + * 1.1855 + * 78 move [int:-1|I] [a0|I] 1.1856 + * : daddi a0, zero, 0xffffffff (correct) 1.1857 + * : daddiu a0, zero, 0xffffffff (incorrect) 1.1858 + */ 1.1859 + if (imm >= 0) 1.1860 + addiu(reg, R0, imm); 1.1861 + else 1.1862 + addi(reg, R0, imm); 1.1863 + } else { 1.1864 + lui(reg, split_low(imm >> 16)); 1.1865 + if (split_low(imm)) 1.1866 + ori(reg, reg, split_low(imm)); 1.1867 + } 1.1868 +} 1.1869 + 1.1870 +#ifdef _LP64 1.1871 +void MacroAssembler::set64(Register d, jlong value) { 1.1872 + assert_not_delayed(); 1.1873 + 1.1874 + int hi = (int)(value >> 32); 1.1875 + int lo = (int)(value & ~0); 1.1876 + 1.1877 + if (value == lo) { // 32-bit integer 1.1878 + if (is_simm16(value)) { 1.1879 + daddiu(d, R0, value); 1.1880 + } else { 1.1881 + lui(d, split_low(value >> 16)); 1.1882 + if (split_low(value)) { 1.1883 + ori(d, d, split_low(value)); 1.1884 + } 1.1885 + } 1.1886 + } else if (hi == 0) { // hardware zero-extends to upper 32 1.1887 + ori(d, R0, julong(value) >> 16); 1.1888 + dsll(d, d, 16); 1.1889 + if (split_low(value)) { 1.1890 + ori(d, d, split_low(value)); 1.1891 + } 1.1892 + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 1.1893 + // 4 insts 1.1894 + li48(d, value); 1.1895 + } else { // li64 1.1896 + // 6 insts 1.1897 + li64(d, value); 1.1898 + } 1.1899 +} 1.1900 + 1.1901 + 1.1902 +int MacroAssembler::insts_for_set64(jlong value) { 1.1903 + int hi = (int)(value >> 32); 1.1904 + int lo = (int)(value & ~0); 1.1905 + 1.1906 + int count = 0; 1.1907 + 1.1908 + if (value == lo) { // 32-bit integer 1.1909 + if (is_simm16(value)) { 1.1910 + //daddiu(d, R0, value); 1.1911 + count++; 1.1912 + } else { 1.1913 + //lui(d, split_low(value >> 16)); 1.1914 + count++; 1.1915 + if (split_low(value)) { 1.1916 + //ori(d, d, split_low(value)); 1.1917 + count++; 1.1918 + } 1.1919 + } 1.1920 + } else if (hi == 0) { // hardware zero-extends to upper 32 1.1921 + //ori(d, R0, julong(value) >> 16); 1.1922 + //dsll(d, d, 16); 1.1923 + count += 2; 1.1924 + if (split_low(value)) { 1.1925 + //ori(d, d, split_low(value)); 1.1926 + count++; 1.1927 + } 1.1928 + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 1.1929 + // 4 insts 1.1930 + //li48(d, value); 1.1931 + count += 4; 1.1932 + } else { // li64 1.1933 + // 6 insts 1.1934 + //li64(d, value); 1.1935 + count += 6; 1.1936 + } 1.1937 + 1.1938 + return count; 1.1939 +} 1.1940 + 1.1941 +void MacroAssembler::patchable_set48(Register d, jlong value) { 1.1942 + assert_not_delayed(); 1.1943 + 1.1944 + int hi = (int)(value >> 32); 1.1945 + int lo = (int)(value & ~0); 1.1946 + 1.1947 + int count = 0; 1.1948 + 1.1949 + if (value == lo) { // 32-bit integer 1.1950 + if (is_simm16(value)) { 1.1951 + daddiu(d, R0, value); 1.1952 + count += 1; 1.1953 + } else { 1.1954 + lui(d, split_low(value >> 16)); 1.1955 + count += 1; 1.1956 + if (split_low(value)) { 1.1957 + ori(d, d, split_low(value)); 1.1958 + count += 1; 1.1959 + } 1.1960 + } 1.1961 + } else if (hi == 0) { // hardware zero-extends to upper 32 1.1962 + ori(d, R0, julong(value) >> 16); 1.1963 + dsll(d, d, 16); 1.1964 + count += 2; 1.1965 + if (split_low(value)) { 1.1966 + ori(d, d, split_low(value)); 1.1967 + count += 1; 1.1968 + } 1.1969 + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 1.1970 + // 4 insts 1.1971 + li48(d, value); 1.1972 + count += 4; 1.1973 + } else { // li64 1.1974 + tty->print_cr("value = 0x%x", value); 1.1975 + guarantee(false, "Not supported yet !"); 1.1976 + } 1.1977 + 1.1978 + for (count; count < 4; count++) { 1.1979 + nop(); 1.1980 + } 1.1981 +} 1.1982 + 1.1983 +void MacroAssembler::patchable_set32(Register d, jlong value) { 1.1984 + assert_not_delayed(); 1.1985 + 1.1986 + int hi = (int)(value >> 32); 1.1987 + int lo = (int)(value & ~0); 1.1988 + 1.1989 + int count = 0; 1.1990 + 1.1991 + if (value == lo) { // 32-bit integer 1.1992 + if (is_simm16(value)) { 1.1993 + daddiu(d, R0, value); 1.1994 + count += 1; 1.1995 + } else { 1.1996 + lui(d, split_low(value >> 16)); 1.1997 + count += 1; 1.1998 + if (split_low(value)) { 1.1999 + ori(d, d, split_low(value)); 1.2000 + count += 1; 1.2001 + } 1.2002 + } 1.2003 + } else if (hi == 0) { // hardware zero-extends to upper 32 1.2004 + ori(d, R0, julong(value) >> 16); 1.2005 + dsll(d, d, 16); 1.2006 + count += 2; 1.2007 + if (split_low(value)) { 1.2008 + ori(d, d, split_low(value)); 1.2009 + count += 1; 1.2010 + } 1.2011 + } else { 1.2012 + tty->print_cr("value = 0x%x", value); 1.2013 + guarantee(false, "Not supported yet !"); 1.2014 + } 1.2015 + 1.2016 + for (count; count < 3; count++) { 1.2017 + nop(); 1.2018 + } 1.2019 +} 1.2020 + 1.2021 +void MacroAssembler::patchable_call32(Register d, jlong value) { 1.2022 + assert_not_delayed(); 1.2023 + 1.2024 + int hi = (int)(value >> 32); 1.2025 + int lo = (int)(value & ~0); 1.2026 + 1.2027 + int count = 0; 1.2028 + 1.2029 + if (value == lo) { // 32-bit integer 1.2030 + if (is_simm16(value)) { 1.2031 + daddiu(d, R0, value); 1.2032 + count += 1; 1.2033 + } else { 1.2034 + lui(d, split_low(value >> 16)); 1.2035 + count += 1; 1.2036 + if (split_low(value)) { 1.2037 + ori(d, d, split_low(value)); 1.2038 + count += 1; 1.2039 + } 1.2040 + } 1.2041 + } else { 1.2042 + tty->print_cr("value = 0x%x", value); 1.2043 + guarantee(false, "Not supported yet !"); 1.2044 + } 1.2045 + 1.2046 + for (count; count < 2; count++) { 1.2047 + nop(); 1.2048 + } 1.2049 +} 1.2050 + 1.2051 +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 1.2052 + assert(UseCompressedClassPointers, "should only be used for compressed header"); 1.2053 + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1.2054 + 1.2055 + int klass_index = oop_recorder()->find_index(k); 1.2056 + RelocationHolder rspec = metadata_Relocation::spec(klass_index); 1.2057 + long narrowKlass = (long)Klass::encode_klass(k); 1.2058 + 1.2059 + relocate(rspec, Assembler::narrow_oop_operand); 1.2060 + patchable_set48(dst, narrowKlass); 1.2061 +} 1.2062 + 1.2063 + 1.2064 +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 1.2065 + assert(UseCompressedOops, "should only be used for compressed header"); 1.2066 + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 1.2067 + 1.2068 + int oop_index = oop_recorder()->find_index(obj); 1.2069 + RelocationHolder rspec = oop_Relocation::spec(oop_index); 1.2070 + 1.2071 + relocate(rspec, Assembler::narrow_oop_operand); 1.2072 + patchable_set48(dst, oop_index); 1.2073 +} 1.2074 + 1.2075 +void MacroAssembler::li64(Register rd, long imm) { 1.2076 + assert_not_delayed(); 1.2077 + lui(rd, imm >> 48); 1.2078 + ori(rd, rd, split_low(imm >> 32)); 1.2079 + dsll(rd, rd, 16); 1.2080 + ori(rd, rd, split_low(imm >> 16)); 1.2081 + dsll(rd, rd, 16); 1.2082 + ori(rd, rd, split_low(imm)); 1.2083 +} 1.2084 + 1.2085 +void MacroAssembler::li48(Register rd, long imm) { 1.2086 + assert_not_delayed(); 1.2087 + assert(is_simm16(imm >> 32), "Not a 48-bit address"); 1.2088 + lui(rd, imm >> 32); 1.2089 + ori(rd, rd, split_low(imm >> 16)); 1.2090 + dsll(rd, rd, 16); 1.2091 + ori(rd, rd, split_low(imm)); 1.2092 +} 1.2093 +#endif 1.2094 +// NOTE: i dont push eax as i486. 1.2095 +// the x86 save eax for it use eax as the jump register 1.2096 +void MacroAssembler::verify_oop(Register reg, const char* s) { 1.2097 + /* 1.2098 + if (!VerifyOops) return; 1.2099 + 1.2100 + // Pass register number to verify_oop_subroutine 1.2101 + char* b = new char[strlen(s) + 50]; 1.2102 + sprintf(b, "verify_oop: %s: %s", reg->name(), s); 1.2103 + push(rax); // save rax, 1.2104 + push(reg); // pass register argument 1.2105 + ExternalAddress buffer((address) b); 1.2106 + // avoid using pushptr, as it modifies scratch registers 1.2107 + // and our contract is not to modify anything 1.2108 + movptr(rax, buffer.addr()); 1.2109 + push(rax); 1.2110 + // call indirectly to solve generation ordering problem 1.2111 + movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 1.2112 + call(rax); 1.2113 + */ 1.2114 + if (!VerifyOops) return; 1.2115 + const char * b = NULL; 1.2116 + stringStream ss; 1.2117 + ss.print("verify_oop: %s: %s", reg->name(), s); 1.2118 + b = code_string(ss.as_string()); 1.2119 +#ifdef _LP64 1.2120 + pushad(); 1.2121 + move(A1, reg); 1.2122 + li(A0, (long)b); 1.2123 + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); 1.2124 + ld(T9, AT, 0); 1.2125 + jalr(T9); 1.2126 + delayed()->nop(); 1.2127 + popad(); 1.2128 +#else 1.2129 + // Pass register number to verify_oop_subroutine 1.2130 + sw(T0, SP, - wordSize); 1.2131 + sw(T1, SP, - 2*wordSize); 1.2132 + sw(RA, SP, - 3*wordSize); 1.2133 + sw(A0, SP ,- 4*wordSize); 1.2134 + sw(A1, SP ,- 5*wordSize); 1.2135 + sw(AT, SP ,- 6*wordSize); 1.2136 + sw(T9, SP ,- 7*wordSize); 1.2137 + addiu(SP, SP, - 7 * wordSize); 1.2138 + move(A1, reg); 1.2139 + li(A0, (long)b); 1.2140 + // call indirectly to solve generation ordering problem 1.2141 + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); 1.2142 + lw(T9, AT, 0); 1.2143 + jalr(T9); 1.2144 + delayed()->nop(); 1.2145 + lw(T0, SP, 6* wordSize); 1.2146 + lw(T1, SP, 5* wordSize); 1.2147 + lw(RA, SP, 4* wordSize); 1.2148 + lw(A0, SP, 3* wordSize); 1.2149 + lw(A1, SP, 2* wordSize); 1.2150 + lw(AT, SP, 1* wordSize); 1.2151 + lw(T9, SP, 0* wordSize); 1.2152 + addiu(SP, SP, 7 * wordSize); 1.2153 +#endif 1.2154 +} 1.2155 + 1.2156 + 1.2157 +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 1.2158 + if (!VerifyOops) { 1.2159 + nop(); 1.2160 + return; 1.2161 + } 1.2162 + // Pass register number to verify_oop_subroutine 1.2163 + const char * b = NULL; 1.2164 + stringStream ss; 1.2165 + ss.print("verify_oop_addr: %s", s); 1.2166 + b = code_string(ss.as_string()); 1.2167 + 1.2168 + st_ptr(T0, SP, - wordSize); 1.2169 + st_ptr(T1, SP, - 2*wordSize); 1.2170 + st_ptr(RA, SP, - 3*wordSize); 1.2171 + st_ptr(A0, SP, - 4*wordSize); 1.2172 + st_ptr(A1, SP, - 5*wordSize); 1.2173 + st_ptr(AT, SP, - 6*wordSize); 1.2174 + st_ptr(T9, SP, - 7*wordSize); 1.2175 + ld_ptr(A1, addr); // addr may use SP, so load from it before change SP 1.2176 + addiu(SP, SP, - 7 * wordSize); 1.2177 + 1.2178 + li(A0, (long)b); 1.2179 + // call indirectly to solve generation ordering problem 1.2180 + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); 1.2181 + ld_ptr(T9, AT, 0); 1.2182 + jalr(T9); 1.2183 + delayed()->nop(); 1.2184 + ld_ptr(T0, SP, 6* wordSize); 1.2185 + ld_ptr(T1, SP, 5* wordSize); 1.2186 + ld_ptr(RA, SP, 4* wordSize); 1.2187 + ld_ptr(A0, SP, 3* wordSize); 1.2188 + ld_ptr(A1, SP, 2* wordSize); 1.2189 + ld_ptr(AT, SP, 1* wordSize); 1.2190 + ld_ptr(T9, SP, 0* wordSize); 1.2191 + addiu(SP, SP, 7 * wordSize); 1.2192 +} 1.2193 + 1.2194 +// used registers : T0, T1 1.2195 +void MacroAssembler::verify_oop_subroutine() { 1.2196 + // RA: ra 1.2197 + // A0: char* error message 1.2198 + // A1: oop object to verify 1.2199 + 1.2200 + Label exit, error; 1.2201 + // increment counter 1.2202 + li(T0, (long)StubRoutines::verify_oop_count_addr()); 1.2203 + lw(AT, T0, 0); 1.2204 +#ifdef _LP64 1.2205 + daddi(AT, AT, 1); 1.2206 +#else 1.2207 + addi(AT, AT, 1); 1.2208 +#endif 1.2209 + sw(AT, T0, 0); 1.2210 + 1.2211 + // make sure object is 'reasonable' 1.2212 + beq(A1, R0, exit); // if obj is NULL it is ok 1.2213 + delayed()->nop(); 1.2214 + 1.2215 + // Check if the oop is in the right area of memory 1.2216 + //const int oop_mask = Universe::verify_oop_mask(); 1.2217 + //const int oop_bits = Universe::verify_oop_bits(); 1.2218 + const uintptr_t oop_mask = Universe::verify_oop_mask(); 1.2219 + const uintptr_t oop_bits = Universe::verify_oop_bits(); 1.2220 + li(AT, oop_mask); 1.2221 + andr(T0, A1, AT); 1.2222 + li(AT, oop_bits); 1.2223 + bne(T0, AT, error); 1.2224 + delayed()->nop(); 1.2225 + 1.2226 + // make sure klass is 'reasonable' 1.2227 + //add for compressedoops 1.2228 + reinit_heapbase(); 1.2229 + //add for compressedoops 1.2230 + load_klass(T0, A1); 1.2231 + beq(T0, R0, error); // if klass is NULL it is broken 1.2232 + delayed()->nop(); 1.2233 + #if 0 1.2234 + //FIXME:wuhui. 1.2235 + // Check if the klass is in the right area of memory 1.2236 + //const int klass_mask = Universe::verify_klass_mask(); 1.2237 + //const int klass_bits = Universe::verify_klass_bits(); 1.2238 + const uintptr_t klass_mask = Universe::verify_klass_mask(); 1.2239 + const uintptr_t klass_bits = Universe::verify_klass_bits(); 1.2240 + 1.2241 + li(AT, klass_mask); 1.2242 + andr(T1, T0, AT); 1.2243 + li(AT, klass_bits); 1.2244 + bne(T1, AT, error); 1.2245 + delayed()->nop(); 1.2246 + // make sure klass' klass is 'reasonable' 1.2247 + //add for compressedoops 1.2248 + load_klass(T0, T0); 1.2249 + beq(T0, R0, error); // if klass' klass is NULL it is broken 1.2250 + delayed()->nop(); 1.2251 + 1.2252 + li(AT, klass_mask); 1.2253 + andr(T1, T0, AT); 1.2254 + li(AT, klass_bits); 1.2255 + bne(T1, AT, error); 1.2256 + delayed()->nop(); // if klass not in right area of memory it is broken too. 1.2257 +#endif 1.2258 + // return if everything seems ok 1.2259 + bind(exit); 1.2260 + 1.2261 + jr(RA); 1.2262 + delayed()->nop(); 1.2263 + 1.2264 + // handle errors 1.2265 + bind(error); 1.2266 + pushad(); 1.2267 +#ifndef _LP64 1.2268 + addi(SP, SP, (-1) * wordSize); 1.2269 +#endif 1.2270 + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); 1.2271 + delayed()->nop(); 1.2272 +#ifndef _LP64 1.2273 + addiu(SP, SP, 1 * wordSize); 1.2274 +#endif 1.2275 + popad(); 1.2276 + jr(RA); 1.2277 + delayed()->nop(); 1.2278 +} 1.2279 + 1.2280 +void MacroAssembler::verify_tlab(Register t1, Register t2) { 1.2281 +#ifdef ASSERT 1.2282 + assert_different_registers(t1, t2, AT); 1.2283 + if (UseTLAB && VerifyOops) { 1.2284 + Label next, ok; 1.2285 + 1.2286 + get_thread(t1); 1.2287 + 1.2288 + ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); 1.2289 + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); 1.2290 + sltu(AT, t2, AT); 1.2291 + beq(AT, R0, next); 1.2292 + delayed()->nop(); 1.2293 + 1.2294 + stop("assert(top >= start)"); 1.2295 + 1.2296 + bind(next); 1.2297 + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); 1.2298 + sltu(AT, AT, t2); 1.2299 + beq(AT, R0, ok); 1.2300 + delayed()->nop(); 1.2301 + 1.2302 + stop("assert(top <= end)"); 1.2303 + 1.2304 + bind(ok); 1.2305 + 1.2306 + } 1.2307 +#endif 1.2308 +} 1.2309 + RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 1.2310 + Register tmp, 1.2311 + int offset) { 1.2312 + intptr_t value = *delayed_value_addr; 1.2313 + if (value != 0) 1.2314 + return RegisterOrConstant(value + offset); 1.2315 + AddressLiteral a(delayed_value_addr); 1.2316 + // load indirectly to solve generation ordering problem 1.2317 + //movptr(tmp, ExternalAddress((address) delayed_value_addr)); 1.2318 + //ld(tmp, a); 1.2319 + if (offset != 0) 1.2320 + daddi(tmp,tmp, offset); 1.2321 + 1.2322 + return RegisterOrConstant(tmp); 1.2323 + } 1.2324 + 1.2325 +void MacroAssembler::hswap(Register reg) { 1.2326 + //short 1.2327 + //andi(reg, reg, 0xffff); 1.2328 + srl(AT, reg, 8); 1.2329 + sll(reg, reg, 24); 1.2330 + sra(reg, reg, 16); 1.2331 + orr(reg, reg, AT); 1.2332 +} 1.2333 + 1.2334 +void MacroAssembler::huswap(Register reg) { 1.2335 +#ifdef _LP64 1.2336 + dsrl(AT, reg, 8); 1.2337 + dsll(reg, reg, 24); 1.2338 + dsrl(reg, reg, 16); 1.2339 + orr(reg, reg, AT); 1.2340 + andi(reg, reg, 0xffff); 1.2341 +#else 1.2342 + //andi(reg, reg, 0xffff); 1.2343 + srl(AT, reg, 8); 1.2344 + sll(reg, reg, 24); 1.2345 + srl(reg, reg, 16); 1.2346 + orr(reg, reg, AT); 1.2347 +#endif 1.2348 +} 1.2349 + 1.2350 +// something funny to do this will only one more register AT 1.2351 +// 32 bits 1.2352 +void MacroAssembler::swap(Register reg) { 1.2353 + srl(AT, reg, 8); 1.2354 + sll(reg, reg, 24); 1.2355 + orr(reg, reg, AT); 1.2356 + //reg : 4 1 2 3 1.2357 + srl(AT, AT, 16); 1.2358 + xorr(AT, AT, reg); 1.2359 + andi(AT, AT, 0xff); 1.2360 + //AT : 0 0 0 1^3); 1.2361 + xorr(reg, reg, AT); 1.2362 + //reg : 4 1 2 1 1.2363 + sll(AT, AT, 16); 1.2364 + xorr(reg, reg, AT); 1.2365 + //reg : 4 3 2 1 1.2366 +} 1.2367 + 1.2368 +#ifdef _LP64 1.2369 + 1.2370 +/* do 32-bit CAS using MIPS64 lld/scd 1.2371 + 1.2372 + Jin: cas_int should only compare 32-bits of the memory value. 1.2373 + However, lld/scd will do 64-bit operation, which violates the intention of cas_int. 1.2374 + To simulate a 32-bit atomic operation, the value loaded with LLD should be split into 1.2375 + tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval, 1.2376 + plus the high-32 bits or memory value, are stored togethor with SCD. 1.2377 + 1.2378 +Example: 1.2379 + 1.2380 + double d = 3.1415926; 1.2381 + System.err.println("hello" + d); 1.2382 + 1.2383 + sun.misc.FloatingDecimal$1.<init>() 1.2384 + | 1.2385 + `- java.util.concurrent.atomic.AtomicInteger::compareAndSet() 1.2386 + 1.2387 + 38 cas_int [a7a7|J] [a0|I] [a6|I] 1.2388 +// a0: 0xffffffffe8ea9f63 pc: 0x55647f3354 1.2389 +// a6: 0x4ab325aa 1.2390 + 1.2391 +again: 1.2392 + 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63" 1.2393 + 1.2394 + 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended) 1.2395 + 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits 1.2396 + 0x00000055647f3c68: dsll32 t8, t8, 0 1.2397 + 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal 1.2398 + 0x00000055647f3c70: sll zero, zero, 0 1.2399 + 1.2400 + 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended) 1.2401 + 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF; 1.2402 + 0x00000055647f3c7c: ori v1, v1, 0xffffffff 1.2403 + 0x00000055647f3c80: and v1, a6, v1 1.2404 + 0x00000055647f3c84: or at, t8, v1 1.2405 + 0x00000055647f3c88: scd at, 0x0(a7) 1.2406 + 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again 1.2407 + 0x00000055647f3c90: sll zero, zero, 0 1.2408 + 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done 1.2409 + 0x00000055647f3c98: sll zero, zero, 0 1.2410 +nequal: 1.2411 + 0x00000055647f45a4: dadd a0, t9, zero 1.2412 + 0x00000055647f45a8: dadd at, zero, zero 1.2413 +done: 1.2414 +*/ 1.2415 + 1.2416 +void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) { 1.2417 + /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */ 1.2418 + Label done, again, nequal; 1.2419 + 1.2420 + bind(again); 1.2421 + 1.2422 + if(!Use3A2000) sync(); 1.2423 + ll(AT, dest); 1.2424 + bne(AT, c_reg, nequal); 1.2425 + delayed()->nop(); 1.2426 + 1.2427 + move(AT, x_reg); 1.2428 + sc(AT, dest); 1.2429 + beq(AT, R0, again); 1.2430 + delayed()->nop(); 1.2431 + b(done); 1.2432 + delayed()->nop(); 1.2433 + 1.2434 + // not xchged 1.2435 + bind(nequal); 1.2436 + sync(); 1.2437 + move(c_reg, AT); 1.2438 + move(AT, R0); 1.2439 + 1.2440 + bind(done); 1.2441 +} 1.2442 +#endif // cmpxchg32 1.2443 + 1.2444 +void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) { 1.2445 + Label done, again, nequal; 1.2446 + 1.2447 + bind(again); 1.2448 +#ifdef _LP64 1.2449 + if(!Use3A2000) sync(); 1.2450 + lld(AT, dest); 1.2451 +#else 1.2452 + if(!Use3A2000) sync(); 1.2453 + ll(AT, dest); 1.2454 +#endif 1.2455 + bne(AT, c_reg, nequal); 1.2456 + delayed()->nop(); 1.2457 + 1.2458 + move(AT, x_reg); 1.2459 +#ifdef _LP64 1.2460 + scd(AT, dest); 1.2461 +#else 1.2462 + sc(AT, dest); 1.2463 +#endif 1.2464 + beq(AT, R0, again); 1.2465 + delayed()->nop(); 1.2466 + b(done); 1.2467 + delayed()->nop(); 1.2468 + 1.2469 + // not xchged 1.2470 + bind(nequal); 1.2471 + sync(); 1.2472 + move(c_reg, AT); 1.2473 + move(AT, R0); 1.2474 + 1.2475 + bind(done); 1.2476 +} 1.2477 + 1.2478 +void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { 1.2479 + Label done, again, nequal; 1.2480 + 1.2481 + Register x_reg = x_regLo; 1.2482 + dsll32(x_regHi, x_regHi, 0); 1.2483 + dsll32(x_regLo, x_regLo, 0); 1.2484 + dsrl32(x_regLo, x_regLo, 0); 1.2485 + orr(x_reg, x_regLo, x_regHi); 1.2486 + 1.2487 + Register c_reg = c_regLo; 1.2488 + dsll32(c_regHi, c_regHi, 0); 1.2489 + dsll32(c_regLo, c_regLo, 0); 1.2490 + dsrl32(c_regLo, c_regLo, 0); 1.2491 + orr(c_reg, c_regLo, c_regHi); 1.2492 + 1.2493 + bind(again); 1.2494 + 1.2495 + if(!Use3A2000) sync(); 1.2496 + lld(AT, dest); 1.2497 + bne(AT, c_reg, nequal); 1.2498 + delayed()->nop(); 1.2499 + 1.2500 + //move(AT, x_reg); 1.2501 + dadd(AT, x_reg, R0); 1.2502 + scd(AT, dest); 1.2503 + beq(AT, R0, again); 1.2504 + delayed()->nop(); 1.2505 + b(done); 1.2506 + delayed()->nop(); 1.2507 + 1.2508 + // not xchged 1.2509 + bind(nequal); 1.2510 + sync(); 1.2511 + //move(c_reg, AT); 1.2512 + //move(AT, R0); 1.2513 + dadd(c_reg, AT, R0); 1.2514 + dadd(AT, R0, R0); 1.2515 + bind(done); 1.2516 +} 1.2517 + 1.2518 +// be sure the three register is different 1.2519 +void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { 1.2520 + assert_different_registers(tmp, fs, ft); 1.2521 + div_s(tmp, fs, ft); 1.2522 + trunc_l_s(tmp, tmp); 1.2523 + cvt_s_l(tmp, tmp); 1.2524 + mul_s(tmp, tmp, ft); 1.2525 + sub_s(fd, fs, tmp); 1.2526 +} 1.2527 + 1.2528 +// be sure the three register is different 1.2529 +void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { 1.2530 + assert_different_registers(tmp, fs, ft); 1.2531 + div_d(tmp, fs, ft); 1.2532 + trunc_l_d(tmp, tmp); 1.2533 + cvt_d_l(tmp, tmp); 1.2534 + mul_d(tmp, tmp, ft); 1.2535 + sub_d(fd, fs, tmp); 1.2536 +} 1.2537 + 1.2538 +// Fast_Lock and Fast_Unlock used by C2 1.2539 + 1.2540 +// Because the transitions from emitted code to the runtime 1.2541 +// monitorenter/exit helper stubs are so slow it's critical that 1.2542 +// we inline both the stack-locking fast-path and the inflated fast path. 1.2543 +// 1.2544 +// See also: cmpFastLock and cmpFastUnlock. 1.2545 +// 1.2546 +// What follows is a specialized inline transliteration of the code 1.2547 +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat 1.2548 +// another option would be to emit TrySlowEnter and TrySlowExit methods 1.2549 +// at startup-time. These methods would accept arguments as 1.2550 +// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure 1.2551 +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply 1.2552 +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. 1.2553 +// In practice, however, the # of lock sites is bounded and is usually small. 1.2554 +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer 1.2555 +// if the processor uses simple bimodal branch predictors keyed by EIP 1.2556 +// Since the helper routines would be called from multiple synchronization 1.2557 +// sites. 1.2558 +// 1.2559 +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" 1.2560 +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites 1.2561 +// to those specialized methods. That'd give us a mostly platform-independent 1.2562 +// implementation that the JITs could optimize and inline at their pleasure. 1.2563 +// Done correctly, the only time we'd need to cross to native could would be 1.2564 +// to park() or unpark() threads. We'd also need a few more unsafe operators 1.2565 +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and 1.2566 +// (b) explicit barriers or fence operations. 1.2567 +// 1.2568 +// TODO: 1.2569 +// 1.2570 +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). 1.2571 +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. 1.2572 +// Given TLAB allocation, Self is usually manifested in a register, so passing it into 1.2573 +// the lock operators would typically be faster than reifying Self. 1.2574 +// 1.2575 +// * Ideally I'd define the primitives as: 1.2576 +// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. 1.2577 +// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED 1.2578 +// Unfortunately ADLC bugs prevent us from expressing the ideal form. 1.2579 +// Instead, we're stuck with a rather awkward and brittle register assignments below. 1.2580 +// Furthermore the register assignments are overconstrained, possibly resulting in 1.2581 +// sub-optimal code near the synchronization site. 1.2582 +// 1.2583 +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. 1.2584 +// Alternately, use a better sp-proximity test. 1.2585 +// 1.2586 +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. 1.2587 +// Either one is sufficient to uniquely identify a thread. 1.2588 +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. 1.2589 +// 1.2590 +// * Intrinsify notify() and notifyAll() for the common cases where the 1.2591 +// object is locked by the calling thread but the waitlist is empty. 1.2592 +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). 1.2593 +// 1.2594 +// * use jccb and jmpb instead of jcc and jmp to improve code density. 1.2595 +// But beware of excessive branch density on AMD Opterons. 1.2596 +// 1.2597 +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success 1.2598 +// or failure of the fast-path. If the fast-path fails then we pass 1.2599 +// control to the slow-path, typically in C. In Fast_Lock and 1.2600 +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 1.2601 +// will emit a conditional branch immediately after the node. 1.2602 +// So we have branches to branches and lots of ICC.ZF games. 1.2603 +// Instead, it might be better to have C2 pass a "FailureLabel" 1.2604 +// into Fast_Lock and Fast_Unlock. In the case of success, control 1.2605 +// will drop through the node. ICC.ZF is undefined at exit. 1.2606 +// In the case of failure, the node will branch directly to the 1.2607 +// FailureLabel 1.2608 + 1.2609 + 1.2610 +// obj: object to lock 1.2611 +// box: on-stack box address (displaced header location) - KILLED 1.2612 +// rax,: tmp -- KILLED 1.2613 +// scr: tmp -- KILLED 1.2614 +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) { 1.2615 + 1.2616 + // Ensure the register assignents are disjoint 1.2617 + guarantee (objReg != boxReg, "") ; 1.2618 + guarantee (objReg != tmpReg, "") ; 1.2619 + guarantee (objReg != scrReg, "") ; 1.2620 + guarantee (boxReg != tmpReg, "") ; 1.2621 + guarantee (boxReg != scrReg, "") ; 1.2622 + 1.2623 + 1.2624 + block_comment("FastLock"); 1.2625 + /* 1.2626 + move(AT, 0x0); 1.2627 + return; 1.2628 + */ 1.2629 + if (PrintBiasedLockingStatistics) { 1.2630 + push(tmpReg); 1.2631 + atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg); 1.2632 + pop(tmpReg); 1.2633 + } 1.2634 + 1.2635 + if (EmitSync & 1) { 1.2636 + move(AT, 0x0); 1.2637 + return; 1.2638 + } else 1.2639 + if (EmitSync & 2) { 1.2640 + Label DONE_LABEL ; 1.2641 + if (UseBiasedLocking) { 1.2642 + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. 1.2643 + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); 1.2644 + } 1.2645 + 1.2646 + ld(tmpReg, Address(objReg, 0)) ; // fetch markword 1.2647 + ori(tmpReg, tmpReg, 0x1); 1.2648 + sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS 1.2649 + 1.2650 + cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg 1.2651 + bne(AT, R0, DONE_LABEL); 1.2652 + delayed()->nop(); 1.2653 + 1.2654 + // Recursive locking 1.2655 + dsubu(tmpReg, tmpReg, SP); 1.2656 + li(AT, (7 - os::vm_page_size() )); 1.2657 + andr(tmpReg, tmpReg, AT); 1.2658 + sd(tmpReg, Address(boxReg, 0)); 1.2659 + bind(DONE_LABEL) ; 1.2660 + } else { 1.2661 + // Possible cases that we'll encounter in fast_lock 1.2662 + // ------------------------------------------------ 1.2663 + // * Inflated 1.2664 + // -- unlocked 1.2665 + // -- Locked 1.2666 + // = by self 1.2667 + // = by other 1.2668 + // * biased 1.2669 + // -- by Self 1.2670 + // -- by other 1.2671 + // * neutral 1.2672 + // * stack-locked 1.2673 + // -- by self 1.2674 + // = sp-proximity test hits 1.2675 + // = sp-proximity test generates false-negative 1.2676 + // -- by other 1.2677 + // 1.2678 + 1.2679 + Label IsInflated, DONE_LABEL, PopDone ; 1.2680 + 1.2681 + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage 1.2682 + // order to reduce the number of conditional branches in the most common cases. 1.2683 + // Beware -- there's a subtle invariant that fetch of the markword 1.2684 + // at [FETCH], below, will never observe a biased encoding (*101b). 1.2685 + // If this invariant is not held we risk exclusion (safety) failure. 1.2686 + if (UseBiasedLocking && !UseOptoBiasInlining) { 1.2687 + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); 1.2688 + } 1.2689 + 1.2690 + ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object. 1.2691 + andi(AT, tmpReg, markOopDesc::monitor_value); 1.2692 + bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias 1.2693 + delayed()->nop(); 1.2694 + 1.2695 + // Attempt stack-locking ... 1.2696 + ori (tmpReg, tmpReg, markOopDesc::unlocked_value); 1.2697 + sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS 1.2698 + //if (os::is_MP()) { 1.2699 + // sync(); 1.2700 + //} 1.2701 + 1.2702 + cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg 1.2703 + //AT == 1: unlocked 1.2704 + 1.2705 + if (PrintBiasedLockingStatistics) { 1.2706 + Label L; 1.2707 + beq(AT, R0, L); 1.2708 + delayed()->nop(); 1.2709 + push(T0); 1.2710 + push(T1); 1.2711 + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); 1.2712 + pop(T1); 1.2713 + pop(T0); 1.2714 + bind(L); 1.2715 + } 1.2716 + bne(AT, R0, DONE_LABEL); 1.2717 + delayed()->nop(); 1.2718 + 1.2719 + // Recursive locking 1.2720 + // The object is stack-locked: markword contains stack pointer to BasicLock. 1.2721 + // Locked by current thread if difference with current SP is less than one page. 1.2722 + dsubu(tmpReg, tmpReg, SP); 1.2723 + li(AT, 7 - os::vm_page_size() ); 1.2724 + andr(tmpReg, tmpReg, AT); 1.2725 + sd(tmpReg, Address(boxReg, 0)); 1.2726 + if (PrintBiasedLockingStatistics) { 1.2727 + Label L; 1.2728 + // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ 1.2729 + bne(tmpReg, R0, L); 1.2730 + delayed()->nop(); 1.2731 + push(T0); 1.2732 + push(T1); 1.2733 + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); 1.2734 + pop(T1); 1.2735 + pop(T0); 1.2736 + bind(L); 1.2737 + } 1.2738 + sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */ 1.2739 + 1.2740 + b(DONE_LABEL) ; 1.2741 + delayed()->nop(); 1.2742 + 1.2743 + bind(IsInflated) ; 1.2744 + // The object's monitor m is unlocked iff m->owner == NULL, 1.2745 + // otherwise m->owner may contain a thread or a stack address. 1.2746 + 1.2747 + // TODO: someday avoid the ST-before-CAS penalty by 1.2748 + // relocating (deferring) the following ST. 1.2749 + // We should also think about trying a CAS without having 1.2750 + // fetched _owner. If the CAS is successful we may 1.2751 + // avoid an RTO->RTS upgrade on the $line. 1.2752 + // Without cast to int32_t a movptr will destroy r10 which is typically obj 1.2753 + li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); 1.2754 + sd(AT, Address(boxReg, 0)); 1.2755 + 1.2756 + move(boxReg, tmpReg) ; 1.2757 + ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.2758 + // if (m->owner != 0) => AT = 0, goto slow path. 1.2759 + move(AT, R0); 1.2760 + bne(tmpReg, R0, DONE_LABEL); 1.2761 + delayed()->nop(); 1.2762 + 1.2763 +#ifndef OPT_THREAD 1.2764 + get_thread (TREG) ; 1.2765 +#endif 1.2766 + // It's inflated and appears unlocked 1.2767 + //if (os::is_MP()) { 1.2768 + // sync(); 1.2769 + //} 1.2770 + cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ; 1.2771 + // Intentional fall-through into DONE_LABEL ... 1.2772 + 1.2773 + 1.2774 + // DONE_LABEL is a hot target - we'd really like to place it at the 1.2775 + // start of cache line by padding with NOPs. 1.2776 + // See the AMD and Intel software optimization manuals for the 1.2777 + // most efficient "long" NOP encodings. 1.2778 + // Unfortunately none of our alignment mechanisms suffice. 1.2779 + bind(DONE_LABEL); 1.2780 + 1.2781 + // At DONE_LABEL the AT is set as follows ... 1.2782 + // Fast_Unlock uses the same protocol. 1.2783 + // AT == 1 -> Success 1.2784 + // AT == 0 -> Failure - force control through the slow-path 1.2785 + 1.2786 + // Avoid branch-to-branch on AMD processors 1.2787 + // This appears to be superstition. 1.2788 + if (EmitSync & 32) nop() ; 1.2789 + 1.2790 + } 1.2791 +} 1.2792 + 1.2793 +// obj: object to unlock 1.2794 +// box: box address (displaced header location), killed. Must be EAX. 1.2795 +// rbx,: killed tmp; cannot be obj nor box. 1.2796 +// 1.2797 +// Some commentary on balanced locking: 1.2798 +// 1.2799 +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. 1.2800 +// Methods that don't have provably balanced locking are forced to run in the 1.2801 +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. 1.2802 +// The interpreter provides two properties: 1.2803 +// I1: At return-time the interpreter automatically and quietly unlocks any 1.2804 +// objects acquired the current activation (frame). Recall that the 1.2805 +// interpreter maintains an on-stack list of locks currently held by 1.2806 +// a frame. 1.2807 +// I2: If a method attempts to unlock an object that is not held by the 1.2808 +// the frame the interpreter throws IMSX. 1.2809 +// 1.2810 +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). 1.2811 +// B() doesn't have provably balanced locking so it runs in the interpreter. 1.2812 +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O 1.2813 +// is still locked by A(). 1.2814 +// 1.2815 +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: 1.2816 +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter 1.2817 +// should not be unlocked by "normal" java-level locking and vice-versa. The specification 1.2818 +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. 1.2819 + 1.2820 +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { 1.2821 + 1.2822 + guarantee (objReg != boxReg, "") ; 1.2823 + guarantee (objReg != tmpReg, "") ; 1.2824 + guarantee (boxReg != tmpReg, "") ; 1.2825 + 1.2826 + 1.2827 + 1.2828 + block_comment("FastUnlock"); 1.2829 + 1.2830 + 1.2831 + if (EmitSync & 4) { 1.2832 + // Disable - inhibit all inlining. Force control through the slow-path 1.2833 + move(AT, 0x0); 1.2834 + return; 1.2835 + } else 1.2836 + if (EmitSync & 8) { 1.2837 + Label DONE_LABEL ; 1.2838 + if (UseBiasedLocking) { 1.2839 + biased_locking_exit(objReg, tmpReg, DONE_LABEL); 1.2840 + } 1.2841 + // classic stack-locking code ... 1.2842 + ld(tmpReg, Address(boxReg, 0)) ; 1.2843 + beq(tmpReg, R0, DONE_LABEL) ; 1.2844 + move(AT, 0x1); // delay slot 1.2845 + 1.2846 + cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box 1.2847 + bind(DONE_LABEL); 1.2848 + } else { 1.2849 + Label DONE_LABEL, Stacked, CheckSucc, Inflated ; 1.2850 + 1.2851 + // Critically, the biased locking test must have precedence over 1.2852 + // and appear before the (box->dhw == 0) recursive stack-lock test. 1.2853 + if (UseBiasedLocking && !UseOptoBiasInlining) { 1.2854 + biased_locking_exit(objReg, tmpReg, DONE_LABEL); 1.2855 + } 1.2856 + 1.2857 + ld(AT, Address(boxReg, 0)) ; // Examine the displaced header 1.2858 + beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock 1.2859 + delayed()->daddiu(AT, R0, 0x1); 1.2860 + 1.2861 + ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword 1.2862 + andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated? 1.2863 + beq(AT, R0, Stacked) ; // Inflated? 1.2864 + delayed()->nop(); 1.2865 + 1.2866 + bind(Inflated) ; 1.2867 + // It's inflated. 1.2868 + // Despite our balanced locking property we still check that m->_owner == Self 1.2869 + // as java routines or native JNI code called by this thread might 1.2870 + // have released the lock. 1.2871 + // Refer to the comments in synchronizer.cpp for how we might encode extra 1.2872 + // state in _succ so we can avoid fetching EntryList|cxq. 1.2873 + // 1.2874 + // I'd like to add more cases in fast_lock() and fast_unlock() -- 1.2875 + // such as recursive enter and exit -- but we have to be wary of 1.2876 + // I$ bloat, T$ effects and BP$ effects. 1.2877 + // 1.2878 + // If there's no contention try a 1-0 exit. That is, exit without 1.2879 + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how 1.2880 + // we detect and recover from the race that the 1-0 exit admits. 1.2881 + // 1.2882 + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier 1.2883 + // before it STs null into _owner, releasing the lock. Updates 1.2884 + // to data protected by the critical section must be visible before 1.2885 + // we drop the lock (and thus before any other thread could acquire 1.2886 + // the lock and observe the fields protected by the lock). 1.2887 + // IA32's memory-model is SPO, so STs are ordered with respect to 1.2888 + // each other and there's no need for an explicit barrier (fence). 1.2889 + // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. 1.2890 +#ifndef OPT_THREAD 1.2891 + get_thread (TREG) ; 1.2892 +#endif 1.2893 + 1.2894 + // It's inflated 1.2895 + ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.2896 + xorr(boxReg, boxReg, TREG); 1.2897 + 1.2898 + ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; 1.2899 + orr(boxReg, boxReg, AT); 1.2900 + 1.2901 + move(AT, R0); 1.2902 + bne(boxReg, R0, DONE_LABEL); 1.2903 + delayed()->nop(); 1.2904 + 1.2905 + ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 1.2906 + ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 1.2907 + orr(boxReg, boxReg, AT); 1.2908 + 1.2909 + move(AT, R0); 1.2910 + bne(boxReg, R0, DONE_LABEL); 1.2911 + delayed()->nop(); 1.2912 + 1.2913 + sync(); 1.2914 + sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 1.2915 + move(AT, 0x1); 1.2916 + b(DONE_LABEL); 1.2917 + delayed()->nop(); 1.2918 + 1.2919 + bind (Stacked); 1.2920 + ld(tmpReg, Address(boxReg, 0)) ; 1.2921 + //if (os::is_MP()) { sync(); } 1.2922 + cmpxchg(tmpReg, Address(objReg, 0), boxReg); 1.2923 + 1.2924 + if (EmitSync & 65536) { 1.2925 + bind (CheckSucc); 1.2926 + } 1.2927 + 1.2928 + bind(DONE_LABEL); 1.2929 + 1.2930 + // Avoid branch to branch on AMD processors 1.2931 + if (EmitSync & 32768) { nop() ; } 1.2932 + } 1.2933 +} 1.2934 + 1.2935 +void MacroAssembler::align(int modulus) { 1.2936 + while (offset() % modulus != 0) nop(); 1.2937 +} 1.2938 + 1.2939 + 1.2940 +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 1.2941 + //Unimplemented(); 1.2942 +} 1.2943 + 1.2944 +#ifdef _LP64 1.2945 +Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; 1.2946 + 1.2947 +/* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */ 1.2948 +FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; 1.2949 +#else 1.2950 +Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; 1.2951 + 1.2952 +Register caller_saved_fpu_registers[] = {}; 1.2953 +#endif 1.2954 + 1.2955 +//We preserve all caller-saved register 1.2956 +void MacroAssembler::pushad(){ 1.2957 + int i; 1.2958 + 1.2959 + /* Fixed-point registers */ 1.2960 + int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); 1.2961 + daddi(SP, SP, -1 * len * wordSize); 1.2962 + for (i = 0; i < len; i++) 1.2963 + { 1.2964 +#ifdef _LP64 1.2965 + sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); 1.2966 +#else 1.2967 + sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize); 1.2968 +#endif 1.2969 + } 1.2970 + 1.2971 + /* Floating-point registers */ 1.2972 + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); 1.2973 + daddi(SP, SP, -1 * len * wordSize); 1.2974 + for (i = 0; i < len; i++) 1.2975 + { 1.2976 +#ifdef _LP64 1.2977 + sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); 1.2978 +#else 1.2979 + swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); 1.2980 +#endif 1.2981 + } 1.2982 +}; 1.2983 + 1.2984 +void MacroAssembler::popad(){ 1.2985 + int i; 1.2986 + 1.2987 + /* Floating-point registers */ 1.2988 + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); 1.2989 + for (i = 0; i < len; i++) 1.2990 + { 1.2991 +#ifdef _LP64 1.2992 + ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); 1.2993 +#else 1.2994 + lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); 1.2995 +#endif 1.2996 + } 1.2997 + daddi(SP, SP, len * wordSize); 1.2998 + 1.2999 + /* Fixed-point registers */ 1.3000 + len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); 1.3001 + for (i = 0; i < len; i++) 1.3002 + { 1.3003 +#ifdef _LP64 1.3004 + ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); 1.3005 +#else 1.3006 + lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize); 1.3007 +#endif 1.3008 + } 1.3009 + daddi(SP, SP, len * wordSize); 1.3010 +}; 1.3011 + 1.3012 +void MacroAssembler::push2(Register reg1, Register reg2) { 1.3013 +#ifdef _LP64 1.3014 + daddi(SP, SP, -16); 1.3015 + sd(reg2, SP, 0); 1.3016 + sd(reg1, SP, 8); 1.3017 +#else 1.3018 + addi(SP, SP, -8); 1.3019 + sw(reg2, SP, 0); 1.3020 + sw(reg1, SP, 4); 1.3021 +#endif 1.3022 +} 1.3023 + 1.3024 +void MacroAssembler::pop2(Register reg1, Register reg2) { 1.3025 +#ifdef _LP64 1.3026 + ld(reg1, SP, 0); 1.3027 + ld(reg2, SP, 8); 1.3028 + daddi(SP, SP, 16); 1.3029 +#else 1.3030 + lw(reg1, SP, 0); 1.3031 + lw(reg2, SP, 4); 1.3032 + addi(SP, SP, 8); 1.3033 +#endif 1.3034 +} 1.3035 + 1.3036 +//for UseCompressedOops Option 1.3037 +void MacroAssembler::load_klass(Register dst, Register src) { 1.3038 +#ifdef _LP64 1.3039 + if(UseCompressedClassPointers){ 1.3040 + lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); 1.3041 + decode_klass_not_null(dst); 1.3042 + } else 1.3043 +#endif 1.3044 + ld(dst, src, oopDesc::klass_offset_in_bytes()); 1.3045 +} 1.3046 + 1.3047 +void MacroAssembler::store_klass(Register dst, Register src) { 1.3048 +#ifdef _LP64 1.3049 + if(UseCompressedClassPointers){ 1.3050 + encode_klass_not_null(src); 1.3051 + sw(src, dst, oopDesc::klass_offset_in_bytes()); 1.3052 + } else { 1.3053 +#endif 1.3054 + sd(src, dst, oopDesc::klass_offset_in_bytes()); 1.3055 + } 1.3056 +} 1.3057 + 1.3058 +void MacroAssembler::load_prototype_header(Register dst, Register src) { 1.3059 + load_klass(dst, src); 1.3060 + ld(dst, Address(dst, Klass::prototype_header_offset())); 1.3061 +} 1.3062 + 1.3063 +#ifdef _LP64 1.3064 +void MacroAssembler::store_klass_gap(Register dst, Register src) { 1.3065 + if (UseCompressedClassPointers) { 1.3066 + sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); 1.3067 + } 1.3068 +} 1.3069 + 1.3070 +void MacroAssembler::load_heap_oop(Register dst, Address src) { 1.3071 + if(UseCompressedOops){ 1.3072 + lwu(dst, src); 1.3073 + decode_heap_oop(dst); 1.3074 + } else{ 1.3075 + ld(dst, src); 1.3076 + } 1.3077 +} 1.3078 + 1.3079 +void MacroAssembler::store_heap_oop(Address dst, Register src){ 1.3080 + if(UseCompressedOops){ 1.3081 + assert(!dst.uses(src), "not enough registers"); 1.3082 + encode_heap_oop(src); 1.3083 + sw(src, dst); 1.3084 + } else{ 1.3085 + sd(src, dst); 1.3086 + } 1.3087 +} 1.3088 + 1.3089 +#ifdef ASSERT 1.3090 +void MacroAssembler::verify_heapbase(const char* msg) { 1.3091 + assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); 1.3092 + assert (Universe::heap() != NULL, "java heap should be initialized"); 1.3093 +} 1.3094 +#endif 1.3095 + 1.3096 + 1.3097 +// Algorithm must match oop.inline.hpp encode_heap_oop. 1.3098 +void MacroAssembler::encode_heap_oop(Register r) { 1.3099 +#ifdef ASSERT 1.3100 + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); 1.3101 +#endif 1.3102 + verify_oop(r, "broken oop in encode_heap_oop"); 1.3103 + if (Universe::narrow_oop_base() == NULL) { 1.3104 + if (Universe::narrow_oop_shift() != 0) { 1.3105 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3106 + shr(r, LogMinObjAlignmentInBytes); 1.3107 + } 1.3108 + return; 1.3109 + } 1.3110 + 1.3111 + movz(r, S5_heapbase, r); 1.3112 + dsub(r, r, S5_heapbase); 1.3113 + if (Universe::narrow_oop_shift() != 0) { 1.3114 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3115 + shr(r, LogMinObjAlignmentInBytes); 1.3116 + } 1.3117 +} 1.3118 + 1.3119 +void MacroAssembler::encode_heap_oop(Register dst, Register src) { 1.3120 +#ifdef ASSERT 1.3121 + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); 1.3122 +#endif 1.3123 + verify_oop(src, "broken oop in encode_heap_oop"); 1.3124 + if (Universe::narrow_oop_base() == NULL) { 1.3125 + if (Universe::narrow_oop_shift() != 0) { 1.3126 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3127 + dsrl(dst, src, LogMinObjAlignmentInBytes); 1.3128 + } else { 1.3129 + if (dst != src) move(dst, src); 1.3130 + } 1.3131 + } else { 1.3132 + if (dst == src) { 1.3133 + movz(dst, S5_heapbase, dst); 1.3134 + dsub(dst, dst, S5_heapbase); 1.3135 + if (Universe::narrow_oop_shift() != 0) { 1.3136 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3137 + shr(dst, LogMinObjAlignmentInBytes); 1.3138 + } 1.3139 + } else { 1.3140 + dsub(dst, src, S5_heapbase); 1.3141 + if (Universe::narrow_oop_shift() != 0) { 1.3142 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3143 + shr(dst, LogMinObjAlignmentInBytes); 1.3144 + } 1.3145 + movz(dst, R0, src); 1.3146 + } 1.3147 + } 1.3148 +} 1.3149 + 1.3150 +void MacroAssembler::encode_heap_oop_not_null(Register r) { 1.3151 + assert (UseCompressedOops, "should be compressed"); 1.3152 +#ifdef ASSERT 1.3153 + if (CheckCompressedOops) { 1.3154 + Label ok; 1.3155 + bne(r, R0, ok); 1.3156 + delayed()->nop(); 1.3157 + stop("null oop passed to encode_heap_oop_not_null"); 1.3158 + bind(ok); 1.3159 + } 1.3160 +#endif 1.3161 + verify_oop(r, "broken oop in encode_heap_oop_not_null"); 1.3162 + if (Universe::narrow_oop_base() != NULL) { 1.3163 + dsub(r, r, S5_heapbase); 1.3164 + } 1.3165 + if (Universe::narrow_oop_shift() != 0) { 1.3166 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3167 + shr(r, LogMinObjAlignmentInBytes); 1.3168 + } 1.3169 + 1.3170 +} 1.3171 + 1.3172 +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 1.3173 + assert (UseCompressedOops, "should be compressed"); 1.3174 +#ifdef ASSERT 1.3175 + if (CheckCompressedOops) { 1.3176 + Label ok; 1.3177 + bne(src, R0, ok); 1.3178 + delayed()->nop(); 1.3179 + stop("null oop passed to encode_heap_oop_not_null2"); 1.3180 + bind(ok); 1.3181 + } 1.3182 +#endif 1.3183 + verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 1.3184 + 1.3185 + if (Universe::narrow_oop_base() != NULL) { 1.3186 + dsub(dst, src, S5_heapbase); 1.3187 + if (Universe::narrow_oop_shift() != 0) { 1.3188 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3189 + shr(dst, LogMinObjAlignmentInBytes); 1.3190 + } 1.3191 + } else { 1.3192 + if (Universe::narrow_oop_shift() != 0) { 1.3193 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3194 + dsrl(dst, src, LogMinObjAlignmentInBytes); 1.3195 + } else { 1.3196 + if (dst != src) move(dst, src); 1.3197 + } 1.3198 + } 1.3199 +} 1.3200 + 1.3201 +void MacroAssembler::decode_heap_oop(Register r) { 1.3202 +#ifdef ASSERT 1.3203 + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); 1.3204 +#endif 1.3205 + if (Universe::narrow_oop_base() == NULL) { 1.3206 + if (Universe::narrow_oop_shift() != 0) { 1.3207 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3208 + shl(r, LogMinObjAlignmentInBytes); 1.3209 + } 1.3210 + } else { 1.3211 + move(AT, r); 1.3212 + if (Universe::narrow_oop_shift() != 0) { 1.3213 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3214 + shl(r, LogMinObjAlignmentInBytes); 1.3215 + } 1.3216 + dadd(r, r, S5_heapbase); 1.3217 + movz(r, R0, AT); 1.3218 + } 1.3219 + verify_oop(r, "broken oop in decode_heap_oop"); 1.3220 +} 1.3221 + 1.3222 +void MacroAssembler::decode_heap_oop(Register dst, Register src) { 1.3223 +#ifdef ASSERT 1.3224 + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); 1.3225 +#endif 1.3226 + if (Universe::narrow_oop_base() == NULL) { 1.3227 + if (Universe::narrow_oop_shift() != 0) { 1.3228 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3229 + if (dst != src) nop(); // DON'T DELETE THIS GUY. 1.3230 + dsll(dst, src, LogMinObjAlignmentInBytes); 1.3231 + } else { 1.3232 + if (dst != src) move(dst, src); 1.3233 + } 1.3234 + } else { 1.3235 + if (dst == src) { 1.3236 + move(AT, dst); 1.3237 + if (Universe::narrow_oop_shift() != 0) { 1.3238 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3239 + shl(dst, LogMinObjAlignmentInBytes); 1.3240 + } 1.3241 + dadd(dst, dst, S5_heapbase); 1.3242 + movz(dst, R0, AT); 1.3243 + } else { 1.3244 + if (Universe::narrow_oop_shift() != 0) { 1.3245 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3246 + dsll(dst, src, LogMinObjAlignmentInBytes); 1.3247 + daddu(dst, dst, S5_heapbase); 1.3248 + } else { 1.3249 + daddu(dst, src, S5_heapbase); 1.3250 + } 1.3251 + movz(dst, R0, src); 1.3252 + } 1.3253 + } 1.3254 + verify_oop(dst, "broken oop in decode_heap_oop"); 1.3255 +} 1.3256 + 1.3257 +void MacroAssembler::decode_heap_oop_not_null(Register r) { 1.3258 + // Note: it will change flags 1.3259 + assert (UseCompressedOops, "should only be used for compressed headers"); 1.3260 + assert (Universe::heap() != NULL, "java heap should be initialized"); 1.3261 + // Cannot assert, unverified entry point counts instructions (see .ad file) 1.3262 + // vtableStubs also counts instructions in pd_code_size_limit. 1.3263 + // Also do not verify_oop as this is called by verify_oop. 1.3264 + if (Universe::narrow_oop_shift() != 0) { 1.3265 + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3266 + shl(r, LogMinObjAlignmentInBytes); 1.3267 + if (Universe::narrow_oop_base() != NULL) { 1.3268 + daddu(r, r, S5_heapbase); 1.3269 + } 1.3270 + } else { 1.3271 + assert (Universe::narrow_oop_base() == NULL, "sanity"); 1.3272 + } 1.3273 +} 1.3274 + 1.3275 +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 1.3276 + assert (UseCompressedOops, "should only be used for compressed headers"); 1.3277 + assert (Universe::heap() != NULL, "java heap should be initialized"); 1.3278 + 1.3279 + // Cannot assert, unverified entry point counts instructions (see .ad file) 1.3280 + // vtableStubs also counts instructions in pd_code_size_limit. 1.3281 + // Also do not verify_oop as this is called by verify_oop. 1.3282 + //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); 1.3283 + if (Universe::narrow_oop_shift() != 0) { 1.3284 + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 1.3285 + if (LogMinObjAlignmentInBytes == Address::times_8) { 1.3286 + dsll(dst, src, LogMinObjAlignmentInBytes); 1.3287 + daddu(dst, dst, S5_heapbase); 1.3288 + } else { 1.3289 + dsll(dst, src, LogMinObjAlignmentInBytes); 1.3290 + if (Universe::narrow_oop_base() != NULL) { 1.3291 + daddu(dst, dst, S5_heapbase); 1.3292 + } 1.3293 + } 1.3294 + } else { 1.3295 + assert (Universe::narrow_oop_base() == NULL, "sanity"); 1.3296 + if (dst != src) { 1.3297 + move(dst, src); 1.3298 + } 1.3299 + } 1.3300 +} 1.3301 + 1.3302 +void MacroAssembler::encode_klass_not_null(Register r) { 1.3303 + if (Universe::narrow_klass_base() != NULL) { 1.3304 + assert(r != AT, "Encoding a klass in AT"); 1.3305 + set64(AT, (int64_t)Universe::narrow_klass_base()); 1.3306 + dsub(r, r, AT); 1.3307 + } 1.3308 + if (Universe::narrow_klass_shift() != 0) { 1.3309 + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 1.3310 + shr(r, LogKlassAlignmentInBytes); 1.3311 + } 1.3312 + // Not neccessary for MIPS at all. 1.3313 + //if (Universe::narrow_klass_base() != NULL) { 1.3314 + // reinit_heapbase(); 1.3315 + //} 1.3316 +} 1.3317 + 1.3318 +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 1.3319 + if (dst == src) { 1.3320 + encode_klass_not_null(src); 1.3321 + } else { 1.3322 + if (Universe::narrow_klass_base() != NULL) { 1.3323 + set64(dst, (int64_t)Universe::narrow_klass_base()); 1.3324 + dsub(dst, src, dst); 1.3325 + if (Universe::narrow_klass_shift() != 0) { 1.3326 + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 1.3327 + shr(dst, LogKlassAlignmentInBytes); 1.3328 + } 1.3329 + } else { 1.3330 + if (Universe::narrow_klass_shift() != 0) { 1.3331 + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 1.3332 + dsrl(dst, src, LogKlassAlignmentInBytes); 1.3333 + } else { 1.3334 + move(dst, src); 1.3335 + } 1.3336 + } 1.3337 + } 1.3338 +} 1.3339 + 1.3340 +// Function instr_size_for_decode_klass_not_null() counts the instructions 1.3341 +// generated by decode_klass_not_null(register r) and reinit_heapbase(), 1.3342 +// when (Universe::heap() != NULL). Hence, if the instructions they 1.3343 +// generate change, then this method needs to be updated. 1.3344 +int MacroAssembler::instr_size_for_decode_klass_not_null() { 1.3345 + assert (UseCompressedClassPointers, "only for compressed klass ptrs"); 1.3346 + if (Universe::narrow_klass_base() != NULL) { 1.3347 + // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). 1.3348 + return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); 1.3349 + } else { 1.3350 + // longest load decode klass function, mov64, leaq 1.3351 + return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); 1.3352 + } 1.3353 +} 1.3354 + 1.3355 +void MacroAssembler::decode_klass_not_null(Register r) { 1.3356 + assert (UseCompressedClassPointers, "should only be used for compressed headers"); 1.3357 + assert(r != AT, "Decoding a klass in AT"); 1.3358 + // Cannot assert, unverified entry point counts instructions (see .ad file) 1.3359 + // vtableStubs also counts instructions in pd_code_size_limit. 1.3360 + // Also do not verify_oop as this is called by verify_oop. 1.3361 + if (Universe::narrow_klass_shift() != 0) { 1.3362 + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 1.3363 + shl(r, LogKlassAlignmentInBytes); 1.3364 + } 1.3365 + if (Universe::narrow_klass_base() != NULL) { 1.3366 + set64(AT, (int64_t)Universe::narrow_klass_base()); 1.3367 + daddu(r, r, AT); 1.3368 + //Not neccessary for MIPS at all. 1.3369 + //reinit_heapbase(); 1.3370 + } 1.3371 +} 1.3372 + 1.3373 +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 1.3374 + assert (UseCompressedClassPointers, "should only be used for compressed headers"); 1.3375 + 1.3376 + if (dst == src) { 1.3377 + decode_klass_not_null(dst); 1.3378 + } else { 1.3379 + // Cannot assert, unverified entry point counts instructions (see .ad file) 1.3380 + // vtableStubs also counts instructions in pd_code_size_limit. 1.3381 + // Also do not verify_oop as this is called by verify_oop. 1.3382 + set64(dst, (int64_t)Universe::narrow_klass_base()); 1.3383 + if (Universe::narrow_klass_shift() != 0) { 1.3384 + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 1.3385 + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 1.3386 + dsll(AT, src, Address::times_8); 1.3387 + daddu(dst, dst, AT); 1.3388 + } else { 1.3389 + daddu(dst, src, dst); 1.3390 + } 1.3391 + } 1.3392 +} 1.3393 + 1.3394 +void MacroAssembler::incrementl(Register reg, int value) { 1.3395 + if (value == min_jint) { 1.3396 + move(AT, value); 1.3397 + LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT)); 1.3398 + return; 1.3399 + } 1.3400 + if (value < 0) { decrementl(reg, -value); return; } 1.3401 + if (value == 0) { ; return; } 1.3402 + 1.3403 + if(Assembler::is_simm16(value)) { 1.3404 + NOT_LP64(addiu(reg, reg, value)); 1.3405 + LP64_ONLY(move(AT, value); addu32(reg, reg, AT)); 1.3406 + } else { 1.3407 + move(AT, value); 1.3408 + LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT)); 1.3409 + } 1.3410 +} 1.3411 + 1.3412 +void MacroAssembler::decrementl(Register reg, int value) { 1.3413 + if (value == min_jint) { 1.3414 + move(AT, value); 1.3415 + LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT)); 1.3416 + return; 1.3417 + } 1.3418 + if (value < 0) { incrementl(reg, -value); return; } 1.3419 + if (value == 0) { ; return; } 1.3420 + 1.3421 + if(Assembler::is_simm16(value)) { 1.3422 + NOT_LP64(addiu(reg, reg, -value)); 1.3423 + LP64_ONLY(move(AT, value); subu32(reg, reg, AT)); 1.3424 + } else { 1.3425 + move(AT, value); 1.3426 + LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT)); 1.3427 + } 1.3428 +} 1.3429 + 1.3430 +void MacroAssembler::reinit_heapbase() { 1.3431 + if (UseCompressedOops || UseCompressedClassPointers) { 1.3432 + if (Universe::heap() != NULL) { 1.3433 + if (Universe::narrow_oop_base() == NULL) { 1.3434 + move(S5_heapbase, R0); 1.3435 + } else { 1.3436 + set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); 1.3437 + } 1.3438 + } else { 1.3439 + set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); 1.3440 + ld(S5_heapbase, S5_heapbase, 0); 1.3441 + } 1.3442 + } 1.3443 +} 1.3444 +#endif // _LP64 1.3445 + 1.3446 +void MacroAssembler::check_klass_subtype(Register sub_klass, 1.3447 + Register super_klass, 1.3448 + Register temp_reg, 1.3449 + Label& L_success) { 1.3450 +//implement ind gen_subtype_check 1.3451 + Label L_failure; 1.3452 + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 1.3453 + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 1.3454 + bind(L_failure); 1.3455 +} 1.3456 + 1.3457 +SkipIfEqual::SkipIfEqual( 1.3458 + MacroAssembler* masm, const bool* flag_addr, bool value) { 1.3459 + _masm = masm; 1.3460 + _masm->li(AT, (address)flag_addr); 1.3461 + _masm->lb(AT,AT,0); 1.3462 + _masm->addi(AT,AT,-value); 1.3463 + _masm->beq(AT,R0,_label); 1.3464 + _masm->delayed()->nop(); 1.3465 +} 1.3466 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 1.3467 + Register super_klass, 1.3468 + Register temp_reg, 1.3469 + Label* L_success, 1.3470 + Label* L_failure, 1.3471 + Label* L_slow_path, 1.3472 + RegisterOrConstant super_check_offset) { 1.3473 + assert_different_registers(sub_klass, super_klass, temp_reg); 1.3474 + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 1.3475 + if (super_check_offset.is_register()) { 1.3476 + assert_different_registers(sub_klass, super_klass, 1.3477 + super_check_offset.as_register()); 1.3478 + } else if (must_load_sco) { 1.3479 + assert(temp_reg != noreg, "supply either a temp or a register offset"); 1.3480 + } 1.3481 + 1.3482 + Label L_fallthrough; 1.3483 + int label_nulls = 0; 1.3484 + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 1.3485 + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 1.3486 + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 1.3487 + assert(label_nulls <= 1, "at most one NULL in the batch"); 1.3488 + 1.3489 + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 1.3490 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 1.3491 + // If the pointers are equal, we are done (e.g., String[] elements). 1.3492 + // This self-check enables sharing of secondary supertype arrays among 1.3493 + // non-primary types such as array-of-interface. Otherwise, each such 1.3494 + // type would need its own customized SSA. 1.3495 + // We move this check to the front of the fast path because many 1.3496 + // type checks are in fact trivially successful in this manner, 1.3497 + // so we get a nicely predicted branch right at the start of the check. 1.3498 + //cmpptr(sub_klass, super_klass); 1.3499 + //local_jcc(Assembler::equal, *L_success); 1.3500 + beq(sub_klass, super_klass, *L_success); 1.3501 + delayed()->nop(); 1.3502 + // Check the supertype display: 1.3503 + if (must_load_sco) { 1.3504 + // Positive movl does right thing on LP64. 1.3505 + lwu(temp_reg, super_klass, sco_offset); 1.3506 + super_check_offset = RegisterOrConstant(temp_reg); 1.3507 + } 1.3508 + dsll(AT, super_check_offset.register_or_noreg(), Address::times_1); 1.3509 + daddu(AT, sub_klass, AT); 1.3510 + ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1); 1.3511 + 1.3512 + // This check has worked decisively for primary supers. 1.3513 + // Secondary supers are sought in the super_cache ('super_cache_addr'). 1.3514 + // (Secondary supers are interfaces and very deeply nested subtypes.) 1.3515 + // This works in the same check above because of a tricky aliasing 1.3516 + // between the super_cache and the primary super display elements. 1.3517 + // (The 'super_check_addr' can address either, as the case requires.) 1.3518 + // Note that the cache is updated below if it does not help us find 1.3519 + // what we need immediately. 1.3520 + // So if it was a primary super, we can just fail immediately. 1.3521 + // Otherwise, it's the slow path for us (no success at this point). 1.3522 + 1.3523 + if (super_check_offset.is_register()) { 1.3524 + beq(super_klass, AT, *L_success); 1.3525 + delayed()->nop(); 1.3526 + addi(AT, super_check_offset.as_register(), -sc_offset); 1.3527 + if (L_failure == &L_fallthrough) { 1.3528 + beq(AT, R0, *L_slow_path); 1.3529 + delayed()->nop(); 1.3530 + } else { 1.3531 + bne(AT, R0, *L_failure); 1.3532 + delayed()->nop(); 1.3533 + b(*L_slow_path); 1.3534 + delayed()->nop(); 1.3535 + } 1.3536 + } else if (super_check_offset.as_constant() == sc_offset) { 1.3537 + // Need a slow path; fast failure is impossible. 1.3538 + if (L_slow_path == &L_fallthrough) { 1.3539 + beq(super_klass, AT, *L_success); 1.3540 + delayed()->nop(); 1.3541 + } else { 1.3542 + bne(super_klass, AT, *L_slow_path); 1.3543 + delayed()->nop(); 1.3544 + b(*L_success); 1.3545 + delayed()->nop(); 1.3546 + } 1.3547 + } else { 1.3548 + // No slow path; it's a fast decision. 1.3549 + if (L_failure == &L_fallthrough) { 1.3550 + beq(super_klass, AT, *L_success); 1.3551 + delayed()->nop(); 1.3552 + } else { 1.3553 + bne(super_klass, AT, *L_failure); 1.3554 + delayed()->nop(); 1.3555 + b(*L_success); 1.3556 + delayed()->nop(); 1.3557 + } 1.3558 + } 1.3559 + 1.3560 + bind(L_fallthrough); 1.3561 + 1.3562 +} 1.3563 + 1.3564 + 1.3565 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 1.3566 + Register super_klass, 1.3567 + Register temp_reg, 1.3568 + Register temp2_reg, 1.3569 + Label* L_success, 1.3570 + Label* L_failure, 1.3571 + bool set_cond_codes) { 1.3572 + assert_different_registers(sub_klass, super_klass, temp_reg); 1.3573 + if (temp2_reg != noreg) 1.3574 + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 1.3575 + else 1.3576 + temp2_reg = T9; 1.3577 +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 1.3578 + 1.3579 + Label L_fallthrough; 1.3580 + int label_nulls = 0; 1.3581 + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 1.3582 + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 1.3583 + assert(label_nulls <= 1, "at most one NULL in the batch"); 1.3584 + 1.3585 + // a couple of useful fields in sub_klass: 1.3586 + int ss_offset = in_bytes(Klass::secondary_supers_offset()); 1.3587 + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 1.3588 + Address secondary_supers_addr(sub_klass, ss_offset); 1.3589 + Address super_cache_addr( sub_klass, sc_offset); 1.3590 + 1.3591 + // Do a linear scan of the secondary super-klass chain. 1.3592 + // This code is rarely used, so simplicity is a virtue here. 1.3593 + // The repne_scan instruction uses fixed registers, which we must spill. 1.3594 + // Don't worry too much about pre-existing connections with the input regs. 1.3595 + 1.3596 +#if 0 1.3597 + assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super) 1.3598 + assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter) 1.3599 +#endif 1.3600 + 1.3601 + // Get super_klass value into rax (even if it was in rdi or rcx). 1.3602 +#ifndef PRODUCT 1.3603 + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 1.3604 + ExternalAddress pst_counter_addr((address) pst_counter); 1.3605 + NOT_LP64( incrementl(pst_counter_addr) ); 1.3606 + //LP64_ONLY( lea(rcx, pst_counter_addr) ); 1.3607 + //LP64_ONLY( incrementl(Address(rcx, 0)) ); 1.3608 +#endif //PRODUCT 1.3609 + 1.3610 + // We will consult the secondary-super array. 1.3611 + ld(temp_reg, secondary_supers_addr); 1.3612 + // Load the array length. (Positive movl does right thing on LP64.) 1.3613 + lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes())); 1.3614 + // Skip to start of data. 1.3615 + daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes()); 1.3616 + 1.3617 + // Scan RCX words at [RDI] for an occurrence of RAX. 1.3618 + // Set NZ/Z based on last compare. 1.3619 + // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 1.3620 + // not change flags (only scas instruction which is repeated sets flags). 1.3621 + // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 1.3622 + 1.3623 + /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */ 1.3624 + Label Loop, subtype; 1.3625 + bind(Loop); 1.3626 + beq(temp2_reg, R0, *L_failure); 1.3627 + delayed()->nop(); 1.3628 + ld(AT, temp_reg, 0); 1.3629 + beq(AT, super_klass, subtype); 1.3630 + delayed()->daddi(temp_reg, temp_reg, 1 * wordSize); 1.3631 + b(Loop); 1.3632 + delayed()->daddi(temp2_reg, temp2_reg, -1); 1.3633 + 1.3634 + bind(subtype); 1.3635 + sd(super_klass, super_cache_addr); 1.3636 + if (L_success != &L_fallthrough) { 1.3637 + b(*L_success); 1.3638 + delayed()->nop(); 1.3639 + } 1.3640 + 1.3641 + // Success. Cache the super we found and proceed in triumph. 1.3642 +#undef IS_A_TEMP 1.3643 + 1.3644 + bind(L_fallthrough); 1.3645 +} 1.3646 +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 1.3647 + ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 1.3648 + sd(R0, Address(java_thread, JavaThread::vm_result_offset())); 1.3649 + verify_oop(oop_result, "broken oop in call_VM_base"); 1.3650 +} 1.3651 + 1.3652 +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 1.3653 + ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 1.3654 + sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); 1.3655 +} 1.3656 + 1.3657 +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 1.3658 + int extra_slot_offset) { 1.3659 + // cf. TemplateTable::prepare_invoke(), if (load_receiver). 1.3660 + int stackElementSize = Interpreter::stackElementSize; 1.3661 + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 1.3662 +#ifdef ASSERT 1.3663 + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 1.3664 + assert(offset1 - offset == stackElementSize, "correct arithmetic"); 1.3665 +#endif 1.3666 + Register scale_reg = NOREG; 1.3667 + Address::ScaleFactor scale_factor = Address::no_scale; 1.3668 + if (arg_slot.is_constant()) { 1.3669 + offset += arg_slot.as_constant() * stackElementSize; 1.3670 + } else { 1.3671 + scale_reg = arg_slot.as_register(); 1.3672 + scale_factor = Address::times_8; 1.3673 + } 1.3674 + // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke. 1.3675 + // offset += wordSize; // return PC is on stack 1.3676 + if(scale_reg==NOREG) return Address(SP, offset); 1.3677 + else { 1.3678 + dsll(scale_reg, scale_reg, scale_factor); 1.3679 + daddu(scale_reg, SP, scale_reg); 1.3680 + return Address(scale_reg, offset); 1.3681 + } 1.3682 +} 1.3683 + 1.3684 +SkipIfEqual::~SkipIfEqual() { 1.3685 + _masm->bind(_label); 1.3686 +} 1.3687 + 1.3688 +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 1.3689 + switch (size_in_bytes) { 1.3690 +#ifndef _LP64 1.3691 + case 8: 1.3692 + assert(dst2 != noreg, "second dest register required"); 1.3693 + lw(dst, src); 1.3694 + lw(dst2, src.plus_disp(BytesPerInt)); 1.3695 + break; 1.3696 +#else 1.3697 + case 8: ld(dst, src); break; 1.3698 +#endif 1.3699 + case 4: lw(dst, src); break; 1.3700 + case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; 1.3701 + case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; 1.3702 + default: ShouldNotReachHere(); 1.3703 + } 1.3704 +} 1.3705 + 1.3706 +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 1.3707 + switch (size_in_bytes) { 1.3708 +#ifndef _LP64 1.3709 + case 8: 1.3710 + assert(src2 != noreg, "second source register required"); 1.3711 + sw(src, dst); 1.3712 + sw(src2, dst.plus_disp(BytesPerInt)); 1.3713 + break; 1.3714 +#else 1.3715 + case 8: sd(src, dst); break; 1.3716 +#endif 1.3717 + case 4: sw(src, dst); break; 1.3718 + case 2: sh(src, dst); break; 1.3719 + case 1: sb(src, dst); break; 1.3720 + default: ShouldNotReachHere(); 1.3721 + } 1.3722 +} 1.3723 + 1.3724 +// Look up the method for a megamorphic invokeinterface call. 1.3725 +// The target method is determined by <intf_klass, itable_index>. 1.3726 +// The receiver klass is in recv_klass. 1.3727 +// On success, the result will be in method_result, and execution falls through. 1.3728 +// On failure, execution transfers to the given label. 1.3729 +void MacroAssembler::lookup_interface_method(Register recv_klass, 1.3730 + Register intf_klass, 1.3731 + RegisterOrConstant itable_index, 1.3732 + Register method_result, 1.3733 + Register scan_temp, 1.3734 + Label& L_no_such_interface) { 1.3735 + assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 1.3736 + assert(itable_index.is_constant() || itable_index.as_register() == method_result, 1.3737 + "caller must use same register for non-constant itable index as for method"); 1.3738 + 1.3739 + // Compute start of first itableOffsetEntry (which is at the end of the vtable) 1.3740 + int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 1.3741 + int itentry_off = itableMethodEntry::method_offset_in_bytes(); 1.3742 + int scan_step = itableOffsetEntry::size() * wordSize; 1.3743 + int vte_size = vtableEntry::size() * wordSize; 1.3744 + Address::ScaleFactor times_vte_scale = Address::times_ptr; 1.3745 + assert(vte_size == wordSize, "else adjust times_vte_scale"); 1.3746 + 1.3747 + lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 1.3748 + 1.3749 + // %%% Could store the aligned, prescaled offset in the klassoop. 1.3750 + dsll(scan_temp, scan_temp, times_vte_scale); 1.3751 + daddu(scan_temp, recv_klass, scan_temp); 1.3752 + daddiu(scan_temp, scan_temp, vtable_base); 1.3753 + if (HeapWordsPerLong > 1) { 1.3754 + // Round up to align_object_offset boundary 1.3755 + // see code for InstanceKlass::start_of_itable! 1.3756 + round_to(scan_temp, BytesPerLong); 1.3757 + } 1.3758 + 1.3759 + // Adjust recv_klass by scaled itable_index, so we can free itable_index. 1.3760 + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 1.3761 +// lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 1.3762 + if (itable_index.is_constant()) { 1.3763 + set64(AT, (int)itable_index.is_constant()); 1.3764 + dsll(AT, AT, (int)Address::times_ptr); 1.3765 + } else { 1.3766 + dsll(AT, itable_index.as_register(), (int)Address::times_ptr); 1.3767 + } 1.3768 + daddu(AT, AT, recv_klass); 1.3769 + daddiu(recv_klass, AT, itentry_off); 1.3770 + 1.3771 + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 1.3772 + // if (scan->interface() == intf) { 1.3773 + // result = (klass + scan->offset() + itable_index); 1.3774 + // } 1.3775 + // } 1.3776 + Label search, found_method; 1.3777 + 1.3778 + for (int peel = 1; peel >= 0; peel--) { 1.3779 + ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 1.3780 + 1.3781 + if (peel) { 1.3782 + beq(intf_klass, method_result, found_method); 1.3783 + nop(); 1.3784 + } else { 1.3785 + bne(intf_klass, method_result, search); 1.3786 + nop(); 1.3787 + // (invert the test to fall through to found_method...) 1.3788 + } 1.3789 + 1.3790 + if (!peel) break; 1.3791 + 1.3792 + bind(search); 1.3793 + 1.3794 + // Check that the previous entry is non-null. A null entry means that 1.3795 + // the receiver class doesn't implement the interface, and wasn't the 1.3796 + // same as when the caller was compiled. 1.3797 + beq(method_result, R0, L_no_such_interface); 1.3798 + nop(); 1.3799 + daddiu(scan_temp, scan_temp, scan_step); 1.3800 + } 1.3801 + 1.3802 + bind(found_method); 1.3803 + 1.3804 + // Got a hit. 1.3805 + lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 1.3806 + //ld(method_result, Address(recv_klass, scan_temp, Address::times_1)); 1.3807 + if(UseLoongsonISA) { 1.3808 + gsldx(method_result, recv_klass, scan_temp, 0); 1.3809 + } else { 1.3810 + daddu(AT, recv_klass, scan_temp); 1.3811 + ld(method_result, AT); 1.3812 + } 1.3813 +} 1.3814 + 1.3815 + 1.3816 +// virtual method calling 1.3817 +void MacroAssembler::lookup_virtual_method(Register recv_klass, 1.3818 + RegisterOrConstant vtable_index, 1.3819 + Register method_result) { 1.3820 + Register tmp = GP; 1.3821 + push(tmp); 1.3822 + 1.3823 + if (vtable_index.is_constant()) { 1.3824 + assert_different_registers(recv_klass, method_result, tmp); 1.3825 + } else { 1.3826 + assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); 1.3827 + } 1.3828 + const int base = InstanceKlass::vtable_start_offset() * wordSize; 1.3829 + assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 1.3830 +/* 1.3831 + Address vtable_entry_addr(recv_klass, 1.3832 + vtable_index, Address::times_ptr, 1.3833 + base + vtableEntry::method_offset_in_bytes()); 1.3834 +*/ 1.3835 + if (vtable_index.is_constant()) { 1.3836 + set64(AT, vtable_index.as_constant()); 1.3837 + dsll(AT, AT, (int)Address::times_ptr); 1.3838 + } else { 1.3839 + dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); 1.3840 + } 1.3841 + set64(tmp, base + vtableEntry::method_offset_in_bytes()); 1.3842 + daddu(tmp, tmp, AT); 1.3843 + daddu(tmp, tmp, recv_klass); 1.3844 + ld(method_result, tmp, 0); 1.3845 + 1.3846 + pop(tmp); 1.3847 +}