aoqi@6880: /* aoqi@6880: * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. aoqi@6880: * Copyright (c) 2017, Loongson Technology. All rights reserved. aoqi@6880: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@6880: * aoqi@6880: * This code is free software; you can redistribute it and/or modify it aoqi@6880: * under the terms of the GNU General Public License version 2 only, as aoqi@6880: * published by the Free Software Foundation. aoqi@6880: * aoqi@6880: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@6880: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@6880: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@6880: * version 2 for more details (a copy is included in the LICENSE file that aoqi@6880: * accompanied this code). aoqi@6880: * aoqi@6880: * You should have received a copy of the GNU General Public License version aoqi@6880: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@6880: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@6880: * aoqi@6880: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@6880: * or visit www.oracle.com if you need additional information or have any aoqi@6880: * questions. aoqi@6880: * aoqi@6880: */ aoqi@6880: aoqi@6880: #include "precompiled.hpp" aoqi@6880: #include "asm/assembler.hpp" aoqi@6880: #include "asm/assembler.inline.hpp" aoqi@6880: #include "asm/macroAssembler.inline.hpp" aoqi@6880: #include "compiler/disassembler.hpp" aoqi@6880: #include "gc_interface/collectedHeap.inline.hpp" aoqi@6880: #include "interpreter/interpreter.hpp" aoqi@6880: #include "memory/cardTableModRefBS.hpp" aoqi@6880: #include "memory/resourceArea.hpp" aoqi@6880: #include "memory/universe.hpp" aoqi@6880: #include "prims/methodHandles.hpp" aoqi@6880: #include "runtime/biasedLocking.hpp" aoqi@6880: #include "runtime/interfaceSupport.hpp" aoqi@6880: #include "runtime/objectMonitor.hpp" aoqi@6880: #include "runtime/os.hpp" aoqi@6880: #include "runtime/sharedRuntime.hpp" aoqi@6880: #include "runtime/stubRoutines.hpp" aoqi@6880: #include "utilities/macros.hpp" aoqi@6880: #if INCLUDE_ALL_GCS aoqi@6880: #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" aoqi@6880: #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" aoqi@6880: #include "gc_implementation/g1/heapRegion.hpp" aoqi@6880: #endif // INCLUDE_ALL_GCS aoqi@6880: aoqi@6880: // Implementation of MacroAssembler aoqi@6880: aoqi@6880: intptr_t MacroAssembler::i[32] = {0}; aoqi@6880: float MacroAssembler::f[32] = {0.0}; aoqi@6880: aoqi@6880: void MacroAssembler::print(outputStream *s) { aoqi@6880: unsigned int k; aoqi@6880: for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); aoqi@6880: } aoqi@6880: s->cr(); aoqi@6880: aoqi@6880: for(k=0; kprint_cr("f%d = %f", k, f[k]); aoqi@6880: } aoqi@6880: s->cr(); aoqi@6880: } aoqi@6880: aoqi@6880: int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } aoqi@6880: int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } aoqi@6880: aoqi@6880: void MacroAssembler::save_registers(MacroAssembler *masm) { aoqi@6880: #define __ masm-> aoqi@6880: for(int k=0; k<32; k++) { aoqi@6880: __ sw (as_Register(k), A0, i_offset(k)); aoqi@6880: } aoqi@6880: aoqi@6880: for(int k=0; k<32; k++) { aoqi@6880: __ swc1 (as_FloatRegister(k), A0, f_offset(k)); aoqi@6880: } aoqi@6880: #undef __ aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::restore_registers(MacroAssembler *masm) { aoqi@6880: #define __ masm-> aoqi@6880: for(int k=0; k<32; k++) { aoqi@6880: __ lw (as_Register(k), A0, i_offset(k)); aoqi@6880: } aoqi@6880: aoqi@6880: for(int k=0; k<32; k++) { aoqi@6880: __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); aoqi@6880: } aoqi@6880: #undef __ aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::pd_patch_instruction(address branch, address target) { aoqi@6880: jint& stub_inst = *(jint*) branch; aoqi@6880: aoqi@6880: /* * aoqi@6880: move(AT, RA); // dadd aoqi@6880: emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); aoqi@6880: nop(); aoqi@6880: lui(T9, 0); // to be patched aoqi@6880: ori(T9, 0); aoqi@6880: daddu(T9, T9, RA); aoqi@6880: move(RA, AT); aoqi@6880: jr(T9); aoqi@6880: */ aoqi@6880: if(special(stub_inst) == dadd_op) { aoqi@6880: jint *pc = (jint *)branch; aoqi@6880: aoqi@6880: assert(opcode(pc[3]) == lui_op aoqi@6880: && opcode(pc[4]) == ori_op aoqi@6880: && special(pc[5]) == daddu_op, "Not a branch label patch"); aoqi@6880: if(!(opcode(pc[3]) == lui_op aoqi@6880: && opcode(pc[4]) == ori_op aoqi@6880: && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } aoqi@6880: aoqi@6880: int offset = target - branch; aoqi@6880: if (!is_simm16(offset)) aoqi@6880: { aoqi@6880: pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); aoqi@6880: pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); aoqi@6880: } aoqi@6880: else aoqi@6880: { aoqi@6880: /* revert to "beq + nop" */ aoqi@6880: CodeBuffer cb(branch, 4 * 10); aoqi@6880: MacroAssembler masm(&cb); aoqi@6880: #define __ masm. aoqi@6880: __ b(target); aoqi@6880: __ nop(); aoqi@6880: __ nop(); aoqi@6880: __ nop(); aoqi@6880: __ nop(); aoqi@6880: __ nop(); aoqi@6880: __ nop(); aoqi@6880: __ nop(); aoqi@6880: } aoqi@6880: return; aoqi@6880: } aoqi@6880: aoqi@6880: #ifndef PRODUCT aoqi@6880: if (!is_simm16((target - branch - 4) >> 2)) aoqi@6880: { aoqi@6880: tty->print_cr("Illegal patching: target=0x%lx", target); aoqi@6880: int *p = (int *)branch; aoqi@6880: for (int i = -10; i < 10; i++) aoqi@6880: { aoqi@6880: tty->print("0x%lx, ", p[i]); aoqi@6880: } aoqi@6880: tty->print_cr(""); aoqi@6880: } aoqi@6880: #endif aoqi@6880: aoqi@6880: stub_inst = patched_branch(target - branch, stub_inst, 0); aoqi@6880: } aoqi@6880: aoqi@6880: static inline address first_cache_address() { aoqi@6880: return CodeCache::low_bound() + sizeof(HeapBlock::Header); aoqi@6880: } aoqi@6880: aoqi@6880: static inline address last_cache_address() { aoqi@6880: return CodeCache::high_bound() - Assembler::InstructionSize; aoqi@6880: } aoqi@6880: aoqi@6880: int MacroAssembler::call_size(address target, bool far, bool patchable) { aoqi@6880: if (patchable) return 6 << Assembler::LogInstructionSize; aoqi@6880: if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop aoqi@6880: return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; aoqi@6880: } aoqi@6880: aoqi@6880: // Can we reach target using jal/j from anywhere aoqi@6880: // in the code cache (because code can be relocated)? aoqi@6880: bool MacroAssembler::reachable_from_cache(address target) { aoqi@6880: address cl = first_cache_address(); aoqi@6880: address ch = last_cache_address(); aoqi@6880: aoqi@6880: return fit_in_jal(target, cl) && fit_in_jal(target, ch); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::general_jump(address target) { aoqi@6880: if (reachable_from_cache(target)) { aoqi@6880: j(target); aoqi@6880: nop(); aoqi@6880: } else { aoqi@6880: set64(T9, (long)target); aoqi@6880: jr(T9); aoqi@6880: nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: int MacroAssembler::insts_for_general_jump(address target) { aoqi@6880: if (reachable_from_cache(target)) { aoqi@6880: //j(target); aoqi@6880: //nop(); aoqi@6880: return 2; aoqi@6880: } else { aoqi@6880: //set64(T9, (long)target); aoqi@6880: //jr(T9); aoqi@6880: //nop(); aoqi@6880: return insts_for_set64((jlong)target) + 2; aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::patchable_jump(address target) { aoqi@6880: if (reachable_from_cache(target)) { aoqi@6880: nop(); aoqi@6880: nop(); aoqi@6880: nop(); aoqi@6880: nop(); aoqi@6880: j(target); aoqi@6880: nop(); aoqi@6880: } else { aoqi@6880: patchable_set48(T9, (long)target); aoqi@6880: jr(T9); aoqi@6880: nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: int MacroAssembler::insts_for_patchable_jump(address target) { aoqi@6880: return 6; aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::general_call(address target) { aoqi@6880: if (reachable_from_cache(target)) { aoqi@6880: jal(target); aoqi@6880: nop(); aoqi@6880: } else { aoqi@6880: set64(T9, (long)target); aoqi@6880: jalr(T9); aoqi@6880: nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: int MacroAssembler::insts_for_general_call(address target) { aoqi@6880: if (reachable_from_cache(target)) { aoqi@6880: //jal(target); aoqi@6880: //nop(); aoqi@6880: return 2; aoqi@6880: } else { aoqi@6880: //set64(T9, (long)target); aoqi@6880: //jalr(T9); aoqi@6880: //nop(); aoqi@6880: return insts_for_set64((jlong)target) + 2; aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::patchable_call(address target) { aoqi@6880: if (reachable_from_cache(target)) { aoqi@6880: nop(); aoqi@6880: nop(); aoqi@6880: nop(); aoqi@6880: nop(); aoqi@6880: jal(target); aoqi@6880: nop(); aoqi@6880: } else { aoqi@6880: patchable_set48(T9, (long)target); aoqi@6880: jalr(T9); aoqi@6880: nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: int MacroAssembler::insts_for_patchable_call(address target) { aoqi@6880: return 6; aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::beq_far(Register rs, Register rt, address entry) aoqi@6880: { aoqi@6880: u_char * cur_pc = pc(); aoqi@6880: aoqi@6880: /* Jin: Near/Far jump */ aoqi@6880: if(is_simm16((entry - pc() - 4) / 4)) aoqi@6880: { aoqi@6880: Assembler::beq(rs, rt, offset(entry)); aoqi@6880: } aoqi@6880: else aoqi@6880: { aoqi@6880: Label not_jump; aoqi@6880: bne(rs, rt, not_jump); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: b_far(entry); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(not_jump); aoqi@6880: has_delay_slot(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::beq_far(Register rs, Register rt, Label& L) aoqi@6880: { aoqi@6880: if (L.is_bound()) { aoqi@6880: beq_far(rs, rt, target(L)); aoqi@6880: } else { aoqi@6880: u_char * cur_pc = pc(); aoqi@6880: Label not_jump; aoqi@6880: bne(rs, rt, not_jump); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: b_far(L); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(not_jump); aoqi@6880: has_delay_slot(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::bne_far(Register rs, Register rt, address entry) aoqi@6880: { aoqi@6880: u_char * cur_pc = pc(); aoqi@6880: aoqi@6880: /* Jin: Near/Far jump */ aoqi@6880: if(is_simm16((entry - pc() - 4) / 4)) aoqi@6880: { aoqi@6880: Assembler::bne(rs, rt, offset(entry)); aoqi@6880: } aoqi@6880: else aoqi@6880: { aoqi@6880: Label not_jump; aoqi@6880: beq(rs, rt, not_jump); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: b_far(entry); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(not_jump); aoqi@6880: has_delay_slot(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::bne_far(Register rs, Register rt, Label& L) aoqi@6880: { aoqi@6880: if (L.is_bound()) { aoqi@6880: bne_far(rs, rt, target(L)); aoqi@6880: } else { aoqi@6880: u_char * cur_pc = pc(); aoqi@6880: Label not_jump; aoqi@6880: beq(rs, rt, not_jump); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: b_far(L); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(not_jump); aoqi@6880: has_delay_slot(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::b_far(Label& L) aoqi@6880: { aoqi@6880: if (L.is_bound()) { aoqi@6880: b_far(target(L)); aoqi@6880: } else { aoqi@6880: volatile address dest = target(L); aoqi@6880: /* aoqi@6880: MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 aoqi@6880: 0x00000055651ed514: dadd at, ra, zero aoqi@6880: 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 aoqi@6880: aoqi@6880: 0x00000055651ed51c: sll zero, zero, 0 aoqi@6880: 0x00000055651ed520: lui t9, 0x0 aoqi@6880: 0x00000055651ed524: ori t9, t9, 0x21b8 aoqi@6880: 0x00000055651ed528: daddu t9, t9, ra aoqi@6880: 0x00000055651ed52c: dadd ra, at, zero aoqi@6880: 0x00000055651ed530: jr t9 aoqi@6880: 0x00000055651ed534: sll zero, zero, 0 aoqi@6880: */ aoqi@6880: move(AT, RA); aoqi@6880: emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); aoqi@6880: nop(); aoqi@6880: lui(T9, 0); // to be patched aoqi@6880: ori(T9, T9, 0); aoqi@6880: daddu(T9, T9, RA); aoqi@6880: move(RA, AT); aoqi@6880: jr(T9); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::b_far(address entry) aoqi@6880: { aoqi@6880: u_char * cur_pc = pc(); aoqi@6880: aoqi@6880: /* Jin: Near/Far jump */ aoqi@6880: if(is_simm16((entry - pc() - 4) / 4)) aoqi@6880: { aoqi@6880: b(offset(entry)); aoqi@6880: } aoqi@6880: else aoqi@6880: { aoqi@6880: /* address must be bounded */ aoqi@6880: move(AT, RA); aoqi@6880: emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); aoqi@6880: nop(); aoqi@6880: li32(T9, entry - pc()); aoqi@6880: daddu(T9, T9, RA); aoqi@6880: move(RA, AT); aoqi@6880: jr(T9); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::ld_ptr(Register rt, Register offset, Register base) { aoqi@6880: addu_long(AT, base, offset); aoqi@6880: ld_ptr(rt, 0, AT); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::st_ptr(Register rt, Register offset, Register base) { aoqi@6880: addu_long(AT, base, offset); aoqi@6880: st_ptr(rt, 0, AT); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::ld_long(Register rt, Register offset, Register base) { aoqi@6880: addu_long(AT, base, offset); aoqi@6880: ld_long(rt, 0, AT); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::st_long(Register rt, Register offset, Register base) { aoqi@6880: addu_long(AT, base, offset); aoqi@6880: st_long(rt, 0, AT); aoqi@6880: } aoqi@6880: aoqi@6880: Address MacroAssembler::as_Address(AddressLiteral adr) { aoqi@6880: return Address(adr.target(), adr.rspec()); aoqi@6880: } aoqi@6880: aoqi@6880: Address MacroAssembler::as_Address(ArrayAddress adr) { aoqi@6880: return Address::make_array(adr); aoqi@6880: } aoqi@6880: aoqi@6880: // tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). aoqi@6880: void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { aoqi@6880: Label again; aoqi@6880: aoqi@6880: li(tmp_reg1, counter_addr); aoqi@6880: bind(again); aoqi@6880: if(!Use3A2000) sync(); aoqi@6880: ll(tmp_reg2, tmp_reg1, 0); aoqi@6880: addi(tmp_reg2, tmp_reg2, inc); aoqi@6880: sc(tmp_reg2, tmp_reg1, 0); aoqi@6880: beq(tmp_reg2, R0, again); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: aoqi@6880: int MacroAssembler::biased_locking_enter(Register lock_reg, aoqi@6880: Register obj_reg, aoqi@6880: Register swap_reg, aoqi@6880: Register tmp_reg, aoqi@6880: bool swap_reg_contains_mark, aoqi@6880: Label& done, aoqi@6880: Label* slow_case, aoqi@6880: BiasedLockingCounters* counters) { aoqi@6880: assert(UseBiasedLocking, "why call this otherwise?"); aoqi@6880: bool need_tmp_reg = false; aoqi@6880: if (tmp_reg == noreg) { aoqi@6880: need_tmp_reg = true; aoqi@6880: tmp_reg = T9; aoqi@6880: } aoqi@6880: assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); aoqi@6880: assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); aoqi@6880: Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); aoqi@6880: Address saved_mark_addr(lock_reg, 0); aoqi@6880: aoqi@6880: // Biased locking aoqi@6880: // See whether the lock is currently biased toward our thread and aoqi@6880: // whether the epoch is still valid aoqi@6880: // Note that the runtime guarantees sufficient alignment of JavaThread aoqi@6880: // pointers to allow age to be placed into low bits aoqi@6880: // First check to see whether biasing is even enabled for this object aoqi@6880: Label cas_label; aoqi@6880: int null_check_offset = -1; aoqi@6880: if (!swap_reg_contains_mark) { aoqi@6880: null_check_offset = offset(); aoqi@6880: ld_ptr(swap_reg, mark_addr); aoqi@6880: } aoqi@6880: aoqi@6880: if (need_tmp_reg) { aoqi@6880: push(tmp_reg); aoqi@6880: } aoqi@6880: move(tmp_reg, swap_reg); aoqi@6880: andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); aoqi@6880: #ifdef _LP64 aoqi@6880: daddi(AT, R0, markOopDesc::biased_lock_pattern); aoqi@6880: dsub(AT, AT, tmp_reg); aoqi@6880: #else aoqi@6880: addi(AT, R0, markOopDesc::biased_lock_pattern); aoqi@6880: sub(AT, AT, tmp_reg); aoqi@6880: #endif aoqi@6880: if (need_tmp_reg) { aoqi@6880: pop(tmp_reg); aoqi@6880: } aoqi@6880: aoqi@6880: bne(AT, R0, cas_label); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: aoqi@6880: // The bias pattern is present in the object's header. Need to check aoqi@6880: // whether the bias owner and the epoch are both still current. aoqi@6880: // Note that because there is no current thread register on MIPS we aoqi@6880: // need to store off the mark word we read out of the object to aoqi@6880: // avoid reloading it and needing to recheck invariants below. This aoqi@6880: // store is unfortunate but it makes the overall code shorter and aoqi@6880: // simpler. aoqi@6880: st_ptr(swap_reg, saved_mark_addr); aoqi@6880: if (need_tmp_reg) { aoqi@6880: push(tmp_reg); aoqi@6880: } aoqi@6880: if (swap_reg_contains_mark) { aoqi@6880: null_check_offset = offset(); aoqi@6880: } aoqi@6880: load_prototype_header(tmp_reg, obj_reg); aoqi@6880: xorr(tmp_reg, tmp_reg, swap_reg); aoqi@6880: get_thread(swap_reg); aoqi@6880: xorr(swap_reg, swap_reg, tmp_reg); aoqi@6880: aoqi@6880: move(AT, ~((int) markOopDesc::age_mask_in_place)); aoqi@6880: andr(swap_reg, swap_reg, AT); aoqi@6880: aoqi@6880: if (PrintBiasedLockingStatistics) { aoqi@6880: Label L; aoqi@6880: bne(swap_reg, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: push(tmp_reg); aoqi@6880: push(A0); aoqi@6880: atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); aoqi@6880: pop(A0); aoqi@6880: pop(tmp_reg); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: if (need_tmp_reg) { aoqi@6880: pop(tmp_reg); aoqi@6880: } aoqi@6880: beq(swap_reg, R0, done); aoqi@6880: delayed()->nop(); aoqi@6880: Label try_revoke_bias; aoqi@6880: Label try_rebias; aoqi@6880: aoqi@6880: // At this point we know that the header has the bias pattern and aoqi@6880: // that we are not the bias owner in the current epoch. We need to aoqi@6880: // figure out more details about the state of the header in order to aoqi@6880: // know what operations can be legally performed on the object's aoqi@6880: // header. aoqi@6880: aoqi@6880: // If the low three bits in the xor result aren't clear, that means aoqi@6880: // the prototype header is no longer biased and we have to revoke aoqi@6880: // the bias on this object. aoqi@6880: aoqi@6880: move(AT, markOopDesc::biased_lock_mask_in_place); aoqi@6880: andr(AT, swap_reg, AT); aoqi@6880: bne(AT, R0, try_revoke_bias); aoqi@6880: delayed()->nop(); aoqi@6880: // Biasing is still enabled for this data type. See whether the aoqi@6880: // epoch of the current bias is still valid, meaning that the epoch aoqi@6880: // bits of the mark word are equal to the epoch bits of the aoqi@6880: // prototype header. (Note that the prototype header's epoch bits aoqi@6880: // only change at a safepoint.) If not, attempt to rebias the object aoqi@6880: // toward the current thread. Note that we must be absolutely sure aoqi@6880: // that the current epoch is invalid in order to do this because aoqi@6880: // otherwise the manipulations it performs on the mark word are aoqi@6880: // illegal. aoqi@6880: aoqi@6880: move(AT, markOopDesc::epoch_mask_in_place); aoqi@6880: andr(AT,swap_reg, AT); aoqi@6880: bne(AT, R0, try_rebias); aoqi@6880: delayed()->nop(); aoqi@6880: // The epoch of the current bias is still valid but we know nothing aoqi@6880: // about the owner; it might be set or it might be clear. Try to aoqi@6880: // acquire the bias of the object using an atomic operation. If this aoqi@6880: // fails we will go in to the runtime to revoke the object's bias. aoqi@6880: // Note that we first construct the presumed unbiased header so we aoqi@6880: // don't accidentally blow away another thread's valid bias. aoqi@6880: aoqi@6880: ld_ptr(swap_reg, saved_mark_addr); aoqi@6880: aoqi@6880: move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); aoqi@6880: andr(swap_reg, swap_reg, AT); aoqi@6880: aoqi@6880: if (need_tmp_reg) { aoqi@6880: push(tmp_reg); aoqi@6880: } aoqi@6880: get_thread(tmp_reg); aoqi@6880: orr(tmp_reg, tmp_reg, swap_reg); aoqi@6880: //if (os::is_MP()) { aoqi@6880: // sync(); aoqi@6880: //} aoqi@6880: cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); aoqi@6880: if (need_tmp_reg) { aoqi@6880: pop(tmp_reg); aoqi@6880: } aoqi@6880: // If the biasing toward our thread failed, this means that aoqi@6880: // another thread succeeded in biasing it toward itself and we aoqi@6880: // need to revoke that bias. The revocation will occur in the aoqi@6880: // interpreter runtime in the slow case. aoqi@6880: if (PrintBiasedLockingStatistics) { aoqi@6880: Label L; aoqi@6880: bne(AT, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: push(tmp_reg); aoqi@6880: push(A0); aoqi@6880: atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); aoqi@6880: pop(A0); aoqi@6880: pop(tmp_reg); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: if (slow_case != NULL) { aoqi@6880: beq_far(AT, R0, *slow_case); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: b(done); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(try_rebias); aoqi@6880: // At this point we know the epoch has expired, meaning that the aoqi@6880: // current "bias owner", if any, is actually invalid. Under these aoqi@6880: // circumstances _only_, we are allowed to use the current header's aoqi@6880: // value as the comparison value when doing the cas to acquire the aoqi@6880: // bias in the current epoch. In other words, we allow transfer of aoqi@6880: // the bias from one thread to another directly in this situation. aoqi@6880: // aoqi@6880: // FIXME: due to a lack of registers we currently blow away the age aoqi@6880: // bits in this situation. Should attempt to preserve them. aoqi@6880: if (need_tmp_reg) { aoqi@6880: push(tmp_reg); aoqi@6880: } aoqi@6880: load_prototype_header(tmp_reg, obj_reg); aoqi@6880: get_thread(swap_reg); aoqi@6880: orr(tmp_reg, tmp_reg, swap_reg); aoqi@6880: ld_ptr(swap_reg, saved_mark_addr); aoqi@6880: aoqi@6880: //if (os::is_MP()) { aoqi@6880: // sync(); aoqi@6880: //} aoqi@6880: cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); aoqi@6880: if (need_tmp_reg) { aoqi@6880: pop(tmp_reg); aoqi@6880: } aoqi@6880: // If the biasing toward our thread failed, then another thread aoqi@6880: // succeeded in biasing it toward itself and we need to revoke that aoqi@6880: // bias. The revocation will occur in the runtime in the slow case. aoqi@6880: if (PrintBiasedLockingStatistics) { aoqi@6880: Label L; aoqi@6880: bne(AT, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: push(AT); aoqi@6880: push(tmp_reg); aoqi@6880: atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); aoqi@6880: pop(tmp_reg); aoqi@6880: pop(AT); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: if (slow_case != NULL) { aoqi@6880: beq_far(AT, R0, *slow_case); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: aoqi@6880: b(done); aoqi@6880: delayed()->nop(); aoqi@6880: bind(try_revoke_bias); aoqi@6880: // The prototype mark in the klass doesn't have the bias bit set any aoqi@6880: // more, indicating that objects of this data type are not supposed aoqi@6880: // to be biased any more. We are going to try to reset the mark of aoqi@6880: // this object to the prototype value and fall through to the aoqi@6880: // CAS-based locking scheme. Note that if our CAS fails, it means aoqi@6880: // that another thread raced us for the privilege of revoking the aoqi@6880: // bias of this particular object, so it's okay to continue in the aoqi@6880: // normal locking code. aoqi@6880: // aoqi@6880: // FIXME: due to a lack of registers we currently blow away the age aoqi@6880: // bits in this situation. Should attempt to preserve them. aoqi@6880: ld_ptr(swap_reg, saved_mark_addr); aoqi@6880: aoqi@6880: if (need_tmp_reg) { aoqi@6880: push(tmp_reg); aoqi@6880: } aoqi@6880: load_prototype_header(tmp_reg, obj_reg); aoqi@6880: //if (os::is_MP()) { aoqi@6880: // lock(); aoqi@6880: //} aoqi@6880: cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); aoqi@6880: if (need_tmp_reg) { aoqi@6880: pop(tmp_reg); aoqi@6880: } aoqi@6880: // Fall through to the normal CAS-based lock, because no matter what aoqi@6880: // the result of the above CAS, some thread must have succeeded in aoqi@6880: // removing the bias bit from the object's header. aoqi@6880: if (PrintBiasedLockingStatistics) { aoqi@6880: Label L; aoqi@6880: bne(AT, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: push(AT); aoqi@6880: push(tmp_reg); aoqi@6880: atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); aoqi@6880: pop(tmp_reg); aoqi@6880: pop(AT); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: aoqi@6880: bind(cas_label); aoqi@6880: return null_check_offset; aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { aoqi@6880: assert(UseBiasedLocking, "why call this otherwise?"); aoqi@6880: aoqi@6880: // Check for biased locking unlock case, which is a no-op aoqi@6880: // Note: we do not have to check the thread ID for two reasons. aoqi@6880: // First, the interpreter checks for IllegalMonitorStateException at aoqi@6880: // a higher level. Second, if the bias was revoked while we held the aoqi@6880: // lock, the object could not be rebiased toward another thread, so aoqi@6880: // the bias bit would be clear. aoqi@6880: #ifdef _LP64 aoqi@6880: ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); aoqi@6880: andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); aoqi@6880: daddi(AT, R0, markOopDesc::biased_lock_pattern); aoqi@6880: #else aoqi@6880: lw(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); aoqi@6880: andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); aoqi@6880: addi(AT, R0, markOopDesc::biased_lock_pattern); aoqi@6880: #endif aoqi@6880: aoqi@6880: beq(AT, temp_reg, done); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: aoqi@6880: // NOTE: we dont increment the SP after call like the x86 version, maybe this is a problem, FIXME. aoqi@6880: // the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf aoqi@6880: // this method will handle the stack problem, you need not to preserve the stack space for the argument now aoqi@6880: void MacroAssembler::call_VM_leaf_base(address entry_point, aoqi@6880: int number_of_arguments) { aoqi@6880: //call(RuntimeAddress(entry_point)); aoqi@6880: //increment(rsp, number_of_arguments * wordSize); aoqi@6880: Label L, E; aoqi@6880: aoqi@6880: assert(number_of_arguments <= 4, "just check"); aoqi@6880: aoqi@6880: andi(AT, SP, 0xf); aoqi@6880: beq(AT, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: daddi(SP, SP, -8); aoqi@6880: call(entry_point, relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: daddi(SP, SP, 8); aoqi@6880: b(E); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(L); aoqi@6880: call(entry_point, relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: bind(E); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::jmp(address entry) { aoqi@6880: patchable_set48(T9, (long)entry); aoqi@6880: jr(T9); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { aoqi@6880: switch (rtype) { aoqi@6880: case relocInfo::runtime_call_type: aoqi@6880: case relocInfo::none: aoqi@6880: jmp(entry); aoqi@6880: break; aoqi@6880: default: aoqi@6880: { aoqi@6880: InstructionMark im(this); aoqi@6880: relocate(rtype); aoqi@6880: patchable_set48(T9, (long)entry); aoqi@6880: jr(T9); aoqi@6880: } aoqi@6880: break; aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call(address entry) { aoqi@6880: // c/c++ code assume T9 is entry point, so we just always move entry to t9 aoqi@6880: // maybe there is some more graceful method to handle this. FIXME aoqi@6880: // For more info, see class NativeCall. aoqi@6880: #ifndef _LP64 aoqi@6880: move(T9, (int)entry); aoqi@6880: #else aoqi@6880: patchable_set48(T9, (long)entry); aoqi@6880: #endif aoqi@6880: jalr(T9); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call(address entry, relocInfo::relocType rtype) { aoqi@6880: switch (rtype) { aoqi@6880: case relocInfo::runtime_call_type: aoqi@6880: case relocInfo::none: aoqi@6880: call(entry); aoqi@6880: break; aoqi@6880: default: aoqi@6880: { aoqi@6880: InstructionMark im(this); aoqi@6880: relocate(rtype); aoqi@6880: call(entry); aoqi@6880: } aoqi@6880: break; aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call(address entry, RelocationHolder& rh) aoqi@6880: { aoqi@6880: switch (rh.type()) { aoqi@6880: case relocInfo::runtime_call_type: aoqi@6880: case relocInfo::none: aoqi@6880: call(entry); aoqi@6880: break; aoqi@6880: default: aoqi@6880: { aoqi@6880: InstructionMark im(this); aoqi@6880: relocate(rh); aoqi@6880: call(entry); aoqi@6880: } aoqi@6880: break; aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::ic_call(address entry) { aoqi@6880: RelocationHolder rh = virtual_call_Relocation::spec(pc()); aoqi@6880: patchable_set48(IC_Klass, (long)Universe::non_oop_word()); aoqi@6880: assert(entry != NULL, "call most probably wrong"); aoqi@6880: InstructionMark im(this); aoqi@6880: relocate(rh); aoqi@6880: patchable_call(entry); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::c2bool(Register r) { aoqi@6880: Label L; aoqi@6880: Assembler::beq(r, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: move(r, 1); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: aoqi@6880: #ifndef PRODUCT aoqi@6880: extern "C" void findpc(intptr_t x); aoqi@6880: #endif aoqi@6880: aoqi@6880: void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { aoqi@6880: // In order to get locks to work, we need to fake a in_VM state aoqi@6880: JavaThread* thread = JavaThread::current(); aoqi@6880: JavaThreadState saved_state = thread->thread_state(); aoqi@6880: thread->set_thread_state(_thread_in_vm); aoqi@6880: if (ShowMessageBoxOnError) { aoqi@6880: JavaThread* thread = JavaThread::current(); aoqi@6880: JavaThreadState saved_state = thread->thread_state(); aoqi@6880: thread->set_thread_state(_thread_in_vm); aoqi@6880: if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { aoqi@6880: ttyLocker ttyl; aoqi@6880: BytecodeCounter::print(); aoqi@6880: } aoqi@6880: // To see where a verify_oop failed, get $ebx+40/X for this frame. aoqi@6880: // This is the value of eip which points to where verify_oop will return. aoqi@6880: if (os::message_box(msg, "Execution stopped, print registers?")) { aoqi@6880: ttyLocker ttyl; aoqi@6880: tty->print_cr("eip = 0x%08x", eip); aoqi@6880: #ifndef PRODUCT aoqi@6880: tty->cr(); aoqi@6880: findpc(eip); aoqi@6880: tty->cr(); aoqi@6880: #endif aoqi@6880: tty->print_cr("rax, = 0x%08x", rax); aoqi@6880: tty->print_cr("rbx, = 0x%08x", rbx); aoqi@6880: tty->print_cr("rcx = 0x%08x", rcx); aoqi@6880: tty->print_cr("rdx = 0x%08x", rdx); aoqi@6880: tty->print_cr("rdi = 0x%08x", rdi); aoqi@6880: tty->print_cr("rsi = 0x%08x", rsi); aoqi@6880: tty->print_cr("rbp, = 0x%08x", rbp); aoqi@6880: tty->print_cr("rsp = 0x%08x", rsp); aoqi@6880: BREAKPOINT; aoqi@6880: } aoqi@6880: } else { aoqi@6880: ttyLocker ttyl; aoqi@6880: ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); aoqi@6880: assert(false, "DEBUG MESSAGE"); aoqi@6880: } aoqi@6880: ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { aoqi@6880: if ( ShowMessageBoxOnError ) { aoqi@6880: JavaThreadState saved_state = JavaThread::current()->thread_state(); aoqi@6880: JavaThread::current()->set_thread_state(_thread_in_vm); aoqi@6880: { aoqi@6880: // In order to get locks work, we need to fake a in_VM state aoqi@6880: ttyLocker ttyl; aoqi@6880: ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); aoqi@6880: if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { aoqi@6880: BytecodeCounter::print(); aoqi@6880: } aoqi@6880: aoqi@6880: // if (os::message_box(msg, "Execution stopped, print registers?")) aoqi@6880: // regs->print(::tty); aoqi@6880: } aoqi@6880: ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); aoqi@6880: } aoqi@6880: else aoqi@6880: ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::stop(const char* msg) { aoqi@6880: li(A0, (long)msg); aoqi@6880: #ifndef _LP64 aoqi@6880: //reserver space for argument. added by yjl 7/10/2005 aoqi@6880: addiu(SP, SP, - 1 * wordSize); aoqi@6880: #endif aoqi@6880: call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: #ifndef _LP64 aoqi@6880: //restore space for argument aoqi@6880: addiu(SP, SP, 1 * wordSize); aoqi@6880: #endif aoqi@6880: brk(17); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::warn(const char* msg) { aoqi@6880: #ifdef _LP64 aoqi@6880: pushad(); aoqi@6880: li(A0, (long)msg); aoqi@6880: push(S2); aoqi@6880: move(AT, -(StackAlignmentInBytes)); aoqi@6880: move(S2, SP); // use S2 as a sender SP holder aoqi@6880: andr(SP, SP, AT); // align stack as required by ABI aoqi@6880: call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: move(SP, S2); // use S2 as a sender SP holder aoqi@6880: pop(S2); aoqi@6880: popad(); aoqi@6880: #else aoqi@6880: pushad(); aoqi@6880: addi(SP, SP, -4); aoqi@6880: sw(A0, SP, -1 * wordSize); aoqi@6880: li(A0, (long)msg); aoqi@6880: addi(SP, SP, -1 * wordSize); aoqi@6880: call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: addi(SP, SP, 1 * wordSize); aoqi@6880: lw(A0, SP, -1 * wordSize); aoqi@6880: addi(SP, SP, 4); aoqi@6880: popad(); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::print_reg(Register reg) { aoqi@6880: /* aoqi@6880: char *s = getenv("PRINT_REG"); aoqi@6880: if (s == NULL) aoqi@6880: return; aoqi@6880: if (strcmp(s, "1") != 0) aoqi@6880: return; aoqi@6880: */ aoqi@6880: void * cur_pc = pc(); aoqi@6880: pushad(); aoqi@6880: NOT_LP64(push(FP);) aoqi@6880: aoqi@6880: li(A0, (long)reg->name()); aoqi@6880: if (reg == SP) aoqi@6880: addiu(A1, SP, wordSize * 23); //23 registers saved in pushad() aoqi@6880: else if (reg == A0) aoqi@6880: ld(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code! aoqi@6880: else aoqi@6880: move(A1, reg); aoqi@6880: li(A2, (long)cur_pc); aoqi@6880: push(S2); aoqi@6880: move(AT, -(StackAlignmentInBytes)); aoqi@6880: move(S2, SP); // use S2 as a sender SP holder aoqi@6880: andr(SP, SP, AT); // align stack as required by ABI aoqi@6880: call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: move(SP, S2); // use S2 as a sender SP holder aoqi@6880: pop(S2); aoqi@6880: NOT_LP64(pop(FP);) aoqi@6880: popad(); aoqi@6880: aoqi@6880: /* aoqi@6880: pushad(); aoqi@6880: #ifdef _LP64 aoqi@6880: if (reg == SP) aoqi@6880: addiu(A0, SP, wordSize * 23); //23 registers saved in pushad() aoqi@6880: else aoqi@6880: move(A0, reg); aoqi@6880: call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: #else aoqi@6880: push(FP); aoqi@6880: move(A0, reg); aoqi@6880: dsrl32(A1, reg, 0); aoqi@6880: //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_int),relocInfo::runtime_call_type); aoqi@6880: call(CAST_FROM_FN_PTR(address, SharedRuntime::print_long),relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: pop(FP); aoqi@6880: #endif aoqi@6880: popad(); aoqi@6880: pushad(); aoqi@6880: NOT_LP64(push(FP);) aoqi@6880: char b[50]; aoqi@6880: sprintf((char *)b, " pc: %p\n",cur_pc); aoqi@6880: li(A0, (long)(char *)b); aoqi@6880: call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: NOT_LP64(pop(FP);) aoqi@6880: popad(); aoqi@6880: */ aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::print_reg(FloatRegister reg) { aoqi@6880: void * cur_pc = pc(); aoqi@6880: pushad(); aoqi@6880: NOT_LP64(push(FP);) aoqi@6880: li(A0, (long)reg->name()); aoqi@6880: push(S2); aoqi@6880: move(AT, -(StackAlignmentInBytes)); aoqi@6880: move(S2, SP); // use S2 as a sender SP holder aoqi@6880: andr(SP, SP, AT); // align stack as required by ABI aoqi@6880: call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: move(SP, S2); // use S2 as a sender SP holder aoqi@6880: pop(S2); aoqi@6880: NOT_LP64(pop(FP);) aoqi@6880: popad(); aoqi@6880: aoqi@6880: pushad(); aoqi@6880: NOT_LP64(push(FP);) aoqi@6880: #if 1 aoqi@6880: move(FP, SP); aoqi@6880: move(AT, -(StackAlignmentInBytes)); aoqi@6880: andr(SP , SP , AT); aoqi@6880: mov_d(F12, reg); aoqi@6880: call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: move(SP, FP); aoqi@6880: #else aoqi@6880: mov_s(F12, reg); aoqi@6880: //call(CAST_FROM_FN_PTR(address, SharedRuntime::print_float),relocInfo::runtime_call_type); aoqi@6880: //delayed()->nop(); aoqi@6880: #endif aoqi@6880: NOT_LP64(pop(FP);) aoqi@6880: popad(); aoqi@6880: aoqi@6880: #if 0 aoqi@6880: pushad(); aoqi@6880: NOT_LP64(push(FP);) aoqi@6880: char* b = new char[50]; aoqi@6880: sprintf(b, " pc: %p\n", cur_pc); aoqi@6880: li(A0, (long)b); aoqi@6880: call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: NOT_LP64(pop(FP);) aoqi@6880: popad(); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::increment(Register reg, int imm) { aoqi@6880: if (!imm) return; aoqi@6880: if (is_simm16(imm)) { aoqi@6880: #ifdef _LP64 aoqi@6880: daddiu(reg, reg, imm); aoqi@6880: #else aoqi@6880: addiu(reg, reg, imm); aoqi@6880: #endif aoqi@6880: } else { aoqi@6880: move(AT, imm); aoqi@6880: #ifdef _LP64 aoqi@6880: daddu(reg, reg, AT); aoqi@6880: #else aoqi@6880: addu(reg, reg, AT); aoqi@6880: #endif aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decrement(Register reg, int imm) { aoqi@6880: increment(reg, -imm); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: address entry_point, aoqi@6880: bool check_exceptions) { aoqi@6880: call_VM_helper(oop_result, entry_point, 0, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: address entry_point, aoqi@6880: Register arg_1, aoqi@6880: bool check_exceptions) { aoqi@6880: if (arg_1!=A1) move(A1, arg_1); aoqi@6880: call_VM_helper(oop_result, entry_point, 1, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: address entry_point, aoqi@6880: Register arg_1, aoqi@6880: Register arg_2, aoqi@6880: bool check_exceptions) { aoqi@6880: if (arg_1!=A1) move(A1, arg_1); aoqi@6880: if (arg_2!=A2) move(A2, arg_2); aoqi@6880: assert(arg_2 != A1, "smashed argument"); aoqi@6880: call_VM_helper(oop_result, entry_point, 2, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: address entry_point, aoqi@6880: Register arg_1, aoqi@6880: Register arg_2, aoqi@6880: Register arg_3, aoqi@6880: bool check_exceptions) { aoqi@6880: if (arg_1!=A1) move(A1, arg_1); aoqi@6880: if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); aoqi@6880: if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); aoqi@6880: call_VM_helper(oop_result, entry_point, 3, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: Register last_java_sp, aoqi@6880: address entry_point, aoqi@6880: int number_of_arguments, aoqi@6880: bool check_exceptions) { aoqi@6880: call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: Register last_java_sp, aoqi@6880: address entry_point, aoqi@6880: Register arg_1, aoqi@6880: bool check_exceptions) { aoqi@6880: if (arg_1 != A1) move(A1, arg_1); aoqi@6880: call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: Register last_java_sp, aoqi@6880: address entry_point, aoqi@6880: Register arg_1, aoqi@6880: Register arg_2, aoqi@6880: bool check_exceptions) { aoqi@6880: if (arg_1 != A1) move(A1, arg_1); aoqi@6880: if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); aoqi@6880: call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM(Register oop_result, aoqi@6880: Register last_java_sp, aoqi@6880: address entry_point, aoqi@6880: Register arg_1, aoqi@6880: Register arg_2, aoqi@6880: Register arg_3, aoqi@6880: bool check_exceptions) { aoqi@6880: if (arg_1 != A1) move(A1, arg_1); aoqi@6880: if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); aoqi@6880: if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); aoqi@6880: call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM_base(Register oop_result, aoqi@6880: Register java_thread, aoqi@6880: Register last_java_sp, aoqi@6880: address entry_point, aoqi@6880: int number_of_arguments, aoqi@6880: bool check_exceptions) { aoqi@6880: aoqi@6880: address before_call_pc; aoqi@6880: // determine java_thread register aoqi@6880: if (!java_thread->is_valid()) { aoqi@6880: #ifndef OPT_THREAD aoqi@6880: java_thread = T2; aoqi@6880: get_thread(java_thread); aoqi@6880: #else aoqi@6880: java_thread = TREG; aoqi@6880: #endif aoqi@6880: } aoqi@6880: // determine last_java_sp register aoqi@6880: if (!last_java_sp->is_valid()) { aoqi@6880: last_java_sp = SP; aoqi@6880: } aoqi@6880: // debugging support aoqi@6880: assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); aoqi@6880: assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); aoqi@6880: assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); aoqi@6880: assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); aoqi@6880: aoqi@6880: assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save ebp"); aoqi@6880: aoqi@6880: // set last Java frame before call aoqi@6880: before_call_pc = (address)pc(); aoqi@6880: set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); aoqi@6880: aoqi@6880: // do the call aoqi@6880: move(A0, java_thread); aoqi@6880: call(entry_point, relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // restore the thread (cannot use the pushed argument since arguments aoqi@6880: // may be overwritten by C code generated by an optimizing compiler); aoqi@6880: // however can use the register value directly if it is callee saved. aoqi@6880: #ifndef OPT_THREAD aoqi@6880: if (java_thread >=S0 && java_thread <=S7) { aoqi@6880: #ifdef ASSERT aoqi@6880: { Label L; aoqi@6880: get_thread(AT); aoqi@6880: beq(java_thread, AT, L); aoqi@6880: delayed()->nop(); aoqi@6880: stop("MacroAssembler::call_VM_base: edi not callee saved?"); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: #endif aoqi@6880: } else { aoqi@6880: get_thread(java_thread); aoqi@6880: } aoqi@6880: #endif aoqi@6880: aoqi@6880: // discard thread and arguments aoqi@6880: ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); aoqi@6880: // reset last Java frame aoqi@6880: reset_last_Java_frame(java_thread, false, true); aoqi@6880: aoqi@6880: check_and_handle_popframe(java_thread); aoqi@6880: check_and_handle_earlyret(java_thread); aoqi@6880: if (check_exceptions) { aoqi@6880: // check for pending exceptions (java_thread is set upon return) aoqi@6880: Label L; aoqi@6880: #ifdef _LP64 aoqi@6880: ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); aoqi@6880: #else aoqi@6880: lw(AT, java_thread, in_bytes(Thread::pending_exception_offset())); aoqi@6880: #endif aoqi@6880: beq(AT, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: li(AT, before_call_pc); aoqi@6880: push(AT); aoqi@6880: jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: aoqi@6880: // get oop result if there is one and reset the value in the thread aoqi@6880: if (oop_result->is_valid()) { aoqi@6880: #ifdef _LP64 aoqi@6880: ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); aoqi@6880: sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); aoqi@6880: #else aoqi@6880: lw(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); aoqi@6880: sw(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); aoqi@6880: #endif aoqi@6880: verify_oop(oop_result); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { aoqi@6880: aoqi@6880: move(V0, SP); aoqi@6880: //we also reserve space for java_thread here aoqi@6880: #ifndef _LP64 aoqi@6880: daddi(SP, SP, (1 + number_of_arguments) * (- wordSize)); aoqi@6880: #endif aoqi@6880: move(AT, -(StackAlignmentInBytes)); aoqi@6880: andr(SP, SP, AT); aoqi@6880: call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); aoqi@6880: aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { aoqi@6880: call_VM_leaf_base(entry_point, number_of_arguments); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { aoqi@6880: if (arg_0 != A0) move(A0, arg_0); aoqi@6880: call_VM_leaf(entry_point, 1); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { aoqi@6880: if (arg_0 != A0) move(A0, arg_0); aoqi@6880: if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); aoqi@6880: call_VM_leaf(entry_point, 2); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { aoqi@6880: if (arg_0 != A0) move(A0, arg_0); aoqi@6880: if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); aoqi@6880: if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); aoqi@6880: call_VM_leaf(entry_point, 3); aoqi@6880: } aoqi@6880: void MacroAssembler::super_call_VM_leaf(address entry_point) { aoqi@6880: MacroAssembler::call_VM_leaf_base(entry_point, 0); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::super_call_VM_leaf(address entry_point, aoqi@6880: Register arg_1) { aoqi@6880: if (arg_1 != A0) move(A0, arg_1); aoqi@6880: MacroAssembler::call_VM_leaf_base(entry_point, 1); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::super_call_VM_leaf(address entry_point, aoqi@6880: Register arg_1, aoqi@6880: Register arg_2) { aoqi@6880: if (arg_1 != A0) move(A0, arg_1); aoqi@6880: if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); aoqi@6880: MacroAssembler::call_VM_leaf_base(entry_point, 2); aoqi@6880: } aoqi@6880: void MacroAssembler::super_call_VM_leaf(address entry_point, aoqi@6880: Register arg_1, aoqi@6880: Register arg_2, aoqi@6880: Register arg_3) { aoqi@6880: if (arg_1 != A0) move(A0, arg_1); aoqi@6880: if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); aoqi@6880: if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); aoqi@6880: MacroAssembler::call_VM_leaf_base(entry_point, 3); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::check_and_handle_earlyret(Register java_thread) { aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::check_and_handle_popframe(Register java_thread) { aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::null_check(Register reg, int offset) { aoqi@6880: if (needs_explicit_null_check(offset)) { aoqi@6880: // provoke OS NULL exception if reg = NULL by aoqi@6880: // accessing M[reg] w/o changing any (non-CC) registers aoqi@6880: // NOTE: cmpl is plenty here to provoke a segv aoqi@6880: lw(AT, reg, 0); aoqi@6880: // Note: should probably use testl(rax, Address(reg, 0)); aoqi@6880: // may be shorter code (however, this version of aoqi@6880: // testl needs to be implemented first) aoqi@6880: } else { aoqi@6880: // nothing to do, (later) access of M[reg + offset] aoqi@6880: // will provoke OS NULL exception if reg = NULL aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::enter() { aoqi@6880: push2(RA, FP); aoqi@6880: move(FP, SP); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::leave() { aoqi@6880: #ifndef _LP64 aoqi@6880: //move(SP, FP); aoqi@6880: //pop2(FP, RA); aoqi@6880: addi(SP, FP, 2 * wordSize); aoqi@6880: lw(RA, SP, - 1 * wordSize); aoqi@6880: lw(FP, SP, - 2 * wordSize); aoqi@6880: #else aoqi@6880: daddi(SP, FP, 2 * wordSize); aoqi@6880: ld(RA, SP, - 1 * wordSize); aoqi@6880: ld(FP, SP, - 2 * wordSize); aoqi@6880: #endif aoqi@6880: } aoqi@6880: /* aoqi@6880: void MacroAssembler::os_breakpoint() { aoqi@6880: // instead of directly emitting a breakpoint, call os:breakpoint for better debugability aoqi@6880: // (e.g., MSVC can't call ps() otherwise) aoqi@6880: call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); aoqi@6880: } aoqi@6880: */ aoqi@6880: void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { aoqi@6880: // determine java_thread register aoqi@6880: if (!java_thread->is_valid()) { aoqi@6880: #ifndef OPT_THREAD aoqi@6880: java_thread = T1; aoqi@6880: get_thread(java_thread); aoqi@6880: #else aoqi@6880: java_thread = TREG; aoqi@6880: #endif aoqi@6880: } aoqi@6880: // we must set sp to zero to clear frame aoqi@6880: st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); aoqi@6880: // must clear fp, so that compiled frames are not confused; it is possible aoqi@6880: // that we need it only for debugging aoqi@6880: if(clear_fp) aoqi@6880: st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); aoqi@6880: aoqi@6880: if (clear_pc) aoqi@6880: st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::reset_last_Java_frame(bool clear_fp, aoqi@6880: bool clear_pc) { aoqi@6880: Register thread = TREG; aoqi@6880: #ifndef OPT_THREAD aoqi@6880: get_thread(thread); aoqi@6880: #endif aoqi@6880: // we must set sp to zero to clear frame aoqi@6880: sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); aoqi@6880: // must clear fp, so that compiled frames are not confused; it is aoqi@6880: // possible that we need it only for debugging aoqi@6880: if (clear_fp) { aoqi@6880: sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: if (clear_pc) { aoqi@6880: sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: // Write serialization page so VM thread can do a pseudo remote membar. aoqi@6880: // We use the current thread pointer to calculate a thread specific aoqi@6880: // offset to write to within the page. This minimizes bus traffic aoqi@6880: // due to cache line collision. aoqi@6880: void MacroAssembler::serialize_memory(Register thread, Register tmp) { aoqi@6880: move(tmp, thread); aoqi@6880: srl(tmp, tmp,os::get_serialize_page_shift_count()); aoqi@6880: move(AT, (os::vm_page_size() - sizeof(int))); aoqi@6880: andr(tmp, tmp,AT); aoqi@6880: sw(tmp,Address(tmp, (intptr_t)os::get_memory_serialize_page())); aoqi@6880: } aoqi@6880: aoqi@6880: // Calls to C land aoqi@6880: // aoqi@6880: // When entering C land, the rbp, & rsp of the last Java frame have to be recorded aoqi@6880: // in the (thread-local) JavaThread object. When leaving C land, the last Java fp aoqi@6880: // has to be reset to 0. This is required to allow proper stack traversal. aoqi@6880: void MacroAssembler::set_last_Java_frame(Register java_thread, aoqi@6880: Register last_java_sp, aoqi@6880: Register last_java_fp, aoqi@6880: address last_java_pc) { aoqi@6880: // determine java_thread register aoqi@6880: if (!java_thread->is_valid()) { aoqi@6880: #ifndef OPT_THREAD aoqi@6880: java_thread = T2; aoqi@6880: get_thread(java_thread); aoqi@6880: #else aoqi@6880: java_thread = TREG; aoqi@6880: #endif aoqi@6880: } aoqi@6880: // determine last_java_sp register aoqi@6880: if (!last_java_sp->is_valid()) { aoqi@6880: last_java_sp = SP; aoqi@6880: } aoqi@6880: aoqi@6880: // last_java_fp is optional aoqi@6880: aoqi@6880: if (last_java_fp->is_valid()) { aoqi@6880: st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: // last_java_pc is optional aoqi@6880: aoqi@6880: if (last_java_pc != NULL) { aoqi@6880: relocate(relocInfo::internal_pc_type); aoqi@6880: patchable_set48(AT, (long)last_java_pc); aoqi@6880: st_ptr(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); aoqi@6880: } aoqi@6880: st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::set_last_Java_frame(Register last_java_sp, aoqi@6880: Register last_java_fp, aoqi@6880: address last_java_pc) { aoqi@6880: // determine last_java_sp register aoqi@6880: if (!last_java_sp->is_valid()) { aoqi@6880: last_java_sp = SP; aoqi@6880: } aoqi@6880: aoqi@6880: Register thread = TREG; aoqi@6880: #ifndef OPT_THREAD aoqi@6880: get_thread(thread); aoqi@6880: #endif aoqi@6880: // last_java_fp is optional aoqi@6880: if (last_java_fp->is_valid()) { aoqi@6880: sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: // last_java_pc is optional aoqi@6880: if (last_java_pc != NULL) { aoqi@6880: Address java_pc(thread, aoqi@6880: JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); aoqi@6880: li(AT, (intptr_t)(last_java_pc)); aoqi@6880: sd(AT, java_pc); aoqi@6880: } aoqi@6880: aoqi@6880: sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: ////////////////////////////////////////////////////////////////////////////////// aoqi@6880: #if INCLUDE_ALL_GCS aoqi@6880: aoqi@6880: void MacroAssembler::g1_write_barrier_pre(Register obj, aoqi@6880: #ifndef _LP64 aoqi@6880: Register thread, aoqi@6880: #endif aoqi@6880: Register tmp, aoqi@6880: Register tmp2, aoqi@6880: bool tosca_live) { aoqi@6880: Unimplemented(); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::g1_write_barrier_post(Register store_addr, aoqi@6880: Register new_val, aoqi@6880: #ifndef _LP64 aoqi@6880: Register thread, aoqi@6880: #endif aoqi@6880: Register tmp, aoqi@6880: Register tmp2) { aoqi@6880: aoqi@6880: Unimplemented(); aoqi@6880: } aoqi@6880: aoqi@6880: #endif // INCLUDE_ALL_GCS aoqi@6880: ////////////////////////////////////////////////////////////////////////////////// aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::store_check(Register obj) { aoqi@6880: // Does a store check for the oop in register obj. The content of aoqi@6880: // register obj is destroyed afterwards. aoqi@6880: store_check_part_1(obj); aoqi@6880: store_check_part_2(obj); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::store_check(Register obj, Address dst) { aoqi@6880: store_check(obj); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: // split the store check operation so that other instructions can be scheduled inbetween aoqi@6880: void MacroAssembler::store_check_part_1(Register obj) { aoqi@6880: BarrierSet* bs = Universe::heap()->barrier_set(); aoqi@6880: assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); aoqi@6880: #ifdef _LP64 aoqi@6880: dsrl(obj, obj, CardTableModRefBS::card_shift); aoqi@6880: #else aoqi@6880: shr(obj, CardTableModRefBS::card_shift); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::store_check_part_2(Register obj) { aoqi@6880: BarrierSet* bs = Universe::heap()->barrier_set(); aoqi@6880: assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); aoqi@6880: CardTableModRefBS* ct = (CardTableModRefBS*)bs; aoqi@6880: assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); aoqi@6880: aoqi@6880: li(AT, (long)ct->byte_map_base); aoqi@6880: #ifdef _LP64 aoqi@6880: dadd(AT, AT, obj); aoqi@6880: #else aoqi@6880: add(AT, AT, obj); aoqi@6880: #endif aoqi@6880: sb(R0, AT, 0); aoqi@6880: sync(); aoqi@6880: } aoqi@6880: aoqi@6880: // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. aoqi@6880: void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, aoqi@6880: Register t1, Register t2, Label& slow_case) { aoqi@6880: assert_different_registers(obj, var_size_in_bytes, t1, t2, AT); aoqi@6880: aoqi@6880: Register end = t2; aoqi@6880: #ifndef OPT_THREAD aoqi@6880: Register thread = t1; aoqi@6880: get_thread(thread); aoqi@6880: #else aoqi@6880: Register thread = TREG; aoqi@6880: #endif aoqi@6880: verify_tlab(t1, t2);//blows t1&t2 aoqi@6880: aoqi@6880: ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset())); aoqi@6880: aoqi@6880: if (var_size_in_bytes == NOREG) { aoqi@6880: // i dont think we need move con_size_in_bytes to a register first. aoqi@6880: // by yjl 8/17/2005 aoqi@6880: assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); aoqi@6880: addi(end, obj, con_size_in_bytes); aoqi@6880: } else { aoqi@6880: add(end, obj, var_size_in_bytes); aoqi@6880: } aoqi@6880: aoqi@6880: ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset())); aoqi@6880: sltu(AT, AT, end); aoqi@6880: bne_far(AT, R0, slow_case); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: aoqi@6880: // update the tlab top pointer aoqi@6880: st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset())); aoqi@6880: aoqi@6880: // recover var_size_in_bytes if necessary aoqi@6880: /*if (var_size_in_bytes == end) { aoqi@6880: sub(var_size_in_bytes, end, obj); aoqi@6880: }*/ aoqi@6880: aoqi@6880: verify_tlab(t1, t2); aoqi@6880: } aoqi@6880: aoqi@6880: // Defines obj, preserves var_size_in_bytes aoqi@6880: void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, aoqi@6880: Register t1, Register t2, Label& slow_case) { aoqi@6880: assert_different_registers(obj, var_size_in_bytes, t1, AT); aoqi@6880: if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq aoqi@6880: // No allocation in the shared eden. aoqi@6880: b_far(slow_case); aoqi@6880: delayed()->nop(); aoqi@6880: } else { aoqi@6880: aoqi@6880: #ifndef _LP64 aoqi@6880: Address heap_top(t1, Assembler::split_low((intptr_t)Universe::heap()->top_addr())); aoqi@6880: lui(t1, split_high((intptr_t)Universe::heap()->top_addr())); aoqi@6880: #else aoqi@6880: Address heap_top(t1); aoqi@6880: li(t1, (long)Universe::heap()->top_addr()); aoqi@6880: #endif aoqi@6880: ld_ptr(obj, heap_top); aoqi@6880: aoqi@6880: Register end = t2; aoqi@6880: Label retry; aoqi@6880: aoqi@6880: bind(retry); aoqi@6880: if (var_size_in_bytes == NOREG) { aoqi@6880: // i dont think we need move con_size_in_bytes to a register first. aoqi@6880: assert(is_simm16(con_size_in_bytes), "fixme by moving imm to a register first"); aoqi@6880: addi(end, obj, con_size_in_bytes); aoqi@6880: } else { aoqi@6880: add(end, obj, var_size_in_bytes); aoqi@6880: } aoqi@6880: // if end < obj then we wrapped around => object too long => slow case aoqi@6880: sltu(AT, end, obj); aoqi@6880: bne_far(AT, R0, slow_case); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: li(AT, (long)Universe::heap()->end_addr()); aoqi@6880: sltu(AT, AT, end); aoqi@6880: bne_far(AT, R0, slow_case); aoqi@6880: delayed()->nop(); aoqi@6880: // Compare obj with the top addr, and if still equal, store the new top addr in aoqi@6880: // end at the address of the top addr pointer. Sets ZF if was equal, and clears aoqi@6880: // it otherwise. Use lock prefix for atomicity on MPs. aoqi@6880: //if (os::is_MP()) { aoqi@6880: // sync(); aoqi@6880: //} aoqi@6880: aoqi@6880: // if someone beat us on the allocation, try again, otherwise continue aoqi@6880: cmpxchg(end, heap_top, obj); aoqi@6880: beq_far(AT, R0, retry); //by yyq aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: // C2 doesn't invoke this one. aoqi@6880: void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { aoqi@6880: Register top = T0; aoqi@6880: Register t1 = T1; aoqi@6880: /* Jin: tlab_refill() is called in aoqi@6880: aoqi@6880: [c1_Runtime1_mips.cpp] Runtime1::generate_code_for(new_type_array_id); aoqi@6880: aoqi@6880: In generate_code_for(), T2 has been assigned as a register(length), which is used aoqi@6880: after calling tlab_refill(); aoqi@6880: Therefore, tlab_refill() should not use T2. aoqi@6880: aoqi@6880: Source: aoqi@6880: aoqi@6880: Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException aoqi@6880: at java.lang.System.arraycopy(Native Method) aoqi@6880: at java.util.Arrays.copyOf(Arrays.java:2799) <-- alloc_array aoqi@6880: at sun.misc.Resource.getBytes(Resource.java:117) aoqi@6880: at java.net.URLClassLoader.defineClass(URLClassLoader.java:273) aoqi@6880: at java.net.URLClassLoader.findClass(URLClassLoader.java:205) aoqi@6880: at java.lang.ClassLoader.loadClass(ClassLoader.java:321) aoqi@6880: */ aoqi@6880: Register t2 = T9; aoqi@6880: Register t3 = T3; aoqi@6880: Register thread_reg = T8; aoqi@6880: Label do_refill, discard_tlab; aoqi@6880: if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { //by yyq aoqi@6880: // No allocation in the shared eden. aoqi@6880: b(slow_case); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: aoqi@6880: get_thread(thread_reg); aoqi@6880: aoqi@6880: ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); aoqi@6880: ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset())); aoqi@6880: aoqi@6880: // calculate amount of free space aoqi@6880: sub(t1, t1, top); aoqi@6880: shr(t1, LogHeapWordSize); aoqi@6880: aoqi@6880: // Retain tlab and allocate object in shared space if aoqi@6880: // the amount free in the tlab is too large to discard. aoqi@6880: ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); aoqi@6880: slt(AT, t2, t1); aoqi@6880: beq(AT, R0, discard_tlab); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // Retain aoqi@6880: aoqi@6880: #ifndef _LP64 aoqi@6880: move(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); aoqi@6880: #else aoqi@6880: li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); aoqi@6880: #endif aoqi@6880: add(t2, t2, AT); aoqi@6880: st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); aoqi@6880: aoqi@6880: if (TLABStats) { aoqi@6880: // increment number of slow_allocations aoqi@6880: lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); aoqi@6880: addiu(AT, AT, 1); aoqi@6880: sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); aoqi@6880: } aoqi@6880: b(try_eden); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(discard_tlab); aoqi@6880: if (TLABStats) { aoqi@6880: // increment number of refills aoqi@6880: lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); aoqi@6880: addi(AT, AT, 1); aoqi@6880: sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); aoqi@6880: // accumulate wastage -- t1 is amount free in tlab aoqi@6880: lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); aoqi@6880: add(AT, AT, t1); aoqi@6880: sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: // if tlab is currently allocated (top or end != null) then aoqi@6880: // fill [top, end + alignment_reserve) with array object aoqi@6880: beq(top, R0, do_refill); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // set up the mark word aoqi@6880: li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2)); aoqi@6880: st_ptr(AT, top, oopDesc::mark_offset_in_bytes()); aoqi@6880: aoqi@6880: // set the length to the remaining space aoqi@6880: addi(t1, t1, - typeArrayOopDesc::header_size(T_INT)); aoqi@6880: addi(t1, t1, ThreadLocalAllocBuffer::alignment_reserve()); aoqi@6880: shl(t1, log2_intptr(HeapWordSize/sizeof(jint))); aoqi@6880: sw(t1, top, arrayOopDesc::length_offset_in_bytes()); aoqi@6880: aoqi@6880: // set klass to intArrayKlass aoqi@6880: #ifndef _LP64 aoqi@6880: lui(AT, split_high((intptr_t)Universe::intArrayKlassObj_addr())); aoqi@6880: lw(t1, AT, split_low((intptr_t)Universe::intArrayKlassObj_addr())); aoqi@6880: #else aoqi@6880: li(AT, (intptr_t)Universe::intArrayKlassObj_addr()); aoqi@6880: ld_ptr(t1, AT, 0); aoqi@6880: #endif aoqi@6880: //st_ptr(t1, top, oopDesc::klass_offset_in_bytes()); aoqi@6880: store_klass(top, t1); aoqi@6880: aoqi@6880: // refill the tlab with an eden allocation aoqi@6880: bind(do_refill); aoqi@6880: ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset())); aoqi@6880: shl(t1, LogHeapWordSize); aoqi@6880: // add object_size ?? aoqi@6880: eden_allocate(top, t1, 0, t2, t3, slow_case); aoqi@6880: aoqi@6880: // Check that t1 was preserved in eden_allocate. aoqi@6880: #ifdef ASSERT aoqi@6880: if (UseTLAB) { aoqi@6880: Label ok; aoqi@6880: assert_different_registers(thread_reg, t1); aoqi@6880: ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset())); aoqi@6880: shl(AT, LogHeapWordSize); aoqi@6880: beq(AT, t1, ok); aoqi@6880: delayed()->nop(); aoqi@6880: stop("assert(t1 != tlab size)"); aoqi@6880: should_not_reach_here(); aoqi@6880: aoqi@6880: bind(ok); aoqi@6880: } aoqi@6880: #endif aoqi@6880: st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset())); aoqi@6880: st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); aoqi@6880: add(top, top, t1); aoqi@6880: addi(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); aoqi@6880: st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset())); aoqi@6880: verify_tlab(t1, t2); aoqi@6880: b(retry); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: aoqi@6880: static const double pi_4 = 0.7853981633974483; aoqi@6880: aoqi@6880: // the x86 version is to clumsy, i dont think we need that fuss. maybe i'm wrong, FIXME aoqi@6880: // must get argument(a double) in F12/F13 aoqi@6880: //void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { aoqi@6880: //We need to preseve the register which maybe modified during the Call @Jerome aoqi@6880: void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { aoqi@6880: //save all modified register here aoqi@6880: // if (preserve_cpu_regs) { aoqi@6880: // } aoqi@6880: //FIXME, in the disassembly of tirgfunc, only used V0,V1,T9, SP,RA,so we ony save V0,V1,T9 aoqi@6880: pushad(); aoqi@6880: //we should preserve the stack space before we call aoqi@6880: addi(SP, SP, -wordSize * 2); aoqi@6880: switch (trig){ aoqi@6880: case 's' : aoqi@6880: call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); aoqi@6880: delayed()->nop(); aoqi@6880: break; aoqi@6880: case 'c': aoqi@6880: call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); aoqi@6880: delayed()->nop(); aoqi@6880: break; aoqi@6880: case 't': aoqi@6880: call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); aoqi@6880: delayed()->nop(); aoqi@6880: break; aoqi@6880: default:assert (false, "bad intrinsic"); aoqi@6880: break; aoqi@6880: aoqi@6880: } aoqi@6880: aoqi@6880: addi(SP, SP, wordSize * 2); aoqi@6880: popad(); aoqi@6880: // if (preserve_cpu_regs) { aoqi@6880: // } aoqi@6880: } aoqi@6880: aoqi@6880: #ifdef _LP64 aoqi@6880: void MacroAssembler::li(Register rd, long imm) { aoqi@6880: if (imm <= max_jint && imm >= min_jint) { aoqi@6880: li32(rd, (int)imm); aoqi@6880: } else if (julong(imm) <= 0xFFFFFFFF) { aoqi@6880: assert_not_delayed(); aoqi@6880: // lui sign-extends, so we can't use that. aoqi@6880: ori(rd, R0, julong(imm) >> 16); aoqi@6880: dsll(rd, rd, 16); aoqi@6880: ori(rd, rd, split_low(imm)); aoqi@6880: //aoqi_test aoqi@6880: //} else if ((imm > 0) && ((imm >> 48) == 0)) { aoqi@6880: } else if ((imm > 0) && is_simm16(imm >> 32)) { aoqi@6880: /* A 48-bit address */ aoqi@6880: li48(rd, imm); aoqi@6880: } else { aoqi@6880: li64(rd, imm); aoqi@6880: } aoqi@6880: } aoqi@6880: #else aoqi@6880: void MacroAssembler::li(Register rd, long imm) { aoqi@6880: li32(rd, (int)imm); aoqi@6880: } aoqi@6880: #endif aoqi@6880: aoqi@6880: void MacroAssembler::li32(Register reg, int imm) { aoqi@6880: if (is_simm16(imm)) { aoqi@6880: /* Jin: for imm < 0, we should use addi instead of addiu. aoqi@6880: * aoqi@6880: * java.lang.StringCoding$StringDecoder.decode(jobject, jint, jint) aoqi@6880: * aoqi@6880: * 78 move [int:-1|I] [a0|I] aoqi@6880: * : daddi a0, zero, 0xffffffff (correct) aoqi@6880: * : daddiu a0, zero, 0xffffffff (incorrect) aoqi@6880: */ aoqi@6880: if (imm >= 0) aoqi@6880: addiu(reg, R0, imm); aoqi@6880: else aoqi@6880: addi(reg, R0, imm); aoqi@6880: } else { aoqi@6880: lui(reg, split_low(imm >> 16)); aoqi@6880: if (split_low(imm)) aoqi@6880: ori(reg, reg, split_low(imm)); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: #ifdef _LP64 aoqi@6880: void MacroAssembler::set64(Register d, jlong value) { aoqi@6880: assert_not_delayed(); aoqi@6880: aoqi@6880: int hi = (int)(value >> 32); aoqi@6880: int lo = (int)(value & ~0); aoqi@6880: aoqi@6880: if (value == lo) { // 32-bit integer aoqi@6880: if (is_simm16(value)) { aoqi@6880: daddiu(d, R0, value); aoqi@6880: } else { aoqi@6880: lui(d, split_low(value >> 16)); aoqi@6880: if (split_low(value)) { aoqi@6880: ori(d, d, split_low(value)); aoqi@6880: } aoqi@6880: } aoqi@6880: } else if (hi == 0) { // hardware zero-extends to upper 32 aoqi@6880: ori(d, R0, julong(value) >> 16); aoqi@6880: dsll(d, d, 16); aoqi@6880: if (split_low(value)) { aoqi@6880: ori(d, d, split_low(value)); aoqi@6880: } aoqi@6880: } else if ((value> 0) && is_simm16(value >> 32)) { // li48 aoqi@6880: // 4 insts aoqi@6880: li48(d, value); aoqi@6880: } else { // li64 aoqi@6880: // 6 insts aoqi@6880: li64(d, value); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: int MacroAssembler::insts_for_set64(jlong value) { aoqi@6880: int hi = (int)(value >> 32); aoqi@6880: int lo = (int)(value & ~0); aoqi@6880: aoqi@6880: int count = 0; aoqi@6880: aoqi@6880: if (value == lo) { // 32-bit integer aoqi@6880: if (is_simm16(value)) { aoqi@6880: //daddiu(d, R0, value); aoqi@6880: count++; aoqi@6880: } else { aoqi@6880: //lui(d, split_low(value >> 16)); aoqi@6880: count++; aoqi@6880: if (split_low(value)) { aoqi@6880: //ori(d, d, split_low(value)); aoqi@6880: count++; aoqi@6880: } aoqi@6880: } aoqi@6880: } else if (hi == 0) { // hardware zero-extends to upper 32 aoqi@6880: //ori(d, R0, julong(value) >> 16); aoqi@6880: //dsll(d, d, 16); aoqi@6880: count += 2; aoqi@6880: if (split_low(value)) { aoqi@6880: //ori(d, d, split_low(value)); aoqi@6880: count++; aoqi@6880: } aoqi@6880: } else if ((value> 0) && is_simm16(value >> 32)) { // li48 aoqi@6880: // 4 insts aoqi@6880: //li48(d, value); aoqi@6880: count += 4; aoqi@6880: } else { // li64 aoqi@6880: // 6 insts aoqi@6880: //li64(d, value); aoqi@6880: count += 6; aoqi@6880: } aoqi@6880: aoqi@6880: return count; aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::patchable_set48(Register d, jlong value) { aoqi@6880: assert_not_delayed(); aoqi@6880: aoqi@6880: int hi = (int)(value >> 32); aoqi@6880: int lo = (int)(value & ~0); aoqi@6880: aoqi@6880: int count = 0; aoqi@6880: aoqi@6880: if (value == lo) { // 32-bit integer aoqi@6880: if (is_simm16(value)) { aoqi@6880: daddiu(d, R0, value); aoqi@6880: count += 1; aoqi@6880: } else { aoqi@6880: lui(d, split_low(value >> 16)); aoqi@6880: count += 1; aoqi@6880: if (split_low(value)) { aoqi@6880: ori(d, d, split_low(value)); aoqi@6880: count += 1; aoqi@6880: } aoqi@6880: } aoqi@6880: } else if (hi == 0) { // hardware zero-extends to upper 32 aoqi@6880: ori(d, R0, julong(value) >> 16); aoqi@6880: dsll(d, d, 16); aoqi@6880: count += 2; aoqi@6880: if (split_low(value)) { aoqi@6880: ori(d, d, split_low(value)); aoqi@6880: count += 1; aoqi@6880: } aoqi@6880: } else if ((value> 0) && is_simm16(value >> 32)) { // li48 aoqi@6880: // 4 insts aoqi@6880: li48(d, value); aoqi@6880: count += 4; aoqi@6880: } else { // li64 aoqi@6880: tty->print_cr("value = 0x%x", value); aoqi@6880: guarantee(false, "Not supported yet !"); aoqi@6880: } aoqi@6880: aoqi@6880: for (count; count < 4; count++) { aoqi@6880: nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::patchable_set32(Register d, jlong value) { aoqi@6880: assert_not_delayed(); aoqi@6880: aoqi@6880: int hi = (int)(value >> 32); aoqi@6880: int lo = (int)(value & ~0); aoqi@6880: aoqi@6880: int count = 0; aoqi@6880: aoqi@6880: if (value == lo) { // 32-bit integer aoqi@6880: if (is_simm16(value)) { aoqi@6880: daddiu(d, R0, value); aoqi@6880: count += 1; aoqi@6880: } else { aoqi@6880: lui(d, split_low(value >> 16)); aoqi@6880: count += 1; aoqi@6880: if (split_low(value)) { aoqi@6880: ori(d, d, split_low(value)); aoqi@6880: count += 1; aoqi@6880: } aoqi@6880: } aoqi@6880: } else if (hi == 0) { // hardware zero-extends to upper 32 aoqi@6880: ori(d, R0, julong(value) >> 16); aoqi@6880: dsll(d, d, 16); aoqi@6880: count += 2; aoqi@6880: if (split_low(value)) { aoqi@6880: ori(d, d, split_low(value)); aoqi@6880: count += 1; aoqi@6880: } aoqi@6880: } else { aoqi@6880: tty->print_cr("value = 0x%x", value); aoqi@6880: guarantee(false, "Not supported yet !"); aoqi@6880: } aoqi@6880: aoqi@6880: for (count; count < 3; count++) { aoqi@6880: nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::patchable_call32(Register d, jlong value) { aoqi@6880: assert_not_delayed(); aoqi@6880: aoqi@6880: int hi = (int)(value >> 32); aoqi@6880: int lo = (int)(value & ~0); aoqi@6880: aoqi@6880: int count = 0; aoqi@6880: aoqi@6880: if (value == lo) { // 32-bit integer aoqi@6880: if (is_simm16(value)) { aoqi@6880: daddiu(d, R0, value); aoqi@6880: count += 1; aoqi@6880: } else { aoqi@6880: lui(d, split_low(value >> 16)); aoqi@6880: count += 1; aoqi@6880: if (split_low(value)) { aoqi@6880: ori(d, d, split_low(value)); aoqi@6880: count += 1; aoqi@6880: } aoqi@6880: } aoqi@6880: } else { aoqi@6880: tty->print_cr("value = 0x%x", value); aoqi@6880: guarantee(false, "Not supported yet !"); aoqi@6880: } aoqi@6880: aoqi@6880: for (count; count < 2; count++) { aoqi@6880: nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { aoqi@6880: assert(UseCompressedClassPointers, "should only be used for compressed header"); aoqi@6880: assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); aoqi@6880: aoqi@6880: int klass_index = oop_recorder()->find_index(k); aoqi@6880: RelocationHolder rspec = metadata_Relocation::spec(klass_index); aoqi@6880: long narrowKlass = (long)Klass::encode_klass(k); aoqi@6880: aoqi@6880: relocate(rspec, Assembler::narrow_oop_operand); aoqi@6880: patchable_set48(dst, narrowKlass); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { aoqi@6880: assert(UseCompressedOops, "should only be used for compressed header"); aoqi@6880: assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); aoqi@6880: aoqi@6880: int oop_index = oop_recorder()->find_index(obj); aoqi@6880: RelocationHolder rspec = oop_Relocation::spec(oop_index); aoqi@6880: aoqi@6880: relocate(rspec, Assembler::narrow_oop_operand); aoqi@6880: patchable_set48(dst, oop_index); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::li64(Register rd, long imm) { aoqi@6880: assert_not_delayed(); aoqi@6880: lui(rd, imm >> 48); aoqi@6880: ori(rd, rd, split_low(imm >> 32)); aoqi@6880: dsll(rd, rd, 16); aoqi@6880: ori(rd, rd, split_low(imm >> 16)); aoqi@6880: dsll(rd, rd, 16); aoqi@6880: ori(rd, rd, split_low(imm)); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::li48(Register rd, long imm) { aoqi@6880: assert_not_delayed(); aoqi@6880: assert(is_simm16(imm >> 32), "Not a 48-bit address"); aoqi@6880: lui(rd, imm >> 32); aoqi@6880: ori(rd, rd, split_low(imm >> 16)); aoqi@6880: dsll(rd, rd, 16); aoqi@6880: ori(rd, rd, split_low(imm)); aoqi@6880: } aoqi@6880: #endif aoqi@6880: // NOTE: i dont push eax as i486. aoqi@6880: // the x86 save eax for it use eax as the jump register aoqi@6880: void MacroAssembler::verify_oop(Register reg, const char* s) { aoqi@6880: /* aoqi@6880: if (!VerifyOops) return; aoqi@6880: aoqi@6880: // Pass register number to verify_oop_subroutine aoqi@6880: char* b = new char[strlen(s) + 50]; aoqi@6880: sprintf(b, "verify_oop: %s: %s", reg->name(), s); aoqi@6880: push(rax); // save rax, aoqi@6880: push(reg); // pass register argument aoqi@6880: ExternalAddress buffer((address) b); aoqi@6880: // avoid using pushptr, as it modifies scratch registers aoqi@6880: // and our contract is not to modify anything aoqi@6880: movptr(rax, buffer.addr()); aoqi@6880: push(rax); aoqi@6880: // call indirectly to solve generation ordering problem aoqi@6880: movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); aoqi@6880: call(rax); aoqi@6880: */ aoqi@6880: if (!VerifyOops) return; aoqi@6880: const char * b = NULL; aoqi@6880: stringStream ss; aoqi@6880: ss.print("verify_oop: %s: %s", reg->name(), s); aoqi@6880: b = code_string(ss.as_string()); aoqi@6880: #ifdef _LP64 aoqi@6880: pushad(); aoqi@6880: move(A1, reg); aoqi@6880: li(A0, (long)b); aoqi@6880: li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); aoqi@6880: ld(T9, AT, 0); aoqi@6880: jalr(T9); aoqi@6880: delayed()->nop(); aoqi@6880: popad(); aoqi@6880: #else aoqi@6880: // Pass register number to verify_oop_subroutine aoqi@6880: sw(T0, SP, - wordSize); aoqi@6880: sw(T1, SP, - 2*wordSize); aoqi@6880: sw(RA, SP, - 3*wordSize); aoqi@6880: sw(A0, SP ,- 4*wordSize); aoqi@6880: sw(A1, SP ,- 5*wordSize); aoqi@6880: sw(AT, SP ,- 6*wordSize); aoqi@6880: sw(T9, SP ,- 7*wordSize); aoqi@6880: addiu(SP, SP, - 7 * wordSize); aoqi@6880: move(A1, reg); aoqi@6880: li(A0, (long)b); aoqi@6880: // call indirectly to solve generation ordering problem aoqi@6880: li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); aoqi@6880: lw(T9, AT, 0); aoqi@6880: jalr(T9); aoqi@6880: delayed()->nop(); aoqi@6880: lw(T0, SP, 6* wordSize); aoqi@6880: lw(T1, SP, 5* wordSize); aoqi@6880: lw(RA, SP, 4* wordSize); aoqi@6880: lw(A0, SP, 3* wordSize); aoqi@6880: lw(A1, SP, 2* wordSize); aoqi@6880: lw(AT, SP, 1* wordSize); aoqi@6880: lw(T9, SP, 0* wordSize); aoqi@6880: addiu(SP, SP, 7 * wordSize); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::verify_oop_addr(Address addr, const char* s) { aoqi@6880: if (!VerifyOops) { aoqi@6880: nop(); aoqi@6880: return; aoqi@6880: } aoqi@6880: // Pass register number to verify_oop_subroutine aoqi@6880: const char * b = NULL; aoqi@6880: stringStream ss; aoqi@6880: ss.print("verify_oop_addr: %s", s); aoqi@6880: b = code_string(ss.as_string()); aoqi@6880: aoqi@6880: st_ptr(T0, SP, - wordSize); aoqi@6880: st_ptr(T1, SP, - 2*wordSize); aoqi@6880: st_ptr(RA, SP, - 3*wordSize); aoqi@6880: st_ptr(A0, SP, - 4*wordSize); aoqi@6880: st_ptr(A1, SP, - 5*wordSize); aoqi@6880: st_ptr(AT, SP, - 6*wordSize); aoqi@6880: st_ptr(T9, SP, - 7*wordSize); aoqi@6880: ld_ptr(A1, addr); // addr may use SP, so load from it before change SP aoqi@6880: addiu(SP, SP, - 7 * wordSize); aoqi@6880: aoqi@6880: li(A0, (long)b); aoqi@6880: // call indirectly to solve generation ordering problem aoqi@6880: li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); aoqi@6880: ld_ptr(T9, AT, 0); aoqi@6880: jalr(T9); aoqi@6880: delayed()->nop(); aoqi@6880: ld_ptr(T0, SP, 6* wordSize); aoqi@6880: ld_ptr(T1, SP, 5* wordSize); aoqi@6880: ld_ptr(RA, SP, 4* wordSize); aoqi@6880: ld_ptr(A0, SP, 3* wordSize); aoqi@6880: ld_ptr(A1, SP, 2* wordSize); aoqi@6880: ld_ptr(AT, SP, 1* wordSize); aoqi@6880: ld_ptr(T9, SP, 0* wordSize); aoqi@6880: addiu(SP, SP, 7 * wordSize); aoqi@6880: } aoqi@6880: aoqi@6880: // used registers : T0, T1 aoqi@6880: void MacroAssembler::verify_oop_subroutine() { aoqi@6880: // RA: ra aoqi@6880: // A0: char* error message aoqi@6880: // A1: oop object to verify aoqi@6880: aoqi@6880: Label exit, error; aoqi@6880: // increment counter aoqi@6880: li(T0, (long)StubRoutines::verify_oop_count_addr()); aoqi@6880: lw(AT, T0, 0); aoqi@6880: #ifdef _LP64 aoqi@6880: daddi(AT, AT, 1); aoqi@6880: #else aoqi@6880: addi(AT, AT, 1); aoqi@6880: #endif aoqi@6880: sw(AT, T0, 0); aoqi@6880: aoqi@6880: // make sure object is 'reasonable' aoqi@6880: beq(A1, R0, exit); // if obj is NULL it is ok aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // Check if the oop is in the right area of memory aoqi@6880: //const int oop_mask = Universe::verify_oop_mask(); aoqi@6880: //const int oop_bits = Universe::verify_oop_bits(); aoqi@6880: const uintptr_t oop_mask = Universe::verify_oop_mask(); aoqi@6880: const uintptr_t oop_bits = Universe::verify_oop_bits(); aoqi@6880: li(AT, oop_mask); aoqi@6880: andr(T0, A1, AT); aoqi@6880: li(AT, oop_bits); aoqi@6880: bne(T0, AT, error); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // make sure klass is 'reasonable' aoqi@6880: //add for compressedoops aoqi@6880: reinit_heapbase(); aoqi@6880: //add for compressedoops aoqi@6880: load_klass(T0, A1); aoqi@6880: beq(T0, R0, error); // if klass is NULL it is broken aoqi@6880: delayed()->nop(); aoqi@6880: #if 0 aoqi@6880: //FIXME:wuhui. aoqi@6880: // Check if the klass is in the right area of memory aoqi@6880: //const int klass_mask = Universe::verify_klass_mask(); aoqi@6880: //const int klass_bits = Universe::verify_klass_bits(); aoqi@6880: const uintptr_t klass_mask = Universe::verify_klass_mask(); aoqi@6880: const uintptr_t klass_bits = Universe::verify_klass_bits(); aoqi@6880: aoqi@6880: li(AT, klass_mask); aoqi@6880: andr(T1, T0, AT); aoqi@6880: li(AT, klass_bits); aoqi@6880: bne(T1, AT, error); aoqi@6880: delayed()->nop(); aoqi@6880: // make sure klass' klass is 'reasonable' aoqi@6880: //add for compressedoops aoqi@6880: load_klass(T0, T0); aoqi@6880: beq(T0, R0, error); // if klass' klass is NULL it is broken aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: li(AT, klass_mask); aoqi@6880: andr(T1, T0, AT); aoqi@6880: li(AT, klass_bits); aoqi@6880: bne(T1, AT, error); aoqi@6880: delayed()->nop(); // if klass not in right area of memory it is broken too. aoqi@6880: #endif aoqi@6880: // return if everything seems ok aoqi@6880: bind(exit); aoqi@6880: aoqi@6880: jr(RA); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // handle errors aoqi@6880: bind(error); aoqi@6880: pushad(); aoqi@6880: #ifndef _LP64 aoqi@6880: addi(SP, SP, (-1) * wordSize); aoqi@6880: #endif aoqi@6880: call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); aoqi@6880: delayed()->nop(); aoqi@6880: #ifndef _LP64 aoqi@6880: addiu(SP, SP, 1 * wordSize); aoqi@6880: #endif aoqi@6880: popad(); aoqi@6880: jr(RA); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::verify_tlab(Register t1, Register t2) { aoqi@6880: #ifdef ASSERT aoqi@6880: assert_different_registers(t1, t2, AT); aoqi@6880: if (UseTLAB && VerifyOops) { aoqi@6880: Label next, ok; aoqi@6880: aoqi@6880: get_thread(t1); aoqi@6880: aoqi@6880: ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); aoqi@6880: ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); aoqi@6880: sltu(AT, t2, AT); aoqi@6880: beq(AT, R0, next); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: stop("assert(top >= start)"); aoqi@6880: aoqi@6880: bind(next); aoqi@6880: ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); aoqi@6880: sltu(AT, AT, t2); aoqi@6880: beq(AT, R0, ok); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: stop("assert(top <= end)"); aoqi@6880: aoqi@6880: bind(ok); aoqi@6880: aoqi@6880: } aoqi@6880: #endif aoqi@6880: } aoqi@6880: RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, aoqi@6880: Register tmp, aoqi@6880: int offset) { aoqi@6880: intptr_t value = *delayed_value_addr; aoqi@6880: if (value != 0) aoqi@6880: return RegisterOrConstant(value + offset); aoqi@6880: AddressLiteral a(delayed_value_addr); aoqi@6880: // load indirectly to solve generation ordering problem aoqi@6880: //movptr(tmp, ExternalAddress((address) delayed_value_addr)); aoqi@6880: //ld(tmp, a); aoqi@6880: if (offset != 0) aoqi@6880: daddi(tmp,tmp, offset); aoqi@6880: aoqi@6880: return RegisterOrConstant(tmp); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::hswap(Register reg) { aoqi@6880: //short aoqi@6880: //andi(reg, reg, 0xffff); aoqi@6880: srl(AT, reg, 8); aoqi@6880: sll(reg, reg, 24); aoqi@6880: sra(reg, reg, 16); aoqi@6880: orr(reg, reg, AT); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::huswap(Register reg) { aoqi@6880: #ifdef _LP64 aoqi@6880: dsrl(AT, reg, 8); aoqi@6880: dsll(reg, reg, 24); aoqi@6880: dsrl(reg, reg, 16); aoqi@6880: orr(reg, reg, AT); aoqi@6880: andi(reg, reg, 0xffff); aoqi@6880: #else aoqi@6880: //andi(reg, reg, 0xffff); aoqi@6880: srl(AT, reg, 8); aoqi@6880: sll(reg, reg, 24); aoqi@6880: srl(reg, reg, 16); aoqi@6880: orr(reg, reg, AT); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: // something funny to do this will only one more register AT aoqi@6880: // 32 bits aoqi@6880: void MacroAssembler::swap(Register reg) { aoqi@6880: srl(AT, reg, 8); aoqi@6880: sll(reg, reg, 24); aoqi@6880: orr(reg, reg, AT); aoqi@6880: //reg : 4 1 2 3 aoqi@6880: srl(AT, AT, 16); aoqi@6880: xorr(AT, AT, reg); aoqi@6880: andi(AT, AT, 0xff); aoqi@6880: //AT : 0 0 0 1^3); aoqi@6880: xorr(reg, reg, AT); aoqi@6880: //reg : 4 1 2 1 aoqi@6880: sll(AT, AT, 16); aoqi@6880: xorr(reg, reg, AT); aoqi@6880: //reg : 4 3 2 1 aoqi@6880: } aoqi@6880: aoqi@6880: #ifdef _LP64 aoqi@6880: aoqi@6880: /* do 32-bit CAS using MIPS64 lld/scd aoqi@6880: aoqi@6880: Jin: cas_int should only compare 32-bits of the memory value. aoqi@6880: However, lld/scd will do 64-bit operation, which violates the intention of cas_int. aoqi@6880: To simulate a 32-bit atomic operation, the value loaded with LLD should be split into aoqi@6880: tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval, aoqi@6880: plus the high-32 bits or memory value, are stored togethor with SCD. aoqi@6880: aoqi@6880: Example: aoqi@6880: aoqi@6880: double d = 3.1415926; aoqi@6880: System.err.println("hello" + d); aoqi@6880: aoqi@6880: sun.misc.FloatingDecimal$1.() aoqi@6880: | aoqi@6880: `- java.util.concurrent.atomic.AtomicInteger::compareAndSet() aoqi@6880: aoqi@6880: 38 cas_int [a7a7|J] [a0|I] [a6|I] aoqi@6880: // a0: 0xffffffffe8ea9f63 pc: 0x55647f3354 aoqi@6880: // a6: 0x4ab325aa aoqi@6880: aoqi@6880: again: aoqi@6880: 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63" aoqi@6880: aoqi@6880: 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended) aoqi@6880: 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits aoqi@6880: 0x00000055647f3c68: dsll32 t8, t8, 0 aoqi@6880: 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal aoqi@6880: 0x00000055647f3c70: sll zero, zero, 0 aoqi@6880: aoqi@6880: 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended) aoqi@6880: 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF; aoqi@6880: 0x00000055647f3c7c: ori v1, v1, 0xffffffff aoqi@6880: 0x00000055647f3c80: and v1, a6, v1 aoqi@6880: 0x00000055647f3c84: or at, t8, v1 aoqi@6880: 0x00000055647f3c88: scd at, 0x0(a7) aoqi@6880: 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again aoqi@6880: 0x00000055647f3c90: sll zero, zero, 0 aoqi@6880: 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done aoqi@6880: 0x00000055647f3c98: sll zero, zero, 0 aoqi@6880: nequal: aoqi@6880: 0x00000055647f45a4: dadd a0, t9, zero aoqi@6880: 0x00000055647f45a8: dadd at, zero, zero aoqi@6880: done: aoqi@6880: */ aoqi@6880: aoqi@6880: void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) { aoqi@6880: /* 2012/11/11 Jin: MIPS64 can use ll/sc for 32-bit atomic memory access */ aoqi@6880: Label done, again, nequal; aoqi@6880: aoqi@6880: bind(again); aoqi@6880: aoqi@6880: if(!Use3A2000) sync(); aoqi@6880: ll(AT, dest); aoqi@6880: bne(AT, c_reg, nequal); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: move(AT, x_reg); aoqi@6880: sc(AT, dest); aoqi@6880: beq(AT, R0, again); aoqi@6880: delayed()->nop(); aoqi@6880: b(done); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // not xchged aoqi@6880: bind(nequal); aoqi@6880: sync(); aoqi@6880: move(c_reg, AT); aoqi@6880: move(AT, R0); aoqi@6880: aoqi@6880: bind(done); aoqi@6880: } aoqi@6880: #endif // cmpxchg32 aoqi@6880: aoqi@6880: void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) { aoqi@6880: Label done, again, nequal; aoqi@6880: aoqi@6880: bind(again); aoqi@6880: #ifdef _LP64 aoqi@6880: if(!Use3A2000) sync(); aoqi@6880: lld(AT, dest); aoqi@6880: #else aoqi@6880: if(!Use3A2000) sync(); aoqi@6880: ll(AT, dest); aoqi@6880: #endif aoqi@6880: bne(AT, c_reg, nequal); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: move(AT, x_reg); aoqi@6880: #ifdef _LP64 aoqi@6880: scd(AT, dest); aoqi@6880: #else aoqi@6880: sc(AT, dest); aoqi@6880: #endif aoqi@6880: beq(AT, R0, again); aoqi@6880: delayed()->nop(); aoqi@6880: b(done); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // not xchged aoqi@6880: bind(nequal); aoqi@6880: sync(); aoqi@6880: move(c_reg, AT); aoqi@6880: move(AT, R0); aoqi@6880: aoqi@6880: bind(done); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { aoqi@6880: Label done, again, nequal; aoqi@6880: aoqi@6880: Register x_reg = x_regLo; aoqi@6880: dsll32(x_regHi, x_regHi, 0); aoqi@6880: dsll32(x_regLo, x_regLo, 0); aoqi@6880: dsrl32(x_regLo, x_regLo, 0); aoqi@6880: orr(x_reg, x_regLo, x_regHi); aoqi@6880: aoqi@6880: Register c_reg = c_regLo; aoqi@6880: dsll32(c_regHi, c_regHi, 0); aoqi@6880: dsll32(c_regLo, c_regLo, 0); aoqi@6880: dsrl32(c_regLo, c_regLo, 0); aoqi@6880: orr(c_reg, c_regLo, c_regHi); aoqi@6880: aoqi@6880: bind(again); aoqi@6880: aoqi@6880: if(!Use3A2000) sync(); aoqi@6880: lld(AT, dest); aoqi@6880: bne(AT, c_reg, nequal); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: //move(AT, x_reg); aoqi@6880: dadd(AT, x_reg, R0); aoqi@6880: scd(AT, dest); aoqi@6880: beq(AT, R0, again); aoqi@6880: delayed()->nop(); aoqi@6880: b(done); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // not xchged aoqi@6880: bind(nequal); aoqi@6880: sync(); aoqi@6880: //move(c_reg, AT); aoqi@6880: //move(AT, R0); aoqi@6880: dadd(c_reg, AT, R0); aoqi@6880: dadd(AT, R0, R0); aoqi@6880: bind(done); aoqi@6880: } aoqi@6880: aoqi@6880: // be sure the three register is different aoqi@6880: void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { aoqi@6880: assert_different_registers(tmp, fs, ft); aoqi@6880: div_s(tmp, fs, ft); aoqi@6880: trunc_l_s(tmp, tmp); aoqi@6880: cvt_s_l(tmp, tmp); aoqi@6880: mul_s(tmp, tmp, ft); aoqi@6880: sub_s(fd, fs, tmp); aoqi@6880: } aoqi@6880: aoqi@6880: // be sure the three register is different aoqi@6880: void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { aoqi@6880: assert_different_registers(tmp, fs, ft); aoqi@6880: div_d(tmp, fs, ft); aoqi@6880: trunc_l_d(tmp, tmp); aoqi@6880: cvt_d_l(tmp, tmp); aoqi@6880: mul_d(tmp, tmp, ft); aoqi@6880: sub_d(fd, fs, tmp); aoqi@6880: } aoqi@6880: aoqi@6880: // Fast_Lock and Fast_Unlock used by C2 aoqi@6880: aoqi@6880: // Because the transitions from emitted code to the runtime aoqi@6880: // monitorenter/exit helper stubs are so slow it's critical that aoqi@6880: // we inline both the stack-locking fast-path and the inflated fast path. aoqi@6880: // aoqi@6880: // See also: cmpFastLock and cmpFastUnlock. aoqi@6880: // aoqi@6880: // What follows is a specialized inline transliteration of the code aoqi@6880: // in slow_enter() and slow_exit(). If we're concerned about I$ bloat aoqi@6880: // another option would be to emit TrySlowEnter and TrySlowExit methods aoqi@6880: // at startup-time. These methods would accept arguments as aoqi@6880: // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure aoqi@6880: // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply aoqi@6880: // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. aoqi@6880: // In practice, however, the # of lock sites is bounded and is usually small. aoqi@6880: // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer aoqi@6880: // if the processor uses simple bimodal branch predictors keyed by EIP aoqi@6880: // Since the helper routines would be called from multiple synchronization aoqi@6880: // sites. aoqi@6880: // aoqi@6880: // An even better approach would be write "MonitorEnter()" and "MonitorExit()" aoqi@6880: // in java - using j.u.c and unsafe - and just bind the lock and unlock sites aoqi@6880: // to those specialized methods. That'd give us a mostly platform-independent aoqi@6880: // implementation that the JITs could optimize and inline at their pleasure. aoqi@6880: // Done correctly, the only time we'd need to cross to native could would be aoqi@6880: // to park() or unpark() threads. We'd also need a few more unsafe operators aoqi@6880: // to (a) prevent compiler-JIT reordering of non-volatile accesses, and aoqi@6880: // (b) explicit barriers or fence operations. aoqi@6880: // aoqi@6880: // TODO: aoqi@6880: // aoqi@6880: // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). aoqi@6880: // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. aoqi@6880: // Given TLAB allocation, Self is usually manifested in a register, so passing it into aoqi@6880: // the lock operators would typically be faster than reifying Self. aoqi@6880: // aoqi@6880: // * Ideally I'd define the primitives as: aoqi@6880: // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. aoqi@6880: // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED aoqi@6880: // Unfortunately ADLC bugs prevent us from expressing the ideal form. aoqi@6880: // Instead, we're stuck with a rather awkward and brittle register assignments below. aoqi@6880: // Furthermore the register assignments are overconstrained, possibly resulting in aoqi@6880: // sub-optimal code near the synchronization site. aoqi@6880: // aoqi@6880: // * Eliminate the sp-proximity tests and just use "== Self" tests instead. aoqi@6880: // Alternately, use a better sp-proximity test. aoqi@6880: // aoqi@6880: // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. aoqi@6880: // Either one is sufficient to uniquely identify a thread. aoqi@6880: // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. aoqi@6880: // aoqi@6880: // * Intrinsify notify() and notifyAll() for the common cases where the aoqi@6880: // object is locked by the calling thread but the waitlist is empty. aoqi@6880: // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). aoqi@6880: // aoqi@6880: // * use jccb and jmpb instead of jcc and jmp to improve code density. aoqi@6880: // But beware of excessive branch density on AMD Opterons. aoqi@6880: // aoqi@6880: // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success aoqi@6880: // or failure of the fast-path. If the fast-path fails then we pass aoqi@6880: // control to the slow-path, typically in C. In Fast_Lock and aoqi@6880: // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 aoqi@6880: // will emit a conditional branch immediately after the node. aoqi@6880: // So we have branches to branches and lots of ICC.ZF games. aoqi@6880: // Instead, it might be better to have C2 pass a "FailureLabel" aoqi@6880: // into Fast_Lock and Fast_Unlock. In the case of success, control aoqi@6880: // will drop through the node. ICC.ZF is undefined at exit. aoqi@6880: // In the case of failure, the node will branch directly to the aoqi@6880: // FailureLabel aoqi@6880: aoqi@6880: aoqi@6880: // obj: object to lock aoqi@6880: // box: on-stack box address (displaced header location) - KILLED aoqi@6880: // rax,: tmp -- KILLED aoqi@6880: // scr: tmp -- KILLED aoqi@6880: void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) { aoqi@6880: aoqi@6880: // Ensure the register assignents are disjoint aoqi@6880: guarantee (objReg != boxReg, "") ; aoqi@6880: guarantee (objReg != tmpReg, "") ; aoqi@6880: guarantee (objReg != scrReg, "") ; aoqi@6880: guarantee (boxReg != tmpReg, "") ; aoqi@6880: guarantee (boxReg != scrReg, "") ; aoqi@6880: aoqi@6880: aoqi@6880: block_comment("FastLock"); aoqi@6880: /* aoqi@6880: move(AT, 0x0); aoqi@6880: return; aoqi@6880: */ aoqi@6880: if (PrintBiasedLockingStatistics) { aoqi@6880: push(tmpReg); aoqi@6880: atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg); aoqi@6880: pop(tmpReg); aoqi@6880: } aoqi@6880: aoqi@6880: if (EmitSync & 1) { aoqi@6880: move(AT, 0x0); aoqi@6880: return; aoqi@6880: } else aoqi@6880: if (EmitSync & 2) { aoqi@6880: Label DONE_LABEL ; aoqi@6880: if (UseBiasedLocking) { aoqi@6880: // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. aoqi@6880: biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); aoqi@6880: } aoqi@6880: aoqi@6880: ld(tmpReg, Address(objReg, 0)) ; // fetch markword aoqi@6880: ori(tmpReg, tmpReg, 0x1); aoqi@6880: sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS aoqi@6880: aoqi@6880: cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg aoqi@6880: bne(AT, R0, DONE_LABEL); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // Recursive locking aoqi@6880: dsubu(tmpReg, tmpReg, SP); aoqi@6880: li(AT, (7 - os::vm_page_size() )); aoqi@6880: andr(tmpReg, tmpReg, AT); aoqi@6880: sd(tmpReg, Address(boxReg, 0)); aoqi@6880: bind(DONE_LABEL) ; aoqi@6880: } else { aoqi@6880: // Possible cases that we'll encounter in fast_lock aoqi@6880: // ------------------------------------------------ aoqi@6880: // * Inflated aoqi@6880: // -- unlocked aoqi@6880: // -- Locked aoqi@6880: // = by self aoqi@6880: // = by other aoqi@6880: // * biased aoqi@6880: // -- by Self aoqi@6880: // -- by other aoqi@6880: // * neutral aoqi@6880: // * stack-locked aoqi@6880: // -- by self aoqi@6880: // = sp-proximity test hits aoqi@6880: // = sp-proximity test generates false-negative aoqi@6880: // -- by other aoqi@6880: // aoqi@6880: aoqi@6880: Label IsInflated, DONE_LABEL, PopDone ; aoqi@6880: aoqi@6880: // TODO: optimize away redundant LDs of obj->mark and improve the markword triage aoqi@6880: // order to reduce the number of conditional branches in the most common cases. aoqi@6880: // Beware -- there's a subtle invariant that fetch of the markword aoqi@6880: // at [FETCH], below, will never observe a biased encoding (*101b). aoqi@6880: // If this invariant is not held we risk exclusion (safety) failure. aoqi@6880: if (UseBiasedLocking && !UseOptoBiasInlining) { aoqi@6880: biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); aoqi@6880: } aoqi@6880: aoqi@6880: ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object. aoqi@6880: andi(AT, tmpReg, markOopDesc::monitor_value); aoqi@6880: bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // Attempt stack-locking ... aoqi@6880: ori (tmpReg, tmpReg, markOopDesc::unlocked_value); aoqi@6880: sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS aoqi@6880: //if (os::is_MP()) { aoqi@6880: // sync(); aoqi@6880: //} aoqi@6880: aoqi@6880: cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg aoqi@6880: //AT == 1: unlocked aoqi@6880: aoqi@6880: if (PrintBiasedLockingStatistics) { aoqi@6880: Label L; aoqi@6880: beq(AT, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: push(T0); aoqi@6880: push(T1); aoqi@6880: atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); aoqi@6880: pop(T1); aoqi@6880: pop(T0); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: bne(AT, R0, DONE_LABEL); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: // Recursive locking aoqi@6880: // The object is stack-locked: markword contains stack pointer to BasicLock. aoqi@6880: // Locked by current thread if difference with current SP is less than one page. aoqi@6880: dsubu(tmpReg, tmpReg, SP); aoqi@6880: li(AT, 7 - os::vm_page_size() ); aoqi@6880: andr(tmpReg, tmpReg, AT); aoqi@6880: sd(tmpReg, Address(boxReg, 0)); aoqi@6880: if (PrintBiasedLockingStatistics) { aoqi@6880: Label L; aoqi@6880: // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ aoqi@6880: bne(tmpReg, R0, L); aoqi@6880: delayed()->nop(); aoqi@6880: push(T0); aoqi@6880: push(T1); aoqi@6880: atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); aoqi@6880: pop(T1); aoqi@6880: pop(T0); aoqi@6880: bind(L); aoqi@6880: } aoqi@6880: sltiu(AT, tmpReg, 1); /* AT = (tmpReg == 0) ? 1 : 0 */ aoqi@6880: aoqi@6880: b(DONE_LABEL) ; aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(IsInflated) ; aoqi@6880: // The object's monitor m is unlocked iff m->owner == NULL, aoqi@6880: // otherwise m->owner may contain a thread or a stack address. aoqi@6880: aoqi@6880: // TODO: someday avoid the ST-before-CAS penalty by aoqi@6880: // relocating (deferring) the following ST. aoqi@6880: // We should also think about trying a CAS without having aoqi@6880: // fetched _owner. If the CAS is successful we may aoqi@6880: // avoid an RTO->RTS upgrade on the $line. aoqi@6880: // Without cast to int32_t a movptr will destroy r10 which is typically obj aoqi@6880: li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); aoqi@6880: sd(AT, Address(boxReg, 0)); aoqi@6880: aoqi@6880: move(boxReg, tmpReg) ; aoqi@6880: ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; aoqi@6880: // if (m->owner != 0) => AT = 0, goto slow path. aoqi@6880: move(AT, R0); aoqi@6880: bne(tmpReg, R0, DONE_LABEL); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: #ifndef OPT_THREAD aoqi@6880: get_thread (TREG) ; aoqi@6880: #endif aoqi@6880: // It's inflated and appears unlocked aoqi@6880: //if (os::is_MP()) { aoqi@6880: // sync(); aoqi@6880: //} aoqi@6880: cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ; aoqi@6880: // Intentional fall-through into DONE_LABEL ... aoqi@6880: aoqi@6880: aoqi@6880: // DONE_LABEL is a hot target - we'd really like to place it at the aoqi@6880: // start of cache line by padding with NOPs. aoqi@6880: // See the AMD and Intel software optimization manuals for the aoqi@6880: // most efficient "long" NOP encodings. aoqi@6880: // Unfortunately none of our alignment mechanisms suffice. aoqi@6880: bind(DONE_LABEL); aoqi@6880: aoqi@6880: // At DONE_LABEL the AT is set as follows ... aoqi@6880: // Fast_Unlock uses the same protocol. aoqi@6880: // AT == 1 -> Success aoqi@6880: // AT == 0 -> Failure - force control through the slow-path aoqi@6880: aoqi@6880: // Avoid branch-to-branch on AMD processors aoqi@6880: // This appears to be superstition. aoqi@6880: if (EmitSync & 32) nop() ; aoqi@6880: aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: // obj: object to unlock aoqi@6880: // box: box address (displaced header location), killed. Must be EAX. aoqi@6880: // rbx,: killed tmp; cannot be obj nor box. aoqi@6880: // aoqi@6880: // Some commentary on balanced locking: aoqi@6880: // aoqi@6880: // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. aoqi@6880: // Methods that don't have provably balanced locking are forced to run in the aoqi@6880: // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. aoqi@6880: // The interpreter provides two properties: aoqi@6880: // I1: At return-time the interpreter automatically and quietly unlocks any aoqi@6880: // objects acquired the current activation (frame). Recall that the aoqi@6880: // interpreter maintains an on-stack list of locks currently held by aoqi@6880: // a frame. aoqi@6880: // I2: If a method attempts to unlock an object that is not held by the aoqi@6880: // the frame the interpreter throws IMSX. aoqi@6880: // aoqi@6880: // Lets say A(), which has provably balanced locking, acquires O and then calls B(). aoqi@6880: // B() doesn't have provably balanced locking so it runs in the interpreter. aoqi@6880: // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O aoqi@6880: // is still locked by A(). aoqi@6880: // aoqi@6880: // The only other source of unbalanced locking would be JNI. The "Java Native Interface: aoqi@6880: // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter aoqi@6880: // should not be unlocked by "normal" java-level locking and vice-versa. The specification aoqi@6880: // doesn't specify what will occur if a program engages in such mixed-mode locking, however. aoqi@6880: aoqi@6880: void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { aoqi@6880: aoqi@6880: guarantee (objReg != boxReg, "") ; aoqi@6880: guarantee (objReg != tmpReg, "") ; aoqi@6880: guarantee (boxReg != tmpReg, "") ; aoqi@6880: aoqi@6880: aoqi@6880: aoqi@6880: block_comment("FastUnlock"); aoqi@6880: aoqi@6880: aoqi@6880: if (EmitSync & 4) { aoqi@6880: // Disable - inhibit all inlining. Force control through the slow-path aoqi@6880: move(AT, 0x0); aoqi@6880: return; aoqi@6880: } else aoqi@6880: if (EmitSync & 8) { aoqi@6880: Label DONE_LABEL ; aoqi@6880: if (UseBiasedLocking) { aoqi@6880: biased_locking_exit(objReg, tmpReg, DONE_LABEL); aoqi@6880: } aoqi@6880: // classic stack-locking code ... aoqi@6880: ld(tmpReg, Address(boxReg, 0)) ; aoqi@6880: beq(tmpReg, R0, DONE_LABEL) ; aoqi@6880: move(AT, 0x1); // delay slot aoqi@6880: aoqi@6880: cmpxchg(tmpReg, Address(objReg, 0), boxReg); // Uses EAX which is box aoqi@6880: bind(DONE_LABEL); aoqi@6880: } else { aoqi@6880: Label DONE_LABEL, Stacked, CheckSucc, Inflated ; aoqi@6880: aoqi@6880: // Critically, the biased locking test must have precedence over aoqi@6880: // and appear before the (box->dhw == 0) recursive stack-lock test. aoqi@6880: if (UseBiasedLocking && !UseOptoBiasInlining) { aoqi@6880: biased_locking_exit(objReg, tmpReg, DONE_LABEL); aoqi@6880: } aoqi@6880: aoqi@6880: ld(AT, Address(boxReg, 0)) ; // Examine the displaced header aoqi@6880: beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock aoqi@6880: delayed()->daddiu(AT, R0, 0x1); aoqi@6880: aoqi@6880: ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword aoqi@6880: andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated? aoqi@6880: beq(AT, R0, Stacked) ; // Inflated? aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind(Inflated) ; aoqi@6880: // It's inflated. aoqi@6880: // Despite our balanced locking property we still check that m->_owner == Self aoqi@6880: // as java routines or native JNI code called by this thread might aoqi@6880: // have released the lock. aoqi@6880: // Refer to the comments in synchronizer.cpp for how we might encode extra aoqi@6880: // state in _succ so we can avoid fetching EntryList|cxq. aoqi@6880: // aoqi@6880: // I'd like to add more cases in fast_lock() and fast_unlock() -- aoqi@6880: // such as recursive enter and exit -- but we have to be wary of aoqi@6880: // I$ bloat, T$ effects and BP$ effects. aoqi@6880: // aoqi@6880: // If there's no contention try a 1-0 exit. That is, exit without aoqi@6880: // a costly MEMBAR or CAS. See synchronizer.cpp for details on how aoqi@6880: // we detect and recover from the race that the 1-0 exit admits. aoqi@6880: // aoqi@6880: // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier aoqi@6880: // before it STs null into _owner, releasing the lock. Updates aoqi@6880: // to data protected by the critical section must be visible before aoqi@6880: // we drop the lock (and thus before any other thread could acquire aoqi@6880: // the lock and observe the fields protected by the lock). aoqi@6880: // IA32's memory-model is SPO, so STs are ordered with respect to aoqi@6880: // each other and there's no need for an explicit barrier (fence). aoqi@6880: // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. aoqi@6880: #ifndef OPT_THREAD aoqi@6880: get_thread (TREG) ; aoqi@6880: #endif aoqi@6880: aoqi@6880: // It's inflated aoqi@6880: ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; aoqi@6880: xorr(boxReg, boxReg, TREG); aoqi@6880: aoqi@6880: ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; aoqi@6880: orr(boxReg, boxReg, AT); aoqi@6880: aoqi@6880: move(AT, R0); aoqi@6880: bne(boxReg, R0, DONE_LABEL); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; aoqi@6880: ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; aoqi@6880: orr(boxReg, boxReg, AT); aoqi@6880: aoqi@6880: move(AT, R0); aoqi@6880: bne(boxReg, R0, DONE_LABEL); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: sync(); aoqi@6880: sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; aoqi@6880: move(AT, 0x1); aoqi@6880: b(DONE_LABEL); aoqi@6880: delayed()->nop(); aoqi@6880: aoqi@6880: bind (Stacked); aoqi@6880: ld(tmpReg, Address(boxReg, 0)) ; aoqi@6880: //if (os::is_MP()) { sync(); } aoqi@6880: cmpxchg(tmpReg, Address(objReg, 0), boxReg); aoqi@6880: aoqi@6880: if (EmitSync & 65536) { aoqi@6880: bind (CheckSucc); aoqi@6880: } aoqi@6880: aoqi@6880: bind(DONE_LABEL); aoqi@6880: aoqi@6880: // Avoid branch to branch on AMD processors aoqi@6880: if (EmitSync & 32768) { nop() ; } aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::align(int modulus) { aoqi@6880: while (offset() % modulus != 0) nop(); aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::verify_FPU(int stack_depth, const char* s) { aoqi@6880: //Unimplemented(); aoqi@6880: } aoqi@6880: aoqi@6880: #ifdef _LP64 aoqi@6880: Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; aoqi@6880: aoqi@6880: /* FIXME: Jin: In MIPS64, F0~23 are all caller-saved registers */ aoqi@6880: FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; aoqi@6880: #else aoqi@6880: Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, T4, T5, T6, T7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; aoqi@6880: aoqi@6880: Register caller_saved_fpu_registers[] = {}; aoqi@6880: #endif aoqi@6880: aoqi@6880: //We preserve all caller-saved register aoqi@6880: void MacroAssembler::pushad(){ aoqi@6880: int i; aoqi@6880: aoqi@6880: /* Fixed-point registers */ aoqi@6880: int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); aoqi@6880: daddi(SP, SP, -1 * len * wordSize); aoqi@6880: for (i = 0; i < len; i++) aoqi@6880: { aoqi@6880: #ifdef _LP64 aoqi@6880: sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #else aoqi@6880: sw(caller_saved_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: /* Floating-point registers */ aoqi@6880: len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); aoqi@6880: daddi(SP, SP, -1 * len * wordSize); aoqi@6880: for (i = 0; i < len; i++) aoqi@6880: { aoqi@6880: #ifdef _LP64 aoqi@6880: sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #else aoqi@6880: swc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #endif aoqi@6880: } aoqi@6880: }; aoqi@6880: aoqi@6880: void MacroAssembler::popad(){ aoqi@6880: int i; aoqi@6880: aoqi@6880: /* Floating-point registers */ aoqi@6880: int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); aoqi@6880: for (i = 0; i < len; i++) aoqi@6880: { aoqi@6880: #ifdef _LP64 aoqi@6880: ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #else aoqi@6880: lwc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #endif aoqi@6880: } aoqi@6880: daddi(SP, SP, len * wordSize); aoqi@6880: aoqi@6880: /* Fixed-point registers */ aoqi@6880: len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); aoqi@6880: for (i = 0; i < len; i++) aoqi@6880: { aoqi@6880: #ifdef _LP64 aoqi@6880: ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #else aoqi@6880: lw(caller_saved_registers[i], SP, (len - i - 1) * wordSize); aoqi@6880: #endif aoqi@6880: } aoqi@6880: daddi(SP, SP, len * wordSize); aoqi@6880: }; aoqi@6880: aoqi@6880: void MacroAssembler::push2(Register reg1, Register reg2) { aoqi@6880: #ifdef _LP64 aoqi@6880: daddi(SP, SP, -16); aoqi@6880: sd(reg2, SP, 0); aoqi@6880: sd(reg1, SP, 8); aoqi@6880: #else aoqi@6880: addi(SP, SP, -8); aoqi@6880: sw(reg2, SP, 0); aoqi@6880: sw(reg1, SP, 4); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::pop2(Register reg1, Register reg2) { aoqi@6880: #ifdef _LP64 aoqi@6880: ld(reg1, SP, 0); aoqi@6880: ld(reg2, SP, 8); aoqi@6880: daddi(SP, SP, 16); aoqi@6880: #else aoqi@6880: lw(reg1, SP, 0); aoqi@6880: lw(reg2, SP, 4); aoqi@6880: addi(SP, SP, 8); aoqi@6880: #endif aoqi@6880: } aoqi@6880: aoqi@6880: //for UseCompressedOops Option aoqi@6880: void MacroAssembler::load_klass(Register dst, Register src) { aoqi@6880: #ifdef _LP64 aoqi@6880: if(UseCompressedClassPointers){ aoqi@6880: lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); aoqi@6880: decode_klass_not_null(dst); aoqi@6880: } else aoqi@6880: #endif aoqi@6880: ld(dst, src, oopDesc::klass_offset_in_bytes()); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::store_klass(Register dst, Register src) { aoqi@6880: #ifdef _LP64 aoqi@6880: if(UseCompressedClassPointers){ aoqi@6880: encode_klass_not_null(src); aoqi@6880: sw(src, dst, oopDesc::klass_offset_in_bytes()); aoqi@6880: } else { aoqi@6880: #endif aoqi@6880: sd(src, dst, oopDesc::klass_offset_in_bytes()); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::load_prototype_header(Register dst, Register src) { aoqi@6880: load_klass(dst, src); aoqi@6880: ld(dst, Address(dst, Klass::prototype_header_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: #ifdef _LP64 aoqi@6880: void MacroAssembler::store_klass_gap(Register dst, Register src) { aoqi@6880: if (UseCompressedClassPointers) { aoqi@6880: sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::load_heap_oop(Register dst, Address src) { aoqi@6880: if(UseCompressedOops){ aoqi@6880: lwu(dst, src); aoqi@6880: decode_heap_oop(dst); aoqi@6880: } else{ aoqi@6880: ld(dst, src); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::store_heap_oop(Address dst, Register src){ aoqi@6880: if(UseCompressedOops){ aoqi@6880: assert(!dst.uses(src), "not enough registers"); aoqi@6880: encode_heap_oop(src); aoqi@6880: sw(src, dst); aoqi@6880: } else{ aoqi@6880: sd(src, dst); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: #ifdef ASSERT aoqi@6880: void MacroAssembler::verify_heapbase(const char* msg) { aoqi@6880: assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); aoqi@6880: assert (Universe::heap() != NULL, "java heap should be initialized"); aoqi@6880: } aoqi@6880: #endif aoqi@6880: aoqi@6880: aoqi@6880: // Algorithm must match oop.inline.hpp encode_heap_oop. aoqi@6880: void MacroAssembler::encode_heap_oop(Register r) { aoqi@6880: #ifdef ASSERT aoqi@6880: verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); aoqi@6880: #endif aoqi@6880: verify_oop(r, "broken oop in encode_heap_oop"); aoqi@6880: if (Universe::narrow_oop_base() == NULL) { aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shr(r, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: return; aoqi@6880: } aoqi@6880: aoqi@6880: movz(r, S5_heapbase, r); aoqi@6880: dsub(r, r, S5_heapbase); aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shr(r, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::encode_heap_oop(Register dst, Register src) { aoqi@6880: #ifdef ASSERT aoqi@6880: verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); aoqi@6880: #endif aoqi@6880: verify_oop(src, "broken oop in encode_heap_oop"); aoqi@6880: if (Universe::narrow_oop_base() == NULL) { aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: dsrl(dst, src, LogMinObjAlignmentInBytes); aoqi@6880: } else { aoqi@6880: if (dst != src) move(dst, src); aoqi@6880: } aoqi@6880: } else { aoqi@6880: if (dst == src) { aoqi@6880: movz(dst, S5_heapbase, dst); aoqi@6880: dsub(dst, dst, S5_heapbase); aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shr(dst, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: } else { aoqi@6880: dsub(dst, src, S5_heapbase); aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shr(dst, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: movz(dst, R0, src); aoqi@6880: } aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::encode_heap_oop_not_null(Register r) { aoqi@6880: assert (UseCompressedOops, "should be compressed"); aoqi@6880: #ifdef ASSERT aoqi@6880: if (CheckCompressedOops) { aoqi@6880: Label ok; aoqi@6880: bne(r, R0, ok); aoqi@6880: delayed()->nop(); aoqi@6880: stop("null oop passed to encode_heap_oop_not_null"); aoqi@6880: bind(ok); aoqi@6880: } aoqi@6880: #endif aoqi@6880: verify_oop(r, "broken oop in encode_heap_oop_not_null"); aoqi@6880: if (Universe::narrow_oop_base() != NULL) { aoqi@6880: dsub(r, r, S5_heapbase); aoqi@6880: } aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shr(r, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { aoqi@6880: assert (UseCompressedOops, "should be compressed"); aoqi@6880: #ifdef ASSERT aoqi@6880: if (CheckCompressedOops) { aoqi@6880: Label ok; aoqi@6880: bne(src, R0, ok); aoqi@6880: delayed()->nop(); aoqi@6880: stop("null oop passed to encode_heap_oop_not_null2"); aoqi@6880: bind(ok); aoqi@6880: } aoqi@6880: #endif aoqi@6880: verify_oop(src, "broken oop in encode_heap_oop_not_null2"); aoqi@6880: aoqi@6880: if (Universe::narrow_oop_base() != NULL) { aoqi@6880: dsub(dst, src, S5_heapbase); aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shr(dst, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: } else { aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: dsrl(dst, src, LogMinObjAlignmentInBytes); aoqi@6880: } else { aoqi@6880: if (dst != src) move(dst, src); aoqi@6880: } aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decode_heap_oop(Register r) { aoqi@6880: #ifdef ASSERT aoqi@6880: verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); aoqi@6880: #endif aoqi@6880: if (Universe::narrow_oop_base() == NULL) { aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shl(r, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: } else { aoqi@6880: move(AT, r); aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shl(r, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: dadd(r, r, S5_heapbase); aoqi@6880: movz(r, R0, AT); aoqi@6880: } aoqi@6880: verify_oop(r, "broken oop in decode_heap_oop"); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decode_heap_oop(Register dst, Register src) { aoqi@6880: #ifdef ASSERT aoqi@6880: verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); aoqi@6880: #endif aoqi@6880: if (Universe::narrow_oop_base() == NULL) { aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: if (dst != src) nop(); // DON'T DELETE THIS GUY. aoqi@6880: dsll(dst, src, LogMinObjAlignmentInBytes); aoqi@6880: } else { aoqi@6880: if (dst != src) move(dst, src); aoqi@6880: } aoqi@6880: } else { aoqi@6880: if (dst == src) { aoqi@6880: move(AT, dst); aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shl(dst, LogMinObjAlignmentInBytes); aoqi@6880: } aoqi@6880: dadd(dst, dst, S5_heapbase); aoqi@6880: movz(dst, R0, AT); aoqi@6880: } else { aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: dsll(dst, src, LogMinObjAlignmentInBytes); aoqi@6880: daddu(dst, dst, S5_heapbase); aoqi@6880: } else { aoqi@6880: daddu(dst, src, S5_heapbase); aoqi@6880: } aoqi@6880: movz(dst, R0, src); aoqi@6880: } aoqi@6880: } aoqi@6880: verify_oop(dst, "broken oop in decode_heap_oop"); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decode_heap_oop_not_null(Register r) { aoqi@6880: // Note: it will change flags aoqi@6880: assert (UseCompressedOops, "should only be used for compressed headers"); aoqi@6880: assert (Universe::heap() != NULL, "java heap should be initialized"); aoqi@6880: // Cannot assert, unverified entry point counts instructions (see .ad file) aoqi@6880: // vtableStubs also counts instructions in pd_code_size_limit. aoqi@6880: // Also do not verify_oop as this is called by verify_oop. aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: shl(r, LogMinObjAlignmentInBytes); aoqi@6880: if (Universe::narrow_oop_base() != NULL) { aoqi@6880: daddu(r, r, S5_heapbase); aoqi@6880: } aoqi@6880: } else { aoqi@6880: assert (Universe::narrow_oop_base() == NULL, "sanity"); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { aoqi@6880: assert (UseCompressedOops, "should only be used for compressed headers"); aoqi@6880: assert (Universe::heap() != NULL, "java heap should be initialized"); aoqi@6880: aoqi@6880: // Cannot assert, unverified entry point counts instructions (see .ad file) aoqi@6880: // vtableStubs also counts instructions in pd_code_size_limit. aoqi@6880: // Also do not verify_oop as this is called by verify_oop. aoqi@6880: //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); aoqi@6880: if (Universe::narrow_oop_shift() != 0) { aoqi@6880: assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); aoqi@6880: if (LogMinObjAlignmentInBytes == Address::times_8) { aoqi@6880: dsll(dst, src, LogMinObjAlignmentInBytes); aoqi@6880: daddu(dst, dst, S5_heapbase); aoqi@6880: } else { aoqi@6880: dsll(dst, src, LogMinObjAlignmentInBytes); aoqi@6880: if (Universe::narrow_oop_base() != NULL) { aoqi@6880: daddu(dst, dst, S5_heapbase); aoqi@6880: } aoqi@6880: } aoqi@6880: } else { aoqi@6880: assert (Universe::narrow_oop_base() == NULL, "sanity"); aoqi@6880: if (dst != src) { aoqi@6880: move(dst, src); aoqi@6880: } aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::encode_klass_not_null(Register r) { aoqi@6880: if (Universe::narrow_klass_base() != NULL) { aoqi@6880: assert(r != AT, "Encoding a klass in AT"); aoqi@6880: set64(AT, (int64_t)Universe::narrow_klass_base()); aoqi@6880: dsub(r, r, AT); aoqi@6880: } aoqi@6880: if (Universe::narrow_klass_shift() != 0) { aoqi@6880: assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); aoqi@6880: shr(r, LogKlassAlignmentInBytes); aoqi@6880: } aoqi@6880: // Not neccessary for MIPS at all. aoqi@6880: //if (Universe::narrow_klass_base() != NULL) { aoqi@6880: // reinit_heapbase(); aoqi@6880: //} aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::encode_klass_not_null(Register dst, Register src) { aoqi@6880: if (dst == src) { aoqi@6880: encode_klass_not_null(src); aoqi@6880: } else { aoqi@6880: if (Universe::narrow_klass_base() != NULL) { aoqi@6880: set64(dst, (int64_t)Universe::narrow_klass_base()); aoqi@6880: dsub(dst, src, dst); aoqi@6880: if (Universe::narrow_klass_shift() != 0) { aoqi@6880: assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); aoqi@6880: shr(dst, LogKlassAlignmentInBytes); aoqi@6880: } aoqi@6880: } else { aoqi@6880: if (Universe::narrow_klass_shift() != 0) { aoqi@6880: assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); aoqi@6880: dsrl(dst, src, LogKlassAlignmentInBytes); aoqi@6880: } else { aoqi@6880: move(dst, src); aoqi@6880: } aoqi@6880: } aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: // Function instr_size_for_decode_klass_not_null() counts the instructions aoqi@6880: // generated by decode_klass_not_null(register r) and reinit_heapbase(), aoqi@6880: // when (Universe::heap() != NULL). Hence, if the instructions they aoqi@6880: // generate change, then this method needs to be updated. aoqi@6880: int MacroAssembler::instr_size_for_decode_klass_not_null() { aoqi@6880: assert (UseCompressedClassPointers, "only for compressed klass ptrs"); aoqi@6880: if (Universe::narrow_klass_base() != NULL) { aoqi@6880: // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). aoqi@6880: return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); aoqi@6880: } else { aoqi@6880: // longest load decode klass function, mov64, leaq aoqi@6880: return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decode_klass_not_null(Register r) { aoqi@6880: assert (UseCompressedClassPointers, "should only be used for compressed headers"); aoqi@6880: assert(r != AT, "Decoding a klass in AT"); aoqi@6880: // Cannot assert, unverified entry point counts instructions (see .ad file) aoqi@6880: // vtableStubs also counts instructions in pd_code_size_limit. aoqi@6880: // Also do not verify_oop as this is called by verify_oop. aoqi@6880: if (Universe::narrow_klass_shift() != 0) { aoqi@6880: assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); aoqi@6880: shl(r, LogKlassAlignmentInBytes); aoqi@6880: } aoqi@6880: if (Universe::narrow_klass_base() != NULL) { aoqi@6880: set64(AT, (int64_t)Universe::narrow_klass_base()); aoqi@6880: daddu(r, r, AT); aoqi@6880: //Not neccessary for MIPS at all. aoqi@6880: //reinit_heapbase(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decode_klass_not_null(Register dst, Register src) { aoqi@6880: assert (UseCompressedClassPointers, "should only be used for compressed headers"); aoqi@6880: aoqi@6880: if (dst == src) { aoqi@6880: decode_klass_not_null(dst); aoqi@6880: } else { aoqi@6880: // Cannot assert, unverified entry point counts instructions (see .ad file) aoqi@6880: // vtableStubs also counts instructions in pd_code_size_limit. aoqi@6880: // Also do not verify_oop as this is called by verify_oop. aoqi@6880: set64(dst, (int64_t)Universe::narrow_klass_base()); aoqi@6880: if (Universe::narrow_klass_shift() != 0) { aoqi@6880: assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); aoqi@6880: assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); aoqi@6880: dsll(AT, src, Address::times_8); aoqi@6880: daddu(dst, dst, AT); aoqi@6880: } else { aoqi@6880: daddu(dst, src, dst); aoqi@6880: } aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::incrementl(Register reg, int value) { aoqi@6880: if (value == min_jint) { aoqi@6880: move(AT, value); aoqi@6880: LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT)); aoqi@6880: return; aoqi@6880: } aoqi@6880: if (value < 0) { decrementl(reg, -value); return; } aoqi@6880: if (value == 0) { ; return; } aoqi@6880: aoqi@6880: if(Assembler::is_simm16(value)) { aoqi@6880: NOT_LP64(addiu(reg, reg, value)); aoqi@6880: LP64_ONLY(move(AT, value); addu32(reg, reg, AT)); aoqi@6880: } else { aoqi@6880: move(AT, value); aoqi@6880: LP64_ONLY(addu32(reg, reg, AT)) NOT_LP64(addu(reg, reg, AT)); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::decrementl(Register reg, int value) { aoqi@6880: if (value == min_jint) { aoqi@6880: move(AT, value); aoqi@6880: LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT)); aoqi@6880: return; aoqi@6880: } aoqi@6880: if (value < 0) { incrementl(reg, -value); return; } aoqi@6880: if (value == 0) { ; return; } aoqi@6880: aoqi@6880: if(Assembler::is_simm16(value)) { aoqi@6880: NOT_LP64(addiu(reg, reg, -value)); aoqi@6880: LP64_ONLY(move(AT, value); subu32(reg, reg, AT)); aoqi@6880: } else { aoqi@6880: move(AT, value); aoqi@6880: LP64_ONLY(subu32(reg, reg, AT)) NOT_LP64(subu(reg, reg, AT)); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::reinit_heapbase() { aoqi@6880: if (UseCompressedOops || UseCompressedClassPointers) { aoqi@6880: if (Universe::heap() != NULL) { aoqi@6880: if (Universe::narrow_oop_base() == NULL) { aoqi@6880: move(S5_heapbase, R0); aoqi@6880: } else { aoqi@6880: set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); aoqi@6880: } aoqi@6880: } else { aoqi@6880: set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); aoqi@6880: ld(S5_heapbase, S5_heapbase, 0); aoqi@6880: } aoqi@6880: } aoqi@6880: } aoqi@6880: #endif // _LP64 aoqi@6880: aoqi@6880: void MacroAssembler::check_klass_subtype(Register sub_klass, aoqi@6880: Register super_klass, aoqi@6880: Register temp_reg, aoqi@6880: Label& L_success) { aoqi@6880: //implement ind gen_subtype_check aoqi@6880: Label L_failure; aoqi@6880: check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); aoqi@6880: check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); aoqi@6880: bind(L_failure); aoqi@6880: } aoqi@6880: aoqi@6880: SkipIfEqual::SkipIfEqual( aoqi@6880: MacroAssembler* masm, const bool* flag_addr, bool value) { aoqi@6880: _masm = masm; aoqi@6880: _masm->li(AT, (address)flag_addr); aoqi@6880: _masm->lb(AT,AT,0); aoqi@6880: _masm->addi(AT,AT,-value); aoqi@6880: _masm->beq(AT,R0,_label); aoqi@6880: _masm->delayed()->nop(); aoqi@6880: } aoqi@6880: void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, aoqi@6880: Register super_klass, aoqi@6880: Register temp_reg, aoqi@6880: Label* L_success, aoqi@6880: Label* L_failure, aoqi@6880: Label* L_slow_path, aoqi@6880: RegisterOrConstant super_check_offset) { aoqi@6880: assert_different_registers(sub_klass, super_klass, temp_reg); aoqi@6880: bool must_load_sco = (super_check_offset.constant_or_zero() == -1); aoqi@6880: if (super_check_offset.is_register()) { aoqi@6880: assert_different_registers(sub_klass, super_klass, aoqi@6880: super_check_offset.as_register()); aoqi@6880: } else if (must_load_sco) { aoqi@6880: assert(temp_reg != noreg, "supply either a temp or a register offset"); aoqi@6880: } aoqi@6880: aoqi@6880: Label L_fallthrough; aoqi@6880: int label_nulls = 0; aoqi@6880: if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } aoqi@6880: if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } aoqi@6880: if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } aoqi@6880: assert(label_nulls <= 1, "at most one NULL in the batch"); aoqi@6880: aoqi@6880: int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); aoqi@6880: int sco_offset = in_bytes(Klass::super_check_offset_offset()); aoqi@6880: // If the pointers are equal, we are done (e.g., String[] elements). aoqi@6880: // This self-check enables sharing of secondary supertype arrays among aoqi@6880: // non-primary types such as array-of-interface. Otherwise, each such aoqi@6880: // type would need its own customized SSA. aoqi@6880: // We move this check to the front of the fast path because many aoqi@6880: // type checks are in fact trivially successful in this manner, aoqi@6880: // so we get a nicely predicted branch right at the start of the check. aoqi@6880: //cmpptr(sub_klass, super_klass); aoqi@6880: //local_jcc(Assembler::equal, *L_success); aoqi@6880: beq(sub_klass, super_klass, *L_success); aoqi@6880: delayed()->nop(); aoqi@6880: // Check the supertype display: aoqi@6880: if (must_load_sco) { aoqi@6880: // Positive movl does right thing on LP64. aoqi@6880: lwu(temp_reg, super_klass, sco_offset); aoqi@6880: super_check_offset = RegisterOrConstant(temp_reg); aoqi@6880: } aoqi@6880: dsll(AT, super_check_offset.register_or_noreg(), Address::times_1); aoqi@6880: daddu(AT, sub_klass, AT); aoqi@6880: ld(AT, AT, super_check_offset.constant_or_zero()*Address::times_1); aoqi@6880: aoqi@6880: // This check has worked decisively for primary supers. aoqi@6880: // Secondary supers are sought in the super_cache ('super_cache_addr'). aoqi@6880: // (Secondary supers are interfaces and very deeply nested subtypes.) aoqi@6880: // This works in the same check above because of a tricky aliasing aoqi@6880: // between the super_cache and the primary super display elements. aoqi@6880: // (The 'super_check_addr' can address either, as the case requires.) aoqi@6880: // Note that the cache is updated below if it does not help us find aoqi@6880: // what we need immediately. aoqi@6880: // So if it was a primary super, we can just fail immediately. aoqi@6880: // Otherwise, it's the slow path for us (no success at this point). aoqi@6880: aoqi@6880: if (super_check_offset.is_register()) { aoqi@6880: beq(super_klass, AT, *L_success); aoqi@6880: delayed()->nop(); aoqi@6880: addi(AT, super_check_offset.as_register(), -sc_offset); aoqi@6880: if (L_failure == &L_fallthrough) { aoqi@6880: beq(AT, R0, *L_slow_path); aoqi@6880: delayed()->nop(); aoqi@6880: } else { aoqi@6880: bne(AT, R0, *L_failure); aoqi@6880: delayed()->nop(); aoqi@6880: b(*L_slow_path); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: } else if (super_check_offset.as_constant() == sc_offset) { aoqi@6880: // Need a slow path; fast failure is impossible. aoqi@6880: if (L_slow_path == &L_fallthrough) { aoqi@6880: beq(super_klass, AT, *L_success); aoqi@6880: delayed()->nop(); aoqi@6880: } else { aoqi@6880: bne(super_klass, AT, *L_slow_path); aoqi@6880: delayed()->nop(); aoqi@6880: b(*L_success); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: } else { aoqi@6880: // No slow path; it's a fast decision. aoqi@6880: if (L_failure == &L_fallthrough) { aoqi@6880: beq(super_klass, AT, *L_success); aoqi@6880: delayed()->nop(); aoqi@6880: } else { aoqi@6880: bne(super_klass, AT, *L_failure); aoqi@6880: delayed()->nop(); aoqi@6880: b(*L_success); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: bind(L_fallthrough); aoqi@6880: aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, aoqi@6880: Register super_klass, aoqi@6880: Register temp_reg, aoqi@6880: Register temp2_reg, aoqi@6880: Label* L_success, aoqi@6880: Label* L_failure, aoqi@6880: bool set_cond_codes) { aoqi@6880: assert_different_registers(sub_klass, super_klass, temp_reg); aoqi@6880: if (temp2_reg != noreg) aoqi@6880: assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); aoqi@6880: else aoqi@6880: temp2_reg = T9; aoqi@6880: #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) aoqi@6880: aoqi@6880: Label L_fallthrough; aoqi@6880: int label_nulls = 0; aoqi@6880: if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } aoqi@6880: if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } aoqi@6880: assert(label_nulls <= 1, "at most one NULL in the batch"); aoqi@6880: aoqi@6880: // a couple of useful fields in sub_klass: aoqi@6880: int ss_offset = in_bytes(Klass::secondary_supers_offset()); aoqi@6880: int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); aoqi@6880: Address secondary_supers_addr(sub_klass, ss_offset); aoqi@6880: Address super_cache_addr( sub_klass, sc_offset); aoqi@6880: aoqi@6880: // Do a linear scan of the secondary super-klass chain. aoqi@6880: // This code is rarely used, so simplicity is a virtue here. aoqi@6880: // The repne_scan instruction uses fixed registers, which we must spill. aoqi@6880: // Don't worry too much about pre-existing connections with the input regs. aoqi@6880: aoqi@6880: #if 0 aoqi@6880: assert(sub_klass != T9, "killed reg"); // killed by mov(rax, super) aoqi@6880: assert(sub_klass != T1, "killed reg"); // killed by lea(rcx, &pst_counter) aoqi@6880: #endif aoqi@6880: aoqi@6880: // Get super_klass value into rax (even if it was in rdi or rcx). aoqi@6880: #ifndef PRODUCT aoqi@6880: int* pst_counter = &SharedRuntime::_partial_subtype_ctr; aoqi@6880: ExternalAddress pst_counter_addr((address) pst_counter); aoqi@6880: NOT_LP64( incrementl(pst_counter_addr) ); aoqi@6880: //LP64_ONLY( lea(rcx, pst_counter_addr) ); aoqi@6880: //LP64_ONLY( incrementl(Address(rcx, 0)) ); aoqi@6880: #endif //PRODUCT aoqi@6880: aoqi@6880: // We will consult the secondary-super array. aoqi@6880: ld(temp_reg, secondary_supers_addr); aoqi@6880: // Load the array length. (Positive movl does right thing on LP64.) aoqi@6880: lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); aoqi@6880: // Skip to start of data. aoqi@6880: daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); aoqi@6880: aoqi@6880: // Scan RCX words at [RDI] for an occurrence of RAX. aoqi@6880: // Set NZ/Z based on last compare. aoqi@6880: // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does aoqi@6880: // not change flags (only scas instruction which is repeated sets flags). aoqi@6880: // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. aoqi@6880: aoqi@6880: /* 2013/4/3 Jin: OpenJDK8 never compresses klass pointers in secondary-super array. */ aoqi@6880: Label Loop, subtype; aoqi@6880: bind(Loop); aoqi@6880: beq(temp2_reg, R0, *L_failure); aoqi@6880: delayed()->nop(); aoqi@6880: ld(AT, temp_reg, 0); aoqi@6880: beq(AT, super_klass, subtype); aoqi@6880: delayed()->daddi(temp_reg, temp_reg, 1 * wordSize); aoqi@6880: b(Loop); aoqi@6880: delayed()->daddi(temp2_reg, temp2_reg, -1); aoqi@6880: aoqi@6880: bind(subtype); aoqi@6880: sd(super_klass, super_cache_addr); aoqi@6880: if (L_success != &L_fallthrough) { aoqi@6880: b(*L_success); aoqi@6880: delayed()->nop(); aoqi@6880: } aoqi@6880: aoqi@6880: // Success. Cache the super we found and proceed in triumph. aoqi@6880: #undef IS_A_TEMP aoqi@6880: aoqi@6880: bind(L_fallthrough); aoqi@6880: } aoqi@6880: void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { aoqi@6880: ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); aoqi@6880: sd(R0, Address(java_thread, JavaThread::vm_result_offset())); aoqi@6880: verify_oop(oop_result, "broken oop in call_VM_base"); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { aoqi@6880: ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); aoqi@6880: sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); aoqi@6880: } aoqi@6880: aoqi@6880: Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, aoqi@6880: int extra_slot_offset) { aoqi@6880: // cf. TemplateTable::prepare_invoke(), if (load_receiver). aoqi@6880: int stackElementSize = Interpreter::stackElementSize; aoqi@6880: int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); aoqi@6880: #ifdef ASSERT aoqi@6880: int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); aoqi@6880: assert(offset1 - offset == stackElementSize, "correct arithmetic"); aoqi@6880: #endif aoqi@6880: Register scale_reg = NOREG; aoqi@6880: Address::ScaleFactor scale_factor = Address::no_scale; aoqi@6880: if (arg_slot.is_constant()) { aoqi@6880: offset += arg_slot.as_constant() * stackElementSize; aoqi@6880: } else { aoqi@6880: scale_reg = arg_slot.as_register(); aoqi@6880: scale_factor = Address::times_8; aoqi@6880: } aoqi@6880: // 2014/07/31 Fu: We don't push RA on stack in prepare_invoke. aoqi@6880: // offset += wordSize; // return PC is on stack aoqi@6880: if(scale_reg==NOREG) return Address(SP, offset); aoqi@6880: else { aoqi@6880: dsll(scale_reg, scale_reg, scale_factor); aoqi@6880: daddu(scale_reg, SP, scale_reg); aoqi@6880: return Address(scale_reg, offset); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: SkipIfEqual::~SkipIfEqual() { aoqi@6880: _masm->bind(_label); aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { aoqi@6880: switch (size_in_bytes) { aoqi@6880: #ifndef _LP64 aoqi@6880: case 8: aoqi@6880: assert(dst2 != noreg, "second dest register required"); aoqi@6880: lw(dst, src); aoqi@6880: lw(dst2, src.plus_disp(BytesPerInt)); aoqi@6880: break; aoqi@6880: #else aoqi@6880: case 8: ld(dst, src); break; aoqi@6880: #endif aoqi@6880: case 4: lw(dst, src); break; aoqi@6880: case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; aoqi@6880: case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; aoqi@6880: default: ShouldNotReachHere(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { aoqi@6880: switch (size_in_bytes) { aoqi@6880: #ifndef _LP64 aoqi@6880: case 8: aoqi@6880: assert(src2 != noreg, "second source register required"); aoqi@6880: sw(src, dst); aoqi@6880: sw(src2, dst.plus_disp(BytesPerInt)); aoqi@6880: break; aoqi@6880: #else aoqi@6880: case 8: sd(src, dst); break; aoqi@6880: #endif aoqi@6880: case 4: sw(src, dst); break; aoqi@6880: case 2: sh(src, dst); break; aoqi@6880: case 1: sb(src, dst); break; aoqi@6880: default: ShouldNotReachHere(); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: // Look up the method for a megamorphic invokeinterface call. aoqi@6880: // The target method is determined by . aoqi@6880: // The receiver klass is in recv_klass. aoqi@6880: // On success, the result will be in method_result, and execution falls through. aoqi@6880: // On failure, execution transfers to the given label. aoqi@6880: void MacroAssembler::lookup_interface_method(Register recv_klass, aoqi@6880: Register intf_klass, aoqi@6880: RegisterOrConstant itable_index, aoqi@6880: Register method_result, aoqi@6880: Register scan_temp, aoqi@6880: Label& L_no_such_interface) { aoqi@6880: assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); aoqi@6880: assert(itable_index.is_constant() || itable_index.as_register() == method_result, aoqi@6880: "caller must use same register for non-constant itable index as for method"); aoqi@6880: aoqi@6880: // Compute start of first itableOffsetEntry (which is at the end of the vtable) aoqi@6880: int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; aoqi@6880: int itentry_off = itableMethodEntry::method_offset_in_bytes(); aoqi@6880: int scan_step = itableOffsetEntry::size() * wordSize; aoqi@6880: int vte_size = vtableEntry::size() * wordSize; aoqi@6880: Address::ScaleFactor times_vte_scale = Address::times_ptr; aoqi@6880: assert(vte_size == wordSize, "else adjust times_vte_scale"); aoqi@6880: aoqi@6880: lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); aoqi@6880: aoqi@6880: // %%% Could store the aligned, prescaled offset in the klassoop. aoqi@6880: dsll(scan_temp, scan_temp, times_vte_scale); aoqi@6880: daddu(scan_temp, recv_klass, scan_temp); aoqi@6880: daddiu(scan_temp, scan_temp, vtable_base); aoqi@6880: if (HeapWordsPerLong > 1) { aoqi@6880: // Round up to align_object_offset boundary aoqi@6880: // see code for InstanceKlass::start_of_itable! aoqi@6880: round_to(scan_temp, BytesPerLong); aoqi@6880: } aoqi@6880: aoqi@6880: // Adjust recv_klass by scaled itable_index, so we can free itable_index. aoqi@6880: assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); aoqi@6880: // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); aoqi@6880: if (itable_index.is_constant()) { aoqi@6880: set64(AT, (int)itable_index.is_constant()); aoqi@6880: dsll(AT, AT, (int)Address::times_ptr); aoqi@6880: } else { aoqi@6880: dsll(AT, itable_index.as_register(), (int)Address::times_ptr); aoqi@6880: } aoqi@6880: daddu(AT, AT, recv_klass); aoqi@6880: daddiu(recv_klass, AT, itentry_off); aoqi@6880: aoqi@6880: // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { aoqi@6880: // if (scan->interface() == intf) { aoqi@6880: // result = (klass + scan->offset() + itable_index); aoqi@6880: // } aoqi@6880: // } aoqi@6880: Label search, found_method; aoqi@6880: aoqi@6880: for (int peel = 1; peel >= 0; peel--) { aoqi@6880: ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); aoqi@6880: aoqi@6880: if (peel) { aoqi@6880: beq(intf_klass, method_result, found_method); aoqi@6880: nop(); aoqi@6880: } else { aoqi@6880: bne(intf_klass, method_result, search); aoqi@6880: nop(); aoqi@6880: // (invert the test to fall through to found_method...) aoqi@6880: } aoqi@6880: aoqi@6880: if (!peel) break; aoqi@6880: aoqi@6880: bind(search); aoqi@6880: aoqi@6880: // Check that the previous entry is non-null. A null entry means that aoqi@6880: // the receiver class doesn't implement the interface, and wasn't the aoqi@6880: // same as when the caller was compiled. aoqi@6880: beq(method_result, R0, L_no_such_interface); aoqi@6880: nop(); aoqi@6880: daddiu(scan_temp, scan_temp, scan_step); aoqi@6880: } aoqi@6880: aoqi@6880: bind(found_method); aoqi@6880: aoqi@6880: // Got a hit. aoqi@6880: lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); aoqi@6880: //ld(method_result, Address(recv_klass, scan_temp, Address::times_1)); aoqi@6880: if(UseLoongsonISA) { aoqi@6880: gsldx(method_result, recv_klass, scan_temp, 0); aoqi@6880: } else { aoqi@6880: daddu(AT, recv_klass, scan_temp); aoqi@6880: ld(method_result, AT); aoqi@6880: } aoqi@6880: } aoqi@6880: aoqi@6880: aoqi@6880: // virtual method calling aoqi@6880: void MacroAssembler::lookup_virtual_method(Register recv_klass, aoqi@6880: RegisterOrConstant vtable_index, aoqi@6880: Register method_result) { aoqi@6880: Register tmp = GP; aoqi@6880: push(tmp); aoqi@6880: aoqi@6880: if (vtable_index.is_constant()) { aoqi@6880: assert_different_registers(recv_klass, method_result, tmp); aoqi@6880: } else { aoqi@6880: assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); aoqi@6880: } aoqi@6880: const int base = InstanceKlass::vtable_start_offset() * wordSize; aoqi@6880: assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); aoqi@6880: /* aoqi@6880: Address vtable_entry_addr(recv_klass, aoqi@6880: vtable_index, Address::times_ptr, aoqi@6880: base + vtableEntry::method_offset_in_bytes()); aoqi@6880: */ aoqi@6880: if (vtable_index.is_constant()) { aoqi@6880: set64(AT, vtable_index.as_constant()); aoqi@6880: dsll(AT, AT, (int)Address::times_ptr); aoqi@6880: } else { aoqi@6880: dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); aoqi@6880: } aoqi@6880: set64(tmp, base + vtableEntry::method_offset_in_bytes()); aoqi@6880: daddu(tmp, tmp, AT); aoqi@6880: daddu(tmp, tmp, recv_klass); aoqi@6880: ld(method_result, tmp, 0); aoqi@6880: aoqi@6880: pop(tmp); aoqi@6880: }