duke@435: /* never@3500: * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. duke@435: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. duke@435: * duke@435: * This code is free software; you can redistribute it and/or modify it duke@435: * under the terms of the GNU General Public License version 2 only, as duke@435: * published by the Free Software Foundation. duke@435: * duke@435: * This code is distributed in the hope that it will be useful, but WITHOUT duke@435: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or duke@435: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License duke@435: * version 2 for more details (a copy is included in the LICENSE file that duke@435: * accompanied this code). duke@435: * duke@435: * You should have received a copy of the GNU General Public License version duke@435: * 2 along with this work; if not, write to the Free Software Foundation, duke@435: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. duke@435: * trims@1907: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA trims@1907: * or visit www.oracle.com if you need additional information or have any trims@1907: * questions. duke@435: * duke@435: */ duke@435: stefank@2314: #include "precompiled.hpp" stefank@2314: #include "asm/assembler.hpp" stefank@2314: #include "assembler_sparc.inline.hpp" stefank@2314: #include "code/debugInfoRec.hpp" stefank@2314: #include "code/icBuffer.hpp" stefank@2314: #include "code/vtableStubs.hpp" stefank@2314: #include "interpreter/interpreter.hpp" stefank@2314: #include "oops/compiledICHolderOop.hpp" stefank@2314: #include "prims/jvmtiRedefineClassesTrace.hpp" stefank@2314: #include "runtime/sharedRuntime.hpp" stefank@2314: #include "runtime/vframeArray.hpp" stefank@2314: #include "vmreg_sparc.inline.hpp" stefank@2314: #ifdef COMPILER1 stefank@2314: #include "c1/c1_Runtime1.hpp" stefank@2314: #endif stefank@2314: #ifdef COMPILER2 stefank@2314: #include "opto/runtime.hpp" stefank@2314: #endif stefank@2314: #ifdef SHARK stefank@2314: #include "compiler/compileBroker.hpp" stefank@2314: #include "shark/sharkCompiler.hpp" stefank@2314: #endif duke@435: duke@435: #define __ masm-> duke@435: duke@435: duke@435: class RegisterSaver { duke@435: duke@435: // Used for saving volatile registers. This is Gregs, Fregs, I/L/O. duke@435: // The Oregs are problematic. In the 32bit build the compiler can duke@435: // have O registers live with 64 bit quantities. A window save will duke@435: // cut the heads off of the registers. We have to do a very extensive duke@435: // stack dance to save and restore these properly. duke@435: duke@435: // Note that the Oregs problem only exists if we block at either a polling duke@435: // page exception a compiled code safepoint that was not originally a call duke@435: // or deoptimize following one of these kinds of safepoints. duke@435: duke@435: // Lots of registers to save. For all builds, a window save will preserve duke@435: // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit duke@435: // builds a window-save will preserve the %o registers. In the LION build duke@435: // we need to save the 64-bit %o registers which requires we save them duke@435: // before the window-save (as then they become %i registers and get their duke@435: // heads chopped off on interrupt). We have to save some %g registers here duke@435: // as well. duke@435: enum { duke@435: // This frame's save area. Includes extra space for the native call: duke@435: // vararg's layout space and the like. Briefly holds the caller's duke@435: // register save area. duke@435: call_args_area = frame::register_save_words_sp_offset + duke@435: frame::memory_parameter_word_sp_offset*wordSize, duke@435: // Make sure save locations are always 8 byte aligned. duke@435: // can't use round_to because it doesn't produce compile time constant duke@435: start_of_extra_save_area = ((call_args_area + 7) & ~7), duke@435: g1_offset = start_of_extra_save_area, // g-regs needing saving duke@435: g3_offset = g1_offset+8, duke@435: g4_offset = g3_offset+8, duke@435: g5_offset = g4_offset+8, duke@435: o0_offset = g5_offset+8, duke@435: o1_offset = o0_offset+8, duke@435: o2_offset = o1_offset+8, duke@435: o3_offset = o2_offset+8, duke@435: o4_offset = o3_offset+8, duke@435: o5_offset = o4_offset+8, duke@435: start_of_flags_save_area = o5_offset+8, duke@435: ccr_offset = start_of_flags_save_area, duke@435: fsr_offset = ccr_offset + 8, duke@435: d00_offset = fsr_offset+8, // Start of float save area duke@435: register_save_size = d00_offset+8*32 duke@435: }; duke@435: duke@435: duke@435: public: duke@435: duke@435: static int Oexception_offset() { return o0_offset; }; duke@435: static int G3_offset() { return g3_offset; }; duke@435: static int G5_offset() { return g5_offset; }; duke@435: static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); duke@435: static void restore_live_registers(MacroAssembler* masm); duke@435: duke@435: // During deoptimization only the result register need to be restored duke@435: // all the other values have already been extracted. duke@435: duke@435: static void restore_result_registers(MacroAssembler* masm); duke@435: }; duke@435: duke@435: OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { duke@435: // Record volatile registers as callee-save values in an OopMap so their save locations will be duke@435: // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for duke@435: // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers duke@435: // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame duke@435: // (as the stub's I's) when the runtime routine called by the stub creates its frame. duke@435: int i; kvn@1442: // Always make the frame size 16 byte aligned. duke@435: int frame_size = round_to(additional_frame_words + register_save_size, 16); duke@435: // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words duke@435: int frame_size_in_slots = frame_size / sizeof(jint); duke@435: // CodeBlob frame size is in words. duke@435: *total_frame_words = frame_size / wordSize; duke@435: // OopMap* map = new OopMap(*total_frame_words, 0); duke@435: OopMap* map = new OopMap(frame_size_in_slots, 0); duke@435: duke@435: #if !defined(_LP64) duke@435: duke@435: // Save 64-bit O registers; they will get their heads chopped off on a 'save'. duke@435: __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); duke@435: __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); duke@435: __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); duke@435: __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); duke@435: __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); duke@435: __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); duke@435: #endif /* _LP64 */ duke@435: duke@435: __ save(SP, -frame_size, SP); duke@435: duke@435: #ifndef _LP64 duke@435: // Reload the 64 bit Oregs. Although they are now Iregs we load them duke@435: // to Oregs here to avoid interrupts cutting off their heads duke@435: duke@435: __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); duke@435: __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); duke@435: __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); duke@435: __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); duke@435: __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); duke@435: __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); duke@435: duke@435: __ stx(O0, SP, o0_offset+STACK_BIAS); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg()); duke@435: duke@435: __ stx(O1, SP, o1_offset+STACK_BIAS); duke@435: duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg()); duke@435: duke@435: __ stx(O2, SP, o2_offset+STACK_BIAS); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg()); duke@435: duke@435: __ stx(O3, SP, o3_offset+STACK_BIAS); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg()); duke@435: duke@435: __ stx(O4, SP, o4_offset+STACK_BIAS); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg()); duke@435: duke@435: __ stx(O5, SP, o5_offset+STACK_BIAS); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg()); duke@435: #endif /* _LP64 */ duke@435: coleenp@548: coleenp@548: #ifdef _LP64 coleenp@548: int debug_offset = 0; coleenp@548: #else coleenp@548: int debug_offset = 4; coleenp@548: #endif duke@435: // Save the G's duke@435: __ stx(G1, SP, g1_offset+STACK_BIAS); coleenp@548: map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg()); duke@435: duke@435: __ stx(G3, SP, g3_offset+STACK_BIAS); coleenp@548: map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg()); duke@435: duke@435: __ stx(G4, SP, g4_offset+STACK_BIAS); coleenp@548: map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg()); duke@435: duke@435: __ stx(G5, SP, g5_offset+STACK_BIAS); coleenp@548: map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg()); duke@435: duke@435: // This is really a waste but we'll keep things as they were for now duke@435: if (true) { duke@435: #ifndef _LP64 duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next()); duke@435: map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next()); coleenp@548: #endif /* _LP64 */ duke@435: } duke@435: duke@435: duke@435: // Save the flags duke@435: __ rdccr( G5 ); duke@435: __ stx(G5, SP, ccr_offset+STACK_BIAS); duke@435: __ stxfsr(SP, fsr_offset+STACK_BIAS); duke@435: kvn@1442: // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles) duke@435: int offset = d00_offset; kvn@1442: for( int i=0; iset_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg()); kvn@1442: map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next()); duke@435: offset += sizeof(double); duke@435: } duke@435: duke@435: // And we're done. duke@435: duke@435: return map; duke@435: } duke@435: duke@435: duke@435: // Pop the current frame and restore all the registers that we duke@435: // saved. duke@435: void RegisterSaver::restore_live_registers(MacroAssembler* masm) { duke@435: duke@435: // Restore all the FP registers kvn@1442: for( int i=0; ireg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; duke@435: } duke@435: never@3500: static VMRegPair reg64_to_VMRegPair(Register r) { never@3500: VMRegPair ret; never@3500: if (wordSize == 8) { never@3500: ret.set2(r->as_VMReg()); never@3500: } else { never@3500: ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); never@3500: } never@3500: return ret; never@3500: } never@3500: duke@435: // --------------------------------------------------------------------------- duke@435: // Read the array of BasicTypes from a signature, and compute where the duke@435: // arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size) duke@435: // quantities. Values less than VMRegImpl::stack0 are registers, those above duke@435: // refer to 4-byte stack slots. All stack slots are based off of the window duke@435: // top. VMRegImpl::stack0 refers to the first slot past the 16-word window, duke@435: // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register duke@435: // values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit duke@435: // integer registers. Values 64-95 are the (32-bit only) float registers. duke@435: // Each 32-bit quantity is given its own number, so the integer registers duke@435: // (in either 32- or 64-bit builds) use 2 numbers. For example, there is duke@435: // an O0-low and an O0-high. Essentially, all int register numbers are doubled. duke@435: duke@435: // Register results are passed in O0-O5, for outgoing call arguments. To duke@435: // convert to incoming arguments, convert all O's to I's. The regs array duke@435: // refer to the low and hi 32-bit words of 64-bit registers or stack slots. duke@435: // If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a duke@435: // 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was duke@435: // passed (used as a placeholder for the other half of longs and doubles in duke@435: // the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is duke@435: // regs[].first()+1 (regs[].first() may be misaligned in the C calling convention). duke@435: // Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first() duke@435: // == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the duke@435: // same VMRegPair. duke@435: duke@435: // Note: the INPUTS in sig_bt are in units of Java argument words, which are duke@435: // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit duke@435: // units regardless of build. duke@435: duke@435: duke@435: // --------------------------------------------------------------------------- duke@435: // The compiled Java calling convention. The Java convention always passes duke@435: // 64-bit values in adjacent aligned locations (either registers or stack), duke@435: // floats in float registers and doubles in aligned float pairs. Values are duke@435: // packed in the registers. There is no backing varargs store for values in duke@435: // registers. In the 32-bit build, longs are passed in G1 and G4 (cannot be duke@435: // passed in I's, because longs in I's get their heads chopped off at duke@435: // interrupt). duke@435: int SharedRuntime::java_calling_convention(const BasicType *sig_bt, duke@435: VMRegPair *regs, duke@435: int total_args_passed, duke@435: int is_outgoing) { duke@435: assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); duke@435: duke@435: // Convention is to pack the first 6 int/oop args into the first 6 registers duke@435: // (I0-I5), extras spill to the stack. Then pack the first 8 float args duke@435: // into F0-F7, extras spill to the stack. Then pad all register sets to duke@435: // align. Then put longs and doubles into the same registers as they fit, duke@435: // else spill to the stack. duke@435: const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; duke@435: const int flt_reg_max = 8; duke@435: // duke@435: // Where 32-bit 1-reg longs start being passed duke@435: // In tiered we must pass on stack because c1 can't use a "pair" in a single reg. duke@435: // So make it look like we've filled all the G regs that c2 wants to use. duke@435: Register g_reg = TieredCompilation ? noreg : G1; duke@435: duke@435: // Count int/oop and float args. See how many stack slots we'll need and duke@435: // where the longs & doubles will go. duke@435: int int_reg_cnt = 0; duke@435: int flt_reg_cnt = 0; duke@435: // int stk_reg_pairs = frame::register_save_words*(wordSize>>2); duke@435: // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots(); duke@435: int stk_reg_pairs = 0; duke@435: for (int i = 0; i < total_args_passed; i++) { duke@435: switch (sig_bt[i]) { duke@435: case T_LONG: // LP64, longs compete with int args duke@435: assert(sig_bt[i+1] == T_VOID, ""); duke@435: #ifdef _LP64 twisti@3969: if (int_reg_cnt < int_reg_max) int_reg_cnt++; duke@435: #endif duke@435: break; duke@435: case T_OBJECT: duke@435: case T_ARRAY: duke@435: case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address twisti@3969: if (int_reg_cnt < int_reg_max) int_reg_cnt++; duke@435: #ifndef _LP64 duke@435: else stk_reg_pairs++; duke@435: #endif duke@435: break; duke@435: case T_INT: duke@435: case T_SHORT: duke@435: case T_CHAR: duke@435: case T_BYTE: duke@435: case T_BOOLEAN: twisti@3969: if (int_reg_cnt < int_reg_max) int_reg_cnt++; duke@435: else stk_reg_pairs++; duke@435: break; duke@435: case T_FLOAT: twisti@3969: if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++; duke@435: else stk_reg_pairs++; duke@435: break; duke@435: case T_DOUBLE: duke@435: assert(sig_bt[i+1] == T_VOID, ""); duke@435: break; duke@435: case T_VOID: duke@435: break; duke@435: default: duke@435: ShouldNotReachHere(); duke@435: } duke@435: } duke@435: duke@435: // This is where the longs/doubles start on the stack. duke@435: stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round duke@435: duke@435: int flt_reg_pairs = (flt_reg_cnt+1) & ~1; duke@435: duke@435: // int stk_reg = frame::register_save_words*(wordSize>>2); duke@435: // int stk_reg = SharedRuntime::out_preserve_stack_slots(); duke@435: int stk_reg = 0; duke@435: int int_reg = 0; duke@435: int flt_reg = 0; duke@435: duke@435: // Now do the signature layout duke@435: for (int i = 0; i < total_args_passed; i++) { duke@435: switch (sig_bt[i]) { duke@435: case T_INT: duke@435: case T_SHORT: duke@435: case T_CHAR: duke@435: case T_BYTE: duke@435: case T_BOOLEAN: duke@435: #ifndef _LP64 duke@435: case T_OBJECT: duke@435: case T_ARRAY: duke@435: case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address duke@435: #endif // _LP64 duke@435: if (int_reg < int_reg_max) { duke@435: Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); duke@435: regs[i].set1(r->as_VMReg()); duke@435: } else { duke@435: regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); duke@435: } duke@435: break; duke@435: duke@435: #ifdef _LP64 duke@435: case T_OBJECT: duke@435: case T_ARRAY: duke@435: case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address duke@435: if (int_reg < int_reg_max) { duke@435: Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); duke@435: regs[i].set2(r->as_VMReg()); duke@435: } else { duke@435: regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); duke@435: stk_reg_pairs += 2; duke@435: } duke@435: break; duke@435: #endif // _LP64 duke@435: duke@435: case T_LONG: duke@435: assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); duke@435: #ifdef _LP64 duke@435: if (int_reg < int_reg_max) { duke@435: Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); duke@435: regs[i].set2(r->as_VMReg()); duke@435: } else { duke@435: regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); duke@435: stk_reg_pairs += 2; duke@435: } duke@435: #else never@739: #ifdef COMPILER2 duke@435: // For 32-bit build, can't pass longs in O-regs because they become duke@435: // I-regs and get trashed. Use G-regs instead. G1 and G4 are almost duke@435: // spare and available. This convention isn't used by the Sparc ABI or duke@435: // anywhere else. If we're tiered then we don't use G-regs because c1 never@739: // can't deal with them as a "pair". (Tiered makes this code think g's are filled) duke@435: // G0: zero duke@435: // G1: 1st Long arg duke@435: // G2: global allocated to TLS duke@435: // G3: used in inline cache check duke@435: // G4: 2nd Long arg duke@435: // G5: used in inline cache check duke@435: // G6: used by OS duke@435: // G7: used by OS duke@435: duke@435: if (g_reg == G1) { duke@435: regs[i].set2(G1->as_VMReg()); // This long arg in G1 duke@435: g_reg = G4; // Where the next arg goes duke@435: } else if (g_reg == G4) { duke@435: regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4 duke@435: g_reg = noreg; // No more longs in registers duke@435: } else { duke@435: regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); duke@435: stk_reg_pairs += 2; duke@435: } duke@435: #else // COMPILER2 duke@435: regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); duke@435: stk_reg_pairs += 2; duke@435: #endif // COMPILER2 never@739: #endif // _LP64 duke@435: break; duke@435: duke@435: case T_FLOAT: duke@435: if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg()); twisti@3969: else regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); duke@435: break; duke@435: case T_DOUBLE: duke@435: assert(sig_bt[i+1] == T_VOID, "expecting half"); duke@435: if (flt_reg_pairs + 1 < flt_reg_max) { duke@435: regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg()); duke@435: flt_reg_pairs += 2; duke@435: } else { duke@435: regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); duke@435: stk_reg_pairs += 2; duke@435: } duke@435: break; duke@435: case T_VOID: regs[i].set_bad(); break; // Halves of longs & doubles duke@435: default: duke@435: ShouldNotReachHere(); duke@435: } duke@435: } duke@435: duke@435: // retun the amount of stack space these arguments will need. duke@435: return stk_reg_pairs; duke@435: duke@435: } duke@435: twisti@1441: // Helper class mostly to avoid passing masm everywhere, and handle twisti@1441: // store displacement overflow logic. duke@435: class AdapterGenerator { duke@435: MacroAssembler *masm; duke@435: Register Rdisp; duke@435: void set_Rdisp(Register r) { Rdisp = r; } duke@435: duke@435: void patch_callers_callsite(); duke@435: duke@435: // base+st_off points to top of argument twisti@1861: int arg_offset(const int st_off) { return st_off; } duke@435: int next_arg_offset(const int st_off) { twisti@1861: return st_off - Interpreter::stackElementSize; twisti@1441: } twisti@1441: twisti@1441: // Argument slot values may be loaded first into a register because twisti@1441: // they might not fit into displacement. twisti@1441: RegisterOrConstant arg_slot(const int st_off); twisti@1441: RegisterOrConstant next_arg_slot(const int st_off); twisti@1441: duke@435: // Stores long into offset pointed to by base duke@435: void store_c2i_long(Register r, Register base, duke@435: const int st_off, bool is_stack); duke@435: void store_c2i_object(Register r, Register base, duke@435: const int st_off); duke@435: void store_c2i_int(Register r, Register base, duke@435: const int st_off); duke@435: void store_c2i_double(VMReg r_2, duke@435: VMReg r_1, Register base, const int st_off); duke@435: void store_c2i_float(FloatRegister f, Register base, duke@435: const int st_off); duke@435: duke@435: public: duke@435: void gen_c2i_adapter(int total_args_passed, duke@435: // VMReg max_arg, duke@435: int comp_args_on_stack, // VMRegStackSlots duke@435: const BasicType *sig_bt, duke@435: const VMRegPair *regs, duke@435: Label& skip_fixup); duke@435: void gen_i2c_adapter(int total_args_passed, duke@435: // VMReg max_arg, duke@435: int comp_args_on_stack, // VMRegStackSlots duke@435: const BasicType *sig_bt, duke@435: const VMRegPair *regs); duke@435: duke@435: AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} duke@435: }; duke@435: duke@435: duke@435: // Patch the callers callsite with entry to compiled code if it exists. duke@435: void AdapterGenerator::patch_callers_callsite() { duke@435: Label L; duke@435: __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch); kvn@3037: __ br_null(G3_scratch, false, Assembler::pt, L); duke@435: // Schedule the branch target address early. duke@435: __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); duke@435: // Call into the VM to patch the caller, then jump to compiled callee duke@435: __ save_frame(4); // Args in compiled layout; do not blow them duke@435: duke@435: // Must save all the live Gregs the list is: duke@435: // G1: 1st Long arg (32bit build) duke@435: // G2: global allocated to TLS duke@435: // G3: used in inline cache check (scratch) duke@435: // G4: 2nd Long arg (32bit build); duke@435: // G5: used in inline cache check (methodOop) duke@435: duke@435: // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops. duke@435: duke@435: #ifdef _LP64 duke@435: // mov(s,d) duke@435: __ mov(G1, L1); duke@435: __ mov(G4, L4); duke@435: __ mov(G5_method, L5); duke@435: __ mov(G5_method, O0); // VM needs target method duke@435: __ mov(I7, O1); // VM needs caller's callsite duke@435: // Must be a leaf call... duke@435: // can be very far once the blob has been relocated twisti@1162: AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); duke@435: __ relocate(relocInfo::runtime_call_type); twisti@1162: __ jumpl_to(dest, O7, O7); duke@435: __ delayed()->mov(G2_thread, L7_thread_cache); duke@435: __ mov(L7_thread_cache, G2_thread); duke@435: __ mov(L1, G1); duke@435: __ mov(L4, G4); duke@435: __ mov(L5, G5_method); duke@435: #else duke@435: __ stx(G1, FP, -8 + STACK_BIAS); duke@435: __ stx(G4, FP, -16 + STACK_BIAS); duke@435: __ mov(G5_method, L5); duke@435: __ mov(G5_method, O0); // VM needs target method duke@435: __ mov(I7, O1); // VM needs caller's callsite duke@435: // Must be a leaf call... duke@435: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type); duke@435: __ delayed()->mov(G2_thread, L7_thread_cache); duke@435: __ mov(L7_thread_cache, G2_thread); duke@435: __ ldx(FP, -8 + STACK_BIAS, G1); duke@435: __ ldx(FP, -16 + STACK_BIAS, G4); duke@435: __ mov(L5, G5_method); duke@435: __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); duke@435: #endif /* _LP64 */ duke@435: duke@435: __ restore(); // Restore args duke@435: __ bind(L); duke@435: } duke@435: twisti@1441: twisti@1441: RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) { twisti@1441: RegisterOrConstant roc(arg_offset(st_off)); twisti@1441: return __ ensure_simm13_or_reg(roc, Rdisp); duke@435: } duke@435: twisti@1441: RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) { twisti@1441: RegisterOrConstant roc(next_arg_offset(st_off)); twisti@1441: return __ ensure_simm13_or_reg(roc, Rdisp); duke@435: } twisti@1441: twisti@1441: duke@435: // Stores long into offset pointed to by base duke@435: void AdapterGenerator::store_c2i_long(Register r, Register base, duke@435: const int st_off, bool is_stack) { duke@435: #ifdef _LP64 duke@435: // In V9, longs are given 2 64-bit slots in the interpreter, but the duke@435: // data is passed in only 1 slot. duke@435: __ stx(r, base, next_arg_slot(st_off)); duke@435: #else ysr@777: #ifdef COMPILER2 duke@435: // Misaligned store of 64-bit data duke@435: __ stw(r, base, arg_slot(st_off)); // lo bits duke@435: __ srlx(r, 32, r); duke@435: __ stw(r, base, next_arg_slot(st_off)); // hi bits duke@435: #else duke@435: if (is_stack) { duke@435: // Misaligned store of 64-bit data duke@435: __ stw(r, base, arg_slot(st_off)); // lo bits duke@435: __ srlx(r, 32, r); duke@435: __ stw(r, base, next_arg_slot(st_off)); // hi bits duke@435: } else { duke@435: __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits duke@435: __ stw(r , base, next_arg_slot(st_off)); // hi bits duke@435: } duke@435: #endif // COMPILER2 ysr@777: #endif // _LP64 duke@435: } duke@435: duke@435: void AdapterGenerator::store_c2i_object(Register r, Register base, duke@435: const int st_off) { duke@435: __ st_ptr (r, base, arg_slot(st_off)); duke@435: } duke@435: duke@435: void AdapterGenerator::store_c2i_int(Register r, Register base, duke@435: const int st_off) { duke@435: __ st (r, base, arg_slot(st_off)); duke@435: } duke@435: duke@435: // Stores into offset pointed to by base duke@435: void AdapterGenerator::store_c2i_double(VMReg r_2, duke@435: VMReg r_1, Register base, const int st_off) { duke@435: #ifdef _LP64 duke@435: // In V9, doubles are given 2 64-bit slots in the interpreter, but the duke@435: // data is passed in only 1 slot. duke@435: __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); duke@435: #else duke@435: // Need to marshal 64-bit value from misaligned Lesp loads duke@435: __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); duke@435: __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) ); duke@435: #endif duke@435: } duke@435: duke@435: void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, duke@435: const int st_off) { duke@435: __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off)); duke@435: } duke@435: duke@435: void AdapterGenerator::gen_c2i_adapter( duke@435: int total_args_passed, duke@435: // VMReg max_arg, duke@435: int comp_args_on_stack, // VMRegStackSlots duke@435: const BasicType *sig_bt, duke@435: const VMRegPair *regs, duke@435: Label& skip_fixup) { duke@435: duke@435: // Before we get into the guts of the C2I adapter, see if we should be here duke@435: // at all. We've come from compiled code and are attempting to jump to the duke@435: // interpreter, which means the caller made a static call to get here duke@435: // (vcalls always get a compiled target if there is one). Check for a duke@435: // compiled target. If there is one, we need to patch the caller's call. duke@435: // However we will run interpreted if we come thru here. The next pass duke@435: // thru the call site will run compiled. If we ran compiled here then duke@435: // we can (theorectically) do endless i2c->c2i->i2c transitions during duke@435: // deopt/uncommon trap cycles. If we always go interpreted here then duke@435: // we can have at most one and don't need to play any tricks to keep duke@435: // from endlessly growing the stack. duke@435: // duke@435: // Actually if we detected that we had an i2c->c2i transition here we duke@435: // ought to be able to reset the world back to the state of the interpreted duke@435: // call and not bother building another interpreter arg area. We don't duke@435: // do that at this point. duke@435: duke@435: patch_callers_callsite(); duke@435: duke@435: __ bind(skip_fixup); duke@435: duke@435: // Since all args are passed on the stack, total_args_passed*wordSize is the duke@435: // space we need. Add in varargs area needed by the interpreter. Round up duke@435: // to stack alignment. twisti@1861: const int arg_size = total_args_passed * Interpreter::stackElementSize; duke@435: const int varargs_area = duke@435: (frame::varargs_offset - frame::register_save_words)*wordSize; duke@435: const int extraspace = round_to(arg_size + varargs_area, 2*wordSize); duke@435: duke@435: int bias = STACK_BIAS; duke@435: const int interp_arg_offset = frame::varargs_offset*wordSize + twisti@1861: (total_args_passed-1)*Interpreter::stackElementSize; duke@435: duke@435: Register base = SP; duke@435: duke@435: #ifdef _LP64 duke@435: // In the 64bit build because of wider slots and STACKBIAS we can run duke@435: // out of bits in the displacement to do loads and stores. Use g3 as duke@435: // temporary displacement. twisti@3310: if (!Assembler::is_simm13(extraspace)) { duke@435: __ set(extraspace, G3_scratch); duke@435: __ sub(SP, G3_scratch, SP); duke@435: } else { duke@435: __ sub(SP, extraspace, SP); duke@435: } duke@435: set_Rdisp(G3_scratch); duke@435: #else duke@435: __ sub(SP, extraspace, SP); duke@435: #endif // _LP64 duke@435: duke@435: // First write G1 (if used) to where ever it must go duke@435: for (int i=0; ias_VMReg()) { duke@435: if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { duke@435: store_c2i_object(G1_scratch, base, st_off); duke@435: } else if (sig_bt[i] == T_LONG) { duke@435: assert(!TieredCompilation, "should not use register args for longs"); duke@435: store_c2i_long(G1_scratch, base, st_off, false); duke@435: } else { duke@435: store_c2i_int(G1_scratch, base, st_off); duke@435: } duke@435: } duke@435: } duke@435: duke@435: // Now write the args into the outgoing interpreter space duke@435: for (int i=0; iis_valid()) { duke@435: assert(!r_2->is_valid(), ""); duke@435: continue; duke@435: } duke@435: // Skip G1 if found as we did it first in order to free it up duke@435: if (r_1 == G1_scratch->as_VMReg()) { duke@435: continue; duke@435: } duke@435: #ifdef ASSERT duke@435: bool G1_forced = false; duke@435: #endif // ASSERT duke@435: if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 duke@435: #ifdef _LP64 duke@435: Register ld_off = Rdisp; duke@435: __ set(reg2offset(r_1) + extraspace + bias, ld_off); duke@435: #else duke@435: int ld_off = reg2offset(r_1) + extraspace + bias; kvn@1686: #endif // _LP64 duke@435: #ifdef ASSERT duke@435: G1_forced = true; duke@435: #endif // ASSERT duke@435: r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle duke@435: if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); duke@435: else __ ldx(base, ld_off, G1_scratch); duke@435: } duke@435: duke@435: if (r_1->is_Register()) { duke@435: Register r = r_1->as_Register()->after_restore(); duke@435: if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { duke@435: store_c2i_object(r, base, st_off); duke@435: } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { kvn@1686: #ifndef _LP64 duke@435: if (TieredCompilation) { duke@435: assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs"); duke@435: } kvn@1686: #endif // _LP64 duke@435: store_c2i_long(r, base, st_off, r_2->is_stack()); duke@435: } else { duke@435: store_c2i_int(r, base, st_off); duke@435: } duke@435: } else { duke@435: assert(r_1->is_FloatRegister(), ""); duke@435: if (sig_bt[i] == T_FLOAT) { duke@435: store_c2i_float(r_1->as_FloatRegister(), base, st_off); duke@435: } else { duke@435: assert(sig_bt[i] == T_DOUBLE, "wrong type"); duke@435: store_c2i_double(r_2, r_1, base, st_off); duke@435: } duke@435: } duke@435: } duke@435: duke@435: #ifdef _LP64 duke@435: // Need to reload G3_scratch, used for temporary displacements. duke@435: __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); duke@435: duke@435: // Pass O5_savedSP as an argument to the interpreter. duke@435: // The interpreter will restore SP to this value before returning. duke@435: __ set(extraspace, G1); duke@435: __ add(SP, G1, O5_savedSP); duke@435: #else duke@435: // Pass O5_savedSP as an argument to the interpreter. duke@435: // The interpreter will restore SP to this value before returning. duke@435: __ add(SP, extraspace, O5_savedSP); duke@435: #endif // _LP64 duke@435: duke@435: __ mov((frame::varargs_offset)*wordSize - twisti@1861: 1*Interpreter::stackElementSize+bias+BytesPerWord, G1); duke@435: // Jump to the interpreter just as if interpreter was doing it. duke@435: __ jmpl(G3_scratch, 0, G0); duke@435: // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp duke@435: // (really L0) is in use by the compiled frame as a generic temp. However, duke@435: // the interpreter does not know where its args are without some kind of duke@435: // arg pointer being passed in. Pass it in Gargs. duke@435: __ delayed()->add(SP, G1, Gargs); duke@435: } duke@435: twisti@3969: static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg, twisti@3969: address code_start, address code_end, twisti@3969: Label& L_ok) { twisti@3969: Label L_fail; twisti@3969: __ set(ExternalAddress(code_start), temp_reg); twisti@3969: __ set(pointer_delta(code_end, code_start, 1), temp2_reg); twisti@3969: __ cmp(pc_reg, temp_reg); twisti@3969: __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail); twisti@3969: __ delayed()->add(temp_reg, temp2_reg, temp_reg); twisti@3969: __ cmp(pc_reg, temp_reg); twisti@3969: __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok); twisti@3969: __ bind(L_fail); twisti@3969: } twisti@3969: duke@435: void AdapterGenerator::gen_i2c_adapter( duke@435: int total_args_passed, duke@435: // VMReg max_arg, duke@435: int comp_args_on_stack, // VMRegStackSlots duke@435: const BasicType *sig_bt, duke@435: const VMRegPair *regs) { duke@435: duke@435: // Generate an I2C adapter: adjust the I-frame to make space for the C-frame duke@435: // layout. Lesp was saved by the calling I-frame and will be restored on duke@435: // return. Meanwhile, outgoing arg space is all owned by the callee duke@435: // C-frame, so we can mangle it at will. After adjusting the frame size, duke@435: // hoist register arguments and repack other args according to the compiled duke@435: // code convention. Finally, end in a jump to the compiled code. The entry duke@435: // point address is the start of the buffer. duke@435: duke@435: // We will only enter here from an interpreted frame and never from after duke@435: // passing thru a c2i. Azul allowed this but we do not. If we lose the duke@435: // race and use a c2i we will remain interpreted for the race loser(s). duke@435: // This removes all sorts of headaches on the x86 side and also eliminates duke@435: // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. duke@435: twisti@3969: // More detail: twisti@3969: // Adapters can be frameless because they do not require the caller twisti@3969: // to perform additional cleanup work, such as correcting the stack pointer. twisti@3969: // An i2c adapter is frameless because the *caller* frame, which is interpreted, twisti@3969: // routinely repairs its own stack pointer (from interpreter_frame_last_sp), twisti@3969: // even if a callee has modified the stack pointer. twisti@3969: // A c2i adapter is frameless because the *callee* frame, which is interpreted, twisti@3969: // routinely repairs its caller's stack pointer (from sender_sp, which is set twisti@3969: // up via the senderSP register). twisti@3969: // In other words, if *either* the caller or callee is interpreted, we can twisti@3969: // get the stack pointer repaired after a call. twisti@3969: // This is why c2i and i2c adapters cannot be indefinitely composed. twisti@3969: // In particular, if a c2i adapter were to somehow call an i2c adapter, twisti@3969: // both caller and callee would be compiled methods, and neither would twisti@3969: // clean up the stack pointer changes performed by the two adapters. twisti@3969: // If this happens, control eventually transfers back to the compiled twisti@3969: // caller, but with an uncorrected stack, causing delayed havoc. twisti@3969: twisti@3969: if (VerifyAdapterCalls && twisti@3969: (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { twisti@3969: // So, let's test for cascading c2i/i2c adapters right now. twisti@3969: // assert(Interpreter::contains($return_addr) || twisti@3969: // StubRoutines::contains($return_addr), twisti@3969: // "i2c adapter must return to an interpreter frame"); twisti@3969: __ block_comment("verify_i2c { "); twisti@3969: Label L_ok; twisti@3969: if (Interpreter::code() != NULL) twisti@3969: range_check(masm, O7, O0, O1, twisti@3969: Interpreter::code()->code_start(), Interpreter::code()->code_end(), twisti@3969: L_ok); twisti@3969: if (StubRoutines::code1() != NULL) twisti@3969: range_check(masm, O7, O0, O1, twisti@3969: StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), twisti@3969: L_ok); twisti@3969: if (StubRoutines::code2() != NULL) twisti@3969: range_check(masm, O7, O0, O1, twisti@3969: StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), twisti@3969: L_ok); twisti@3969: const char* msg = "i2c adapter must return to an interpreter frame"; twisti@3969: __ block_comment(msg); twisti@3969: __ stop(msg); twisti@3969: __ bind(L_ok); twisti@3969: __ block_comment("} verify_i2ce "); twisti@3969: } twisti@3969: duke@435: // As you can see from the list of inputs & outputs there are not a lot duke@435: // of temp registers to work with: mostly G1, G3 & G4. duke@435: duke@435: // Inputs: duke@435: // G2_thread - TLS duke@435: // G5_method - Method oop jrose@1145: // G4 (Gargs) - Pointer to interpreter's args jrose@1145: // O0..O4 - free for scratch jrose@1145: // O5_savedSP - Caller's saved SP, to be restored if needed duke@435: // O6 - Current SP! duke@435: // O7 - Valid return address jrose@1145: // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) duke@435: duke@435: // Outputs: duke@435: // G2_thread - TLS duke@435: // G1, G4 - Outgoing long args in 32-bit build duke@435: // O0-O5 - Outgoing args in compiled layout duke@435: // O6 - Adjusted or restored SP duke@435: // O7 - Valid return address twisti@1919: // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) duke@435: // F0-F7 - more outgoing args duke@435: duke@435: jrose@1145: // Gargs is the incoming argument base, and also an outgoing argument. duke@435: __ sub(Gargs, BytesPerWord, Gargs); duke@435: duke@435: // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME duke@435: // WITH O7 HOLDING A VALID RETURN PC duke@435: // duke@435: // | | duke@435: // : java stack : duke@435: // | | duke@435: // +--------------+ <--- start of outgoing args duke@435: // | receiver | | duke@435: // : rest of args : |---size is java-arg-words duke@435: // | | | duke@435: // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I duke@435: // | | | duke@435: // : unused : |---Space for max Java stack, plus stack alignment duke@435: // | | | duke@435: // +--------------+ <--- SP + 16*wordsize duke@435: // | | duke@435: // : window : duke@435: // | | duke@435: // +--------------+ <--- SP duke@435: duke@435: // WE REPACK THE STACK. We use the common calling convention layout as duke@435: // discovered by calling SharedRuntime::calling_convention. We assume it duke@435: // causes an arbitrary shuffle of memory, which may require some register duke@435: // temps to do the shuffle. We hope for (and optimize for) the case where duke@435: // temps are not needed. We may have to resize the stack slightly, in case duke@435: // we need alignment padding (32-bit interpreter can pass longs & doubles duke@435: // misaligned, but the compilers expect them aligned). duke@435: // duke@435: // | | duke@435: // : java stack : duke@435: // | | duke@435: // +--------------+ <--- start of outgoing args duke@435: // | pad, align | | duke@435: // +--------------+ | duke@435: // | ints, floats | |---Outgoing stack args, packed low. duke@435: // +--------------+ | First few args in registers. duke@435: // : doubles : | duke@435: // | longs | | duke@435: // +--------------+ <--- SP' + 16*wordsize duke@435: // | | duke@435: // : window : duke@435: // | | duke@435: // +--------------+ <--- SP' duke@435: duke@435: // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME duke@435: // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP duke@435: // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN. duke@435: duke@435: // Cut-out for having no stack args. Since up to 6 args are passed duke@435: // in registers, we will commonly have no stack args. duke@435: if (comp_args_on_stack > 0) { duke@435: duke@435: // Convert VMReg stack slots to words. duke@435: int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; duke@435: // Round up to miminum stack alignment, in wordSize duke@435: comp_words_on_stack = round_to(comp_words_on_stack, 2); duke@435: // Now compute the distance from Lesp to SP. This calculation does not duke@435: // include the space for total_args_passed because Lesp has not yet popped duke@435: // the arguments. duke@435: __ sub(SP, (comp_words_on_stack)*wordSize, SP); duke@435: } duke@435: duke@435: // Will jump to the compiled code just as if compiled code was doing it. duke@435: // Pre-load the register-jump target early, to schedule it better. duke@435: __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3); duke@435: duke@435: // Now generate the shuffle code. Pick up all register args and move the duke@435: // rest through G1_scratch. duke@435: for (int i=0; i 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); duke@435: continue; duke@435: } duke@435: duke@435: // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the duke@435: // 32-bit build and aligned in the 64-bit build. Look for the obvious duke@435: // ldx/lddf optimizations. duke@435: duke@435: // Load in argument order going down. twisti@1861: const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; duke@435: set_Rdisp(G1_scratch); duke@435: duke@435: VMReg r_1 = regs[i].first(); duke@435: VMReg r_2 = regs[i].second(); duke@435: if (!r_1->is_valid()) { duke@435: assert(!r_2->is_valid(), ""); duke@435: continue; duke@435: } duke@435: if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9 duke@435: r_1 = F8->as_VMReg(); // as part of the load/store shuffle duke@435: if (r_2->is_valid()) r_2 = r_1->next(); duke@435: } duke@435: if (r_1->is_Register()) { // Register argument duke@435: Register r = r_1->as_Register()->after_restore(); duke@435: if (!r_2->is_valid()) { duke@435: __ ld(Gargs, arg_slot(ld_off), r); duke@435: } else { duke@435: #ifdef _LP64 duke@435: // In V9, longs are given 2 64-bit slots in the interpreter, but the duke@435: // data is passed in only 1 slot. twisti@1441: RegisterOrConstant slot = (sig_bt[i] == T_LONG) ? duke@435: next_arg_slot(ld_off) : arg_slot(ld_off); duke@435: __ ldx(Gargs, slot, r); duke@435: #else duke@435: // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the duke@435: // stack shuffle. Load the first 2 longs into G1/G4 later. duke@435: #endif duke@435: } duke@435: } else { duke@435: assert(r_1->is_FloatRegister(), ""); duke@435: if (!r_2->is_valid()) { duke@435: __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); duke@435: } else { duke@435: #ifdef _LP64 duke@435: // In V9, doubles are given 2 64-bit slots in the interpreter, but the duke@435: // data is passed in only 1 slot. This code also handles longs that duke@435: // are passed on the stack, but need a stack-to-stack move through a duke@435: // spare float register. twisti@1441: RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? duke@435: next_arg_slot(ld_off) : arg_slot(ld_off); duke@435: __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); duke@435: #else duke@435: // Need to marshal 64-bit value from misaligned Lesp loads duke@435: __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister()); duke@435: __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); duke@435: #endif duke@435: } duke@435: } duke@435: // Was the argument really intended to be on the stack, but was loaded duke@435: // into F8/F9? duke@435: if (regs[i].first()->is_stack()) { duke@435: assert(r_1->as_FloatRegister() == F8, "fix this code"); duke@435: // Convert stack slot to an SP offset duke@435: int st_off = reg2offset(regs[i].first()) + STACK_BIAS; duke@435: // Store down the shuffled stack word. Target address _is_ aligned. twisti@1441: RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp); twisti@1441: if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot); twisti@1441: else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot); duke@435: } duke@435: } duke@435: bool made_space = false; duke@435: #ifndef _LP64 duke@435: // May need to pick up a few long args in G1/G4 duke@435: bool g4_crushed = false; duke@435: bool g3_crushed = false; duke@435: for (int i=0; iis_Register() && regs[i].second()->is_valid()) { duke@435: // Load in argument order going down twisti@1861: int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; duke@435: // Need to marshal 64-bit value from misaligned Lesp loads duke@435: Register r = regs[i].first()->as_Register()->after_restore(); duke@435: if (r == G1 || r == G4) { duke@435: assert(!g4_crushed, "ordering problem"); duke@435: if (r == G4){ duke@435: g4_crushed = true; duke@435: __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits duke@435: __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits duke@435: } else { duke@435: // better schedule this way duke@435: __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits duke@435: __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits duke@435: } duke@435: g3_crushed = true; duke@435: __ sllx(r, 32, r); duke@435: __ or3(G3_scratch, r, r); duke@435: } else { duke@435: assert(r->is_out(), "longs passed in two O registers"); duke@435: __ ld (Gargs, arg_slot(ld_off) , r->successor()); // Load lo bits duke@435: __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits duke@435: } duke@435: } duke@435: } duke@435: #endif duke@435: duke@435: // Jump to the compiled code just as if compiled code was doing it. duke@435: // duke@435: #ifndef _LP64 duke@435: if (g3_crushed) { duke@435: // Rats load was wasted, at least it is in cache... twisti@1162: __ ld_ptr(G5_method, methodOopDesc::from_compiled_offset(), G3); duke@435: } duke@435: #endif /* _LP64 */ duke@435: duke@435: // 6243940 We might end up in handle_wrong_method if duke@435: // the callee is deoptimized as we race thru here. If that duke@435: // happens we don't want to take a safepoint because the duke@435: // caller frame will look interpreted and arguments are now duke@435: // "compiled" so it is much better to make this transition duke@435: // invisible to the stack walking code. Unfortunately if duke@435: // we try and find the callee by normal means a safepoint duke@435: // is possible. So we stash the desired callee in the thread duke@435: // and the vm will find there should this case occur. twisti@1162: Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); duke@435: __ st_ptr(G5_method, callee_target_addr); duke@435: duke@435: if (StressNonEntrant) { duke@435: // Open a big window for deopt failure duke@435: __ save_frame(0); duke@435: __ mov(G0, L0); duke@435: Label loop; duke@435: __ bind(loop); duke@435: __ sub(L0, 1, L0); kvn@3037: __ br_null_short(L0, Assembler::pt, loop); duke@435: duke@435: __ restore(); duke@435: } duke@435: duke@435: duke@435: __ jmpl(G3, 0, G0); duke@435: __ delayed()->nop(); duke@435: } duke@435: duke@435: // --------------------------------------------------------------- duke@435: AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, duke@435: int total_args_passed, duke@435: // VMReg max_arg, duke@435: int comp_args_on_stack, // VMRegStackSlots duke@435: const BasicType *sig_bt, never@1622: const VMRegPair *regs, never@1622: AdapterFingerPrint* fingerprint) { duke@435: address i2c_entry = __ pc(); duke@435: duke@435: AdapterGenerator agen(masm); duke@435: duke@435: agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); duke@435: duke@435: duke@435: // ------------------------------------------------------------------------- duke@435: // Generate a C2I adapter. On entry we know G5 holds the methodOop. The duke@435: // args start out packed in the compiled layout. They need to be unpacked duke@435: // into the interpreter layout. This will almost always require some stack duke@435: // space. We grow the current (compiled) stack, then repack the args. We duke@435: // finally end in a jump to the generic interpreter entry point. On exit duke@435: // from the interpreter, the interpreter will restore our SP (lest the duke@435: // compiled code, which relys solely on SP and not FP, get sick). duke@435: duke@435: address c2i_unverified_entry = __ pc(); duke@435: Label skip_fixup; duke@435: { duke@435: #if !defined(_LP64) && defined(COMPILER2) duke@435: Register R_temp = L0; // another scratch register duke@435: #else duke@435: Register R_temp = G1; // another scratch register duke@435: #endif duke@435: twisti@1162: AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); duke@435: duke@435: __ verify_oop(O0); duke@435: __ verify_oop(G5_method); coleenp@548: __ load_klass(O0, G3_scratch); duke@435: __ verify_oop(G3_scratch); duke@435: duke@435: #if !defined(_LP64) && defined(COMPILER2) duke@435: __ save(SP, -frame::register_save_words*wordSize, SP); duke@435: __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp); duke@435: __ verify_oop(R_temp); duke@435: __ cmp(G3_scratch, R_temp); duke@435: __ restore(); duke@435: #else duke@435: __ ld_ptr(G5_method, compiledICHolderOopDesc::holder_klass_offset(), R_temp); duke@435: __ verify_oop(R_temp); duke@435: __ cmp(G3_scratch, R_temp); duke@435: #endif duke@435: duke@435: Label ok, ok2; duke@435: __ brx(Assembler::equal, false, Assembler::pt, ok); duke@435: __ delayed()->ld_ptr(G5_method, compiledICHolderOopDesc::holder_method_offset(), G5_method); twisti@1162: __ jump_to(ic_miss, G3_scratch); duke@435: __ delayed()->nop(); duke@435: duke@435: __ bind(ok); duke@435: // Method might have been compiled since the call site was patched to duke@435: // interpreted if that is the case treat it as a miss so we can get duke@435: // the call site corrected. duke@435: __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch); duke@435: __ bind(ok2); kvn@3037: __ br_null(G3_scratch, false, Assembler::pt, skip_fixup); duke@435: __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch); twisti@1162: __ jump_to(ic_miss, G3_scratch); duke@435: __ delayed()->nop(); duke@435: duke@435: } duke@435: duke@435: address c2i_entry = __ pc(); duke@435: duke@435: agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); duke@435: duke@435: __ flush(); never@1622: return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); duke@435: duke@435: } duke@435: duke@435: // Helper function for native calling conventions duke@435: static VMReg int_stk_helper( int i ) { duke@435: // Bias any stack based VMReg we get by ignoring the window area duke@435: // but not the register parameter save area. duke@435: // duke@435: // This is strange for the following reasons. We'd normally expect duke@435: // the calling convention to return an VMReg for a stack slot duke@435: // completely ignoring any abi reserved area. C2 thinks of that duke@435: // abi area as only out_preserve_stack_slots. This does not include duke@435: // the area allocated by the C abi to store down integer arguments duke@435: // because the java calling convention does not use it. So duke@435: // since c2 assumes that there are only out_preserve_stack_slots duke@435: // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack duke@435: // location the c calling convention must add in this bias amount duke@435: // to make up for the fact that the out_preserve_stack_slots is duke@435: // insufficient for C calls. What a mess. I sure hope those 6 duke@435: // stack words were worth it on every java call! duke@435: duke@435: // Another way of cleaning this up would be for out_preserve_stack_slots duke@435: // to take a parameter to say whether it was C or java calling conventions. duke@435: // Then things might look a little better (but not much). duke@435: duke@435: int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; duke@435: if( mem_parm_offset < 0 ) { duke@435: return as_oRegister(i)->as_VMReg(); duke@435: } else { duke@435: int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; duke@435: // Now return a biased offset that will be correct when out_preserve_slots is added back in duke@435: return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); duke@435: } duke@435: } duke@435: duke@435: duke@435: int SharedRuntime::c_calling_convention(const BasicType *sig_bt, duke@435: VMRegPair *regs, duke@435: int total_args_passed) { duke@435: duke@435: // Return the number of VMReg stack_slots needed for the args. duke@435: // This value does not include an abi space (like register window duke@435: // save area). duke@435: duke@435: // The native convention is V8 if !LP64 duke@435: // The LP64 convention is the V9 convention which is slightly more sane. duke@435: duke@435: // We return the amount of VMReg stack slots we need to reserve for all duke@435: // the arguments NOT counting out_preserve_stack_slots. Since we always duke@435: // have space for storing at least 6 registers to memory we start with that. duke@435: // See int_stk_helper for a further discussion. duke@435: int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots(); duke@435: duke@435: #ifdef _LP64 duke@435: // V9 convention: All things "as-if" on double-wide stack slots. duke@435: // Hoist any int/ptr/long's in the first 6 to int regs. duke@435: // Hoist any flt/dbl's in the first 16 dbl regs. duke@435: int j = 0; // Count of actual args, not HALVES duke@435: for( int i=0; ias_VMReg()); duke@435: } else { duke@435: // V9ism: floats go in ODD stack slot duke@435: regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1))); duke@435: } duke@435: break; duke@435: case T_DOUBLE: duke@435: assert( sig_bt[i+1] == T_VOID, "expecting half" ); duke@435: if ( j < 16 ) { duke@435: // V9ism: doubles go in EVEN/ODD regs duke@435: regs[i].set2(as_FloatRegister(j<<1)->as_VMReg()); duke@435: } else { duke@435: // V9ism: doubles go in EVEN/ODD stack slots duke@435: regs[i].set2(VMRegImpl::stack2reg(j<<1)); duke@435: } duke@435: break; duke@435: case T_VOID: regs[i].set_bad(); j--; break; // Do not count HALVES duke@435: default: duke@435: ShouldNotReachHere(); duke@435: } duke@435: if (regs[i].first()->is_stack()) { duke@435: int off = regs[i].first()->reg2stack(); duke@435: if (off > max_stack_slots) max_stack_slots = off; duke@435: } duke@435: if (regs[i].second()->is_stack()) { duke@435: int off = regs[i].second()->reg2stack(); duke@435: if (off > max_stack_slots) max_stack_slots = off; duke@435: } duke@435: } duke@435: duke@435: #else // _LP64 duke@435: // V8 convention: first 6 things in O-regs, rest on stack. duke@435: // Alignment is willy-nilly. duke@435: for( int i=0; iis_stack()) { duke@435: int off = regs[i].first()->reg2stack(); duke@435: if (off > max_stack_slots) max_stack_slots = off; duke@435: } duke@435: if (regs[i].second()->is_stack()) { duke@435: int off = regs[i].second()->reg2stack(); duke@435: if (off > max_stack_slots) max_stack_slots = off; duke@435: } duke@435: } duke@435: #endif // _LP64 duke@435: duke@435: return round_to(max_stack_slots + 1, 2); duke@435: duke@435: } duke@435: duke@435: duke@435: // --------------------------------------------------------------------------- duke@435: void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { duke@435: switch (ret_type) { duke@435: case T_FLOAT: duke@435: __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS); duke@435: break; duke@435: case T_DOUBLE: duke@435: __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS); duke@435: break; duke@435: } duke@435: } duke@435: duke@435: void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { duke@435: switch (ret_type) { duke@435: case T_FLOAT: duke@435: __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0); duke@435: break; duke@435: case T_DOUBLE: duke@435: __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0); duke@435: break; duke@435: } duke@435: } duke@435: duke@435: // Check and forward and pending exception. Thread is stored in duke@435: // L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there duke@435: // is no exception handler. We merely pop this frame off and throw the duke@435: // exception in the caller's frame. duke@435: static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) { duke@435: Label L; duke@435: __ br_null(Rex_oop, false, Assembler::pt, L); duke@435: __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception duke@435: // Since this is a native call, we *know* the proper exception handler duke@435: // without calling into the VM: it's the empty function. Just pop this duke@435: // frame and then jump to forward_exception_entry; O7 will contain the duke@435: // native caller's return PC. twisti@1162: AddressLiteral exception_entry(StubRoutines::forward_exception_entry()); twisti@1162: __ jump_to(exception_entry, G3_scratch); duke@435: __ delayed()->restore(); // Pop this frame off. duke@435: __ bind(L); duke@435: } duke@435: duke@435: // A simple move of integer like type duke@435: static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { duke@435: if (src.first()->is_stack()) { duke@435: if (dst.first()->is_stack()) { duke@435: // stack to stack duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); duke@435: __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } else { duke@435: // stack to reg duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); duke@435: } duke@435: } else if (dst.first()->is_stack()) { duke@435: // reg to stack duke@435: __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } else { duke@435: __ mov(src.first()->as_Register(), dst.first()->as_Register()); duke@435: } duke@435: } duke@435: duke@435: // On 64 bit we will store integer like items to the stack as duke@435: // 64 bits items (sparc abi) even though java would only store duke@435: // 32bits for a parameter. On 32bit it will simply be 32 bits duke@435: // So this routine will do 32->32 on 32bit and 32->64 on 64bit duke@435: static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { duke@435: if (src.first()->is_stack()) { duke@435: if (dst.first()->is_stack()) { duke@435: // stack to stack duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); duke@435: __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } else { duke@435: // stack to reg duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); duke@435: } duke@435: } else if (dst.first()->is_stack()) { duke@435: // reg to stack duke@435: __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } else { duke@435: __ mov(src.first()->as_Register(), dst.first()->as_Register()); duke@435: } duke@435: } duke@435: duke@435: never@3500: static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { never@3500: if (src.first()->is_stack()) { never@3500: if (dst.first()->is_stack()) { never@3500: // stack to stack never@3500: __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5); never@3500: __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); never@3500: } else { never@3500: // stack to reg never@3500: __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); never@3500: } never@3500: } else if (dst.first()->is_stack()) { never@3500: // reg to stack never@3500: __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); never@3500: } else { never@3500: __ mov(src.first()->as_Register(), dst.first()->as_Register()); never@3500: } never@3500: } never@3500: never@3500: duke@435: // An oop arg. Must pass a handle not the oop itself duke@435: static void object_move(MacroAssembler* masm, duke@435: OopMap* map, duke@435: int oop_handle_offset, duke@435: int framesize_in_slots, duke@435: VMRegPair src, duke@435: VMRegPair dst, duke@435: bool is_receiver, duke@435: int* receiver_offset) { duke@435: duke@435: // must pass a handle. First figure out the location we use as a handle duke@435: duke@435: if (src.first()->is_stack()) { duke@435: // Oop is already on the stack duke@435: Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register(); duke@435: __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle); duke@435: __ ld_ptr(rHandle, 0, L4); duke@435: #ifdef _LP64 duke@435: __ movr( Assembler::rc_z, L4, G0, rHandle ); duke@435: #else duke@435: __ tst( L4 ); duke@435: __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); duke@435: #endif duke@435: if (dst.first()->is_stack()) { duke@435: __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } duke@435: int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); duke@435: if (is_receiver) { duke@435: *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; duke@435: } duke@435: map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); duke@435: } else { duke@435: // Oop is in an input register pass we must flush it to the stack duke@435: const Register rOop = src.first()->as_Register(); duke@435: const Register rHandle = L5; duke@435: int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset; duke@435: int offset = oop_slot*VMRegImpl::stack_slot_size; duke@435: Label skip; duke@435: __ st_ptr(rOop, SP, offset + STACK_BIAS); duke@435: if (is_receiver) { duke@435: *receiver_offset = oop_slot * VMRegImpl::stack_slot_size; duke@435: } duke@435: map->set_oop(VMRegImpl::stack2reg(oop_slot)); duke@435: __ add(SP, offset + STACK_BIAS, rHandle); duke@435: #ifdef _LP64 duke@435: __ movr( Assembler::rc_z, rOop, G0, rHandle ); duke@435: #else duke@435: __ tst( rOop ); duke@435: __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); duke@435: #endif duke@435: duke@435: if (dst.first()->is_stack()) { duke@435: __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } else { duke@435: __ mov(rHandle, dst.first()->as_Register()); duke@435: } duke@435: } duke@435: } duke@435: duke@435: // A float arg may have to do float reg int reg conversion duke@435: static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { duke@435: assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); duke@435: duke@435: if (src.first()->is_stack()) { duke@435: if (dst.first()->is_stack()) { duke@435: // stack to stack the easiest of the bunch duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); duke@435: __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } else { duke@435: // stack to reg duke@435: if (dst.first()->is_Register()) { duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); duke@435: } else { duke@435: __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); duke@435: } duke@435: } duke@435: } else if (dst.first()->is_stack()) { duke@435: // reg to stack duke@435: if (src.first()->is_Register()) { duke@435: __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } else { duke@435: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } duke@435: } else { duke@435: // reg to reg duke@435: if (src.first()->is_Register()) { duke@435: if (dst.first()->is_Register()) { duke@435: // gpr -> gpr duke@435: __ mov(src.first()->as_Register(), dst.first()->as_Register()); duke@435: } else { duke@435: // gpr -> fpr duke@435: __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS); duke@435: __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister()); duke@435: } duke@435: } else if (dst.first()->is_Register()) { duke@435: // fpr -> gpr duke@435: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS); duke@435: __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register()); duke@435: } else { duke@435: // fpr -> fpr duke@435: // In theory these overlap but the ordering is such that this is likely a nop duke@435: if ( src.first() != dst.first()) { duke@435: __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); duke@435: } duke@435: } duke@435: } duke@435: } duke@435: duke@435: static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { duke@435: VMRegPair src_lo(src.first()); duke@435: VMRegPair src_hi(src.second()); duke@435: VMRegPair dst_lo(dst.first()); duke@435: VMRegPair dst_hi(dst.second()); duke@435: simple_move32(masm, src_lo, dst_lo); duke@435: simple_move32(masm, src_hi, dst_hi); duke@435: } duke@435: duke@435: // A long move duke@435: static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { duke@435: duke@435: // Do the simple ones here else do two int moves duke@435: if (src.is_single_phys_reg() ) { duke@435: if (dst.is_single_phys_reg()) { duke@435: __ mov(src.first()->as_Register(), dst.first()->as_Register()); duke@435: } else { duke@435: // split src into two separate registers duke@435: // Remember hi means hi address or lsw on sparc duke@435: // Move msw to lsw duke@435: if (dst.second()->is_reg()) { duke@435: // MSW -> MSW duke@435: __ srax(src.first()->as_Register(), 32, dst.first()->as_Register()); duke@435: // Now LSW -> LSW duke@435: // this will only move lo -> lo and ignore hi duke@435: VMRegPair split(dst.second()); duke@435: simple_move32(masm, src, split); duke@435: } else { duke@435: VMRegPair split(src.first(), L4->as_VMReg()); duke@435: // MSW -> MSW (lo ie. first word) duke@435: __ srax(src.first()->as_Register(), 32, L4); duke@435: split_long_move(masm, split, dst); duke@435: } duke@435: } duke@435: } else if (dst.is_single_phys_reg()) { duke@435: if (src.is_adjacent_aligned_on_stack(2)) { never@739: __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); duke@435: } else { duke@435: // dst is a single reg. duke@435: // Remember lo is low address not msb for stack slots duke@435: // and lo is the "real" register for registers duke@435: // src is duke@435: duke@435: VMRegPair split; duke@435: duke@435: if (src.first()->is_reg()) { duke@435: // src.lo (msw) is a reg, src.hi is stk/reg duke@435: // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg] duke@435: split.set_pair(dst.first(), src.first()); duke@435: } else { duke@435: // msw is stack move to L5 duke@435: // lsw is stack move to dst.lo (real reg) duke@435: // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5 duke@435: split.set_pair(dst.first(), L5->as_VMReg()); duke@435: } duke@435: duke@435: // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg) duke@435: // msw -> src.lo/L5, lsw -> dst.lo duke@435: split_long_move(masm, src, split); duke@435: duke@435: // So dst now has the low order correct position the duke@435: // msw half duke@435: __ sllx(split.first()->as_Register(), 32, L5); duke@435: duke@435: const Register d = dst.first()->as_Register(); duke@435: __ or3(L5, d, d); duke@435: } duke@435: } else { duke@435: // For LP64 we can probably do better. duke@435: split_long_move(masm, src, dst); duke@435: } duke@435: } duke@435: duke@435: // A double move duke@435: static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { duke@435: duke@435: // The painful thing here is that like long_move a VMRegPair might be duke@435: // 1: a single physical register duke@435: // 2: two physical registers (v8) duke@435: // 3: a physical reg [lo] and a stack slot [hi] (v8) duke@435: // 4: two stack slots duke@435: duke@435: // Since src is always a java calling convention we know that the src pair duke@435: // is always either all registers or all stack (and aligned?) duke@435: duke@435: // in a register [lo] and a stack slot [hi] duke@435: if (src.first()->is_stack()) { duke@435: if (dst.first()->is_stack()) { duke@435: // stack to stack the easiest of the bunch duke@435: // ought to be a way to do this where if alignment is ok we use ldd/std when possible duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); duke@435: __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); duke@435: __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); duke@435: } else { duke@435: // stack to reg duke@435: if (dst.second()->is_stack()) { duke@435: // stack -> reg, stack -> stack duke@435: __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); duke@435: if (dst.first()->is_Register()) { duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); duke@435: } else { duke@435: __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); duke@435: } duke@435: // This was missing. (very rare case) duke@435: __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); duke@435: } else { duke@435: // stack -> reg duke@435: // Eventually optimize for alignment QQQ duke@435: if (dst.first()->is_Register()) { duke@435: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); duke@435: __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register()); duke@435: } else { duke@435: __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); duke@435: __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister()); duke@435: } duke@435: } duke@435: } duke@435: } else if (dst.first()->is_stack()) { duke@435: // reg to stack duke@435: if (src.first()->is_Register()) { duke@435: // Eventually optimize for alignment QQQ duke@435: __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: if (src.second()->is_stack()) { duke@435: __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); duke@435: __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); duke@435: } else { duke@435: __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS); duke@435: } duke@435: } else { duke@435: // fpr to stack duke@435: if (src.second()->is_stack()) { duke@435: ShouldNotReachHere(); duke@435: } else { duke@435: // Is the stack aligned? duke@435: if (reg2offset(dst.first()) & 0x7) { duke@435: // No do as pairs duke@435: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS); duke@435: } else { duke@435: __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); duke@435: } duke@435: } duke@435: } duke@435: } else { duke@435: // reg to reg duke@435: if (src.first()->is_Register()) { duke@435: if (dst.first()->is_Register()) { duke@435: // gpr -> gpr duke@435: __ mov(src.first()->as_Register(), dst.first()->as_Register()); duke@435: __ mov(src.second()->as_Register(), dst.second()->as_Register()); duke@435: } else { duke@435: // gpr -> fpr duke@435: // ought to be able to do a single store duke@435: __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS); duke@435: __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS); duke@435: // ought to be able to do a single load duke@435: __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister()); duke@435: __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister()); duke@435: } duke@435: } else if (dst.first()->is_Register()) { duke@435: // fpr -> gpr duke@435: // ought to be able to do a single store duke@435: __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS); duke@435: // ought to be able to do a single load duke@435: // REMEMBER first() is low address not LSB duke@435: __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register()); duke@435: if (dst.second()->is_Register()) { duke@435: __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register()); duke@435: } else { duke@435: __ ld(FP, -4 + STACK_BIAS, L4); duke@435: __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); duke@435: } duke@435: } else { duke@435: // fpr -> fpr duke@435: // In theory these overlap but the ordering is such that this is likely a nop duke@435: if ( src.first() != dst.first()) { duke@435: __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); duke@435: } duke@435: } duke@435: } duke@435: } duke@435: duke@435: // Creates an inner frame if one hasn't already been created, and duke@435: // saves a copy of the thread in L7_thread_cache duke@435: static void create_inner_frame(MacroAssembler* masm, bool* already_created) { duke@435: if (!*already_created) { duke@435: __ save_frame(0); duke@435: // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below duke@435: // Don't use save_thread because it smashes G2 and we merely want to save a duke@435: // copy duke@435: __ mov(G2_thread, L7_thread_cache); duke@435: *already_created = true; duke@435: } duke@435: } duke@435: never@3500: never@3500: static void save_or_restore_arguments(MacroAssembler* masm, never@3500: const int stack_slots, never@3500: const int total_in_args, never@3500: const int arg_save_area, never@3500: OopMap* map, never@3500: VMRegPair* in_regs, never@3500: BasicType* in_sig_bt) { never@3500: // if map is non-NULL then the code should store the values, never@3500: // otherwise it should load them. never@3500: if (map != NULL) { never@3500: // Fill in the map never@3500: for (int i = 0; i < total_in_args; i++) { never@3500: if (in_sig_bt[i] == T_ARRAY) { never@3500: if (in_regs[i].first()->is_stack()) { never@3500: int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); never@3500: map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); never@3500: } else if (in_regs[i].first()->is_Register()) { never@3500: map->set_oop(in_regs[i].first()); never@3500: } else { never@3500: ShouldNotReachHere(); never@3500: } never@3500: } never@3500: } never@3500: } never@3500: never@3500: // Save or restore double word values never@3500: int handle_index = 0; never@3500: for (int i = 0; i < total_in_args; i++) { never@3500: int slot = handle_index + arg_save_area; never@3500: int offset = slot * VMRegImpl::stack_slot_size; never@3500: if (in_sig_bt[i] == T_LONG && in_regs[i].first()->is_Register()) { never@3500: const Register reg = in_regs[i].first()->as_Register(); never@3500: if (reg->is_global()) { never@3500: handle_index += 2; never@3500: assert(handle_index <= stack_slots, "overflow"); never@3500: if (map != NULL) { never@3500: __ stx(reg, SP, offset + STACK_BIAS); never@3500: } else { never@3500: __ ldx(SP, offset + STACK_BIAS, reg); never@3500: } never@3500: } never@3500: } else if (in_sig_bt[i] == T_DOUBLE && in_regs[i].first()->is_FloatRegister()) { never@3500: handle_index += 2; never@3500: assert(handle_index <= stack_slots, "overflow"); never@3500: if (map != NULL) { never@3500: __ stf(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); never@3500: } else { never@3500: __ ldf(FloatRegisterImpl::D, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); never@3500: } never@3500: } never@3500: } never@3500: // Save floats never@3500: for (int i = 0; i < total_in_args; i++) { never@3500: int slot = handle_index + arg_save_area; never@3500: int offset = slot * VMRegImpl::stack_slot_size; never@3500: if (in_sig_bt[i] == T_FLOAT && in_regs[i].first()->is_FloatRegister()) { never@3500: handle_index++; never@3500: assert(handle_index <= stack_slots, "overflow"); never@3500: if (map != NULL) { never@3500: __ stf(FloatRegisterImpl::S, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); never@3500: } else { never@3500: __ ldf(FloatRegisterImpl::S, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); never@3500: } never@3500: } never@3500: } never@3500: never@3500: } never@3500: never@3500: never@3500: // Check GC_locker::needs_gc and enter the runtime if it's true. This never@3500: // keeps a new JNI critical region from starting until a GC has been never@3500: // forced. Save down any oops in registers and describe them in an never@3500: // OopMap. never@3500: static void check_needs_gc_for_critical_native(MacroAssembler* masm, never@3500: const int stack_slots, never@3500: const int total_in_args, never@3500: const int arg_save_area, never@3500: OopMapSet* oop_maps, never@3500: VMRegPair* in_regs, never@3500: BasicType* in_sig_bt) { never@3500: __ block_comment("check GC_locker::needs_gc"); never@3500: Label cont; never@3500: AddressLiteral sync_state(GC_locker::needs_gc_address()); never@3500: __ load_bool_contents(sync_state, G3_scratch); never@3500: __ cmp_zero_and_br(Assembler::equal, G3_scratch, cont); never@3500: __ delayed()->nop(); never@3500: never@3500: // Save down any values that are live in registers and call into the never@3500: // runtime to halt for a GC never@3500: OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); never@3500: save_or_restore_arguments(masm, stack_slots, total_in_args, never@3500: arg_save_area, map, in_regs, in_sig_bt); never@3500: never@3500: __ mov(G2_thread, L7_thread_cache); never@3500: never@3500: __ set_last_Java_frame(SP, noreg); never@3500: never@3500: __ block_comment("block_for_jni_critical"); never@3500: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical), relocInfo::runtime_call_type); never@3500: __ delayed()->mov(L7_thread_cache, O0); never@3500: oop_maps->add_gc_map( __ offset(), map); never@3500: never@3500: __ restore_thread(L7_thread_cache); // restore G2_thread never@3500: __ reset_last_Java_frame(); never@3500: never@3500: // Reload all the register arguments never@3500: save_or_restore_arguments(masm, stack_slots, total_in_args, never@3500: arg_save_area, NULL, in_regs, in_sig_bt); never@3500: never@3500: __ bind(cont); never@3500: #ifdef ASSERT never@3500: if (StressCriticalJNINatives) { never@3500: // Stress register saving never@3500: OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); never@3500: save_or_restore_arguments(masm, stack_slots, total_in_args, never@3500: arg_save_area, map, in_regs, in_sig_bt); never@3500: // Destroy argument registers never@3500: for (int i = 0; i < total_in_args; i++) { never@3500: if (in_regs[i].first()->is_Register()) { never@3500: const Register reg = in_regs[i].first()->as_Register(); never@3500: if (reg->is_global()) { never@3500: __ mov(G0, reg); never@3500: } never@3500: } else if (in_regs[i].first()->is_FloatRegister()) { never@3500: __ fneg(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister()); never@3500: } never@3500: } never@3500: never@3500: save_or_restore_arguments(masm, stack_slots, total_in_args, never@3500: arg_save_area, NULL, in_regs, in_sig_bt); never@3500: } never@3500: #endif never@3500: } never@3500: never@3500: // Unpack an array argument into a pointer to the body and the length never@3500: // if the array is non-null, otherwise pass 0 for both. never@3500: static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { never@3500: // Pass the length, ptr pair never@3500: Label is_null, done; never@3500: if (reg.first()->is_stack()) { never@3500: VMRegPair tmp = reg64_to_VMRegPair(L2); never@3500: // Load the arg up from the stack never@3500: move_ptr(masm, reg, tmp); never@3500: reg = tmp; never@3500: } never@3500: __ cmp(reg.first()->as_Register(), G0); never@3500: __ brx(Assembler::equal, false, Assembler::pt, is_null); never@3500: __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4); never@3500: move_ptr(masm, reg64_to_VMRegPair(L4), body_arg); never@3500: __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4); never@3500: move32_64(masm, reg64_to_VMRegPair(L4), length_arg); never@3500: __ ba_short(done); never@3500: __ bind(is_null); never@3500: // Pass zeros never@3500: move_ptr(masm, reg64_to_VMRegPair(G0), body_arg); never@3500: move32_64(masm, reg64_to_VMRegPair(G0), length_arg); never@3500: __ bind(done); never@3500: } never@3500: twisti@3969: static void verify_oop_args(MacroAssembler* masm, twisti@3969: int total_args_passed, twisti@3969: const BasicType* sig_bt, twisti@3969: const VMRegPair* regs) { twisti@3969: Register temp_reg = G5_method; // not part of any compiled calling seq twisti@3969: if (VerifyOops) { twisti@3969: for (int i = 0; i < total_args_passed; i++) { twisti@3969: if (sig_bt[i] == T_OBJECT || twisti@3969: sig_bt[i] == T_ARRAY) { twisti@3969: VMReg r = regs[i].first(); twisti@3969: assert(r->is_valid(), "bad oop arg"); twisti@3969: if (r->is_stack()) { twisti@3969: RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; twisti@3969: ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg); twisti@3969: __ ld_ptr(SP, ld_off, temp_reg); twisti@3969: __ verify_oop(temp_reg); twisti@3969: } else { twisti@3969: __ verify_oop(r->as_Register()); twisti@3969: } twisti@3969: } twisti@3969: } twisti@3969: } twisti@3969: } twisti@3969: twisti@3969: static void gen_special_dispatch(MacroAssembler* masm, twisti@3969: int total_args_passed, twisti@3969: int comp_args_on_stack, twisti@3969: vmIntrinsics::ID special_dispatch, twisti@3969: const BasicType* sig_bt, twisti@3969: const VMRegPair* regs) { twisti@3969: verify_oop_args(masm, total_args_passed, sig_bt, regs); twisti@3969: twisti@3969: // Now write the args into the outgoing interpreter space twisti@3969: bool has_receiver = false; twisti@3969: Register receiver_reg = noreg; twisti@3969: int member_arg_pos = -1; twisti@3969: Register member_reg = noreg; twisti@3969: int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); twisti@3969: if (ref_kind != 0) { twisti@3969: member_arg_pos = total_args_passed - 1; // trailing MemberName argument twisti@3969: member_reg = G5_method; // known to be free at this point twisti@3969: has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); twisti@3969: } else if (special_dispatch == vmIntrinsics::_invokeBasic) { twisti@3969: has_receiver = true; twisti@3969: } else { twisti@3969: fatal(err_msg("special_dispatch=%d", special_dispatch)); twisti@3969: } twisti@3969: twisti@3969: if (member_reg != noreg) { twisti@3969: // Load the member_arg into register, if necessary. twisti@3969: assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); twisti@3969: assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); twisti@3969: VMReg r = regs[member_arg_pos].first(); twisti@3969: assert(r->is_valid(), "bad member arg"); twisti@3969: if (r->is_stack()) { twisti@3969: RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; twisti@3969: ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); twisti@3969: __ ld_ptr(SP, ld_off, member_reg); twisti@3969: } else { twisti@3969: // no data motion is needed twisti@3969: member_reg = r->as_Register(); twisti@3969: } twisti@3969: } twisti@3969: twisti@3969: if (has_receiver) { twisti@3969: // Make sure the receiver is loaded into a register. twisti@3969: assert(total_args_passed > 0, "oob"); twisti@3969: assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); twisti@3969: VMReg r = regs[0].first(); twisti@3969: assert(r->is_valid(), "bad receiver arg"); twisti@3969: if (r->is_stack()) { twisti@3969: // Porting note: This assumes that compiled calling conventions always twisti@3969: // pass the receiver oop in a register. If this is not true on some twisti@3969: // platform, pick a temp and load the receiver from stack. twisti@3969: assert(false, "receiver always in a register"); twisti@3969: receiver_reg = G3_scratch; // known to be free at this point twisti@3969: RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; twisti@3969: ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); twisti@3969: __ ld_ptr(SP, ld_off, receiver_reg); twisti@3969: } else { twisti@3969: // no data motion is needed twisti@3969: receiver_reg = r->as_Register(); twisti@3969: } twisti@3969: } twisti@3969: twisti@3969: // Figure out which address we are really jumping to: twisti@3969: MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, twisti@3969: receiver_reg, member_reg, /*for_compiler_entry:*/ true); twisti@3969: } twisti@3969: duke@435: // --------------------------------------------------------------------------- duke@435: // Generate a native wrapper for a given method. The method takes arguments duke@435: // in the Java compiled code convention, marshals them to the native duke@435: // convention (handlizes oops, etc), transitions to native, makes the call, duke@435: // returns to java state (possibly blocking), unhandlizes any result and duke@435: // returns. twisti@3969: // twisti@3969: // Critical native functions are a shorthand for the use of twisti@3969: // GetPrimtiveArrayCritical and disallow the use of any other JNI twisti@3969: // functions. The wrapper is expected to unpack the arguments before twisti@3969: // passing them to the callee and perform checks before and after the twisti@3969: // native call to ensure that they GC_locker twisti@3969: // lock_critical/unlock_critical semantics are followed. Some other twisti@3969: // parts of JNI setup are skipped like the tear down of the JNI handle twisti@3969: // block and the check for pending exceptions it's impossible for them twisti@3969: // to be thrown. twisti@3969: // twisti@3969: // They are roughly structured like this: twisti@3969: // if (GC_locker::needs_gc()) twisti@3969: // SharedRuntime::block_for_jni_critical(); twisti@3969: // tranistion to thread_in_native twisti@3969: // unpack arrray arguments and call native entry point twisti@3969: // check for safepoint in progress twisti@3969: // check if any thread suspend flags are set twisti@3969: // call into JVM and possible unlock the JNI critical twisti@3969: // if a GC was suppressed while in the critical native. twisti@3969: // transition back to thread_in_Java twisti@3969: // return to caller twisti@3969: // duke@435: nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, duke@435: methodHandle method, twisti@2687: int compile_id, duke@435: int total_in_args, duke@435: int comp_args_on_stack, // in VMRegStackSlots twisti@3969: BasicType* in_sig_bt, twisti@3969: VMRegPair* in_regs, duke@435: BasicType ret_type) { twisti@3969: if (method->is_method_handle_intrinsic()) { twisti@3969: vmIntrinsics::ID iid = method->intrinsic_id(); twisti@3969: intptr_t start = (intptr_t)__ pc(); twisti@3969: int vep_offset = ((intptr_t)__ pc()) - start; twisti@3969: gen_special_dispatch(masm, twisti@3969: total_in_args, twisti@3969: comp_args_on_stack, twisti@3969: method->intrinsic_id(), twisti@3969: in_sig_bt, twisti@3969: in_regs); twisti@3969: int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period twisti@3969: __ flush(); twisti@3969: int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually twisti@3969: return nmethod::new_native_nmethod(method, twisti@3969: compile_id, twisti@3969: masm->code(), twisti@3969: vep_offset, twisti@3969: frame_complete, twisti@3969: stack_slots / VMRegImpl::slots_per_word, twisti@3969: in_ByteSize(-1), twisti@3969: in_ByteSize(-1), twisti@3969: (OopMapSet*)NULL); twisti@3969: } never@3500: bool is_critical_native = true; never@3500: address native_func = method->critical_native_function(); never@3500: if (native_func == NULL) { never@3500: native_func = method->native_function(); never@3500: is_critical_native = false; never@3500: } never@3500: assert(native_func != NULL, "must have function"); duke@435: duke@435: // Native nmethod wrappers never take possesion of the oop arguments. duke@435: // So the caller will gc the arguments. The only thing we need an duke@435: // oopMap for is if the call is static duke@435: // duke@435: // An OopMap for lock (and class if static), and one for the VM call itself duke@435: OopMapSet *oop_maps = new OopMapSet(); duke@435: intptr_t start = (intptr_t)__ pc(); duke@435: duke@435: // First thing make an ic check to see if we should even be here duke@435: { duke@435: Label L; duke@435: const Register temp_reg = G3_scratch; twisti@1162: AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); duke@435: __ verify_oop(O0); coleenp@548: __ load_klass(O0, temp_reg); kvn@3037: __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); duke@435: twisti@1162: __ jump_to(ic_miss, temp_reg); duke@435: __ delayed()->nop(); duke@435: __ align(CodeEntryAlignment); duke@435: __ bind(L); duke@435: } duke@435: duke@435: int vep_offset = ((intptr_t)__ pc()) - start; duke@435: duke@435: #ifdef COMPILER1 duke@435: if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { duke@435: // Object.hashCode can pull the hashCode from the header word duke@435: // instead of doing a full VM transition once it's been computed. duke@435: // Since hashCode is usually polymorphic at call sites we can't do duke@435: // this optimization at the call site without a lot of work. duke@435: Label slowCase; duke@435: Register receiver = O0; duke@435: Register result = O0; duke@435: Register header = G3_scratch; duke@435: Register hash = G3_scratch; // overwrite header value with hash value duke@435: Register mask = G1; // to get hash field from header duke@435: duke@435: // Read the header and build a mask to get its hash field. Give up if the object is not unlocked. duke@435: // We depend on hash_mask being at most 32 bits and avoid the use of duke@435: // hash_mask_in_place because it could be larger than 32 bits in a 64-bit duke@435: // vm: see markOop.hpp. duke@435: __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header); duke@435: __ sethi(markOopDesc::hash_mask, mask); duke@435: __ btst(markOopDesc::unlocked_value, header); duke@435: __ br(Assembler::zero, false, Assembler::pn, slowCase); duke@435: if (UseBiasedLocking) { duke@435: // Check if biased and fall through to runtime if so duke@435: __ delayed()->nop(); duke@435: __ btst(markOopDesc::biased_lock_bit_in_place, header); duke@435: __ br(Assembler::notZero, false, Assembler::pn, slowCase); duke@435: } duke@435: __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask); duke@435: duke@435: // Check for a valid (non-zero) hash code and get its value. duke@435: #ifdef _LP64 duke@435: __ srlx(header, markOopDesc::hash_shift, hash); duke@435: #else duke@435: __ srl(header, markOopDesc::hash_shift, hash); duke@435: #endif duke@435: __ andcc(hash, mask, hash); duke@435: __ br(Assembler::equal, false, Assembler::pn, slowCase); duke@435: __ delayed()->nop(); duke@435: duke@435: // leaf return. duke@435: __ retl(); duke@435: __ delayed()->mov(hash, result); duke@435: __ bind(slowCase); duke@435: } duke@435: #endif // COMPILER1 duke@435: duke@435: duke@435: // We have received a description of where all the java arg are located duke@435: // on entry to the wrapper. We need to convert these args to where duke@435: // the jni function will expect them. To figure out where they go duke@435: // we convert the java signature to a C signature by inserting duke@435: // the hidden arguments as arg[0] and possibly arg[1] (static method) duke@435: never@3500: int total_c_args = total_in_args; never@3500: int total_save_slots = 6 * VMRegImpl::slots_per_word; never@3500: if (!is_critical_native) { never@3500: total_c_args += 1; never@3500: if (method->is_static()) { never@3500: total_c_args++; never@3500: } never@3500: } else { never@3500: for (int i = 0; i < total_in_args; i++) { never@3500: if (in_sig_bt[i] == T_ARRAY) { never@3500: // These have to be saved and restored across the safepoint never@3500: total_c_args++; never@3500: } never@3500: } duke@435: } duke@435: duke@435: BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); never@3500: VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); never@3500: BasicType* in_elem_bt = NULL; duke@435: duke@435: int argc = 0; never@3500: if (!is_critical_native) { never@3500: out_sig_bt[argc++] = T_ADDRESS; never@3500: if (method->is_static()) { never@3500: out_sig_bt[argc++] = T_OBJECT; never@3500: } never@3500: never@3500: for (int i = 0; i < total_in_args ; i++ ) { never@3500: out_sig_bt[argc++] = in_sig_bt[i]; never@3500: } never@3500: } else { never@3500: Thread* THREAD = Thread::current(); never@3500: in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); never@3500: SignatureStream ss(method->signature()); never@3500: for (int i = 0; i < total_in_args ; i++ ) { never@3500: if (in_sig_bt[i] == T_ARRAY) { never@3500: // Arrays are passed as int, elem* pair never@3500: out_sig_bt[argc++] = T_INT; never@3500: out_sig_bt[argc++] = T_ADDRESS; never@3500: Symbol* atype = ss.as_symbol(CHECK_NULL); never@3500: const char* at = atype->as_C_string(); never@3500: if (strlen(at) == 2) { never@3500: assert(at[0] == '[', "must be"); never@3500: switch (at[1]) { never@3500: case 'B': in_elem_bt[i] = T_BYTE; break; never@3500: case 'C': in_elem_bt[i] = T_CHAR; break; never@3500: case 'D': in_elem_bt[i] = T_DOUBLE; break; never@3500: case 'F': in_elem_bt[i] = T_FLOAT; break; never@3500: case 'I': in_elem_bt[i] = T_INT; break; never@3500: case 'J': in_elem_bt[i] = T_LONG; break; never@3500: case 'S': in_elem_bt[i] = T_SHORT; break; never@3500: case 'Z': in_elem_bt[i] = T_BOOLEAN; break; never@3500: default: ShouldNotReachHere(); never@3500: } never@3500: } never@3500: } else { never@3500: out_sig_bt[argc++] = in_sig_bt[i]; never@3500: in_elem_bt[i] = T_VOID; never@3500: } never@3500: if (in_sig_bt[i] != T_VOID) { never@3500: assert(in_sig_bt[i] == ss.type(), "must match"); never@3500: ss.next(); never@3500: } never@3500: } duke@435: } duke@435: duke@435: // Now figure out where the args must be stored and how much stack space duke@435: // they require (neglecting out_preserve_stack_slots but space for storing duke@435: // the 1st six register arguments). It's weird see int_stk_helper. duke@435: // duke@435: int out_arg_slots; duke@435: out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); duke@435: never@3500: if (is_critical_native) { never@3500: // Critical natives may have to call out so they need a save area never@3500: // for register arguments. never@3500: int double_slots = 0; never@3500: int single_slots = 0; never@3500: for ( int i = 0; i < total_in_args; i++) { never@3500: if (in_regs[i].first()->is_Register()) { never@3500: const Register reg = in_regs[i].first()->as_Register(); never@3500: switch (in_sig_bt[i]) { never@3500: case T_ARRAY: never@3500: case T_BOOLEAN: never@3500: case T_BYTE: never@3500: case T_SHORT: never@3500: case T_CHAR: never@3500: case T_INT: assert(reg->is_in(), "don't need to save these"); break; never@3500: case T_LONG: if (reg->is_global()) double_slots++; break; never@3500: default: ShouldNotReachHere(); never@3500: } never@3500: } else if (in_regs[i].first()->is_FloatRegister()) { never@3500: switch (in_sig_bt[i]) { never@3500: case T_FLOAT: single_slots++; break; never@3500: case T_DOUBLE: double_slots++; break; never@3500: default: ShouldNotReachHere(); never@3500: } never@3500: } never@3500: } never@3500: total_save_slots = double_slots * 2 + single_slots; never@3500: } never@3500: duke@435: // Compute framesize for the wrapper. We need to handlize all oops in duke@435: // registers. We must create space for them here that is disjoint from duke@435: // the windowed save area because we have no control over when we might duke@435: // flush the window again and overwrite values that gc has since modified. duke@435: // (The live window race) duke@435: // duke@435: // We always just allocate 6 word for storing down these object. This allow duke@435: // us to simply record the base and use the Ireg number to decide which duke@435: // slot to use. (Note that the reg number is the inbound number not the duke@435: // outbound number). duke@435: // We must shuffle args to match the native convention, and include var-args space. duke@435: duke@435: // Calculate the total number of stack slots we will need. duke@435: duke@435: // First count the abi requirement plus all of the outgoing args duke@435: int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; duke@435: duke@435: // Now the space for the inbound oop handle area duke@435: never@3500: int oop_handle_offset = round_to(stack_slots, 2); never@3500: stack_slots += total_save_slots; duke@435: duke@435: // Now any space we need for handlizing a klass if static method duke@435: duke@435: int klass_slot_offset = 0; duke@435: int klass_offset = -1; duke@435: int lock_slot_offset = 0; duke@435: bool is_static = false; duke@435: duke@435: if (method->is_static()) { duke@435: klass_slot_offset = stack_slots; duke@435: stack_slots += VMRegImpl::slots_per_word; duke@435: klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; duke@435: is_static = true; duke@435: } duke@435: duke@435: // Plus a lock if needed duke@435: duke@435: if (method->is_synchronized()) { duke@435: lock_slot_offset = stack_slots; duke@435: stack_slots += VMRegImpl::slots_per_word; duke@435: } duke@435: duke@435: // Now a place to save return value or as a temporary for any gpr -> fpr moves duke@435: stack_slots += 2; duke@435: duke@435: // Ok The space we have allocated will look like: duke@435: // duke@435: // duke@435: // FP-> | | duke@435: // |---------------------| duke@435: // | 2 slots for moves | duke@435: // |---------------------| duke@435: // | lock box (if sync) | duke@435: // |---------------------| <- lock_slot_offset duke@435: // | klass (if static) | duke@435: // |---------------------| <- klass_slot_offset duke@435: // | oopHandle area | duke@435: // |---------------------| <- oop_handle_offset duke@435: // | outbound memory | duke@435: // | based arguments | duke@435: // | | duke@435: // |---------------------| duke@435: // | vararg area | duke@435: // |---------------------| duke@435: // | | duke@435: // SP-> | out_preserved_slots | duke@435: // duke@435: // duke@435: duke@435: duke@435: // Now compute actual number of stack words we need rounding to make duke@435: // stack properly aligned. duke@435: stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word); duke@435: duke@435: int stack_size = stack_slots * VMRegImpl::stack_slot_size; duke@435: duke@435: // Generate stack overflow check before creating frame duke@435: __ generate_stack_overflow_check(stack_size); duke@435: duke@435: // Generate a new frame for the wrapper. duke@435: __ save(SP, -stack_size, SP); duke@435: duke@435: int frame_complete = ((intptr_t)__ pc()) - start; duke@435: duke@435: __ verify_thread(); duke@435: never@3500: if (is_critical_native) { never@3500: check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, never@3500: oop_handle_offset, oop_maps, in_regs, in_sig_bt); never@3500: } duke@435: duke@435: // duke@435: // We immediately shuffle the arguments so that any vm call we have to duke@435: // make from here on out (sync slow path, jvmti, etc.) we will have duke@435: // captured the oops from our caller and have a valid oopMap for duke@435: // them. duke@435: duke@435: // ----------------- duke@435: // The Grand Shuffle duke@435: // duke@435: // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* duke@435: // (derived from JavaThread* which is in L7_thread_cache) and, if static, duke@435: // the class mirror instead of a receiver. This pretty much guarantees that duke@435: // register layout will not match. We ignore these extra arguments during duke@435: // the shuffle. The shuffle is described by the two calling convention duke@435: // vectors we have in our possession. We simply walk the java vector to duke@435: // get the source locations and the c vector to get the destinations. duke@435: // Because we have a new window and the argument registers are completely duke@435: // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about duke@435: // here. duke@435: duke@435: // This is a trick. We double the stack slots so we can claim duke@435: // the oops in the caller's frame. Since we are sure to have duke@435: // more args than the caller doubling is enough to make duke@435: // sure we can capture all the incoming oop args from the duke@435: // caller. duke@435: // duke@435: OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); duke@435: // Record sp-based slot for receiver on stack for non-static methods duke@435: int receiver_offset = -1; duke@435: duke@435: // We move the arguments backward because the floating point registers duke@435: // destination will always be to a register with a greater or equal register duke@435: // number or the stack. duke@435: duke@435: #ifdef ASSERT duke@435: bool reg_destroyed[RegisterImpl::number_of_registers]; duke@435: bool freg_destroyed[FloatRegisterImpl::number_of_registers]; duke@435: for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { duke@435: reg_destroyed[r] = false; duke@435: } duke@435: for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { duke@435: freg_destroyed[f] = false; duke@435: } duke@435: duke@435: #endif /* ASSERT */ duke@435: never@3500: for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) { duke@435: duke@435: #ifdef ASSERT duke@435: if (in_regs[i].first()->is_Register()) { duke@435: assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!"); duke@435: } else if (in_regs[i].first()->is_FloatRegister()) { duke@435: assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!"); duke@435: } duke@435: if (out_regs[c_arg].first()->is_Register()) { duke@435: reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; duke@435: } else if (out_regs[c_arg].first()->is_FloatRegister()) { duke@435: freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true; duke@435: } duke@435: #endif /* ASSERT */ duke@435: duke@435: switch (in_sig_bt[i]) { duke@435: case T_ARRAY: never@3500: if (is_critical_native) { never@3500: unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]); never@3500: c_arg--; never@3500: break; never@3500: } duke@435: case T_OBJECT: never@3500: assert(!is_critical_native, "no oop arguments"); duke@435: object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], duke@435: ((i == 0) && (!is_static)), duke@435: &receiver_offset); duke@435: break; duke@435: case T_VOID: duke@435: break; duke@435: duke@435: case T_FLOAT: duke@435: float_move(masm, in_regs[i], out_regs[c_arg]); never@3500: break; duke@435: duke@435: case T_DOUBLE: duke@435: assert( i + 1 < total_in_args && duke@435: in_sig_bt[i + 1] == T_VOID && duke@435: out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); duke@435: double_move(masm, in_regs[i], out_regs[c_arg]); duke@435: break; duke@435: duke@435: case T_LONG : duke@435: long_move(masm, in_regs[i], out_regs[c_arg]); duke@435: break; duke@435: duke@435: case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); duke@435: duke@435: default: duke@435: move32_64(masm, in_regs[i], out_regs[c_arg]); duke@435: } duke@435: } duke@435: duke@435: // Pre-load a static method's oop into O1. Used both by locking code and duke@435: // the normal JNI call code. never@3500: if (method->is_static() && !is_critical_native) { duke@435: __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1); duke@435: duke@435: // Now handlize the static class mirror in O1. It's known not-null. duke@435: __ st_ptr(O1, SP, klass_offset + STACK_BIAS); duke@435: map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); duke@435: __ add(SP, klass_offset + STACK_BIAS, O1); duke@435: } duke@435: duke@435: duke@435: const Register L6_handle = L6; duke@435: duke@435: if (method->is_synchronized()) { never@3500: assert(!is_critical_native, "unhandled"); duke@435: __ mov(O1, L6_handle); duke@435: } duke@435: duke@435: // We have all of the arguments setup at this point. We MUST NOT touch any Oregs duke@435: // except O6/O7. So if we must call out we must push a new frame. We immediately duke@435: // push a new frame and flush the windows. duke@435: #ifdef _LP64 duke@435: intptr_t thepc = (intptr_t) __ pc(); duke@435: { duke@435: address here = __ pc(); duke@435: // Call the next instruction duke@435: __ call(here + 8, relocInfo::none); duke@435: __ delayed()->nop(); duke@435: } duke@435: #else duke@435: intptr_t thepc = __ load_pc_address(O7, 0); duke@435: #endif /* _LP64 */ duke@435: duke@435: // We use the same pc/oopMap repeatedly when we call out duke@435: oop_maps->add_gc_map(thepc - start, map); duke@435: duke@435: // O7 now has the pc loaded that we will use when we finally call to native. duke@435: duke@435: // Save thread in L7; it crosses a bunch of VM calls below duke@435: // Don't use save_thread because it smashes G2 and we merely duke@435: // want to save a copy duke@435: __ mov(G2_thread, L7_thread_cache); duke@435: duke@435: duke@435: // If we create an inner frame once is plenty duke@435: // when we create it we must also save G2_thread duke@435: bool inner_frame_created = false; duke@435: duke@435: // dtrace method entry support duke@435: { duke@435: SkipIfEqual skip_if( duke@435: masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); duke@435: // create inner frame duke@435: __ save_frame(0); duke@435: __ mov(G2_thread, L7_thread_cache); duke@435: __ set_oop_constant(JNIHandles::make_local(method()), O1); duke@435: __ call_VM_leaf(L7_thread_cache, duke@435: CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), duke@435: G2_thread, O1); duke@435: __ restore(); duke@435: } duke@435: dcubed@1045: // RedefineClasses() tracing support for obsolete method entry dcubed@1045: if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { dcubed@1045: // create inner frame dcubed@1045: __ save_frame(0); dcubed@1045: __ mov(G2_thread, L7_thread_cache); dcubed@1045: __ set_oop_constant(JNIHandles::make_local(method()), O1); dcubed@1045: __ call_VM_leaf(L7_thread_cache, dcubed@1045: CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), dcubed@1045: G2_thread, O1); dcubed@1045: __ restore(); dcubed@1045: } dcubed@1045: duke@435: // We are in the jni frame unless saved_frame is true in which case duke@435: // we are in one frame deeper (the "inner" frame). If we are in the duke@435: // "inner" frames the args are in the Iregs and if the jni frame then duke@435: // they are in the Oregs. duke@435: // If we ever need to go to the VM (for locking, jvmti) then duke@435: // we will always be in the "inner" frame. duke@435: duke@435: // Lock a synchronized method duke@435: int lock_offset = -1; // Set if locked duke@435: if (method->is_synchronized()) { duke@435: Register Roop = O1; duke@435: const Register L3_box = L3; duke@435: duke@435: create_inner_frame(masm, &inner_frame_created); duke@435: duke@435: __ ld_ptr(I1, 0, O1); duke@435: Label done; duke@435: duke@435: lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); duke@435: __ add(FP, lock_offset+STACK_BIAS, L3_box); duke@435: #ifdef ASSERT duke@435: if (UseBiasedLocking) { duke@435: // making the box point to itself will make it clear it went unused duke@435: // but also be obviously invalid duke@435: __ st_ptr(L3_box, L3_box, 0); duke@435: } duke@435: #endif // ASSERT duke@435: // duke@435: // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch duke@435: // duke@435: __ compiler_lock_object(Roop, L1, L3_box, L2); duke@435: __ br(Assembler::equal, false, Assembler::pt, done); duke@435: __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box); duke@435: duke@435: duke@435: // None of the above fast optimizations worked so we have to get into the duke@435: // slow case of monitor enter. Inline a special case of call_VM that duke@435: // disallows any pending_exception. duke@435: __ mov(Roop, O0); // Need oop in O0 duke@435: __ mov(L3_box, O1); duke@435: duke@435: // Record last_Java_sp, in case the VM code releases the JVM lock. duke@435: duke@435: __ set_last_Java_frame(FP, I7); duke@435: duke@435: // do the call duke@435: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); duke@435: __ delayed()->mov(L7_thread_cache, O2); duke@435: duke@435: __ restore_thread(L7_thread_cache); // restore G2_thread duke@435: __ reset_last_Java_frame(); duke@435: duke@435: #ifdef ASSERT duke@435: { Label L; duke@435: __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); kvn@3037: __ br_null_short(O0, Assembler::pt, L); duke@435: __ stop("no pending exception allowed on exit from IR::monitorenter"); duke@435: __ bind(L); duke@435: } duke@435: #endif duke@435: __ bind(done); duke@435: } duke@435: duke@435: duke@435: // Finally just about ready to make the JNI call duke@435: duke@435: __ flush_windows(); duke@435: if (inner_frame_created) { duke@435: __ restore(); duke@435: } else { duke@435: // Store only what we need from this frame duke@435: // QQQ I think that non-v9 (like we care) we don't need these saves duke@435: // either as the flush traps and the current window goes too. duke@435: __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS); duke@435: __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS); duke@435: } duke@435: duke@435: // get JNIEnv* which is first argument to native never@3500: if (!is_critical_native) { never@3500: __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0); never@3500: } duke@435: duke@435: // Use that pc we placed in O7 a while back as the current frame anchor duke@435: __ set_last_Java_frame(SP, O7); duke@435: never@3500: // We flushed the windows ages ago now mark them as flushed before transitioning. never@3500: __ set(JavaFrameAnchor::flushed, G3_scratch); never@3500: __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); never@3500: duke@435: // Transition from _thread_in_Java to _thread_in_native. duke@435: __ set(_thread_in_native, G3_scratch); duke@435: duke@435: #ifdef _LP64 never@3500: AddressLiteral dest(native_func); duke@435: __ relocate(relocInfo::runtime_call_type); twisti@1162: __ jumpl_to(dest, O7, O7); duke@435: #else never@3500: __ call(native_func, relocInfo::runtime_call_type); duke@435: #endif never@3500: __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); duke@435: duke@435: __ restore_thread(L7_thread_cache); // restore G2_thread duke@435: duke@435: // Unpack native results. For int-types, we do any needed sign-extension duke@435: // and move things into I0. The return value there will survive any VM duke@435: // calls for blocking or unlocking. An FP or OOP result (handle) is done duke@435: // specially in the slow-path code. duke@435: switch (ret_type) { duke@435: case T_VOID: break; // Nothing to do! duke@435: case T_FLOAT: break; // Got it where we want it (unless slow-path) duke@435: case T_DOUBLE: break; // Got it where we want it (unless slow-path) duke@435: // In 64 bits build result is in O0, in O0, O1 in 32bit build duke@435: case T_LONG: duke@435: #ifndef _LP64 duke@435: __ mov(O1, I1); duke@435: #endif duke@435: // Fall thru duke@435: case T_OBJECT: // Really a handle duke@435: case T_ARRAY: duke@435: case T_INT: duke@435: __ mov(O0, I0); duke@435: break; duke@435: case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false duke@435: case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break; duke@435: case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value! duke@435: case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break; duke@435: break; // Cannot de-handlize until after reclaiming jvm_lock duke@435: default: duke@435: ShouldNotReachHere(); duke@435: } duke@435: never@3500: Label after_transition; duke@435: // must we block? duke@435: duke@435: // Block, if necessary, before resuming in _thread_in_Java state. duke@435: // In order for GC to work, don't clear the last_Java_sp until after blocking. duke@435: { Label no_block; twisti@1162: AddressLiteral sync_state(SafepointSynchronize::address_of_state()); duke@435: duke@435: // Switch thread to "native transition" state before reading the synchronization state. duke@435: // This additional state is necessary because reading and testing the synchronization duke@435: // state is not atomic w.r.t. GC, as this scenario demonstrates: duke@435: // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. duke@435: // VM thread changes sync state to synchronizing and suspends threads for GC. duke@435: // Thread A is resumed to finish this native method, but doesn't block here since it duke@435: // didn't see any synchronization is progress, and escapes. duke@435: __ set(_thread_in_native_trans, G3_scratch); twisti@1162: __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); duke@435: if(os::is_MP()) { duke@435: if (UseMembar) { duke@435: // Force this write out before the read below duke@435: __ membar(Assembler::StoreLoad); duke@435: } else { duke@435: // Write serialization page so VM thread can do a pseudo remote membar. duke@435: // We use the current thread pointer to calculate a thread specific duke@435: // offset to write to within the page. This minimizes bus traffic duke@435: // due to cache line collision. duke@435: __ serialize_memory(G2_thread, G1_scratch, G3_scratch); duke@435: } duke@435: } duke@435: __ load_contents(sync_state, G3_scratch); duke@435: __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); duke@435: duke@435: Label L; twisti@1162: Address suspend_state(G2_thread, JavaThread::suspend_flags_offset()); duke@435: __ br(Assembler::notEqual, false, Assembler::pn, L); twisti@1162: __ delayed()->ld(suspend_state, G3_scratch); kvn@3037: __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); duke@435: __ bind(L); duke@435: duke@435: // Block. Save any potential method result value before the operation and duke@435: // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this duke@435: // lets us share the oopMap we used when we went native rather the create duke@435: // a distinct one for this pc duke@435: // duke@435: save_native_result(masm, ret_type, stack_slots); never@3500: if (!is_critical_native) { never@3500: __ call_VM_leaf(L7_thread_cache, never@3500: CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), never@3500: G2_thread); never@3500: } else { never@3500: __ call_VM_leaf(L7_thread_cache, never@3500: CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), never@3500: G2_thread); never@3500: } duke@435: duke@435: // Restore any method result value duke@435: restore_native_result(masm, ret_type, stack_slots); never@3500: never@3500: if (is_critical_native) { never@3500: // The call above performed the transition to thread_in_Java so never@3500: // skip the transition logic below. never@3500: __ ba(after_transition); never@3500: __ delayed()->nop(); never@3500: } never@3500: duke@435: __ bind(no_block); duke@435: } duke@435: duke@435: // thread state is thread_in_native_trans. Any safepoint blocking has already duke@435: // happened so we can now change state to _thread_in_Java. duke@435: __ set(_thread_in_Java, G3_scratch); twisti@1162: __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); never@3500: __ bind(after_transition); duke@435: duke@435: Label no_reguard; twisti@1162: __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch); kvn@3037: __ cmp_and_br_short(G3_scratch, JavaThread::stack_guard_yellow_disabled, Assembler::notEqual, Assembler::pt, no_reguard); duke@435: duke@435: save_native_result(masm, ret_type, stack_slots); duke@435: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); duke@435: __ delayed()->nop(); duke@435: duke@435: __ restore_thread(L7_thread_cache); // restore G2_thread duke@435: restore_native_result(masm, ret_type, stack_slots); duke@435: duke@435: __ bind(no_reguard); duke@435: duke@435: // Handle possible exception (will unlock if necessary) duke@435: duke@435: // native result if any is live in freg or I0 (and I1 if long and 32bit vm) duke@435: duke@435: // Unlock duke@435: if (method->is_synchronized()) { duke@435: Label done; duke@435: Register I2_ex_oop = I2; duke@435: const Register L3_box = L3; duke@435: // Get locked oop from the handle we passed to jni duke@435: __ ld_ptr(L6_handle, 0, L4); duke@435: __ add(SP, lock_offset+STACK_BIAS, L3_box); duke@435: // Must save pending exception around the slow-path VM call. Since it's a duke@435: // leaf call, the pending exception (if any) can be kept in a register. duke@435: __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop); duke@435: // Now unlock duke@435: // (Roop, Rmark, Rbox, Rscratch) duke@435: __ compiler_unlock_object(L4, L1, L3_box, L2); duke@435: __ br(Assembler::equal, false, Assembler::pt, done); duke@435: __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box); duke@435: duke@435: // save and restore any potential method result value around the unlocking duke@435: // operation. Will save in I0 (or stack for FP returns). duke@435: save_native_result(masm, ret_type, stack_slots); duke@435: duke@435: // Must clear pending-exception before re-entering the VM. Since this is duke@435: // a leaf call, pending-exception-oop can be safely kept in a register. duke@435: __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset())); duke@435: duke@435: // slow case of monitor enter. Inline a special case of call_VM that duke@435: // disallows any pending_exception. duke@435: __ mov(L3_box, O1); duke@435: duke@435: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); duke@435: __ delayed()->mov(L4, O0); // Need oop in O0 duke@435: duke@435: __ restore_thread(L7_thread_cache); // restore G2_thread duke@435: duke@435: #ifdef ASSERT duke@435: { Label L; duke@435: __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); kvn@3037: __ br_null_short(O0, Assembler::pt, L); duke@435: __ stop("no pending exception allowed on exit from IR::monitorexit"); duke@435: __ bind(L); duke@435: } duke@435: #endif duke@435: restore_native_result(masm, ret_type, stack_slots); duke@435: // check_forward_pending_exception jump to forward_exception if any pending duke@435: // exception is set. The forward_exception routine expects to see the duke@435: // exception in pending_exception and not in a register. Kind of clumsy, duke@435: // since all folks who branch to forward_exception must have tested duke@435: // pending_exception first and hence have it in a register already. duke@435: __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset())); duke@435: __ bind(done); duke@435: } duke@435: duke@435: // Tell dtrace about this method exit duke@435: { duke@435: SkipIfEqual skip_if( duke@435: masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); duke@435: save_native_result(masm, ret_type, stack_slots); duke@435: __ set_oop_constant(JNIHandles::make_local(method()), O1); duke@435: __ call_VM_leaf(L7_thread_cache, duke@435: CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), duke@435: G2_thread, O1); duke@435: restore_native_result(masm, ret_type, stack_slots); duke@435: } duke@435: duke@435: // Clear "last Java frame" SP and PC. duke@435: __ verify_thread(); // G2_thread must be correct duke@435: __ reset_last_Java_frame(); duke@435: duke@435: // Unpack oop result duke@435: if (ret_type == T_OBJECT || ret_type == T_ARRAY) { duke@435: Label L; duke@435: __ addcc(G0, I0, G0); duke@435: __ brx(Assembler::notZero, true, Assembler::pt, L); duke@435: __ delayed()->ld_ptr(I0, 0, I0); duke@435: __ mov(G0, I0); duke@435: __ bind(L); duke@435: __ verify_oop(I0); duke@435: } duke@435: never@3500: if (!is_critical_native) { never@3500: // reset handle block never@3500: __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5); never@3500: __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes()); never@3500: never@3500: __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch); never@3500: check_forward_pending_exception(masm, G3_scratch); never@3500: } duke@435: duke@435: duke@435: // Return duke@435: duke@435: #ifndef _LP64 duke@435: if (ret_type == T_LONG) { duke@435: duke@435: // Must leave proper result in O0,O1 and G1 (c2/tiered only) duke@435: __ sllx(I0, 32, G1); // Shift bits into high G1 duke@435: __ srl (I1, 0, I1); // Zero extend O1 (harmless?) duke@435: __ or3 (I1, G1, G1); // OR 64 bits into G1 duke@435: } duke@435: #endif duke@435: duke@435: __ ret(); duke@435: __ delayed()->restore(); duke@435: duke@435: __ flush(); duke@435: duke@435: nmethod *nm = nmethod::new_native_nmethod(method, twisti@2687: compile_id, duke@435: masm->code(), duke@435: vep_offset, duke@435: frame_complete, duke@435: stack_slots / VMRegImpl::slots_per_word, duke@435: (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), duke@435: in_ByteSize(lock_offset), duke@435: oop_maps); never@3500: never@3500: if (is_critical_native) { never@3500: nm->set_lazy_critical_native(true); never@3500: } duke@435: return nm; duke@435: duke@435: } duke@435: kamg@551: #ifdef HAVE_DTRACE_H kamg@551: // --------------------------------------------------------------------------- kamg@551: // Generate a dtrace nmethod for a given signature. The method takes arguments kamg@551: // in the Java compiled code convention, marshals them to the native kamg@551: // abi and then leaves nops at the position you would expect to call a native kamg@551: // function. When the probe is enabled the nops are replaced with a trap kamg@551: // instruction that dtrace inserts and the trace will cause a notification kamg@551: // to dtrace. kamg@551: // kamg@551: // The probes are only able to take primitive types and java/lang/String as kamg@551: // arguments. No other java types are allowed. Strings are converted to utf8 kamg@551: // strings so that from dtrace point of view java strings are converted to C kamg@551: // strings. There is an arbitrary fixed limit on the total space that a method kamg@551: // can use for converting the strings. (256 chars per string in the signature). kamg@551: // So any java string larger then this is truncated. kamg@551: kamg@551: static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; kamg@551: static bool offsets_initialized = false; kamg@551: kamg@551: nmethod *SharedRuntime::generate_dtrace_nmethod( kamg@551: MacroAssembler *masm, methodHandle method) { kamg@551: kamg@551: kamg@551: // generate_dtrace_nmethod is guarded by a mutex so we are sure to kamg@551: // be single threaded in this method. kamg@551: assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); kamg@551: kamg@551: // Fill in the signature array, for the calling-convention call. kamg@551: int total_args_passed = method->size_of_parameters(); kamg@551: kamg@551: BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); kamg@551: VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); kamg@551: kamg@551: // The signature we are going to use for the trap that dtrace will see kamg@551: // java/lang/String is converted. We drop "this" and any other object kamg@551: // is converted to NULL. (A one-slot java/lang/Long object reference kamg@551: // is converted to a two-slot long, which is why we double the allocation). kamg@551: BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); kamg@551: VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); kamg@551: kamg@551: int i=0; kamg@551: int total_strings = 0; kamg@551: int first_arg_to_pass = 0; kamg@551: int total_c_args = 0; kamg@551: kamg@551: // Skip the receiver as dtrace doesn't want to see it kamg@551: if( !method->is_static() ) { kamg@551: in_sig_bt[i++] = T_OBJECT; kamg@551: first_arg_to_pass = 1; kamg@551: } kamg@551: kamg@551: SignatureStream ss(method->signature()); kamg@551: for ( ; !ss.at_return_type(); ss.next()) { kamg@551: BasicType bt = ss.type(); kamg@551: in_sig_bt[i++] = bt; // Collect remaining bits of signature kamg@551: out_sig_bt[total_c_args++] = bt; kamg@551: if( bt == T_OBJECT) { coleenp@2497: Symbol* s = ss.as_symbol_or_null(); kamg@551: if (s == vmSymbols::java_lang_String()) { kamg@551: total_strings++; kamg@551: out_sig_bt[total_c_args-1] = T_ADDRESS; kamg@551: } else if (s == vmSymbols::java_lang_Boolean() || kamg@551: s == vmSymbols::java_lang_Byte()) { kamg@551: out_sig_bt[total_c_args-1] = T_BYTE; kamg@551: } else if (s == vmSymbols::java_lang_Character() || kamg@551: s == vmSymbols::java_lang_Short()) { kamg@551: out_sig_bt[total_c_args-1] = T_SHORT; kamg@551: } else if (s == vmSymbols::java_lang_Integer() || kamg@551: s == vmSymbols::java_lang_Float()) { kamg@551: out_sig_bt[total_c_args-1] = T_INT; kamg@551: } else if (s == vmSymbols::java_lang_Long() || kamg@551: s == vmSymbols::java_lang_Double()) { kamg@551: out_sig_bt[total_c_args-1] = T_LONG; kamg@551: out_sig_bt[total_c_args++] = T_VOID; kamg@551: } kamg@551: } else if ( bt == T_LONG || bt == T_DOUBLE ) { kamg@551: in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots kamg@551: // We convert double to long kamg@551: out_sig_bt[total_c_args-1] = T_LONG; kamg@551: out_sig_bt[total_c_args++] = T_VOID; kamg@551: } else if ( bt == T_FLOAT) { kamg@551: // We convert float to int kamg@551: out_sig_bt[total_c_args-1] = T_INT; kamg@551: } kamg@551: } kamg@551: kamg@551: assert(i==total_args_passed, "validly parsed signature"); kamg@551: kamg@551: // Now get the compiled-Java layout as input arguments kamg@551: int comp_args_on_stack; kamg@551: comp_args_on_stack = SharedRuntime::java_calling_convention( kamg@551: in_sig_bt, in_regs, total_args_passed, false); kamg@551: kamg@551: // We have received a description of where all the java arg are located kamg@551: // on entry to the wrapper. We need to convert these args to where kamg@551: // the a native (non-jni) function would expect them. To figure out kamg@551: // where they go we convert the java signature to a C signature and remove kamg@551: // T_VOID for any long/double we might have received. kamg@551: kamg@551: kamg@551: // Now figure out where the args must be stored and how much stack space kamg@551: // they require (neglecting out_preserve_stack_slots but space for storing kamg@551: // the 1st six register arguments). It's weird see int_stk_helper. kamg@551: // kamg@551: int out_arg_slots; kamg@551: out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); kamg@551: kamg@551: // Calculate the total number of stack slots we will need. kamg@551: kamg@551: // First count the abi requirement plus all of the outgoing args kamg@551: int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; kamg@551: kamg@551: // Plus a temp for possible converion of float/double/long register args kamg@551: kamg@551: int conversion_temp = stack_slots; kamg@551: stack_slots += 2; kamg@551: kamg@551: kamg@551: // Now space for the string(s) we must convert kamg@551: kamg@551: int string_locs = stack_slots; kamg@551: stack_slots += total_strings * kamg@551: (max_dtrace_string_size / VMRegImpl::stack_slot_size); kamg@551: kamg@551: // Ok The space we have allocated will look like: kamg@551: // kamg@551: // kamg@551: // FP-> | | kamg@551: // |---------------------| kamg@551: // | string[n] | kamg@551: // |---------------------| <- string_locs[n] kamg@551: // | string[n-1] | kamg@551: // |---------------------| <- string_locs[n-1] kamg@551: // | ... | kamg@551: // | ... | kamg@551: // |---------------------| <- string_locs[1] kamg@551: // | string[0] | kamg@551: // |---------------------| <- string_locs[0] kamg@551: // | temp | kamg@551: // |---------------------| <- conversion_temp kamg@551: // | outbound memory | kamg@551: // | based arguments | kamg@551: // | | kamg@551: // |---------------------| kamg@551: // | | kamg@551: // SP-> | out_preserved_slots | kamg@551: // kamg@551: // kamg@551: kamg@551: // Now compute actual number of stack words we need rounding to make kamg@551: // stack properly aligned. kamg@551: stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); kamg@551: kamg@551: int stack_size = stack_slots * VMRegImpl::stack_slot_size; kamg@551: kamg@551: intptr_t start = (intptr_t)__ pc(); kamg@551: kamg@551: // First thing make an ic check to see if we should even be here kamg@551: kamg@551: { kamg@551: Label L; kamg@551: const Register temp_reg = G3_scratch; twisti@1162: AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); kamg@551: __ verify_oop(O0); kamg@551: __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); kvn@3037: __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); kamg@551: twisti@1162: __ jump_to(ic_miss, temp_reg); kamg@551: __ delayed()->nop(); kamg@551: __ align(CodeEntryAlignment); kamg@551: __ bind(L); kamg@551: } kamg@551: kamg@551: int vep_offset = ((intptr_t)__ pc()) - start; kamg@551: kamg@551: kamg@551: // The instruction at the verified entry point must be 5 bytes or longer kamg@551: // because it can be patched on the fly by make_non_entrant. The stack bang kamg@551: // instruction fits that requirement. kamg@551: kamg@551: // Generate stack overflow check before creating frame kamg@551: __ generate_stack_overflow_check(stack_size); kamg@551: kamg@551: assert(((intptr_t)__ pc() - start - vep_offset) >= 5, kamg@551: "valid size for make_non_entrant"); kamg@551: kamg@551: // Generate a new frame for the wrapper. kamg@551: __ save(SP, -stack_size, SP); kamg@551: kamg@551: // Frame is now completed as far a size and linkage. kamg@551: kamg@551: int frame_complete = ((intptr_t)__ pc()) - start; kamg@551: kamg@551: #ifdef ASSERT kamg@551: bool reg_destroyed[RegisterImpl::number_of_registers]; kamg@551: bool freg_destroyed[FloatRegisterImpl::number_of_registers]; kamg@551: for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { kamg@551: reg_destroyed[r] = false; kamg@551: } kamg@551: for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { kamg@551: freg_destroyed[f] = false; kamg@551: } kamg@551: kamg@551: #endif /* ASSERT */ kamg@551: kamg@551: VMRegPair zero; kamg@611: const Register g0 = G0; // without this we get a compiler warning (why??) kamg@611: zero.set2(g0->as_VMReg()); kamg@551: kamg@551: int c_arg, j_arg; kamg@551: kamg@551: Register conversion_off = noreg; kamg@551: kamg@551: for (j_arg = first_arg_to_pass, c_arg = 0 ; kamg@551: j_arg < total_args_passed ; j_arg++, c_arg++ ) { kamg@551: kamg@551: VMRegPair src = in_regs[j_arg]; kamg@551: VMRegPair dst = out_regs[c_arg]; kamg@551: kamg@551: #ifdef ASSERT kamg@551: if (src.first()->is_Register()) { kamg@551: assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); kamg@551: } else if (src.first()->is_FloatRegister()) { kamg@551: assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( kamg@551: FloatRegisterImpl::S)], "ack!"); kamg@551: } kamg@551: if (dst.first()->is_Register()) { kamg@551: reg_destroyed[dst.first()->as_Register()->encoding()] = true; kamg@551: } else if (dst.first()->is_FloatRegister()) { kamg@551: freg_destroyed[dst.first()->as_FloatRegister()->encoding( kamg@551: FloatRegisterImpl::S)] = true; kamg@551: } kamg@551: #endif /* ASSERT */ kamg@551: kamg@551: switch (in_sig_bt[j_arg]) { kamg@551: case T_ARRAY: kamg@551: case T_OBJECT: kamg@551: { kamg@551: if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || kamg@551: out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { kamg@551: // need to unbox a one-slot value kamg@551: Register in_reg = L0; kamg@551: Register tmp = L2; kamg@551: if ( src.first()->is_reg() ) { kamg@551: in_reg = src.first()->as_Register(); kamg@551: } else { kamg@551: assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), kamg@551: "must be"); kamg@551: __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); kamg@551: } kamg@551: // If the final destination is an acceptable register kamg@551: if ( dst.first()->is_reg() ) { kamg@551: if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { kamg@551: tmp = dst.first()->as_Register(); kamg@551: } kamg@551: } kamg@551: kamg@551: Label skipUnbox; kamg@551: if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { kamg@551: __ mov(G0, tmp->successor()); kamg@551: } kamg@551: __ br_null(in_reg, true, Assembler::pn, skipUnbox); kamg@551: __ delayed()->mov(G0, tmp); kamg@551: kvn@600: BasicType bt = out_sig_bt[c_arg]; kvn@600: int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); kvn@600: switch (bt) { kamg@551: case T_BYTE: kamg@551: __ ldub(in_reg, box_offset, tmp); break; kamg@551: case T_SHORT: kamg@551: __ lduh(in_reg, box_offset, tmp); break; kamg@551: case T_INT: kamg@551: __ ld(in_reg, box_offset, tmp); break; kamg@551: case T_LONG: kamg@551: __ ld_long(in_reg, box_offset, tmp); break; kamg@551: default: ShouldNotReachHere(); kamg@551: } kamg@551: kamg@551: __ bind(skipUnbox); kamg@551: // If tmp wasn't final destination copy to final destination kamg@551: if (tmp == L2) { kamg@551: VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); kamg@551: if (out_sig_bt[c_arg] == T_LONG) { kamg@551: long_move(masm, tmp_as_VM, dst); kamg@551: } else { kamg@551: move32_64(masm, tmp_as_VM, out_regs[c_arg]); kamg@551: } kamg@551: } kamg@551: if (out_sig_bt[c_arg] == T_LONG) { kamg@551: assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); kamg@551: ++c_arg; // move over the T_VOID to keep the loop indices in sync kamg@551: } kamg@551: } else if (out_sig_bt[c_arg] == T_ADDRESS) { kamg@551: Register s = kamg@551: src.first()->is_reg() ? src.first()->as_Register() : L2; kamg@551: Register d = kamg@551: dst.first()->is_reg() ? dst.first()->as_Register() : L2; kamg@551: kamg@551: // We store the oop now so that the conversion pass can reach kamg@551: // while in the inner frame. This will be the only store if kamg@551: // the oop is NULL. kamg@551: if (s != L2) { kamg@551: // src is register kamg@551: if (d != L2) { kamg@551: // dst is register kamg@551: __ mov(s, d); kamg@551: } else { kamg@551: assert(Assembler::is_simm13(reg2offset(dst.first()) + kamg@551: STACK_BIAS), "must be"); kamg@551: __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); kamg@551: } kamg@551: } else { kamg@551: // src not a register kamg@551: assert(Assembler::is_simm13(reg2offset(src.first()) + kamg@551: STACK_BIAS), "must be"); kamg@551: __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); kamg@551: if (d == L2) { kamg@551: assert(Assembler::is_simm13(reg2offset(dst.first()) + kamg@551: STACK_BIAS), "must be"); kamg@551: __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); kamg@551: } kamg@551: } kamg@551: } else if (out_sig_bt[c_arg] != T_VOID) { kamg@551: // Convert the arg to NULL kamg@551: if (dst.first()->is_reg()) { kamg@551: __ mov(G0, dst.first()->as_Register()); kamg@551: } else { kamg@551: assert(Assembler::is_simm13(reg2offset(dst.first()) + kamg@551: STACK_BIAS), "must be"); kamg@551: __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); kamg@551: } kamg@551: } kamg@551: } kamg@551: break; kamg@551: case T_VOID: kamg@551: break; kamg@551: kamg@551: case T_FLOAT: kamg@551: if (src.first()->is_stack()) { kamg@551: // Stack to stack/reg is simple kamg@551: move32_64(masm, src, dst); kamg@551: } else { kamg@551: if (dst.first()->is_reg()) { kamg@551: // freg -> reg kamg@551: int off = kamg@551: STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; kamg@551: Register d = dst.first()->as_Register(); kamg@551: if (Assembler::is_simm13(off)) { kamg@551: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), kamg@551: SP, off); kamg@551: __ ld(SP, off, d); kamg@551: } else { kamg@551: if (conversion_off == noreg) { kamg@551: __ set(off, L6); kamg@551: conversion_off = L6; kamg@551: } kamg@551: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), kamg@551: SP, conversion_off); kamg@551: __ ld(SP, conversion_off , d); kamg@551: } kamg@551: } else { kamg@551: // freg -> mem kamg@551: int off = STACK_BIAS + reg2offset(dst.first()); kamg@551: if (Assembler::is_simm13(off)) { kamg@551: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), kamg@551: SP, off); kamg@551: } else { kamg@551: if (conversion_off == noreg) { kamg@551: __ set(off, L6); kamg@551: conversion_off = L6; kamg@551: } kamg@551: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), kamg@551: SP, conversion_off); kamg@551: } kamg@551: } kamg@551: } kamg@551: break; kamg@551: kamg@551: case T_DOUBLE: kamg@551: assert( j_arg + 1 < total_args_passed && kamg@551: in_sig_bt[j_arg + 1] == T_VOID && kamg@551: out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); kamg@551: if (src.first()->is_stack()) { kamg@551: // Stack to stack/reg is simple kamg@551: long_move(masm, src, dst); kamg@551: } else { kamg@551: Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; kamg@551: kamg@551: // Destination could be an odd reg on 32bit in which case kamg@551: // we can't load direct to the destination. kamg@551: kamg@551: if (!d->is_even() && wordSize == 4) { kamg@551: d = L2; kamg@551: } kamg@551: int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; kamg@551: if (Assembler::is_simm13(off)) { kamg@551: __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), kamg@551: SP, off); kamg@551: __ ld_long(SP, off, d); kamg@551: } else { kamg@551: if (conversion_off == noreg) { kamg@551: __ set(off, L6); kamg@551: conversion_off = L6; kamg@551: } kamg@551: __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), kamg@551: SP, conversion_off); kamg@551: __ ld_long(SP, conversion_off, d); kamg@551: } kamg@551: if (d == L2) { kamg@551: long_move(masm, reg64_to_VMRegPair(L2), dst); kamg@551: } kamg@551: } kamg@551: break; kamg@551: kamg@551: case T_LONG : kamg@551: // 32bit can't do a split move of something like g1 -> O0, O1 kamg@551: // so use a memory temp kamg@551: if (src.is_single_phys_reg() && wordSize == 4) { kamg@551: Register tmp = L2; kamg@551: if (dst.first()->is_reg() && kamg@551: (wordSize == 8 || dst.first()->as_Register()->is_even())) { kamg@551: tmp = dst.first()->as_Register(); kamg@551: } kamg@551: kamg@551: int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; kamg@551: if (Assembler::is_simm13(off)) { kamg@551: __ stx(src.first()->as_Register(), SP, off); kamg@551: __ ld_long(SP, off, tmp); kamg@551: } else { kamg@551: if (conversion_off == noreg) { kamg@551: __ set(off, L6); kamg@551: conversion_off = L6; kamg@551: } kamg@551: __ stx(src.first()->as_Register(), SP, conversion_off); kamg@551: __ ld_long(SP, conversion_off, tmp); kamg@551: } kamg@551: kamg@551: if (tmp == L2) { kamg@551: long_move(masm, reg64_to_VMRegPair(L2), dst); kamg@551: } kamg@551: } else { kamg@551: long_move(masm, src, dst); kamg@551: } kamg@551: break; kamg@551: kamg@551: case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); kamg@551: kamg@551: default: kamg@551: move32_64(masm, src, dst); kamg@551: } kamg@551: } kamg@551: kamg@551: kamg@551: // If we have any strings we must store any register based arg to the stack kamg@551: // This includes any still live xmm registers too. kamg@551: kamg@551: if (total_strings > 0 ) { kamg@551: kamg@551: // protect all the arg registers kamg@551: __ save_frame(0); kamg@551: __ mov(G2_thread, L7_thread_cache); kamg@551: const Register L2_string_off = L2; kamg@551: kamg@551: // Get first string offset kamg@551: __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); kamg@551: kamg@551: for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { kamg@551: if (out_sig_bt[c_arg] == T_ADDRESS) { kamg@551: kamg@551: VMRegPair dst = out_regs[c_arg]; kamg@551: const Register d = dst.first()->is_reg() ? kamg@551: dst.first()->as_Register()->after_save() : noreg; kamg@551: kamg@551: // It's a string the oop and it was already copied to the out arg kamg@551: // position kamg@551: if (d != noreg) { kamg@551: __ mov(d, O0); kamg@551: } else { kamg@551: assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), kamg@551: "must be"); kamg@551: __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); kamg@551: } kamg@551: Label skip; kamg@551: kamg@551: __ br_null(O0, false, Assembler::pn, skip); kamg@551: __ delayed()->add(FP, L2_string_off, O1); kamg@551: kamg@551: if (d != noreg) { kamg@551: __ mov(O1, d); kamg@551: } else { kamg@551: assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), kamg@551: "must be"); kamg@551: __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); kamg@551: } kamg@551: kamg@551: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), kamg@551: relocInfo::runtime_call_type); kamg@551: __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off); kamg@551: kamg@551: __ bind(skip); kamg@551: kamg@551: } kamg@551: kamg@551: } kamg@551: __ mov(L7_thread_cache, G2_thread); kamg@551: __ restore(); kamg@551: kamg@551: } kamg@551: kamg@551: kamg@551: // Ok now we are done. Need to place the nop that dtrace wants in order to kamg@551: // patch in the trap kamg@551: kamg@551: int patch_offset = ((intptr_t)__ pc()) - start; kamg@551: kamg@551: __ nop(); kamg@551: kamg@551: kamg@551: // Return kamg@551: kamg@551: __ ret(); kamg@551: __ delayed()->restore(); kamg@551: kamg@551: __ flush(); kamg@551: kamg@551: nmethod *nm = nmethod::new_dtrace_nmethod( kamg@551: method, masm->code(), vep_offset, patch_offset, frame_complete, kamg@551: stack_slots / VMRegImpl::slots_per_word); kamg@551: return nm; kamg@551: kamg@551: } kamg@551: kamg@551: #endif // HAVE_DTRACE_H kamg@551: duke@435: // this function returns the adjust size (in number of words) to a c2i adapter duke@435: // activation for use during deoptimization duke@435: int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { duke@435: assert(callee_locals >= callee_parameters, duke@435: "test and remove; got more parms than locals"); duke@435: if (callee_locals < callee_parameters) duke@435: return 0; // No adjustment for negative locals twisti@1861: int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; duke@435: return round_to(diff, WordsPerLong); duke@435: } duke@435: duke@435: // "Top of Stack" slots that may be unused by the calling convention but must duke@435: // otherwise be preserved. duke@435: // On Intel these are not necessary and the value can be zero. duke@435: // On Sparc this describes the words reserved for storing a register window duke@435: // when an interrupt occurs. duke@435: uint SharedRuntime::out_preserve_stack_slots() { duke@435: return frame::register_save_words * VMRegImpl::slots_per_word; duke@435: } duke@435: duke@435: static void gen_new_frame(MacroAssembler* masm, bool deopt) { duke@435: // duke@435: // Common out the new frame generation for deopt and uncommon trap duke@435: // duke@435: Register G3pcs = G3_scratch; // Array of new pcs (input) duke@435: Register Oreturn0 = O0; duke@435: Register Oreturn1 = O1; duke@435: Register O2UnrollBlock = O2; duke@435: Register O3array = O3; // Array of frame sizes (input) duke@435: Register O4array_size = O4; // number of frames (input) duke@435: Register O7frame_size = O7; // number of frames (input) duke@435: duke@435: __ ld_ptr(O3array, 0, O7frame_size); duke@435: __ sub(G0, O7frame_size, O7frame_size); duke@435: __ save(SP, O7frame_size, SP); duke@435: __ ld_ptr(G3pcs, 0, I7); // load frame's new pc duke@435: duke@435: #ifdef ASSERT duke@435: // make sure that the frames are aligned properly duke@435: #ifndef _LP64 duke@435: __ btst(wordSize*2-1, SP); coleenp@3627: __ breakpoint_trap(Assembler::notZero, Assembler::ptr_cc); duke@435: #endif duke@435: #endif duke@435: duke@435: // Deopt needs to pass some extra live values from frame to frame duke@435: duke@435: if (deopt) { duke@435: __ mov(Oreturn0->after_save(), Oreturn0); duke@435: __ mov(Oreturn1->after_save(), Oreturn1); duke@435: } duke@435: duke@435: __ mov(O4array_size->after_save(), O4array_size); duke@435: __ sub(O4array_size, 1, O4array_size); duke@435: __ mov(O3array->after_save(), O3array); duke@435: __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); duke@435: __ add(G3pcs, wordSize, G3pcs); // point to next pc value duke@435: duke@435: #ifdef ASSERT duke@435: // trash registers to show a clear pattern in backtraces duke@435: __ set(0xDEAD0000, I0); duke@435: __ add(I0, 2, I1); duke@435: __ add(I0, 4, I2); duke@435: __ add(I0, 6, I3); duke@435: __ add(I0, 8, I4); duke@435: // Don't touch I5 could have valuable savedSP duke@435: __ set(0xDEADBEEF, L0); duke@435: __ mov(L0, L1); duke@435: __ mov(L0, L2); duke@435: __ mov(L0, L3); duke@435: __ mov(L0, L4); duke@435: __ mov(L0, L5); duke@435: duke@435: // trash the return value as there is nothing to return yet duke@435: __ set(0xDEAD0001, O7); duke@435: #endif duke@435: duke@435: __ mov(SP, O5_savedSP); duke@435: } duke@435: duke@435: duke@435: static void make_new_frames(MacroAssembler* masm, bool deopt) { duke@435: // duke@435: // loop through the UnrollBlock info and create new frames duke@435: // duke@435: Register G3pcs = G3_scratch; duke@435: Register Oreturn0 = O0; duke@435: Register Oreturn1 = O1; duke@435: Register O2UnrollBlock = O2; duke@435: Register O3array = O3; duke@435: Register O4array_size = O4; duke@435: Label loop; duke@435: duke@435: // Before we make new frames, check to see if stack is available. duke@435: // Do this after the caller's return address is on top of stack duke@435: if (UseStackBanging) { duke@435: // Get total frame size for interpreted frames twisti@1162: __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4); duke@435: __ bang_stack_size(O4, O3, G3_scratch); duke@435: } duke@435: twisti@1162: __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size); twisti@1162: __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs); twisti@1162: __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array); duke@435: duke@435: // Adjust old interpreter frame to make space for new frame's extra java locals duke@435: // duke@435: // We capture the original sp for the transition frame only because it is needed in duke@435: // order to properly calculate interpreter_sp_adjustment. Even though in real life duke@435: // every interpreter frame captures a savedSP it is only needed at the transition duke@435: // (fortunately). If we had to have it correct everywhere then we would need to duke@435: // be told the sp_adjustment for each frame we create. If the frame size array duke@435: // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] duke@435: // for each frame we create and keep up the illusion every where. duke@435: // duke@435: twisti@1162: __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7); duke@435: __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment duke@435: __ sub(SP, O7, SP); duke@435: duke@435: #ifdef ASSERT duke@435: // make sure that there is at least one entry in the array duke@435: __ tst(O4array_size); coleenp@3627: __ breakpoint_trap(Assembler::zero, Assembler::icc); duke@435: #endif duke@435: duke@435: // Now push the new interpreter frames duke@435: __ bind(loop); duke@435: duke@435: // allocate a new frame, filling the registers duke@435: duke@435: gen_new_frame(masm, deopt); // allocate an interpreter frame duke@435: kvn@3037: __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop); duke@435: __ delayed()->add(O3array, wordSize, O3array); duke@435: __ ld_ptr(G3pcs, 0, O7); // load final frame new pc duke@435: duke@435: } duke@435: duke@435: //------------------------------generate_deopt_blob---------------------------- duke@435: // Ought to generate an ideal graph & compile, but here's some SPARC ASM duke@435: // instead. duke@435: void SharedRuntime::generate_deopt_blob() { duke@435: // allocate space for the code duke@435: ResourceMark rm; duke@435: // setup code generation tools duke@435: int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code kvn@3582: if (UseStackBanging) { kvn@3582: pad += StackShadowPages*16 + 32; kvn@3582: } duke@435: #ifdef _LP64 duke@435: CodeBuffer buffer("deopt_blob", 2100+pad, 512); duke@435: #else duke@435: // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread) duke@435: // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread) duke@435: CodeBuffer buffer("deopt_blob", 1600+pad, 512); duke@435: #endif /* _LP64 */ duke@435: MacroAssembler* masm = new MacroAssembler(&buffer); duke@435: FloatRegister Freturn0 = F0; duke@435: Register Greturn1 = G1; duke@435: Register Oreturn0 = O0; duke@435: Register Oreturn1 = O1; duke@435: Register O2UnrollBlock = O2; never@1472: Register L0deopt_mode = L0; never@1472: Register G4deopt_mode = G4_scratch; duke@435: int frame_size_words; twisti@1162: Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS); duke@435: #if !defined(_LP64) && defined(COMPILER2) twisti@1162: Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS); duke@435: #endif duke@435: Label cont; duke@435: duke@435: OopMapSet *oop_maps = new OopMapSet(); duke@435: duke@435: // duke@435: // This is the entry point for code which is returning to a de-optimized duke@435: // frame. duke@435: // The steps taken by this frame are as follows: duke@435: // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1) duke@435: // and all potentially live registers (at a pollpoint many registers can be live). duke@435: // duke@435: // - call the C routine: Deoptimization::fetch_unroll_info (this function duke@435: // returns information about the number and size of interpreter frames duke@435: // which are equivalent to the frame which is being deoptimized) duke@435: // - deallocate the unpack frame, restoring only results values. Other duke@435: // volatile registers will now be captured in the vframeArray as needed. duke@435: // - deallocate the deoptimization frame duke@435: // - in a loop using the information returned in the previous step duke@435: // push new interpreter frames (take care to propagate the return duke@435: // values through each new frame pushed) duke@435: // - create a dummy "unpack_frame" and save the return values (O0, O1, F0) duke@435: // - call the C routine: Deoptimization::unpack_frames (this function duke@435: // lays out values on the interpreter frame which was just created) duke@435: // - deallocate the dummy unpack_frame duke@435: // - ensure that all the return values are correctly set and then do duke@435: // a return to the interpreter entry point duke@435: // duke@435: // Refer to the following methods for more information: duke@435: // - Deoptimization::fetch_unroll_info duke@435: // - Deoptimization::unpack_frames duke@435: duke@435: OopMap* map = NULL; duke@435: duke@435: int start = __ offset(); duke@435: duke@435: // restore G2, the trampoline destroyed it duke@435: __ get_thread(); duke@435: duke@435: // On entry we have been called by the deoptimized nmethod with a call that duke@435: // replaced the original call (or safepoint polling location) so the deoptimizing duke@435: // pc is now in O7. Return values are still in the expected places duke@435: duke@435: map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); kvn@3037: __ ba(cont); never@1472: __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode); duke@435: duke@435: int exception_offset = __ offset() - start; duke@435: duke@435: // restore G2, the trampoline destroyed it duke@435: __ get_thread(); duke@435: duke@435: // On entry we have been jumped to by the exception handler (or exception_blob duke@435: // for server). O0 contains the exception oop and O7 contains the original duke@435: // exception pc. So if we push a frame here it will look to the duke@435: // stack walking code (fetch_unroll_info) just like a normal call so duke@435: // state will be extracted normally. duke@435: duke@435: // save exception oop in JavaThread and fall through into the duke@435: // exception_in_tls case since they are handled in same way except duke@435: // for where the pending exception is kept. twisti@1162: __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset()); duke@435: duke@435: // duke@435: // Vanilla deoptimization with an exception pending in exception_oop duke@435: // duke@435: int exception_in_tls_offset = __ offset() - start; duke@435: duke@435: // No need to update oop_map as each call to save_live_registers will produce identical oopmap duke@435: (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); duke@435: duke@435: // Restore G2_thread duke@435: __ get_thread(); duke@435: duke@435: #ifdef ASSERT duke@435: { duke@435: // verify that there is really an exception oop in exception_oop duke@435: Label has_exception; twisti@1162: __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception); kvn@3037: __ br_notnull_short(Oexception, Assembler::pt, has_exception); duke@435: __ stop("no exception in thread"); duke@435: __ bind(has_exception); duke@435: duke@435: // verify that there is no pending exception duke@435: Label no_pending_exception; twisti@1162: Address exception_addr(G2_thread, Thread::pending_exception_offset()); duke@435: __ ld_ptr(exception_addr, Oexception); kvn@3037: __ br_null_short(Oexception, Assembler::pt, no_pending_exception); duke@435: __ stop("must not have pending exception here"); duke@435: __ bind(no_pending_exception); duke@435: } duke@435: #endif duke@435: kvn@3037: __ ba(cont); never@1472: __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);; duke@435: duke@435: // duke@435: // Reexecute entry, similar to c2 uncommon trap duke@435: // duke@435: int reexecute_offset = __ offset() - start; duke@435: duke@435: // No need to update oop_map as each call to save_live_registers will produce identical oopmap duke@435: (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); duke@435: never@1472: __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode); duke@435: duke@435: __ bind(cont); duke@435: duke@435: __ set_last_Java_frame(SP, noreg); duke@435: duke@435: // do the call by hand so we can get the oopmap duke@435: duke@435: __ mov(G2_thread, L7_thread_cache); duke@435: __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); duke@435: __ delayed()->mov(G2_thread, O0); duke@435: duke@435: // Set an oopmap for the call site this describes all our saved volatile registers duke@435: duke@435: oop_maps->add_gc_map( __ offset()-start, map); duke@435: duke@435: __ mov(L7_thread_cache, G2_thread); duke@435: duke@435: __ reset_last_Java_frame(); duke@435: duke@435: // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers duke@435: // so this move will survive duke@435: never@1472: __ mov(L0deopt_mode, G4deopt_mode); duke@435: duke@435: __ mov(O0, O2UnrollBlock->after_save()); duke@435: duke@435: RegisterSaver::restore_result_registers(masm); duke@435: duke@435: Label noException; kvn@3037: __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException); duke@435: duke@435: // Move the pending exception from exception_oop to Oexception so duke@435: // the pending exception will be picked up the interpreter. duke@435: __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception); duke@435: __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset())); duke@435: __ bind(noException); duke@435: duke@435: // deallocate the deoptimization frame taking care to preserve the return values duke@435: __ mov(Oreturn0, Oreturn0->after_save()); duke@435: __ mov(Oreturn1, Oreturn1->after_save()); duke@435: __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); duke@435: __ restore(); duke@435: duke@435: // Allocate new interpreter frame(s) and possible c2i adapter frame duke@435: duke@435: make_new_frames(masm, true); duke@435: duke@435: // push a dummy "unpack_frame" taking care of float return values and duke@435: // call Deoptimization::unpack_frames to have the unpacker layout duke@435: // information in the interpreter frames just created and then return duke@435: // to the interpreter entry point duke@435: __ save(SP, -frame_size_words*wordSize, SP); duke@435: __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr); duke@435: #if !defined(_LP64) duke@435: #if defined(COMPILER2) iveresov@2138: // 32-bit 1-register longs return longs in G1 iveresov@2138: __ stx(Greturn1, saved_Greturn1_addr); duke@435: #endif duke@435: __ set_last_Java_frame(SP, noreg); never@1472: __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode); duke@435: #else duke@435: // LP64 uses g4 in set_last_Java_frame never@1472: __ mov(G4deopt_mode, O1); duke@435: __ set_last_Java_frame(SP, G0); duke@435: __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1); duke@435: #endif duke@435: __ reset_last_Java_frame(); duke@435: __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0); duke@435: duke@435: #if !defined(_LP64) && defined(COMPILER2) duke@435: // In 32 bit, C2 returns longs in G1 so restore the saved G1 into iveresov@2138: // I0/I1 if the return value is long. iveresov@2138: Label not_long; kvn@3037: __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long); iveresov@2138: __ ldd(saved_Greturn1_addr,I0); iveresov@2138: __ bind(not_long); duke@435: #endif duke@435: __ ret(); duke@435: __ delayed()->restore(); duke@435: duke@435: masm->flush(); duke@435: _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words); duke@435: _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); duke@435: } duke@435: duke@435: #ifdef COMPILER2 duke@435: duke@435: //------------------------------generate_uncommon_trap_blob-------------------- duke@435: // Ought to generate an ideal graph & compile, but here's some SPARC ASM duke@435: // instead. duke@435: void SharedRuntime::generate_uncommon_trap_blob() { duke@435: // allocate space for the code duke@435: ResourceMark rm; duke@435: // setup code generation tools duke@435: int pad = VerifyThread ? 512 : 0; kvn@3582: if (UseStackBanging) { kvn@3582: pad += StackShadowPages*16 + 32; kvn@3582: } duke@435: #ifdef _LP64 duke@435: CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); duke@435: #else duke@435: // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread) duke@435: // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread) duke@435: CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512); duke@435: #endif duke@435: MacroAssembler* masm = new MacroAssembler(&buffer); duke@435: Register O2UnrollBlock = O2; duke@435: Register O2klass_index = O2; duke@435: duke@435: // duke@435: // This is the entry point for all traps the compiler takes when it thinks duke@435: // it cannot handle further execution of compilation code. The frame is duke@435: // deoptimized in these cases and converted into interpreter frames for duke@435: // execution duke@435: // The steps taken by this frame are as follows: duke@435: // - push a fake "unpack_frame" duke@435: // - call the C routine Deoptimization::uncommon_trap (this function duke@435: // packs the current compiled frame into vframe arrays and returns duke@435: // information about the number and size of interpreter frames which duke@435: // are equivalent to the frame which is being deoptimized) duke@435: // - deallocate the "unpack_frame" duke@435: // - deallocate the deoptimization frame duke@435: // - in a loop using the information returned in the previous step duke@435: // push interpreter frames; duke@435: // - create a dummy "unpack_frame" duke@435: // - call the C routine: Deoptimization::unpack_frames (this function duke@435: // lays out values on the interpreter frame which was just created) duke@435: // - deallocate the dummy unpack_frame duke@435: // - return to the interpreter entry point duke@435: // duke@435: // Refer to the following methods for more information: duke@435: // - Deoptimization::uncommon_trap duke@435: // - Deoptimization::unpack_frame duke@435: duke@435: // the unloaded class index is in O0 (first parameter to this blob) duke@435: duke@435: // push a dummy "unpack_frame" duke@435: // and call Deoptimization::uncommon_trap to pack the compiled frame into duke@435: // vframe array and return the UnrollBlock information duke@435: __ save_frame(0); duke@435: __ set_last_Java_frame(SP, noreg); duke@435: __ mov(I0, O2klass_index); duke@435: __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index); duke@435: __ reset_last_Java_frame(); duke@435: __ mov(O0, O2UnrollBlock->after_save()); duke@435: __ restore(); duke@435: duke@435: // deallocate the deoptimized frame taking care to preserve the return values duke@435: __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); duke@435: __ restore(); duke@435: duke@435: // Allocate new interpreter frame(s) and possible c2i adapter frame duke@435: duke@435: make_new_frames(masm, false); duke@435: duke@435: // push a dummy "unpack_frame" taking care of float return values and duke@435: // call Deoptimization::unpack_frames to have the unpacker layout duke@435: // information in the interpreter frames just created and then return duke@435: // to the interpreter entry point duke@435: __ save_frame(0); duke@435: __ set_last_Java_frame(SP, noreg); duke@435: __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case duke@435: __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3); duke@435: __ reset_last_Java_frame(); duke@435: __ ret(); duke@435: __ delayed()->restore(); duke@435: duke@435: masm->flush(); duke@435: _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize); duke@435: } duke@435: duke@435: #endif // COMPILER2 duke@435: duke@435: //------------------------------generate_handler_blob------------------- duke@435: // duke@435: // Generate a special Compile2Runtime blob that saves all registers, and sets duke@435: // up an OopMap. duke@435: // duke@435: // This blob is jumped to (via a breakpoint and the signal handler) from a duke@435: // safepoint in compiled code. On entry to this blob, O7 contains the duke@435: // address in the original nmethod at which we should resume normal execution. duke@435: // Thus, this blob looks like a subroutine which must preserve lots of duke@435: // registers and return normally. Note that O7 is never register-allocated, duke@435: // so it is guaranteed to be free here. duke@435: // duke@435: duke@435: // The hardest part of what this blob must do is to save the 64-bit %o duke@435: // registers in the 32-bit build. A simple 'save' turn the %o's to %i's and duke@435: // an interrupt will chop off their heads. Making space in the caller's frame duke@435: // first will let us save the 64-bit %o's before save'ing, but we cannot hand duke@435: // the adjusted FP off to the GC stack-crawler: this will modify the caller's duke@435: // SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save duke@435: // the 64-bit %o's, then do a save, then fixup the caller's SP (our FP). duke@435: // Tricky, tricky, tricky... duke@435: never@2950: SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { duke@435: assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); duke@435: duke@435: // allocate space for the code duke@435: ResourceMark rm; duke@435: // setup code generation tools duke@435: // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) duke@435: // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) duke@435: // even larger with TraceJumps duke@435: int pad = TraceJumps ? 512 : 0; duke@435: CodeBuffer buffer("handler_blob", 1600 + pad, 512); duke@435: MacroAssembler* masm = new MacroAssembler(&buffer); duke@435: int frame_size_words; duke@435: OopMapSet *oop_maps = new OopMapSet(); duke@435: OopMap* map = NULL; duke@435: duke@435: int start = __ offset(); duke@435: duke@435: // If this causes a return before the processing, then do a "restore" duke@435: if (cause_return) { duke@435: __ restore(); duke@435: } else { duke@435: // Make it look like we were called via the poll duke@435: // so that frame constructor always sees a valid return address duke@435: __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7); duke@435: __ sub(O7, frame::pc_return_offset, O7); duke@435: } duke@435: duke@435: map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); duke@435: duke@435: // setup last_Java_sp (blows G4) duke@435: __ set_last_Java_frame(SP, noreg); duke@435: duke@435: // call into the runtime to handle illegal instructions exception duke@435: // Do not use call_VM_leaf, because we need to make a GC map at this call site. duke@435: __ mov(G2_thread, O0); duke@435: __ save_thread(L7_thread_cache); duke@435: __ call(call_ptr); duke@435: __ delayed()->nop(); duke@435: duke@435: // Set an oopmap for the call site. duke@435: // We need this not only for callee-saved registers, but also for volatile duke@435: // registers that the compiler might be keeping live across a safepoint. duke@435: duke@435: oop_maps->add_gc_map( __ offset() - start, map); duke@435: duke@435: __ restore_thread(L7_thread_cache); duke@435: // clear last_Java_sp duke@435: __ reset_last_Java_frame(); duke@435: duke@435: // Check for exceptions duke@435: Label pending; duke@435: duke@435: __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); kvn@3037: __ br_notnull_short(O1, Assembler::pn, pending); duke@435: duke@435: RegisterSaver::restore_live_registers(masm); duke@435: duke@435: // We are back the the original state on entry and ready to go. duke@435: duke@435: __ retl(); duke@435: __ delayed()->nop(); duke@435: duke@435: // Pending exception after the safepoint duke@435: duke@435: __ bind(pending); duke@435: duke@435: RegisterSaver::restore_live_registers(masm); duke@435: duke@435: // We are back the the original state on entry. duke@435: duke@435: // Tail-call forward_exception_entry, with the issuing PC in O7, duke@435: // so it looks like the original nmethod called forward_exception_entry. duke@435: __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); duke@435: __ JMP(O0, 0); duke@435: __ delayed()->nop(); duke@435: duke@435: // ------------- duke@435: // make sure all code is generated duke@435: masm->flush(); duke@435: duke@435: // return exception blob duke@435: return SafepointBlob::create(&buffer, oop_maps, frame_size_words); duke@435: } duke@435: duke@435: // duke@435: // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss duke@435: // duke@435: // Generate a stub that calls into vm to find out the proper destination duke@435: // of a java call. All the argument registers are live at this point duke@435: // but since this is generic code we don't know what they are and the caller duke@435: // must do any gc of the args. duke@435: // never@2950: RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { duke@435: assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); duke@435: duke@435: // allocate space for the code duke@435: ResourceMark rm; duke@435: // setup code generation tools duke@435: // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) duke@435: // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) duke@435: // even larger with TraceJumps duke@435: int pad = TraceJumps ? 512 : 0; duke@435: CodeBuffer buffer(name, 1600 + pad, 512); duke@435: MacroAssembler* masm = new MacroAssembler(&buffer); duke@435: int frame_size_words; duke@435: OopMapSet *oop_maps = new OopMapSet(); duke@435: OopMap* map = NULL; duke@435: duke@435: int start = __ offset(); duke@435: duke@435: map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); duke@435: duke@435: int frame_complete = __ offset(); duke@435: duke@435: // setup last_Java_sp (blows G4) duke@435: __ set_last_Java_frame(SP, noreg); duke@435: duke@435: // call into the runtime to handle illegal instructions exception duke@435: // Do not use call_VM_leaf, because we need to make a GC map at this call site. duke@435: __ mov(G2_thread, O0); duke@435: __ save_thread(L7_thread_cache); duke@435: __ call(destination, relocInfo::runtime_call_type); duke@435: __ delayed()->nop(); duke@435: duke@435: // O0 contains the address we are going to jump to assuming no exception got installed duke@435: duke@435: // Set an oopmap for the call site. duke@435: // We need this not only for callee-saved registers, but also for volatile duke@435: // registers that the compiler might be keeping live across a safepoint. duke@435: duke@435: oop_maps->add_gc_map( __ offset() - start, map); duke@435: duke@435: __ restore_thread(L7_thread_cache); duke@435: // clear last_Java_sp duke@435: __ reset_last_Java_frame(); duke@435: duke@435: // Check for exceptions duke@435: Label pending; duke@435: duke@435: __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); kvn@3037: __ br_notnull_short(O1, Assembler::pn, pending); duke@435: duke@435: // get the returned methodOop duke@435: duke@435: __ get_vm_result(G5_method); duke@435: __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS); duke@435: duke@435: // O0 is where we want to jump, overwrite G3 which is saved and scratch duke@435: duke@435: __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS); duke@435: duke@435: RegisterSaver::restore_live_registers(masm); duke@435: duke@435: // We are back the the original state on entry and ready to go. duke@435: duke@435: __ JMP(G3, 0); duke@435: __ delayed()->nop(); duke@435: duke@435: // Pending exception after the safepoint duke@435: duke@435: __ bind(pending); duke@435: duke@435: RegisterSaver::restore_live_registers(masm); duke@435: duke@435: // We are back the the original state on entry. duke@435: duke@435: // Tail-call forward_exception_entry, with the issuing PC in O7, duke@435: // so it looks like the original nmethod called forward_exception_entry. duke@435: __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); duke@435: __ JMP(O0, 0); duke@435: __ delayed()->nop(); duke@435: duke@435: // ------------- duke@435: // make sure all code is generated duke@435: masm->flush(); duke@435: duke@435: // return the blob duke@435: // frame_size_words or bytes?? duke@435: return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); duke@435: }