aoqi@1: /* aoqi@1: * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. aoqi@1: * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. aoqi@1: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@1: * aoqi@1: * This code is free software; you can redistribute it and/or modify it aoqi@1: * under the terms of the GNU General Public License version 2 only, as aoqi@1: * published by the Free Software Foundation. aoqi@1: * aoqi@1: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@1: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@1: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@1: * version 2 for more details (a copy is included in the LICENSE file that aoqi@1: * accompanied this code). aoqi@1: * aoqi@1: * You should have received a copy of the GNU General Public License version aoqi@1: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@1: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@1: * aoqi@1: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@1: * or visit www.oracle.com if you need additional information or have any aoqi@1: * questions. aoqi@1: * aoqi@1: */ aoqi@1: aoqi@1: #include "precompiled.hpp" aoqi@1: #include "asm/macroAssembler.hpp" aoqi@1: #include "asm/macroAssembler.inline.hpp" aoqi@1: #include "code/debugInfoRec.hpp" aoqi@1: #include "code/icBuffer.hpp" aoqi@1: #include "code/vtableStubs.hpp" aoqi@1: #include "interpreter/interpreter.hpp" aoqi@1: #include "oops/compiledICHolder.hpp" aoqi@1: #include "prims/jvmtiRedefineClassesTrace.hpp" aoqi@1: #include "runtime/sharedRuntime.hpp" aoqi@1: #include "runtime/vframeArray.hpp" aoqi@1: #include "vmreg_mips.inline.hpp" aoqi@1: #ifdef COMPILER1 aoqi@1: #include "c1/c1_Runtime1.hpp" aoqi@1: #endif aoqi@1: #ifdef COMPILER2 aoqi@1: #include "opto/runtime.hpp" aoqi@1: #endif aoqi@1: aoqi@1: #define __ masm-> aoqi@1: const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; aoqi@1: aoqi@1: class RegisterSaver { aoqi@1: enum { FPU_regs_live = 32 }; aoqi@1: // Capture info about frame layout aoqi@1: enum layout { aoqi@1: #define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, aoqi@1: DEF_LAYOUT_OFFS(for_16_bytes_aligned) aoqi@1: DEF_LAYOUT_OFFS(fpr0) aoqi@1: DEF_LAYOUT_OFFS(fpr1) aoqi@1: DEF_LAYOUT_OFFS(fpr2) aoqi@1: DEF_LAYOUT_OFFS(fpr3) aoqi@1: DEF_LAYOUT_OFFS(fpr4) aoqi@1: DEF_LAYOUT_OFFS(fpr5) aoqi@1: DEF_LAYOUT_OFFS(fpr6) aoqi@1: DEF_LAYOUT_OFFS(fpr7) aoqi@1: DEF_LAYOUT_OFFS(fpr8) aoqi@1: DEF_LAYOUT_OFFS(fpr9) aoqi@1: DEF_LAYOUT_OFFS(fpr10) aoqi@1: DEF_LAYOUT_OFFS(fpr11) aoqi@1: DEF_LAYOUT_OFFS(fpr12) aoqi@1: DEF_LAYOUT_OFFS(fpr13) aoqi@1: DEF_LAYOUT_OFFS(fpr14) aoqi@1: DEF_LAYOUT_OFFS(fpr15) aoqi@1: DEF_LAYOUT_OFFS(fpr16) aoqi@1: DEF_LAYOUT_OFFS(fpr17) aoqi@1: DEF_LAYOUT_OFFS(fpr18) aoqi@1: DEF_LAYOUT_OFFS(fpr19) aoqi@1: DEF_LAYOUT_OFFS(fpr20) aoqi@1: DEF_LAYOUT_OFFS(fpr21) aoqi@1: DEF_LAYOUT_OFFS(fpr22) aoqi@1: DEF_LAYOUT_OFFS(fpr23) aoqi@1: DEF_LAYOUT_OFFS(fpr24) aoqi@1: DEF_LAYOUT_OFFS(fpr25) aoqi@1: DEF_LAYOUT_OFFS(fpr26) aoqi@1: DEF_LAYOUT_OFFS(fpr27) aoqi@1: DEF_LAYOUT_OFFS(fpr28) aoqi@1: DEF_LAYOUT_OFFS(fpr29) aoqi@1: DEF_LAYOUT_OFFS(fpr30) aoqi@1: DEF_LAYOUT_OFFS(fpr31) aoqi@1: aoqi@1: DEF_LAYOUT_OFFS(v0) aoqi@1: DEF_LAYOUT_OFFS(v1) aoqi@1: DEF_LAYOUT_OFFS(a0) aoqi@1: DEF_LAYOUT_OFFS(a1) aoqi@1: DEF_LAYOUT_OFFS(a2) aoqi@1: DEF_LAYOUT_OFFS(a3) aoqi@1: DEF_LAYOUT_OFFS(a4) aoqi@1: DEF_LAYOUT_OFFS(a5) aoqi@1: DEF_LAYOUT_OFFS(a6) aoqi@1: DEF_LAYOUT_OFFS(a7) aoqi@1: DEF_LAYOUT_OFFS(t0) aoqi@1: DEF_LAYOUT_OFFS(t1) aoqi@1: DEF_LAYOUT_OFFS(t2) aoqi@1: DEF_LAYOUT_OFFS(t3) aoqi@1: DEF_LAYOUT_OFFS(s0) aoqi@1: DEF_LAYOUT_OFFS(s1) aoqi@1: DEF_LAYOUT_OFFS(s2) aoqi@1: DEF_LAYOUT_OFFS(s3) aoqi@1: DEF_LAYOUT_OFFS(s4) aoqi@1: DEF_LAYOUT_OFFS(s5) aoqi@1: DEF_LAYOUT_OFFS(s6) aoqi@1: DEF_LAYOUT_OFFS(s7) aoqi@1: DEF_LAYOUT_OFFS(t8) aoqi@1: DEF_LAYOUT_OFFS(t9) aoqi@1: aoqi@1: DEF_LAYOUT_OFFS(gp) aoqi@1: DEF_LAYOUT_OFFS(fp) aoqi@1: DEF_LAYOUT_OFFS(return) aoqi@1: /* aoqi@1: fpr0_off, fpr1_off, aoqi@1: fpr2_off, fpr3_off, aoqi@1: fpr4_off, fpr5_off, aoqi@1: fpr6_off, fpr7_off, aoqi@1: fpr8_off, fpr9_off, aoqi@1: fpr10_off, fpr11_off, aoqi@1: fpr12_off, fpr13_off, aoqi@1: fpr14_off, fpr15_off, aoqi@1: fpr16_off, fpr17_off, aoqi@1: fpr18_off, fpr19_off, aoqi@1: fpr20_off, fpr21_off, aoqi@1: fpr22_off, fpr23_off, aoqi@1: fpr24_off, fpr25_off, aoqi@1: fpr26_off, fpr27_off, aoqi@1: fpr28_off, fpr29_off, aoqi@1: fpr30_off, fpr31_off, aoqi@1: aoqi@1: v0_off, v1_off, aoqi@1: a0_off, a1_off, aoqi@1: a2_off, a3_off, aoqi@1: a4_off, a5_off, aoqi@1: a6_off, a7_off, aoqi@1: t0_off, t1_off, t2_off, t3_off, aoqi@1: s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off, aoqi@1: t8_off, t9_off, aoqi@1: aoqi@1: gp_off, fp_off, aoqi@1: return_off, aoqi@1: */ aoqi@1: reg_save_size aoqi@1: }; aoqi@1: aoqi@1: public: aoqi@1: aoqi@1: static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); aoqi@1: static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); aoqi@1: //FIXME, I have no idea which register to use aoqi@1: static int raOffset(void) { return return_off / 2; } aoqi@1: //Rmethod aoqi@1: static int methodOffset(void) { return s3_off / 2; } aoqi@1: aoqi@1: static int v0Offset(void) { return v0_off / 2; } aoqi@1: static int v1Offset(void) { return v1_off / 2; } aoqi@1: aoqi@1: static int fpResultOffset(void) { return fpr0_off / 2; } aoqi@1: aoqi@1: // During deoptimization only the result register need to be restored aoqi@1: // all the other values have already been extracted. aoqi@1: aoqi@1: static void restore_result_registers(MacroAssembler* masm); aoqi@1: }; aoqi@1: aoqi@1: OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { aoqi@1: aoqi@1: /* aoqi@1: int frame_words = reg_save_size + additional_frame_words; aoqi@1: int frame_size_in_bytes = frame_words * wordSize; aoqi@1: *total_frame_words = frame_words; aoqi@1: */ aoqi@1: // Always make the frame size 16-byte aligned aoqi@1: int frame_size_in_bytes = round_to(additional_frame_words*wordSize + aoqi@1: reg_save_size*BytesPerInt, 16); aoqi@1: // OopMap frame size is in compiler stack slots (jint's) not bytes or words aoqi@1: int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; aoqi@1: // The caller will allocate additional_frame_words aoqi@1: int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; aoqi@1: // CodeBlob frame size is in words. aoqi@1: int frame_size_in_words = frame_size_in_bytes / wordSize; aoqi@1: *total_frame_words = frame_size_in_words; aoqi@1: aoqi@1: // save registers, fpu state, and flags aoqi@1: // We assume caller has already has return address slot on the stack aoqi@1: // We push epb twice in this sequence because we want the real ebp aoqi@1: // to be under the return like a normal enter and we want to use pushad aoqi@1: // We push by hand instead of pusing push aoqi@1: aoqi@1: __ daddiu(SP, SP, - reg_save_size * jintSize); aoqi@1: aoqi@1: __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); aoqi@1: __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); aoqi@1: __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); aoqi@1: __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); aoqi@1: __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); aoqi@1: __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); aoqi@1: __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); aoqi@1: __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); aoqi@1: __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); aoqi@1: __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); aoqi@1: __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); aoqi@1: __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); aoqi@1: __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); aoqi@1: __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); aoqi@1: __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); aoqi@1: __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); aoqi@1: __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); aoqi@1: __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); aoqi@1: __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); aoqi@1: __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); aoqi@1: __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); aoqi@1: __ sd(T0, SP, t0_off * jintSize); aoqi@1: __ sd(T1, SP, t1_off * jintSize); aoqi@1: __ sd(T2, SP, t2_off * jintSize); aoqi@1: __ sd(T3, SP, t3_off * jintSize); aoqi@1: __ sd(S0, SP, s0_off * jintSize); aoqi@1: __ sd(S1, SP, s1_off * jintSize); aoqi@1: __ sd(S2, SP, s2_off * jintSize); aoqi@1: __ sd(S3, SP, s3_off * jintSize); aoqi@1: __ sd(S4, SP, s4_off * jintSize); aoqi@1: __ sd(S5, SP, s5_off * jintSize); aoqi@1: __ sd(S6, SP, s6_off * jintSize); aoqi@1: __ sd(S7, SP, s7_off * jintSize); aoqi@1: aoqi@1: __ sd(T8, SP, t8_off * jintSize); aoqi@1: __ sd(T9, SP, t9_off * jintSize); aoqi@1: aoqi@1: __ sd(GP, SP, gp_off * jintSize); aoqi@1: __ sd(FP, SP, fp_off * jintSize); aoqi@1: __ sd(RA, SP, return_off * jintSize); aoqi@1: __ daddi(FP, SP, fp_off * jintSize); aoqi@1: aoqi@1: OopMapSet *oop_maps = new OopMapSet(); aoqi@1: //OopMap* map = new OopMap( frame_words, 0 ); aoqi@1: OopMap* map = new OopMap( frame_size_in_slots, 0 ); aoqi@1: aoqi@1: aoqi@1: //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) aoqi@1: #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) aoqi@1: map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); aoqi@1: aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); aoqi@1: aoqi@1: /* aoqi@1: if (true) { aoqi@1: map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next()); aoqi@1: aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next()); aoqi@1: map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next()); aoqi@1: } aoqi@1: */ aoqi@1: #undef STACK_OFFSET aoqi@1: return map; aoqi@1: } aoqi@1: aoqi@1: aoqi@1: // Pop the current frame and restore all the registers that we aoqi@1: // saved. aoqi@1: void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { aoqi@1: __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); aoqi@1: __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); aoqi@1: __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); aoqi@1: __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); aoqi@1: __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); aoqi@1: __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); aoqi@1: __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); aoqi@1: __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); aoqi@1: __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); aoqi@1: __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); aoqi@1: __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); aoqi@1: __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); aoqi@1: __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); aoqi@1: __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); aoqi@1: __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); aoqi@1: __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); aoqi@1: aoqi@1: __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); aoqi@1: __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); aoqi@1: __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); aoqi@1: __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); aoqi@1: __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); aoqi@1: __ ld(T0, SP, t0_off * jintSize); aoqi@1: __ ld(T1, SP, t1_off * jintSize); aoqi@1: __ ld(T2, SP, t2_off * jintSize); aoqi@1: __ ld(T3, SP, t3_off * jintSize); aoqi@1: __ ld(S0, SP, s0_off * jintSize); aoqi@1: __ ld(S1, SP, s1_off * jintSize); aoqi@1: __ ld(S2, SP, s2_off * jintSize); aoqi@1: __ ld(S3, SP, s3_off * jintSize); aoqi@1: __ ld(S4, SP, s4_off * jintSize); aoqi@1: __ ld(S5, SP, s5_off * jintSize); aoqi@1: __ ld(S6, SP, s6_off * jintSize); aoqi@1: __ ld(S7, SP, s7_off * jintSize); aoqi@1: aoqi@1: __ ld(T8, SP, t8_off * jintSize); aoqi@1: __ ld(T9, SP, t9_off * jintSize); aoqi@1: aoqi@1: __ ld(GP, SP, gp_off * jintSize); aoqi@1: __ ld(FP, SP, fp_off * jintSize); aoqi@1: __ ld(RA, SP, return_off * jintSize); aoqi@1: aoqi@1: __ addiu(SP, SP, reg_save_size * jintSize); aoqi@1: } aoqi@1: aoqi@1: // Pop the current frame and restore the registers that might be holding aoqi@1: // a result. aoqi@1: // FIXME, if the result is float? aoqi@1: void RegisterSaver::restore_result_registers(MacroAssembler* masm) { aoqi@1: // Just restore result register. Only used by deoptimization. By aoqi@1: // now any callee save register that needs to be restore to a c2 aoqi@1: // caller of the deoptee has been extracted into the vframeArray aoqi@1: // and will be stuffed into the c2i adapter we create for later aoqi@1: // restoration so only result registers need to be restored here. aoqi@1: // aoqi@1: __ ld(V0, SP, v0_off * jintSize); aoqi@1: __ ld(V1, SP, v1_off * jintSize); aoqi@1: __ addiu(SP, SP, return_off * jintSize); aoqi@1: } aoqi@1: aoqi@1: // Is vector's size (in bytes) bigger than a size saved by default? aoqi@1: // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. aoqi@1: bool SharedRuntime::is_wide_vector(int size) { aoqi@1: return size > 16; aoqi@1: } aoqi@1: aoqi@1: // The java_calling_convention describes stack locations as ideal slots on aoqi@1: // a frame with no abi restrictions. Since we must observe abi restrictions aoqi@1: // (like the placement of the register window) the slots must be biased by aoqi@1: // the following value. aoqi@1: aoqi@1: static int reg2offset_in(VMReg r) { aoqi@1: // Account for saved ebp and return address aoqi@1: // This should really be in_preserve_stack_slots aoqi@1: return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); aoqi@1: } aoqi@1: aoqi@1: static int reg2offset_out(VMReg r) { aoqi@1: return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; aoqi@1: } aoqi@1: aoqi@1: // --------------------------------------------------------------------------- aoqi@1: // Read the array of BasicTypes from a signature, and compute where the aoqi@1: // arguments should go. Values in the VMRegPair regs array refer to 4-byte aoqi@1: // quantities. Values less than SharedInfo::stack0 are registers, those above aoqi@1: // refer to 4-byte stack slots. All stack slots are based off of the stack pointer aoqi@1: // as framesizes are fixed. aoqi@1: // VMRegImpl::stack0 refers to the first slot 0(sp). aoqi@1: // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register aoqi@1: // up to RegisterImpl::number_of_registers) are the 32-bit aoqi@1: // integer registers. aoqi@1: aoqi@1: // Pass first five oop/int args in registers T0, A0 - A3. aoqi@1: // Pass float/double/long args in stack. aoqi@1: // Doubles have precedence, so if you pass a mix of floats and doubles aoqi@1: // the doubles will grab the registers before the floats will. aoqi@1: aoqi@1: // Note: the INPUTS in sig_bt are in units of Java argument words, which are aoqi@1: // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit aoqi@1: // units regardless of build. Of course for i486 there is no 64 bit build aoqi@1: aoqi@1: aoqi@1: // --------------------------------------------------------------------------- aoqi@1: // The compiled Java calling convention. aoqi@1: // Pass first five oop/int args in registers T0, A0 - A3. aoqi@1: // Pass float/double/long args in stack. aoqi@1: // Doubles have precedence, so if you pass a mix of floats and doubles aoqi@1: // the doubles will grab the registers before the floats will. aoqi@1: aoqi@1: int SharedRuntime::java_calling_convention(const BasicType *sig_bt, aoqi@1: VMRegPair *regs, aoqi@1: int total_args_passed, aoqi@1: int is_outgoing) { aoqi@1: //#define aoqi_test aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed); aoqi@1: #endif aoqi@1: aoqi@1: // Create the mapping between argument positions and aoqi@1: // registers. aoqi@1: //static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { aoqi@1: static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { aoqi@1: T0, A0, A1, A2, A3, A4, A5, A6, A7 aoqi@1: }; aoqi@1: //static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { aoqi@1: static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { aoqi@1: F12, F13, F14, F15, F16, F17, F18, F19 aoqi@1: }; aoqi@1: aoqi@1: aoqi@1: uint args = 0; aoqi@1: uint stk_args = 0; // inc by 2 each time aoqi@1: aoqi@1: for (int i = 0; i < total_args_passed; i++) { aoqi@1: switch (sig_bt[i]) { aoqi@1: case T_VOID: aoqi@1: // halves of T_LONG or T_DOUBLE aoqi@1: assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); aoqi@1: regs[i].set_bad(); aoqi@1: break; aoqi@1: case T_BOOLEAN: aoqi@1: case T_CHAR: aoqi@1: case T_BYTE: aoqi@1: case T_SHORT: aoqi@1: case T_INT: aoqi@1: if (args < Argument::n_register_parameters) { aoqi@1: regs[i].set1(INT_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set1(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: case T_LONG: aoqi@1: assert(sig_bt[i + 1] == T_VOID, "expecting half"); aoqi@1: // fall through aoqi@1: case T_OBJECT: aoqi@1: case T_ARRAY: aoqi@1: case T_ADDRESS: aoqi@1: if (args < Argument::n_register_parameters) { aoqi@1: regs[i].set2(INT_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set2(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: case T_FLOAT: aoqi@1: if (args < Argument::n_float_register_parameters) { aoqi@1: regs[i].set1(FP_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set1(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: case T_DOUBLE: aoqi@1: assert(sig_bt[i + 1] == T_VOID, "expecting half"); aoqi@1: if (args < Argument::n_float_register_parameters) { aoqi@1: regs[i].set2(FP_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set2(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: default: aoqi@1: ShouldNotReachHere(); aoqi@1: break; aoqi@1: } aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args); aoqi@1: #endif aoqi@1: } aoqi@1: aoqi@1: return round_to(stk_args, 2); aoqi@1: /* aoqi@1: // Starting stack position for args on stack aoqi@1: uint stack = 0; aoqi@1: aoqi@1: // Pass first five oop/int args in registers T0, A0 - A3. aoqi@1: uint reg_arg0 = 9999; aoqi@1: uint reg_arg1 = 9999; aoqi@1: uint reg_arg2 = 9999; aoqi@1: uint reg_arg3 = 9999; aoqi@1: uint reg_arg4 = 9999; aoqi@1: aoqi@1: aoqi@1: // Pass doubles & longs &float ligned on the stack. First count stack slots for doubles aoqi@1: int i; aoqi@1: for( i = 0; i < total_args_passed; i++) { aoqi@1: if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) { aoqi@1: stack += 2; aoqi@1: } aoqi@1: } aoqi@1: int dstack = 0; // Separate counter for placing doubles aoqi@1: for( i = 0; i < total_args_passed; i++) { aoqi@1: // From the type and the argument number (count) compute the location aoqi@1: switch( sig_bt[i] ) { aoqi@1: case T_SHORT: aoqi@1: case T_CHAR: aoqi@1: case T_BYTE: aoqi@1: case T_BOOLEAN: aoqi@1: case T_INT: aoqi@1: case T_ARRAY: aoqi@1: case T_OBJECT: aoqi@1: case T_ADDRESS: aoqi@1: if( reg_arg0 == 9999 ) { aoqi@1: reg_arg0 = i; aoqi@1: regs[i].set1(T0->as_VMReg()); aoqi@1: } else if( reg_arg1 == 9999 ) { aoqi@1: reg_arg1 = i; aoqi@1: regs[i].set1(A0->as_VMReg()); aoqi@1: } else if( reg_arg2 == 9999 ) { aoqi@1: reg_arg2 = i; aoqi@1: regs[i].set1(A1->as_VMReg()); aoqi@1: }else if( reg_arg3 == 9999 ) { aoqi@1: reg_arg3 = i; aoqi@1: regs[i].set1(A2->as_VMReg()); aoqi@1: }else if( reg_arg4 == 9999 ) { aoqi@1: reg_arg4 = i; aoqi@1: regs[i].set1(A3->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set1(VMRegImpl::stack2reg(stack++)); aoqi@1: } aoqi@1: break; aoqi@1: case T_FLOAT: aoqi@1: regs[i].set1(VMRegImpl::stack2reg(stack++)); aoqi@1: break; aoqi@1: case T_LONG: aoqi@1: assert(sig_bt[i+1] == T_VOID, "missing Half" ); aoqi@1: regs[i].set2(VMRegImpl::stack2reg(dstack)); aoqi@1: dstack += 2; aoqi@1: break; aoqi@1: case T_DOUBLE: aoqi@1: assert(sig_bt[i+1] == T_VOID, "missing Half" ); aoqi@1: regs[i].set2(VMRegImpl::stack2reg(dstack)); aoqi@1: dstack += 2; aoqi@1: break; aoqi@1: case T_VOID: regs[i].set_bad(); break; aoqi@1: break; aoqi@1: default: aoqi@1: ShouldNotReachHere(); aoqi@1: break; aoqi@1: } aoqi@1: } aoqi@1: // return value can be odd number of VMRegImpl stack slots make multiple of 2 aoqi@1: return round_to(stack, 2); aoqi@1: */ aoqi@1: } aoqi@1: aoqi@1: // Helper class mostly to avoid passing masm everywhere, and handle store aoqi@1: // displacement overflow logic for LP64 aoqi@1: class AdapterGenerator { aoqi@1: MacroAssembler *masm; aoqi@1: #ifdef _LP64 aoqi@1: Register Rdisp; aoqi@1: void set_Rdisp(Register r) { Rdisp = r; } aoqi@1: #endif // _LP64 aoqi@1: aoqi@1: void patch_callers_callsite(); aoqi@1: // void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch); aoqi@1: aoqi@1: // base+st_off points to top of argument aoqi@1: int arg_offset(const int st_off) { return st_off; } aoqi@1: int next_arg_offset(const int st_off) { aoqi@1: return st_off - Interpreter::stackElementSize; aoqi@1: } aoqi@1: aoqi@1: #ifdef _LP64 aoqi@1: // On _LP64 argument slot values are loaded first into a register aoqi@1: // because they might not fit into displacement. aoqi@1: Register arg_slot(const int st_off); aoqi@1: Register next_arg_slot(const int st_off); aoqi@1: #else aoqi@1: int arg_slot(const int st_off) { return arg_offset(st_off); } aoqi@1: int next_arg_slot(const int st_off) { return next_arg_offset(st_off); } aoqi@1: #endif // _LP64 aoqi@1: aoqi@1: // Stores long into offset pointed to by base aoqi@1: void store_c2i_long(Register r, Register base, aoqi@1: const int st_off, bool is_stack); aoqi@1: void store_c2i_object(Register r, Register base, aoqi@1: const int st_off); aoqi@1: void store_c2i_int(Register r, Register base, aoqi@1: const int st_off); aoqi@1: void store_c2i_double(VMReg r_2, aoqi@1: VMReg r_1, Register base, const int st_off); aoqi@1: void store_c2i_float(FloatRegister f, Register base, aoqi@1: const int st_off); aoqi@1: aoqi@1: public: aoqi@1: //void tag_stack(const BasicType sig, int st_off); aoqi@1: void gen_c2i_adapter(int total_args_passed, aoqi@1: // VMReg max_arg, aoqi@1: int comp_args_on_stack, // VMRegStackSlots aoqi@1: const BasicType *sig_bt, aoqi@1: const VMRegPair *regs, aoqi@1: Label& skip_fixup); aoqi@1: void gen_i2c_adapter(int total_args_passed, aoqi@1: // VMReg max_arg, aoqi@1: int comp_args_on_stack, // VMRegStackSlots aoqi@1: const BasicType *sig_bt, aoqi@1: const VMRegPair *regs); aoqi@1: aoqi@1: AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} aoqi@1: }; aoqi@1: aoqi@1: aoqi@1: // Patch the callers callsite with entry to compiled code if it exists. aoqi@1: void AdapterGenerator::patch_callers_callsite() { aoqi@1: Label L; aoqi@1: //FIXME , what is stored in eax? aoqi@1: //__ verify_oop(ebx); aoqi@1: __ verify_oop(Rmethod); aoqi@1: // __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD); aoqi@1: __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); aoqi@1: //__ jcc(Assembler::equal, L); aoqi@1: __ beq(AT,R0,L); aoqi@1: __ delayed()->nop(); aoqi@1: // Schedule the branch target address early. aoqi@1: // Call into the VM to patch the caller, then jump to compiled callee aoqi@1: // eax isn't live so capture return address while we easily can aoqi@1: // __ movl(eax, Address(esp, 0)); aoqi@1: // __ lw(T5,SP,0); aoqi@1: __ move(V0, RA); aoqi@1: aoqi@1: __ pushad(); aoqi@1: //jerome_for_debug aoqi@1: // __ pushad(); aoqi@1: // __ pushfd(); aoqi@1: #ifdef COMPILER2 aoqi@1: // C2 may leave the stack dirty if not in SSE2+ mode aoqi@1: __ empty_FPU_stack(); aoqi@1: #endif /* COMPILER2 */ aoqi@1: aoqi@1: // VM needs caller's callsite aoqi@1: // __ pushl(eax); aoqi@1: aoqi@1: // VM needs target method aoqi@1: // __ pushl(ebx); aoqi@1: // __ push(Rmethod); aoqi@1: // __ verify_oop(ebx); aoqi@1: aoqi@1: __ move(A0, Rmethod); aoqi@1: __ move(A1, V0); aoqi@1: // __ addi(SP, SP, -8); aoqi@1: //we should preserve the return address aoqi@1: __ verify_oop(Rmethod); aoqi@1: __ move(S0, SP); aoqi@1: __ move(AT, -(StackAlignmentInBytes)); // align the stack aoqi@1: __ andr(SP, SP, AT); aoqi@1: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), aoqi@1: relocInfo::runtime_call_type); aoqi@1: //__ addl(esp, 2*wordSize); aoqi@1: aoqi@1: __ delayed()->nop(); aoqi@1: // __ addi(SP, SP, 8); aoqi@1: // __ popfd(); aoqi@1: __ move(SP, S0); aoqi@1: __ popad(); aoqi@1: __ bind(L); aoqi@1: } aoqi@1: /* aoqi@1: void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off, aoqi@1: Register scratch) { aoqi@1: Unimplemented(); aoqi@1: }*/ aoqi@1: aoqi@1: #ifdef _LP64 aoqi@1: Register AdapterGenerator::arg_slot(const int st_off) { aoqi@1: Unimplemented(); aoqi@1: } aoqi@1: aoqi@1: Register AdapterGenerator::next_arg_slot(const int st_off){ aoqi@1: Unimplemented(); aoqi@1: } aoqi@1: #endif // _LP64 aoqi@1: aoqi@1: // Stores long into offset pointed to by base aoqi@1: void AdapterGenerator::store_c2i_long(Register r, Register base, aoqi@1: const int st_off, bool is_stack) { aoqi@1: Unimplemented(); aoqi@1: } aoqi@1: aoqi@1: void AdapterGenerator::store_c2i_object(Register r, Register base, aoqi@1: const int st_off) { aoqi@1: Unimplemented(); aoqi@1: } aoqi@1: aoqi@1: void AdapterGenerator::store_c2i_int(Register r, Register base, aoqi@1: const int st_off) { aoqi@1: Unimplemented(); aoqi@1: } aoqi@1: aoqi@1: // Stores into offset pointed to by base aoqi@1: void AdapterGenerator::store_c2i_double(VMReg r_2, aoqi@1: VMReg r_1, Register base, const int st_off) { aoqi@1: Unimplemented(); aoqi@1: } aoqi@1: aoqi@1: void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, aoqi@1: const int st_off) { aoqi@1: Unimplemented(); aoqi@1: } aoqi@1: /* aoqi@1: void AdapterGenerator::tag_stack(const BasicType sig, int st_off) { aoqi@1: if (TaggedStackInterpreter) { aoqi@1: int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0); aoqi@1: if (sig == T_OBJECT || sig == T_ARRAY) { aoqi@1: // __ movl(Address(esp, tag_offset), frame::TagReference); aoqi@1: // __ addi(AT,R0, frame::TagReference); aoqi@1: aoqi@1: __ move(AT, frame::TagReference); aoqi@1: __ sw (AT, SP, tag_offset); aoqi@1: } else if (sig == T_LONG || sig == T_DOUBLE) { aoqi@1: int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1); aoqi@1: // __ movl(Address(esp, next_tag_offset), frame::TagValue); aoqi@1: // __ addi(AT,R0, frame::TagValue); aoqi@1: __ move(AT, frame::TagValue); aoqi@1: __ sw (AT, SP, next_tag_offset); aoqi@1: //__ movl(Address(esp, tag_offset), frame::TagValue); aoqi@1: // __ addi(AT,R0, frame::TagValue); aoqi@1: __ move(AT, frame::TagValue); aoqi@1: __ sw (AT, SP, tag_offset); aoqi@1: aoqi@1: } else { aoqi@1: // __ movl(Address(esp, tag_offset), frame::TagValue); aoqi@1: //__ addi(AT,R0, frame::TagValue); aoqi@1: __ move(AT, frame::TagValue); aoqi@1: __ sw (AT, SP, tag_offset); aoqi@1: aoqi@1: } aoqi@1: } aoqi@1: }*/ aoqi@1: aoqi@1: void AdapterGenerator::gen_c2i_adapter( aoqi@1: int total_args_passed, aoqi@1: // VMReg max_arg, aoqi@1: int comp_args_on_stack, // VMRegStackSlots aoqi@1: const BasicType *sig_bt, aoqi@1: const VMRegPair *regs, aoqi@1: Label& skip_fixup) { aoqi@1: aoqi@1: // Before we get into the guts of the C2I adapter, see if we should be here aoqi@1: // at all. We've come from compiled code and are attempting to jump to the aoqi@1: // interpreter, which means the caller made a static call to get here aoqi@1: // (vcalls always get a compiled target if there is one). Check for a aoqi@1: // compiled target. If there is one, we need to patch the caller's call. aoqi@1: // However we will run interpreted if we come thru here. The next pass aoqi@1: // thru the call site will run compiled. If we ran compiled here then aoqi@1: // we can (theorectically) do endless i2c->c2i->i2c transitions during aoqi@1: // deopt/uncommon trap cycles. If we always go interpreted here then aoqi@1: // we can have at most one and don't need to play any tricks to keep aoqi@1: // from endlessly growing the stack. aoqi@1: // aoqi@1: // Actually if we detected that we had an i2c->c2i transition here we aoqi@1: // ought to be able to reset the world back to the state of the interpreted aoqi@1: // call and not bother building another interpreter arg area. We don't aoqi@1: // do that at this point. aoqi@1: aoqi@1: patch_callers_callsite(); aoqi@1: aoqi@1: __ bind(skip_fixup); aoqi@1: aoqi@1: #ifdef COMPILER2 aoqi@1: __ empty_FPU_stack(); aoqi@1: #endif /* COMPILER2 */ aoqi@1: //this is for native ? aoqi@1: // Since all args are passed on the stack, total_args_passed * interpreter_ aoqi@1: // stack_element_size is the aoqi@1: // space we need. aoqi@1: int extraspace = total_args_passed * Interpreter::stackElementSize; aoqi@1: aoqi@1: // stack is aligned, keep it that way aoqi@1: extraspace = round_to(extraspace, 2*wordSize); aoqi@1: aoqi@1: // Get return address aoqi@1: // __ popl(eax); aoqi@1: //__ pop(T4); aoqi@1: __ move(V0, RA); aoqi@1: // set senderSP value aoqi@1: // __ movl(esi, esp); aoqi@1: //refer to interpreter_mips.cpp:generate_asm_entry aoqi@1: __ move(Rsender, SP); aoqi@1: //__ subl(esp, extraspace); aoqi@1: __ addi(SP, SP, -extraspace); aoqi@1: aoqi@1: // Now write the args into the outgoing interpreter space aoqi@1: for (int i = 0; i < total_args_passed; i++) { aoqi@1: if (sig_bt[i] == T_VOID) { aoqi@1: assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), aoqi@1: "missing half"); aoqi@1: continue; aoqi@1: } aoqi@1: aoqi@1: // st_off points to lowest address on stack. aoqi@1: int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); aoqi@1: #endif aoqi@1: // Say 4 args: aoqi@1: // i st_off aoqi@1: // 0 12 T_LONG aoqi@1: // 1 8 T_VOID aoqi@1: // 2 4 T_OBJECT aoqi@1: // 3 0 T_BOOL aoqi@1: VMReg r_1 = regs[i].first(); aoqi@1: VMReg r_2 = regs[i].second(); aoqi@1: if (!r_1->is_valid()) { aoqi@1: assert(!r_2->is_valid(), ""); aoqi@1: continue; aoqi@1: } aoqi@1: aoqi@1: if (r_1->is_stack()) { aoqi@1: // memory to memory use fpu stack top aoqi@1: int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off); aoqi@1: #endif aoqi@1: aoqi@1: if (!r_2->is_valid()) { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off); aoqi@1: #endif aoqi@1: __ ld_ptr(AT, SP, ld_off); aoqi@1: __ st_ptr(AT, SP, st_off); aoqi@1: //tag_stack(sig_bt[i], st_off); aoqi@1: } else { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off); aoqi@1: #endif aoqi@1: aoqi@1: // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW aoqi@1: // st_off == MSW, st_off-wordSize == LSW aoqi@1: aoqi@1: int next_off = st_off - Interpreter::stackElementSize; aoqi@1: /* aoqi@1: __ lw(AT, SP, ld_off); aoqi@1: __ sw(AT, SP, next_off); aoqi@1: __ lw(AT, SP, ld_off + wordSize); aoqi@1: __ sw(AT, SP, st_off); aoqi@1: */ aoqi@1: __ ld_ptr(AT, SP, ld_off); aoqi@1: __ st_ptr(AT, SP, st_off); aoqi@1: aoqi@1: /* Ref to is_Register condition */ aoqi@1: if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) aoqi@1: __ st_ptr(AT,SP,st_off - 8); aoqi@1: //tag_stack(sig_bt[i], next_off); aoqi@1: } aoqi@1: } else if (r_1->is_Register()) { aoqi@1: Register r = r_1->as_Register(); aoqi@1: if (!r_2->is_valid()) { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off); aoqi@1: #endif aoqi@1: // __ movl(Address(esp, st_off), r); aoqi@1: __ sd(r,SP, st_off); //aoqi_test FIXME aoqi@1: //tag_stack(sig_bt[i], st_off); aoqi@1: } else { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off); aoqi@1: #endif aoqi@1: //FIXME, mips will not enter here aoqi@1: // long/double in gpr aoqi@1: __ sd(r,SP, st_off); //aoqi_test FIXME aoqi@1: /* Jin: In [java/util/zip/ZipFile.java] aoqi@1: aoqi@1: private static native long open(String name, int mode, long lastModified); aoqi@1: private static native int getTotal(long jzfile); aoqi@1: * aoqi@1: * We need to transfer T_LONG paramenters from a compiled method to a native method. aoqi@1: * It's a complex process: aoqi@1: * aoqi@1: * Caller -> lir_static_call -> gen_resolve_stub aoqi@1: -> -- resolve_static_call_C aoqi@1: `- gen_c2i_adapter() [*] aoqi@1: | aoqi@1: `- AdapterHandlerLibrary::get_create_apapter_index aoqi@1: -> generate_native_entry aoqi@1: -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] aoqi@1: aoqi@1: * In [**], T_Long parameter is stored in stack as: aoqi@1: aoqi@1: (high) aoqi@1: | | aoqi@1: ----------- aoqi@1: | 8 bytes | aoqi@1: | (void) | aoqi@1: ----------- aoqi@1: | 8 bytes | aoqi@1: | (long) | aoqi@1: ----------- aoqi@1: | | aoqi@1: (low) aoqi@1: * aoqi@1: * However, the sequence is reversed here: aoqi@1: * aoqi@1: (high) aoqi@1: | | aoqi@1: ----------- aoqi@1: | 8 bytes | aoqi@1: | (long) | aoqi@1: ----------- aoqi@1: | 8 bytes | aoqi@1: | (void) | aoqi@1: ----------- aoqi@1: | | aoqi@1: (low) aoqi@1: * aoqi@1: * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). aoqi@1: */ aoqi@1: if (sig_bt[i] == T_LONG) aoqi@1: __ sd(r,SP, st_off - 8); aoqi@1: // ShouldNotReachHere(); aoqi@1: // int next_off = st_off - Interpreter::stackElementSize; aoqi@1: // __ sw(r_2->as_Register(),SP, st_off); aoqi@1: // __ sw(r,SP, next_off); aoqi@1: // tag_stack(masm, sig_bt[i], next_off); aoqi@1: } aoqi@1: } else if (r_1->is_FloatRegister()) { aoqi@1: assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); aoqi@1: aoqi@1: FloatRegister fr = r_1->as_FloatRegister(); aoqi@1: if (sig_bt[i] == T_FLOAT) aoqi@1: __ swc1(fr,SP, st_off); aoqi@1: else aoqi@1: { aoqi@1: __ sdc1(fr,SP, st_off); aoqi@1: __ sdc1(fr,SP, st_off - 8); /* T_DOUBLE needs two slots */ aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // Schedule the branch target address early. aoqi@1: __ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); aoqi@1: // And repush original return address aoqi@1: __ move(RA, V0); aoqi@1: __ jr (AT); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@1: aoqi@1: void AdapterGenerator::gen_i2c_adapter( aoqi@1: int total_args_passed, aoqi@1: // VMReg max_arg, aoqi@1: int comp_args_on_stack, // VMRegStackSlots aoqi@1: const BasicType *sig_bt, aoqi@1: const VMRegPair *regs) { aoqi@1: aoqi@1: // Generate an I2C adapter: adjust the I-frame to make space for the C-frame aoqi@1: // layout. Lesp was saved by the calling I-frame and will be restored on aoqi@1: // return. Meanwhile, outgoing arg space is all owned by the callee aoqi@1: // C-frame, so we can mangle it at will. After adjusting the frame size, aoqi@1: // hoist register arguments and repack other args according to the compiled aoqi@1: // code convention. Finally, end in a jump to the compiled code. The entry aoqi@1: // point address is the start of the buffer. aoqi@1: aoqi@1: // We will only enter here from an interpreted frame and never from after aoqi@1: // passing thru a c2i. Azul allowed this but we do not. If we lose the aoqi@1: // race and use a c2i we will remain interpreted for the race loser(s). aoqi@1: // This removes all sorts of headaches on the mips side and also eliminates aoqi@1: // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. aoqi@1: aoqi@1: aoqi@1: __ move(T9, SP); aoqi@1: aoqi@1: // Cut-out for having no stack args. Since up to 2 int/oop args are passed aoqi@1: // in registers, we will occasionally have no stack args. aoqi@1: int comp_words_on_stack = 0; aoqi@1: if (comp_args_on_stack) { aoqi@1: // Sig words on the stack are greater-than VMRegImpl::stack0. Those in aoqi@1: // registers are below. By subtracting stack0, we either get a negative aoqi@1: // number (all values in registers) or the maximum stack slot accessed. aoqi@1: // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); aoqi@1: // Convert 4-byte stack slots to words. aoqi@1: // did mips need round? FIXME aoqi aoqi@1: comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; aoqi@1: // Round up to miminum stack alignment, in wordSize aoqi@1: comp_words_on_stack = round_to(comp_words_on_stack, 2); aoqi@1: __ daddi(SP, SP, -comp_words_on_stack * wordSize); aoqi@1: } aoqi@1: aoqi@1: // Align the outgoing SP aoqi@1: __ move(AT, -(StackAlignmentInBytes)); aoqi@1: __ andr(SP, SP, AT); aoqi@1: // push the return address on the stack (note that pushing, rather aoqi@1: // than storing it, yields the correct frame alignment for the callee) aoqi@1: // Put saved SP in another register aoqi@1: // const Register saved_sp = eax; aoqi@1: const Register saved_sp = V0; aoqi@1: __ move(saved_sp, T9); aoqi@1: aoqi@1: aoqi@1: // Will jump to the compiled code just as if compiled code was doing it. aoqi@1: // Pre-load the register-jump target early, to schedule it better. aoqi@1: __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); aoqi@1: aoqi@1: // Now generate the shuffle code. Pick up all register args and move the aoqi@1: // rest through the floating point stack top. aoqi@1: for (int i = 0; i < total_args_passed; i++) { aoqi@1: if (sig_bt[i] == T_VOID) { aoqi@1: // Longs and doubles are passed in native word order, but misaligned aoqi@1: // in the 32-bit build. aoqi@1: assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); aoqi@1: continue; aoqi@1: } aoqi@1: aoqi@1: // Pick up 0, 1 or 2 words from SP+offset. aoqi@1: aoqi@1: //FIXME. aoqi. just delete the assert aoqi@1: //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); aoqi@1: // Load in argument order going down. aoqi@1: int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; aoqi@1: // Point to interpreter value (vs. tag) aoqi@1: int next_off = ld_off - Interpreter::stackElementSize; aoqi@1: // aoqi@1: // aoqi@1: // aoqi@1: VMReg r_1 = regs[i].first(); aoqi@1: VMReg r_2 = regs[i].second(); aoqi@1: if (!r_1->is_valid()) { aoqi@1: assert(!r_2->is_valid(), ""); aoqi@1: continue; aoqi@1: } aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off); aoqi@1: #endif aoqi@1: if (r_1->is_stack()) { aoqi@1: // Convert stack slot to an SP offset (+ wordSize to aoqi@1: // account for return address ) aoqi@1: //NOTICE HERE!!!! I sub a wordSize here aoqi@1: int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; aoqi@1: //+ wordSize; aoqi@1: aoqi@1: // We can use esi as a temp here because compiled code doesn't aoqi@1: // need esi as an input aoqi@1: // and if we end up going thru a c2i because of a miss a reasonable aoqi@1: // value of esi aoqi@1: // we be generated. aoqi@1: if (!r_2->is_valid()) { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); aoqi@1: #endif aoqi@1: __ ld(AT, saved_sp, ld_off); aoqi@1: __ sd(AT, SP, st_off); aoqi@1: } else { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); aoqi@1: #endif aoqi@1: // Interpreter local[n] == MSW, local[n+1] == LSW however locals aoqi@1: // are accessed as negative so LSW is at LOW address aoqi@1: aoqi@1: // ld_off is MSW so get LSW aoqi@1: // st_off is LSW (i.e. reg.first()) aoqi@1: /* aoqi@1: __ ld(AT, saved_sp, next_off); aoqi@1: __ sd(AT, SP, st_off); aoqi@1: __ ld(AT, saved_sp, ld_off); aoqi@1: __ sd(AT, SP, st_off + wordSize); aoqi@1: */ aoqi@1: aoqi@1: /* 2012/4/9 Jin aoqi@1: * [./org/eclipse/swt/graphics/GC.java] aoqi@1: * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, aoqi@1: int destX, int destY, int destWidth, int destHeight, aoqi@1: boolean simple, aoqi@1: int imgWidth, int imgHeight, aoqi@1: long maskPixmap, <-- Pass T_LONG in stack aoqi@1: int maskType); aoqi@1: * Before this modification, Eclipse displays icons with solid black background. aoqi@1: */ aoqi@1: __ ld(AT, saved_sp, ld_off); aoqi@1: if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) aoqi@1: __ ld(AT, saved_sp, ld_off - 8); aoqi@1: __ sd(AT, SP, st_off); aoqi@1: //__ ld(AT, saved_sp, next_off); aoqi@1: //__ sd(AT, SP, st_off + wordSize); aoqi@1: } aoqi@1: } else if (r_1->is_Register()) { // Register argument aoqi@1: Register r = r_1->as_Register(); aoqi@1: // assert(r != eax, "must be different"); aoqi@1: if (r_2->is_valid()) { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed); aoqi@1: #endif aoqi@1: // assert(r_2->as_Register() != eax, "need another temporary register"); aoqi@1: // Remember r_1 is low address (and LSB on mips) aoqi@1: // So r_2 gets loaded from high address regardless of the platform aoqi@1: //aoqi aoqi@1: assert(r_2->as_Register() == r_1->as_Register(), ""); aoqi@1: //__ ld(r_2->as_Register(), saved_sp, ld_off); aoqi@1: //__ ld(r, saved_sp, next_off); aoqi@1: __ ld(r, saved_sp, ld_off); aoqi@1: aoqi@1: /* Jin: aoqi@1: * aoqi@1: * For T_LONG type, the real layout is as below: aoqi@1: aoqi@1: (high) aoqi@1: | | aoqi@1: ----------- aoqi@1: | 8 bytes | aoqi@1: | (void) | aoqi@1: ----------- aoqi@1: | 8 bytes | aoqi@1: | (long) | aoqi@1: ----------- aoqi@1: | | aoqi@1: (low) aoqi@1: * aoqi@1: * We should load the low-8 bytes. aoqi@1: */ aoqi@1: if (sig_bt[i] == T_LONG) aoqi@1: __ ld(r, saved_sp, ld_off - 8); aoqi@1: } else { aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed); aoqi@1: #endif aoqi@1: __ lw(r, saved_sp, ld_off); aoqi@1: } aoqi@1: } else if (r_1->is_FloatRegister()) { // Float Register aoqi@1: assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); aoqi@1: aoqi@1: FloatRegister fr = r_1->as_FloatRegister(); aoqi@1: if (sig_bt[i] == T_FLOAT) aoqi@1: __ lwc1(fr, saved_sp, ld_off); aoqi@1: else aoqi@1: { aoqi@1: __ ldc1(fr, saved_sp, ld_off); aoqi@1: __ ldc1(fr, saved_sp, ld_off - 8); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // 6243940 We might end up in handle_wrong_method if aoqi@1: // the callee is deoptimized as we race thru here. If that aoqi@1: // happens we don't want to take a safepoint because the aoqi@1: // caller frame will look interpreted and arguments are now aoqi@1: // "compiled" so it is much better to make this transition aoqi@1: // invisible to the stack walking code. Unfortunately if aoqi@1: // we try and find the callee by normal means a safepoint aoqi@1: // is possible. So we stash the desired callee in the thread aoqi@1: // and the vm will find there should this case occur. aoqi@1: __ get_thread(T8); aoqi@1: __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset())); aoqi@1: aoqi@1: // move methodOop to eax in case we end up in an c2i adapter. aoqi@1: // the c2i adapters expect methodOop in eax (c2) because c2's aoqi@1: // resolve stubs return the result (the method) in eax. aoqi@1: // I'd love to fix this. aoqi@1: __ move(V0, Rmethod); aoqi@1: __ jr(T9); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@1: aoqi@1: // --------------------------------------------------------------- aoqi@1: AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, aoqi@1: int total_args_passed, aoqi@1: // VMReg max_arg, aoqi@1: int comp_args_on_stack, // VMRegStackSlots aoqi@1: const BasicType *sig_bt, aoqi@1: const VMRegPair *regs, aoqi@1: AdapterFingerPrint* fingerprint) { aoqi@1: address i2c_entry = __ pc(); aoqi@1: aoqi@1: AdapterGenerator agen(masm); aoqi@1: aoqi@1: agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); aoqi@1: aoqi@1: aoqi@1: // ------------------------------------------------------------------------- aoqi@1: // Generate a C2I adapter. On entry we know G5 holds the methodOop. The aoqi@1: // args start out packed in the compiled layout. They need to be unpacked aoqi@1: // into the interpreter layout. This will almost always require some stack aoqi@1: // space. We grow the current (compiled) stack, then repack the args. We aoqi@1: // finally end in a jump to the generic interpreter entry point. On exit aoqi@1: // from the interpreter, the interpreter will restore our SP (lest the aoqi@1: // compiled code, which relys solely on SP and not FP, get sick). aoqi@1: aoqi@1: address c2i_unverified_entry = __ pc(); aoqi@1: Label skip_fixup; aoqi@1: { aoqi@1: Register holder = T1; aoqi@1: Register receiver = T0; aoqi@1: Register temp = T8; aoqi@1: address ic_miss = SharedRuntime::get_ic_miss_stub(); aoqi@1: aoqi@1: Label missed; aoqi@1: aoqi@1: __ verify_oop(holder); aoqi@1: // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes())); aoqi@1: //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes()); aoqi@1: //add for compressedoops aoqi@1: __ load_klass(temp, receiver); aoqi@1: __ verify_oop(temp); aoqi@1: aoqi@1: // __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset())); aoqi@1: __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); aoqi@1: //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset())); aoqi@1: __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset()); aoqi@1: //__ jcc(Assembler::notEqual, missed); aoqi@1: __ bne(AT, temp, missed); aoqi@1: __ delayed()->nop(); aoqi@1: // Method might have been compiled since the call site was patched to aoqi@1: // interpreted if that is the case treat it as a miss so we can get aoqi@1: // the call site corrected. aoqi@1: //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD); aoqi@1: //__ jcc(Assembler::equal, skip_fixup); aoqi@1: __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); aoqi@1: __ beq(AT, R0, skip_fixup); aoqi@1: __ delayed()->nop(); aoqi@1: __ bind(missed); aoqi@1: // __ move(AT, (int)&jerome7); aoqi@1: // __ sw(RA, AT, 0); aoqi@1: aoqi@1: __ jmp(ic_miss, relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@1: aoqi@1: address c2i_entry = __ pc(); aoqi@1: aoqi@1: agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); aoqi@1: aoqi@1: __ flush(); aoqi@1: return AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry); aoqi@1: aoqi@1: } aoqi@1: /* aoqi@1: // Helper function for native calling conventions aoqi@1: static VMReg int_stk_helper( int i ) { aoqi@1: // Bias any stack based VMReg we get by ignoring the window area aoqi@1: // but not the register parameter save area. aoqi@1: // aoqi@1: // This is strange for the following reasons. We'd normally expect aoqi@1: // the calling convention to return an VMReg for a stack slot aoqi@1: // completely ignoring any abi reserved area. C2 thinks of that aoqi@1: // abi area as only out_preserve_stack_slots. This does not include aoqi@1: // the area allocated by the C abi to store down integer arguments aoqi@1: // because the java calling convention does not use it. So aoqi@1: // since c2 assumes that there are only out_preserve_stack_slots aoqi@1: // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack aoqi@1: // location the c calling convention must add in this bias amount aoqi@1: // to make up for the fact that the out_preserve_stack_slots is aoqi@1: // insufficient for C calls. What a mess. I sure hope those 6 aoqi@1: // stack words were worth it on every java call! aoqi@1: aoqi@1: // Another way of cleaning this up would be for out_preserve_stack_slots aoqi@1: // to take a parameter to say whether it was C or java calling conventions. aoqi@1: // Then things might look a little better (but not much). aoqi@1: aoqi@1: int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; aoqi@1: if( mem_parm_offset < 0 ) { aoqi@1: return as_oRegister(i)->as_VMReg(); aoqi@1: } else { aoqi@1: int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; aoqi@1: // Now return a biased offset that will be correct when out_preserve_slots is added back in aoqi@1: return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); aoqi@1: } aoqi@1: } aoqi@1: */ aoqi@1: aoqi@1: aoqi@1: int SharedRuntime::c_calling_convention(const BasicType *sig_bt, aoqi@1: VMRegPair *regs, aoqi@1: VMRegPair *regs2, aoqi@1: int total_args_passed) { aoqi@1: assert(regs2 == NULL, "not needed on MIPS"); aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed); aoqi@1: #endif aoqi@1: // Return the number of VMReg stack_slots needed for the args. aoqi@1: // This value does not include an abi space (like register window aoqi@1: // save area). aoqi@1: aoqi@1: // The native convention is V8 if !LP64 aoqi@1: // The LP64 convention is the V9 convention which is slightly more sane. aoqi@1: aoqi@1: // We return the amount of VMReg stack slots we need to reserve for all aoqi@1: // the arguments NOT counting out_preserve_stack_slots. Since we always aoqi@1: // have space for storing at least 6 registers to memory we start with that. aoqi@1: // See int_stk_helper for a further discussion. aoqi@1: // We return the amount of VMRegImpl stack slots we need to reserve for all aoqi@1: // the arguments NOT counting out_preserve_stack_slots. aoqi@1: static const Register INT_ArgReg[Argument::n_register_parameters] = { aoqi@1: A0, A1, A2, A3, A4, A5, A6, A7 aoqi@1: }; aoqi@1: static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { aoqi@1: F12, F13, F14, F15, F16, F17, F18, F19 aoqi@1: }; aoqi@1: uint args = 0; aoqi@1: uint stk_args = 0; // inc by 2 each time aoqi@1: aoqi@1: /* Example: aoqi@1: --- n java.lang.UNIXProcess::forkAndExec aoqi@1: private native int forkAndExec(byte[] prog, aoqi@1: byte[] argBlock, int argc, aoqi@1: byte[] envBlock, int envc, aoqi@1: byte[] dir, aoqi@1: boolean redirectErrorStream, aoqi@1: FileDescriptor stdin_fd, aoqi@1: FileDescriptor stdout_fd, aoqi@1: FileDescriptor stderr_fd) aoqi@1: JNIEXPORT jint JNICALL aoqi@1: Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, aoqi@1: jobject process, aoqi@1: jbyteArray prog, aoqi@1: jbyteArray argBlock, jint argc, aoqi@1: jbyteArray envBlock, jint envc, aoqi@1: jbyteArray dir, aoqi@1: jboolean redirectErrorStream, aoqi@1: jobject stdin_fd, aoqi@1: jobject stdout_fd, aoqi@1: jobject stderr_fd) aoqi@1: aoqi@1: ::c_calling_convention aoqi@1: 0: // env <-- a0 aoqi@1: 1: L // klass/obj <-- t0 => a1 aoqi@1: 2: [ // prog[] <-- a0 => a2 aoqi@1: 3: [ // argBlock[] <-- a1 => a3 aoqi@1: 4: I // argc aoqi@1: 5: [ // envBlock[] <-- a3 => a5 aoqi@1: 6: I // envc aoqi@1: 7: [ // dir[] <-- a5 => a7 aoqi@1: 8: Z // redirectErrorStream a6 => sp[0] aoqi@1: 9: L // stdin a7 => sp[8] aoqi@1: 10: L // stdout fp[16] => sp[16] aoqi@1: 11: L // stderr fp[24] => sp[24] aoqi@1: */ aoqi@1: for (int i = 0; i < total_args_passed; i++) { aoqi@1: switch (sig_bt[i]) { aoqi@1: case T_VOID: // Halves of longs and doubles aoqi@1: assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); aoqi@1: regs[i].set_bad(); aoqi@1: break; aoqi@1: case T_BOOLEAN: aoqi@1: case T_CHAR: aoqi@1: case T_BYTE: aoqi@1: case T_SHORT: aoqi@1: case T_INT: aoqi@1: if (args < Argument::n_register_parameters) { aoqi@1: regs[i].set1(INT_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set1(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: case T_LONG: aoqi@1: assert(sig_bt[i + 1] == T_VOID, "expecting half"); aoqi@1: // fall through aoqi@1: case T_OBJECT: aoqi@1: case T_ARRAY: aoqi@1: case T_ADDRESS: aoqi@1: case T_METADATA: aoqi@1: if (args < Argument::n_register_parameters) { aoqi@1: regs[i].set2(INT_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set2(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: case T_FLOAT: aoqi@1: if (args < Argument::n_float_register_parameters) { aoqi@1: regs[i].set1(FP_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set1(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: case T_DOUBLE: aoqi@1: assert(sig_bt[i + 1] == T_VOID, "expecting half"); aoqi@1: if (args < Argument::n_float_register_parameters) { aoqi@1: regs[i].set2(FP_ArgReg[args++]->as_VMReg()); aoqi@1: } else { aoqi@1: regs[i].set2(VMRegImpl::stack2reg(stk_args)); aoqi@1: stk_args += 2; aoqi@1: } aoqi@1: break; aoqi@1: default: aoqi@1: ShouldNotReachHere(); aoqi@1: break; aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: return round_to(stk_args, 2); aoqi@1: } aoqi@1: /* aoqi@1: int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt, aoqi@1: VMRegPair *regs, aoqi@1: int total_args_passed) { aoqi@1: // We return the amount of VMRegImpl stack slots we need to reserve for all aoqi@1: // the arguments NOT counting out_preserve_stack_slots. aoqi@1: bool unalign = 0; aoqi@1: uint stack = 0; // All arguments on stack aoqi@1: #ifdef aoqi_test aoqi@1: tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed); aoqi@1: #endif aoqi@1: aoqi@1: for( int i = 0; i < total_args_passed; i++) { aoqi@1: // From the type and the argument number (count) compute the location aoqi@1: switch( sig_bt[i] ) { aoqi@1: case T_BOOLEAN: aoqi@1: case T_CHAR: aoqi@1: case T_FLOAT: aoqi@1: case T_BYTE: aoqi@1: case T_SHORT: aoqi@1: case T_INT: aoqi@1: case T_OBJECT: aoqi@1: case T_ARRAY: aoqi@1: case T_ADDRESS: aoqi@1: regs[i].set1(VMRegImpl::stack2reg(stack++)); aoqi@1: unalign = !unalign; aoqi@1: break; aoqi@1: case T_LONG: aoqi@1: case T_DOUBLE: // The stack numbering is reversed from Java aoqi@1: // Since C arguments do not get reversed, the ordering for aoqi@1: // doubles on the stack must be opposite the Java convention aoqi@1: assert(sig_bt[i+1] == T_VOID, "missing Half" ); aoqi@1: if(unalign){ aoqi@1: stack += 1; aoqi@1: unalign = ! unalign; aoqi@1: } aoqi@1: regs[i].set2(VMRegImpl::stack2reg(stack)); aoqi@1: stack += 2; aoqi@1: break; aoqi@1: case T_VOID: regs[i].set_bad(); break; aoqi@1: default: aoqi@1: ShouldNotReachHere(); aoqi@1: break; aoqi@1: } aoqi@1: } aoqi@1: return stack; aoqi@1: } aoqi@1: */ aoqi@1: aoqi@1: // --------------------------------------------------------------------------- aoqi@1: void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { aoqi@1: // We always ignore the frame_slots arg and just use the space just below frame pointer aoqi@1: // which by this time is free to use aoqi@1: switch (ret_type) { aoqi@1: case T_FLOAT: aoqi@1: __ swc1(FSF, FP, -wordSize); aoqi@1: break; aoqi@1: case T_DOUBLE: aoqi@1: __ sdc1(FSF, FP, -wordSize ); aoqi@1: break; aoqi@1: case T_VOID: break; aoqi@1: case T_LONG: aoqi@1: __ sd(V0, FP, -wordSize); aoqi@1: break; aoqi@1: case T_OBJECT: aoqi@1: case T_ARRAY: aoqi@1: __ sd(V0, FP, -wordSize); aoqi@1: break; aoqi@1: default: { aoqi@1: __ sw(V0, FP, -wordSize); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { aoqi@1: // We always ignore the frame_slots arg and just use the space just below frame pointer aoqi@1: // which by this time is free to use aoqi@1: switch (ret_type) { aoqi@1: case T_FLOAT: aoqi@1: __ lwc1(FSF, FP, -wordSize); aoqi@1: break; aoqi@1: case T_DOUBLE: aoqi@1: __ ldc1(FSF, FP, -wordSize ); aoqi@1: break; aoqi@1: case T_LONG: aoqi@1: __ ld(V0, FP, -wordSize); aoqi@1: break; aoqi@1: case T_VOID: break; aoqi@1: case T_OBJECT: aoqi@1: case T_ARRAY: aoqi@1: __ ld(V0, FP, -wordSize); aoqi@1: break; aoqi@1: default: { aoqi@1: __ lw(V0, FP, -wordSize); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { aoqi@1: for ( int i = first_arg ; i < arg_count ; i++ ) { aoqi@1: if (args[i].first()->is_Register()) { aoqi@1: __ push(args[i].first()->as_Register()); aoqi@1: } else if (args[i].first()->is_FloatRegister()) { aoqi@1: __ push(args[i].first()->as_FloatRegister()); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { aoqi@1: for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { aoqi@1: if (args[i].first()->is_Register()) { aoqi@1: __ pop(args[i].first()->as_Register()); aoqi@1: } else if (args[i].first()->is_FloatRegister()) { aoqi@1: __ pop(args[i].first()->as_FloatRegister()); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // A simple move of integer like type aoqi@1: static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { aoqi@1: if (src.first()->is_stack()) { aoqi@1: if (dst.first()->is_stack()) { aoqi@1: // stack to stack aoqi@1: __ lw(AT, FP, reg2offset_in(src.first())); aoqi@1: __ sd(AT,SP, reg2offset_out(dst.first())); aoqi@1: } else { aoqi@1: // stack to reg aoqi@1: //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); aoqi@1: __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); aoqi@1: } aoqi@1: } else if (dst.first()->is_stack()) { aoqi@1: // reg to stack aoqi@1: __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); aoqi@1: } else { aoqi@1: //__ mov(src.first()->as_Register(), dst.first()->as_Register()); aoqi@1: if (dst.first() != src.first()){ aoqi@1: __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: /* aoqi@1: // On 64 bit we will store integer like items to the stack as aoqi@1: // 64 bits items (sparc abi) even though java would only store aoqi@1: // 32bits for a parameter. On 32bit it will simply be 32 bits aoqi@1: // So this routine will do 32->32 on 32bit and 32->64 on 64bit aoqi@1: static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { aoqi@1: if (src.first()->is_stack()) { aoqi@1: if (dst.first()->is_stack()) { aoqi@1: // stack to stack aoqi@1: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); aoqi@1: __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); aoqi@1: } else { aoqi@1: // stack to reg aoqi@1: __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); aoqi@1: } aoqi@1: } else if (dst.first()->is_stack()) { aoqi@1: // reg to stack aoqi@1: __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); aoqi@1: } else { aoqi@1: __ mov(src.first()->as_Register(), dst.first()->as_Register()); aoqi@1: } aoqi@1: } aoqi@1: */ aoqi@1: aoqi@1: // An oop arg. Must pass a handle not the oop itself aoqi@1: static void object_move(MacroAssembler* masm, aoqi@1: OopMap* map, aoqi@1: int oop_handle_offset, aoqi@1: int framesize_in_slots, aoqi@1: VMRegPair src, aoqi@1: VMRegPair dst, aoqi@1: bool is_receiver, aoqi@1: int* receiver_offset) { aoqi@1: aoqi@1: // must pass a handle. First figure out the location we use as a handle aoqi@1: aoqi@1: //FIXME, for mips, dst can be register aoqi@1: if (src.first()->is_stack()) { aoqi@1: // Oop is already on the stack as an argument aoqi@1: Register rHandle = V0; aoqi@1: Label nil; aoqi@1: //__ xorl(rHandle, rHandle); aoqi@1: __ xorr(rHandle, rHandle, rHandle); aoqi@1: //__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD); aoqi@1: __ ld(AT, FP, reg2offset_in(src.first())); aoqi@1: //__ jcc(Assembler::equal, nil); aoqi@1: __ beq(AT,R0, nil); aoqi@1: __ delayed()->nop(); aoqi@1: // __ leal(rHandle, Address(ebp, reg2offset_in(src.first()))); aoqi@1: __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); aoqi@1: __ bind(nil); aoqi@1: //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle); aoqi@1: if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); aoqi@1: else __ move( (dst.first())->as_Register(),rHandle); aoqi@1: //if dst is register aoqi@1: //FIXME, do mips need out preserve stack slots? aoqi@1: int offset_in_older_frame = src.first()->reg2stack() aoqi@1: + SharedRuntime::out_preserve_stack_slots(); aoqi@1: map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); aoqi@1: if (is_receiver) { aoqi@1: *receiver_offset = (offset_in_older_frame aoqi@1: + framesize_in_slots) * VMRegImpl::stack_slot_size; aoqi@1: } aoqi@1: } else { aoqi@1: // Oop is in an a register we must store it to the space we reserve aoqi@1: // on the stack for oop_handles aoqi@1: const Register rOop = src.first()->as_Register(); aoqi@1: assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); aoqi@1: // const Register rHandle = eax; aoqi@1: const Register rHandle = V0; aoqi@1: //Important: refer to java_calling_convertion aoqi@1: int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; aoqi@1: int offset = oop_slot*VMRegImpl::stack_slot_size; aoqi@1: Label skip; aoqi@1: // __ movl(Address(esp, offset), rOop); aoqi@1: __ sd( rOop , SP, offset ); aoqi@1: map->set_oop(VMRegImpl::stack2reg(oop_slot)); aoqi@1: // __ xorl(rHandle, rHandle); aoqi@1: __ xorr( rHandle, rHandle, rHandle); aoqi@1: //__ cmpl(rOop, NULL_WORD); aoqi@1: // __ jcc(Assembler::equal, skip); aoqi@1: __ beq(rOop, R0, skip); aoqi@1: __ delayed()->nop(); aoqi@1: // __ leal(rHandle, Address(esp, offset)); aoqi@1: __ lea(rHandle, Address(SP, offset)); aoqi@1: __ bind(skip); aoqi@1: // Store the handle parameter aoqi@1: //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle); aoqi@1: if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); aoqi@1: else __ move((dst.first())->as_Register(), rHandle); aoqi@1: //if dst is register aoqi@1: aoqi@1: if (is_receiver) { aoqi@1: *receiver_offset = offset; aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // A float arg may have to do float reg int reg conversion aoqi@1: static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { aoqi@1: assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); aoqi@1: aoqi@1: if (src.first()->is_stack()) { aoqi@1: if(dst.first()->is_stack()){ aoqi@1: // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); aoqi@1: __ lwc1(F12 , FP, reg2offset_in(src.first())); aoqi@1: // __ movl(Address(esp, reg2offset_out(dst.first())), eax); aoqi@1: __ swc1(F12 ,SP, reg2offset_out(dst.first())); aoqi@1: } aoqi@1: else aoqi@1: __ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); aoqi@1: } else { aoqi@1: // reg to stack aoqi@1: // __ movss(Address(esp, reg2offset_out(dst.first())), aoqi@1: // src.first()->as_XMMRegister()); aoqi@1: // __ movl(Address(esp, reg2offset_out(dst.first())), eax); aoqi@1: if(dst.first()->is_stack()) aoqi@1: __ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); aoqi@1: else aoqi@1: __ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); aoqi@1: } aoqi@1: } aoqi@1: /* aoqi@1: static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { aoqi@1: VMRegPair src_lo(src.first()); aoqi@1: VMRegPair src_hi(src.second()); aoqi@1: VMRegPair dst_lo(dst.first()); aoqi@1: VMRegPair dst_hi(dst.second()); aoqi@1: simple_move32(masm, src_lo, dst_lo); aoqi@1: simple_move32(masm, src_hi, dst_hi); aoqi@1: } aoqi@1: */ aoqi@1: // A long move aoqi@1: static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { aoqi@1: aoqi@1: // The only legal possibility for a long_move VMRegPair is: aoqi@1: // 1: two stack slots (possibly unaligned) aoqi@1: // as neither the java or C calling convention will use registers aoqi@1: // for longs. aoqi@1: aoqi@1: if (src.first()->is_stack()) { aoqi@1: assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); aoqi@1: // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); aoqi@1: if( dst.first()->is_stack()){ aoqi@1: __ ld(AT, FP, reg2offset_in(src.first())); aoqi@1: // __ movl(ebx, address(ebp, reg2offset_in(src.second()))); aoqi@1: //__ lw(V0, FP, reg2offset_in(src.second())); aoqi@1: // __ movl(address(esp, reg2offset_out(dst.first())), eax); aoqi@1: __ sd(AT, SP, reg2offset_out(dst.first())); aoqi@1: // __ movl(address(esp, reg2offset_out(dst.second())), ebx); aoqi@1: //__ sw(V0, SP, reg2offset_out(dst.second())); aoqi@1: } else{ aoqi@1: __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); aoqi@1: //__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second())); aoqi@1: } aoqi@1: } else { aoqi@1: if( dst.first()->is_stack()){ aoqi@1: __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); aoqi@1: //__ sw( (src.second())->as_Register(), SP, reg2offset_out(dst.second())); aoqi@1: } else{ aoqi@1: __ move( (dst.first())->as_Register() , (src.first())->as_Register()); aoqi@1: //__ move( (dst.second())->as_Register(), (src.second())->as_Register()); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // A double move aoqi@1: static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { aoqi@1: aoqi@1: // The only legal possibilities for a double_move VMRegPair are: aoqi@1: // The painful thing here is that like long_move a VMRegPair might be aoqi@1: aoqi@1: // Because of the calling convention we know that src is either aoqi@1: // 1: a single physical register (xmm registers only) aoqi@1: // 2: two stack slots (possibly unaligned) aoqi@1: // dst can only be a pair of stack slots. aoqi@1: aoqi@1: // assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || aoqi@1: // src.first()->is_stack()), "bad args"); aoqi@1: // assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args"); aoqi@1: aoqi@1: if (src.first()->is_stack()) { aoqi@1: // source is all stack aoqi@1: // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); aoqi@1: if( dst.first()->is_stack()){ aoqi@1: __ ldc1(F12, FP, reg2offset_in(src.first())); aoqi@1: //__ movl(ebx, Address(ebp, reg2offset_in(src.second()))); aoqi@1: //__ lwc1(F14, FP, reg2offset_in(src.second())); aoqi@1: aoqi@1: // __ movl(Address(esp, reg2offset_out(dst.first())), eax); aoqi@1: __ sdc1(F12, SP, reg2offset_out(dst.first())); aoqi@1: // __ movl(Address(esp, reg2offset_out(dst.second())), ebx); aoqi@1: //__ swc1(F14, SP, reg2offset_out(dst.second())); aoqi@1: } else{ aoqi@1: __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); aoqi@1: //__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second())); aoqi@1: } aoqi@1: aoqi@1: } else { aoqi@1: // reg to stack aoqi@1: // No worries about stack alignment aoqi@1: // __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); aoqi@1: if( dst.first()->is_stack()){ aoqi@1: __ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); aoqi@1: //__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second())); aoqi@1: } aoqi@1: else aoqi@1: __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); aoqi@1: //__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister()); aoqi@1: aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: static void verify_oop_args(MacroAssembler* masm, aoqi@1: methodHandle method, aoqi@1: const BasicType* sig_bt, aoqi@1: const VMRegPair* regs) { aoqi@1: Register temp_reg = T9; // not part of any compiled calling seq aoqi@1: if (VerifyOops) { aoqi@1: for (int i = 0; i < method->size_of_parameters(); i++) { aoqi@1: if (sig_bt[i] == T_OBJECT || aoqi@1: sig_bt[i] == T_ARRAY) { aoqi@1: VMReg r = regs[i].first(); aoqi@1: assert(r->is_valid(), "bad oop arg"); aoqi@1: if (r->is_stack()) { aoqi@1: // __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); aoqi@1: __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); aoqi@1: __ verify_oop(temp_reg); aoqi@1: } else { aoqi@1: __ verify_oop(r->as_Register()); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: static void gen_special_dispatch(MacroAssembler* masm, aoqi@1: methodHandle method, aoqi@1: const BasicType* sig_bt, aoqi@1: const VMRegPair* regs) { aoqi@1: verify_oop_args(masm, method, sig_bt, regs); aoqi@1: vmIntrinsics::ID iid = method->intrinsic_id(); aoqi@1: aoqi@1: // Now write the args into the outgoing interpreter space aoqi@1: bool has_receiver = false; aoqi@1: Register receiver_reg = noreg; aoqi@1: int member_arg_pos = -1; aoqi@1: Register member_reg = noreg; aoqi@1: int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); aoqi@1: if (ref_kind != 0) { aoqi@1: member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument aoqi@1: // member_reg = rbx; // known to be free at this point aoqi@1: member_reg = S3; // known to be free at this point aoqi@1: has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); aoqi@1: } else if (iid == vmIntrinsics::_invokeBasic) { aoqi@1: has_receiver = true; aoqi@1: } else { aoqi@1: fatal(err_msg_res("unexpected intrinsic id %d", iid)); aoqi@1: } aoqi@1: aoqi@1: if (member_reg != noreg) { aoqi@1: // Load the member_arg into register, if necessary. aoqi@1: SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); aoqi@1: VMReg r = regs[member_arg_pos].first(); aoqi@1: if (r->is_stack()) { aoqi@1: // __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); aoqi@1: __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); aoqi@1: } else { aoqi@1: // no data motion is needed aoqi@1: member_reg = r->as_Register(); aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: if (has_receiver) { aoqi@1: // Make sure the receiver is loaded into a register. aoqi@1: assert(method->size_of_parameters() > 0, "oob"); aoqi@1: assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); aoqi@1: VMReg r = regs[0].first(); aoqi@1: assert(r->is_valid(), "bad receiver arg"); aoqi@1: if (r->is_stack()) { aoqi@1: // Porting note: This assumes that compiled calling conventions always aoqi@1: // pass the receiver oop in a register. If this is not true on some aoqi@1: // platform, pick a temp and load the receiver from stack. aoqi@1: fatal("receiver always in a register"); aoqi@1: // receiver_reg = j_rarg0; // known to be free at this point aoqi@1: receiver_reg = SSR; // known to be free at this point aoqi@1: // __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); aoqi@1: __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); aoqi@1: } else { aoqi@1: // no data motion is needed aoqi@1: receiver_reg = r->as_Register(); aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // Figure out which address we are really jumping to: aoqi@1: MethodHandles::generate_method_handle_dispatch(masm, iid, aoqi@1: receiver_reg, member_reg, /*for_compiler_entry:*/ true); aoqi@1: } aoqi@1: aoqi@1: // --------------------------------------------------------------------------- aoqi@1: // Generate a native wrapper for a given method. The method takes arguments aoqi@1: // in the Java compiled code convention, marshals them to the native aoqi@1: // convention (handlizes oops, etc), transitions to native, makes the call, aoqi@1: // returns to java state (possibly blocking), unhandlizes any result and aoqi@1: // returns. aoqi@1: nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, aoqi@1: methodHandle method, aoqi@1: int compile_id, aoqi@1: BasicType *in_sig_bt, aoqi@1: VMRegPair *in_regs, aoqi@1: BasicType ret_type) { aoqi@1: aoqi@1: if (method->is_method_handle_intrinsic()) { aoqi@1: vmIntrinsics::ID iid = method->intrinsic_id(); aoqi@1: intptr_t start = (intptr_t)__ pc(); aoqi@1: int vep_offset = ((intptr_t)__ pc()) - start; aoqi@1: aoqi@1: gen_special_dispatch(masm, aoqi@1: method, aoqi@1: in_sig_bt, aoqi@1: in_regs); aoqi@1: aoqi@1: int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period aoqi@1: __ flush(); aoqi@1: int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually aoqi@1: return nmethod::new_native_nmethod(method, aoqi@1: compile_id, aoqi@1: masm->code(), aoqi@1: vep_offset, aoqi@1: frame_complete, aoqi@1: stack_slots / VMRegImpl::slots_per_word, aoqi@1: in_ByteSize(-1), aoqi@1: in_ByteSize(-1), aoqi@1: (OopMapSet*)NULL); aoqi@1: } aoqi@1: bool is_critical_native = true; aoqi@1: address native_func = method->critical_native_function(); aoqi@1: if (native_func == NULL) { aoqi@1: native_func = method->native_function(); aoqi@1: is_critical_native = false; aoqi@1: } aoqi@1: assert(native_func != NULL, "must have function"); aoqi@1: aoqi@1: // Native nmethod wrappers never take possesion of the oop arguments. aoqi@1: // So the caller will gc the arguments. The only thing we need an aoqi@1: // oopMap for is if the call is static aoqi@1: // aoqi@1: // An OopMap for lock (and class if static), and one for the VM call itself aoqi@1: OopMapSet *oop_maps = new OopMapSet(); aoqi@1: aoqi@1: // We have received a description of where all the java arg are located aoqi@1: // on entry to the wrapper. We need to convert these args to where aoqi@1: // the jni function will expect them. To figure out where they go aoqi@1: // we convert the java signature to a C signature by inserting aoqi@1: // the hidden arguments as arg[0] and possibly arg[1] (static method) aoqi@1: aoqi@1: const int total_in_args = method->size_of_parameters(); aoqi@1: int total_c_args = total_in_args; aoqi@1: if (!is_critical_native) { aoqi@1: total_c_args += 1; aoqi@1: if (method->is_static()) { aoqi@1: total_c_args++; aoqi@1: } aoqi@1: } else { aoqi@1: for (int i = 0; i < total_in_args; i++) { aoqi@1: if (in_sig_bt[i] == T_ARRAY) { aoqi@1: total_c_args++; aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); aoqi@1: VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); aoqi@1: BasicType* in_elem_bt = NULL; aoqi@1: aoqi@1: int argc = 0; aoqi@1: if (!is_critical_native) { aoqi@1: out_sig_bt[argc++] = T_ADDRESS; aoqi@1: if (method->is_static()) { aoqi@1: out_sig_bt[argc++] = T_OBJECT; aoqi@1: } aoqi@1: aoqi@1: for (int i = 0; i < total_in_args ; i++ ) { aoqi@1: out_sig_bt[argc++] = in_sig_bt[i]; aoqi@1: } aoqi@1: } else { aoqi@1: Thread* THREAD = Thread::current(); aoqi@1: in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); aoqi@1: SignatureStream ss(method->signature()); aoqi@1: for (int i = 0; i < total_in_args ; i++ ) { aoqi@1: if (in_sig_bt[i] == T_ARRAY) { aoqi@1: // Arrays are passed as int, elem* pair aoqi@1: out_sig_bt[argc++] = T_INT; aoqi@1: out_sig_bt[argc++] = T_ADDRESS; aoqi@1: Symbol* atype = ss.as_symbol(CHECK_NULL); aoqi@1: const char* at = atype->as_C_string(); aoqi@1: if (strlen(at) == 2) { aoqi@1: assert(at[0] == '[', "must be"); aoqi@1: switch (at[1]) { aoqi@1: case 'B': in_elem_bt[i] = T_BYTE; break; aoqi@1: case 'C': in_elem_bt[i] = T_CHAR; break; aoqi@1: case 'D': in_elem_bt[i] = T_DOUBLE; break; aoqi@1: case 'F': in_elem_bt[i] = T_FLOAT; break; aoqi@1: case 'I': in_elem_bt[i] = T_INT; break; aoqi@1: case 'J': in_elem_bt[i] = T_LONG; break; aoqi@1: case 'S': in_elem_bt[i] = T_SHORT; break; aoqi@1: case 'Z': in_elem_bt[i] = T_BOOLEAN; break; aoqi@1: default: ShouldNotReachHere(); aoqi@1: } aoqi@1: } aoqi@1: } else { aoqi@1: out_sig_bt[argc++] = in_sig_bt[i]; aoqi@1: in_elem_bt[i] = T_VOID; aoqi@1: } aoqi@1: if (in_sig_bt[i] != T_VOID) { aoqi@1: assert(in_sig_bt[i] == ss.type(), "must match"); aoqi@1: ss.next(); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // Now figure out where the args must be stored and how much stack space aoqi@1: // they require (neglecting out_preserve_stack_slots but space for storing aoqi@1: // the 1st six register arguments). It's weird see int_stk_helper. aoqi@1: // aoqi@1: int out_arg_slots; aoqi@1: //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); aoqi@1: out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); aoqi@1: aoqi@1: // Compute framesize for the wrapper. We need to handlize all oops in aoqi@1: // registers. We must create space for them here that is disjoint from aoqi@1: // the windowed save area because we have no control over when we might aoqi@1: // flush the window again and overwrite values that gc has since modified. aoqi@1: // (The live window race) aoqi@1: // aoqi@1: // We always just allocate 6 word for storing down these object. This allow aoqi@1: // us to simply record the base and use the Ireg number to decide which aoqi@1: // slot to use. (Note that the reg number is the inbound number not the aoqi@1: // outbound number). aoqi@1: // We must shuffle args to match the native convention, and include var-args space. aoqi@1: aoqi@1: // Calculate the total number of stack slots we will need. aoqi@1: aoqi@1: // First count the abi requirement plus all of the outgoing args aoqi@1: int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; aoqi@1: aoqi@1: // Now the space for the inbound oop handle area aoqi@1: int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers aoqi@1: if (is_critical_native) { aoqi@1: // Critical natives may have to call out so they need a save area aoqi@1: // for register arguments. aoqi@1: int double_slots = 0; aoqi@1: int single_slots = 0; aoqi@1: for ( int i = 0; i < total_in_args; i++) { aoqi@1: if (in_regs[i].first()->is_Register()) { aoqi@1: const Register reg = in_regs[i].first()->as_Register(); aoqi@1: switch (in_sig_bt[i]) { aoqi@1: case T_BOOLEAN: aoqi@1: case T_BYTE: aoqi@1: case T_SHORT: aoqi@1: case T_CHAR: aoqi@1: case T_INT: single_slots++; break; aoqi@1: case T_ARRAY: // specific to LP64 (7145024) aoqi@1: case T_LONG: double_slots++; break; aoqi@1: default: ShouldNotReachHere(); aoqi@1: } aoqi@1: } else if (in_regs[i].first()->is_FloatRegister()) { aoqi@1: switch (in_sig_bt[i]) { aoqi@1: case T_FLOAT: single_slots++; break; aoqi@1: case T_DOUBLE: double_slots++; break; aoqi@1: default: ShouldNotReachHere(); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: total_save_slots = double_slots * 2 + single_slots; aoqi@1: // align the save area aoqi@1: if (double_slots != 0) { aoqi@1: stack_slots = round_to(stack_slots, 2); aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: int oop_handle_offset = stack_slots; aoqi@1: // stack_slots += 9*VMRegImpl::slots_per_word; // T0, A0 ~ A7 aoqi@1: stack_slots += total_save_slots; aoqi@1: aoqi@1: // Now any space we need for handlizing a klass if static method aoqi@1: aoqi@1: int klass_slot_offset = 0; aoqi@1: int klass_offset = -1; aoqi@1: int lock_slot_offset = 0; aoqi@1: bool is_static = false; aoqi@1: //int oop_temp_slot_offset = 0; aoqi@1: aoqi@1: if (method->is_static()) { aoqi@1: klass_slot_offset = stack_slots; aoqi@1: stack_slots += VMRegImpl::slots_per_word; aoqi@1: klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; aoqi@1: is_static = true; aoqi@1: } aoqi@1: aoqi@1: // Plus a lock if needed aoqi@1: aoqi@1: if (method->is_synchronized()) { aoqi@1: lock_slot_offset = stack_slots; aoqi@1: stack_slots += VMRegImpl::slots_per_word; aoqi@1: } aoqi@1: aoqi@1: // Now a place to save return value or as a temporary for any gpr -> fpr moves aoqi@1: // + 2 for return address (which we own) and saved ebp aoqi@1: //stack_slots += 2; aoqi@1: stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) aoqi@1: aoqi@1: // Ok The space we have allocated will look like: aoqi@1: // aoqi@1: // aoqi@1: // FP-> | | aoqi@1: // |---------------------| aoqi@1: // | 2 slots for moves | aoqi@1: // |---------------------| aoqi@1: // | lock box (if sync) | aoqi@1: // |---------------------| <- lock_slot_offset aoqi@1: // | klass (if static) | aoqi@1: // |---------------------| <- klass_slot_offset aoqi@1: // | oopHandle area | aoqi@1: // |---------------------| <- oop_handle_offset aoqi@1: // | outbound memory | aoqi@1: // | based arguments | aoqi@1: // | | aoqi@1: // |---------------------| aoqi@1: // | vararg area | aoqi@1: // |---------------------| aoqi@1: // | | aoqi@1: // SP-> | out_preserved_slots | aoqi@1: // aoqi@1: // aoqi@1: aoqi@1: aoqi@1: // Now compute actual number of stack words we need rounding to make aoqi@1: // stack properly aligned. aoqi@1: stack_slots = round_to(stack_slots, StackAlignmentInSlots); aoqi@1: aoqi@1: int stack_size = stack_slots * VMRegImpl::stack_slot_size; aoqi@1: aoqi@1: intptr_t start = (intptr_t)__ pc(); aoqi@1: aoqi@1: aoqi@1: aoqi@1: // First thing make an ic check to see if we should even be here aoqi@1: address ic_miss = SharedRuntime::get_ic_miss_stub(); aoqi@1: aoqi@1: // We are free to use all registers as temps without saving them and aoqi@1: // restoring them except ebp. ebp is the only callee save register aoqi@1: // as far as the interpreter and the compiler(s) are concerned. aoqi@1: aoqi@1: //refer to register_mips.hpp:IC_Klass aoqi@1: const Register ic_reg = T1; aoqi@1: const Register receiver = T0; aoqi@1: Label hit; aoqi@1: Label exception_pending; aoqi@1: aoqi@1: __ verify_oop(receiver); aoqi@1: //__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); aoqi@1: //add for compressedoops aoqi@1: __ load_klass(AT, receiver); aoqi@1: __ beq(AT, ic_reg, hit); aoqi@1: __ delayed()->nop(); aoqi@1: __ jmp(ic_miss, relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: // verified entry must be aligned for code patching. aoqi@1: // and the first 5 bytes must be in the same cache line aoqi@1: // if we align at 8 then we will be sure 5 bytes are in the same line aoqi@1: __ align(8); aoqi@1: aoqi@1: __ bind(hit); aoqi@1: aoqi@1: aoqi@1: int vep_offset = ((intptr_t)__ pc()) - start; aoqi@1: #ifdef COMPILER1 aoqi@1: if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { aoqi@1: // Object.hashCode can pull the hashCode from the header word aoqi@1: // instead of doing a full VM transition once it's been computed. aoqi@1: // Since hashCode is usually polymorphic at call sites we can't do aoqi@1: // this optimization at the call site without a lot of work. aoqi@1: Label slowCase; aoqi@1: Register receiver = T0; aoqi@1: Register result = V0; aoqi@1: __ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); aoqi@1: // check if locked aoqi@1: __ andi(AT, result, markOopDesc::unlocked_value); aoqi@1: __ beq(AT, R0, slowCase); aoqi@1: __ delayed()->nop(); aoqi@1: if (UseBiasedLocking) { aoqi@1: // Check if biased and fall through to runtime if so aoqi@1: __ andi (AT, result, markOopDesc::biased_lock_bit_in_place); aoqi@1: __ bne(AT,R0, slowCase); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@1: // get hash aoqi@1: __ li(AT, markOopDesc::hash_mask_in_place); aoqi@1: __ andr (AT, result, AT); aoqi@1: // test if hashCode exists aoqi@1: __ beq (AT, R0, slowCase); aoqi@1: __ delayed()->nop(); aoqi@1: __ shr(result, markOopDesc::hash_shift); aoqi@1: __ jr(RA); aoqi@1: __ delayed()->nop(); aoqi@1: __ bind (slowCase); aoqi@1: } aoqi@1: #endif // COMPILER1 aoqi@1: aoqi@1: // The instruction at the verified entry point must be 5 bytes or longer aoqi@1: // because it can be patched on the fly by make_non_entrant. The stack bang aoqi@1: // instruction fits that requirement. aoqi@1: aoqi@1: // Generate stack overflow check aoqi@1: aoqi@1: if (UseStackBanging) { aoqi@1: //this function will modify the value in A0 aoqi@1: __ push(A0); aoqi@1: __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); aoqi@1: __ pop(A0); aoqi@1: } else { aoqi@1: // need a 5 byte instruction to allow MT safe patching to non-entrant aoqi@1: __ nop(); aoqi@1: __ nop(); aoqi@1: __ nop(); aoqi@1: __ nop(); aoqi@1: __ nop(); aoqi@1: } aoqi@1: // Generate a new frame for the wrapper. aoqi@1: // do mips need this ? aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(TREG); aoqi@1: #endif aoqi@1: //FIXME here aoqi@1: __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); aoqi@1: // -2 because return address is already present and so is saved ebp aoqi@1: __ move(AT, -(StackAlignmentInBytes)); aoqi@1: __ andr(SP, SP, AT); aoqi@1: aoqi@1: __ enter(); aoqi@1: __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); aoqi@1: aoqi@1: // Frame is now completed as far a size and linkage. aoqi@1: aoqi@1: int frame_complete = ((intptr_t)__ pc()) - start; aoqi@1: aoqi@1: // Calculate the difference between esp and ebp. We need to know it aoqi@1: // after the native call because on windows Java Natives will pop aoqi@1: // the arguments and it is painful to do esp relative addressing aoqi@1: // in a platform independent way. So after the call we switch to aoqi@1: // ebp relative addressing. aoqi@1: //FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change aoqi@1: //the SP aoqi@1: int fp_adjustment = stack_size - 2*wordSize; aoqi@1: aoqi@1: #ifdef COMPILER2 aoqi@1: // C2 may leave the stack dirty if not in SSE2+ mode aoqi@1: // if (UseSSE >= 2) { aoqi@1: // __ verify_FPU(0, "c2i transition should have clean FPU stack"); aoqi@1: //} else { aoqi@1: __ empty_FPU_stack(); aoqi@1: //} aoqi@1: #endif /* COMPILER2 */ aoqi@1: aoqi@1: // Compute the ebp offset for any slots used after the jni call aoqi@1: aoqi@1: int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; aoqi@1: // We use edi as a thread pointer because it is callee save and aoqi@1: // if we load it once it is usable thru the entire wrapper aoqi@1: // const Register thread = edi; aoqi@1: const Register thread = TREG; aoqi@1: aoqi@1: // We use esi as the oop handle for the receiver/klass aoqi@1: // It is callee save so it survives the call to native aoqi@1: aoqi@1: // const Register oop_handle_reg = esi; aoqi@1: const Register oop_handle_reg = S4; aoqi@1: if (is_critical_native) { aoqi@1: __ stop("generate_native_wrapper in sharedRuntime <2>"); aoqi@1: //TODO:Fu aoqi@1: /* aoqi@1: check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, aoqi@1: oop_handle_offset, oop_maps, in_regs, in_sig_bt); aoqi@1: */ aoqi@1: } aoqi@1: aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(thread); aoqi@1: #endif aoqi@1: aoqi@1: // aoqi@1: // We immediately shuffle the arguments so that any vm call we have to aoqi@1: // make from here on out (sync slow path, jvmpi, etc.) we will have aoqi@1: // captured the oops from our caller and have a valid oopMap for aoqi@1: // them. aoqi@1: aoqi@1: // ----------------- aoqi@1: // The Grand Shuffle aoqi@1: // aoqi@1: // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* aoqi@1: // and, if static, the class mirror instead of a receiver. This pretty much aoqi@1: // guarantees that register layout will not match (and mips doesn't use reg aoqi@1: // parms though amd does). Since the native abi doesn't use register args aoqi@1: // and the java conventions does we don't have to worry about collisions. aoqi@1: // All of our moved are reg->stack or stack->stack. aoqi@1: // We ignore the extra arguments during the shuffle and handle them at the aoqi@1: // last moment. The shuffle is described by the two calling convention aoqi@1: // vectors we have in our possession. We simply walk the java vector to aoqi@1: // get the source locations and the c vector to get the destinations. aoqi@1: aoqi@1: int c_arg = method->is_static() ? 2 : 1 ; aoqi@1: aoqi@1: // Record esp-based slot for receiver on stack for non-static methods aoqi@1: int receiver_offset = -1; aoqi@1: aoqi@1: // This is a trick. We double the stack slots so we can claim aoqi@1: // the oops in the caller's frame. Since we are sure to have aoqi@1: // more args than the caller doubling is enough to make aoqi@1: // sure we can capture all the incoming oop args from the aoqi@1: // caller. aoqi@1: // aoqi@1: OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); aoqi@1: aoqi@1: // Mark location of rbp (someday) aoqi@1: // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); aoqi@1: aoqi@1: // Use eax, ebx as temporaries during any memory-memory moves we have to do aoqi@1: // All inbound args are referenced based on rbp and all outbound args via rsp. aoqi@1: aoqi@1: aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: bool reg_destroyed[RegisterImpl::number_of_registers]; aoqi@1: bool freg_destroyed[FloatRegisterImpl::number_of_registers]; aoqi@1: for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { aoqi@1: reg_destroyed[r] = false; aoqi@1: } aoqi@1: for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { aoqi@1: freg_destroyed[f] = false; aoqi@1: } aoqi@1: aoqi@1: #endif /* ASSERT */ aoqi@1: aoqi@1: // We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx aoqi@1: // Are free to temporaries if we have to do stack to steck moves. aoqi@1: // All inbound args are referenced based on ebp and all outbound args via esp. aoqi@1: aoqi@1: // This may iterate in two different directions depending on the aoqi@1: // kind of native it is. The reason is that for regular JNI natives aoqi@1: // the incoming and outgoing registers are offset upwards and for aoqi@1: // critical natives they are offset down. aoqi@1: GrowableArray arg_order(2 * total_in_args); aoqi@1: VMRegPair tmp_vmreg; aoqi@1: // tmp_vmreg.set1(rbx->as_VMReg()); aoqi@1: tmp_vmreg.set1(T8->as_VMReg()); aoqi@1: aoqi@1: if (!is_critical_native) { aoqi@1: for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { aoqi@1: arg_order.push(i); aoqi@1: arg_order.push(c_arg); aoqi@1: } aoqi@1: } else { aoqi@1: // Compute a valid move order, using tmp_vmreg to break any cycles aoqi@1: __ stop("generate_native_wrapper in sharedRuntime <2>"); aoqi@1: //TODO:Fu aoqi@1: // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); aoqi@1: } aoqi@1: aoqi@1: int temploc = -1; aoqi@1: for (int ai = 0; ai < arg_order.length(); ai += 2) { aoqi@1: int i = arg_order.at(ai); aoqi@1: int c_arg = arg_order.at(ai + 1); aoqi@1: __ block_comment(err_msg("move %d -> %d", i, c_arg)); aoqi@1: if (c_arg == -1) { aoqi@1: assert(is_critical_native, "should only be required for critical natives"); aoqi@1: // This arg needs to be moved to a temporary aoqi@1: __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); aoqi@1: in_regs[i] = tmp_vmreg; aoqi@1: temploc = i; aoqi@1: continue; aoqi@1: } else if (i == -1) { aoqi@1: assert(is_critical_native, "should only be required for critical natives"); aoqi@1: // Read from the temporary location aoqi@1: assert(temploc != -1, "must be valid"); aoqi@1: i = temploc; aoqi@1: temploc = -1; aoqi@1: } aoqi@1: #ifdef ASSERT aoqi@1: if (in_regs[i].first()->is_Register()) { aoqi@1: assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); aoqi@1: } else if (in_regs[i].first()->is_FloatRegister()) { aoqi@1: assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); aoqi@1: } aoqi@1: if (out_regs[c_arg].first()->is_Register()) { aoqi@1: reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; aoqi@1: } else if (out_regs[c_arg].first()->is_FloatRegister()) { aoqi@1: freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; aoqi@1: } aoqi@1: #endif /* ASSERT */ aoqi@1: switch (in_sig_bt[i]) { aoqi@1: case T_ARRAY: aoqi@1: if (is_critical_native) { aoqi@1: __ stop("generate_native_wrapper in sharedRuntime <2>"); aoqi@1: //TODO:Fu aoqi@1: // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); aoqi@1: c_arg++; aoqi@1: #ifdef ASSERT aoqi@1: if (out_regs[c_arg].first()->is_Register()) { aoqi@1: reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; aoqi@1: } else if (out_regs[c_arg].first()->is_FloatRegister()) { aoqi@1: freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; aoqi@1: } aoqi@1: #endif aoqi@1: break; aoqi@1: } aoqi@1: case T_OBJECT: aoqi@1: assert(!is_critical_native, "no oop arguments"); aoqi@1: object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], aoqi@1: ((i == 0) && (!is_static)), aoqi@1: &receiver_offset); aoqi@1: break; aoqi@1: case T_VOID: aoqi@1: break; aoqi@1: aoqi@1: case T_FLOAT: aoqi@1: float_move(masm, in_regs[i], out_regs[c_arg]); aoqi@1: break; aoqi@1: aoqi@1: case T_DOUBLE: aoqi@1: assert( i + 1 < total_in_args && aoqi@1: in_sig_bt[i + 1] == T_VOID && aoqi@1: out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); aoqi@1: double_move(masm, in_regs[i], out_regs[c_arg]); aoqi@1: break; aoqi@1: aoqi@1: case T_LONG : aoqi@1: long_move(masm, in_regs[i], out_regs[c_arg]); aoqi@1: break; aoqi@1: aoqi@1: case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); aoqi@1: aoqi@1: default: aoqi@1: // move32_64(masm, in_regs[i], out_regs[c_arg]); aoqi@1: simple_move32(masm, in_regs[i], out_regs[c_arg]); aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: // point c_arg at the first arg that is already loaded in case we aoqi@1: // need to spill before we call out aoqi@1: c_arg = total_c_args - total_in_args; aoqi@1: // Pre-load a static method's oop into esi. Used both by locking code and aoqi@1: // the normal JNI call code. aoqi@1: aoqi@1: __ move(oop_handle_reg, A1); aoqi@1: aoqi@1: if (method->is_static() && !is_critical_native) { aoqi@1: aoqi@1: // load opp into a register aoqi@1: int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( aoqi@1: (method->method_holder())->java_mirror())); aoqi@1: aoqi@1: aoqi@1: RelocationHolder rspec = oop_Relocation::spec(oop_index); aoqi@1: __ relocate(rspec); aoqi@1: //__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local( aoqi@1: // Klass::cast(method->method_holder())->java_mirror()))); aoqi@1: //__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int) aoqi@1: // JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()))); fujie@368: __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); aoqi@1: // __ verify_oop(oop_handle_reg); aoqi@1: // Now handlize the static class mirror it's known not-null. aoqi@1: __ sd( oop_handle_reg, SP, klass_offset); aoqi@1: map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); aoqi@1: aoqi@1: // Now get the handle aoqi@1: __ lea(oop_handle_reg, Address(SP, klass_offset)); aoqi@1: // store the klass handle as second argument aoqi@1: __ move(A1, oop_handle_reg); aoqi@1: // and protect the arg if we must spill aoqi@1: c_arg--; aoqi@1: } aoqi@1: // Change state to native (we save the return address in the thread, since it might not aoqi@1: // be pushed on the stack when we do a a stack traversal). It is enough that the pc() aoqi@1: // points into the right code segment. It does not have to be the correct return pc. aoqi@1: // We use the same pc/oopMap repeatedly when we call out aoqi@1: aoqi@1: intptr_t the_pc = (intptr_t) __ pc(); aoqi@1: aoqi@1: oop_maps->add_gc_map(the_pc - start, map); aoqi@1: aoqi@1: //__ set_last_Java_frame(thread, esp, noreg, (address)the_pc); aoqi@1: __ set_last_Java_frame(SP, noreg, NULL); aoqi@1: __ relocate(relocInfo::internal_pc_type); aoqi@1: { aoqi@1: intptr_t save_pc = (intptr_t)the_pc ; fujie@368: __ patchable_set48(AT, save_pc); aoqi@1: } aoqi@1: __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); aoqi@1: aoqi@1: aoqi@1: // We have all of the arguments setup at this point. We must not touch any register aoqi@1: // argument registers at this point (what if we save/restore them there are no oop? aoqi@1: { aoqi@1: SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); aoqi@1: int metadata_index = __ oop_recorder()->find_index(method()); aoqi@1: RelocationHolder rspec = metadata_Relocation::spec(metadata_index); aoqi@1: __ relocate(rspec); aoqi@1: //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method()))); aoqi@1: //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method()))); fujie@368: __ patchable_set48(AT, (long)(method())); aoqi@1: aoqi@1: __ call_VM_leaf( aoqi@1: CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), aoqi@1: thread, AT); aoqi@1: aoqi@1: } aoqi@1: aoqi@1: // These are register definitions we need for locking/unlocking aoqi@1: // const Register swap_reg = eax; // Must use eax for cmpxchg instruction aoqi@1: // const Register obj_reg = ecx; // Will contain the oop aoqi@1: // const Register lock_reg = edx; // Address of compiler lock object (BasicLock) aoqi@1: //FIXME, I hava no idea which register to use aoqi@1: const Register swap_reg = T8; // Must use eax for cmpxchg instruction aoqi@1: const Register obj_reg = T9; // Will contain the oop aoqi@1: //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) aoqi@1: const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) aoqi@1: aoqi@1: aoqi@1: aoqi@1: Label slow_path_lock; aoqi@1: Label lock_done; aoqi@1: aoqi@1: // Lock a synchronized method aoqi@1: if (method->is_synchronized()) { aoqi@1: assert(!is_critical_native, "unhandled"); aoqi@1: aoqi@1: const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); aoqi@1: aoqi@1: // Get the handle (the 2nd argument) aoqi@1: __ move(oop_handle_reg, A1); aoqi@1: aoqi@1: // Get address of the box aoqi@1: __ lea(lock_reg, Address(FP, lock_slot_ebp_offset)); aoqi@1: aoqi@1: // Load the oop from the handle aoqi@1: __ ld(obj_reg, oop_handle_reg, 0); aoqi@1: aoqi@1: if (UseBiasedLocking) { aoqi@1: // Note that oop_handle_reg is trashed during this call aoqi@1: __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, aoqi@1: false, lock_done, &slow_path_lock); aoqi@1: } aoqi@1: aoqi@1: // Load immediate 1 into swap_reg %eax aoqi@1: __ move(swap_reg, 1); aoqi@1: aoqi@1: __ ld(AT, obj_reg, 0); aoqi@1: __ orr(swap_reg, swap_reg, AT); aoqi@1: aoqi@1: __ sd( swap_reg, lock_reg, mark_word_offset); aoqi@1: __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); aoqi@1: __ bne(AT, R0, lock_done); aoqi@1: __ delayed()->nop(); aoqi@1: // Test if the oopMark is an obvious stack pointer, i.e., aoqi@1: // 1) (mark & 3) == 0, and aoqi@1: // 2) esp <= mark < mark + os::pagesize() aoqi@1: // These 3 tests can be done by evaluating the following aoqi@1: // expression: ((mark - esp) & (3 - os::vm_page_size())), aoqi@1: // assuming both stack pointer and pagesize have their aoqi@1: // least significant 2 bits clear. aoqi@1: // NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg aoqi@1: aoqi@1: __ dsub(swap_reg, swap_reg,SP); aoqi@1: __ move(AT, 3 - os::vm_page_size()); aoqi@1: __ andr(swap_reg , swap_reg, AT); aoqi@1: // Save the test result, for recursive case, the result is zero aoqi@1: __ sd(swap_reg, lock_reg, mark_word_offset); aoqi@1: //FIXME here, Why notEqual? aoqi@1: __ bne(swap_reg,R0, slow_path_lock); aoqi@1: __ delayed()->nop(); aoqi@1: // Slow path will re-enter here aoqi@1: __ bind(lock_done); aoqi@1: aoqi@1: if (UseBiasedLocking) { aoqi@1: // Re-fetch oop_handle_reg as we trashed it above aoqi@1: __ move(A1, oop_handle_reg); aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: aoqi@1: // Finally just about ready to make the JNI call aoqi@1: aoqi@1: aoqi@1: // get JNIEnv* which is first argument to native aoqi@1: if (!is_critical_native) { aoqi@1: __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset())); aoqi@1: } aoqi@1: aoqi@1: // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) aoqi@1: /* Load the second arguments into A1 */ aoqi@1: //__ ld(A1, SP , wordSize ); // klass aoqi@1: aoqi@1: // Now set thread in native aoqi@1: __ addi(AT, R0, _thread_in_native); aoqi@1: __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); aoqi@1: /* Jin: do the call */ aoqi@1: __ call(method->native_function(), relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: // WARNING - on Windows Java Natives use pascal calling convention and pop the aoqi@1: // arguments off of the stack. We could just re-adjust the stack pointer here aoqi@1: // and continue to do SP relative addressing but we instead switch to FP aoqi@1: // relative addressing. aoqi@1: aoqi@1: // Unpack native results. aoqi@1: switch (ret_type) { aoqi@1: case T_BOOLEAN: __ c2bool(V0); break; aoqi@1: case T_CHAR : __ andi(V0,V0, 0xFFFF); break; aoqi@1: case T_BYTE : __ sign_extend_byte (V0); break; aoqi@1: case T_SHORT : __ sign_extend_short(V0); break; aoqi@1: case T_INT : // nothing to do break; aoqi@1: case T_DOUBLE : aoqi@1: case T_FLOAT : aoqi@1: // Result is in st0 we'll save as needed aoqi@1: break; aoqi@1: case T_ARRAY: // Really a handle aoqi@1: case T_OBJECT: // Really a handle aoqi@1: break; // can't de-handlize until after safepoint check aoqi@1: case T_VOID: break; aoqi@1: case T_LONG: break; aoqi@1: default : ShouldNotReachHere(); aoqi@1: } aoqi@1: // Switch thread to "native transition" state before reading the synchronization state. aoqi@1: // This additional state is necessary because reading and testing the synchronization aoqi@1: // state is not atomic w.r.t. GC, as this scenario demonstrates: aoqi@1: // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. aoqi@1: // VM thread changes sync state to synchronizing and suspends threads for GC. aoqi@1: // Thread A is resumed to finish this native method, but doesn't block here since it aoqi@1: // didn't see any synchronization is progress, and escapes. aoqi@1: // __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); aoqi@1: //__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset()); aoqi@1: // __ move(AT, (int)_thread_in_native_trans); aoqi@1: __ addi(AT, R0, _thread_in_native_trans); aoqi@1: __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); aoqi@1: aoqi@1: Label after_transition; aoqi@1: aoqi@1: // check for safepoint operation in progress and/or pending suspend requests aoqi@1: { Label Continue; aoqi@1: //FIXME here, which regiser should we use? aoqi@1: // SafepointSynchronize::_not_synchronized); aoqi@1: __ li(AT, SafepointSynchronize::address_of_state()); aoqi@1: __ lw(A0, AT, 0); aoqi@1: __ addi(AT, A0, -SafepointSynchronize::_not_synchronized); aoqi@1: Label L; aoqi@1: __ bne(AT,R0, L); aoqi@1: __ delayed()->nop(); aoqi@1: __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); aoqi@1: __ beq(AT, R0, Continue); aoqi@1: __ delayed()->nop(); aoqi@1: __ bind(L); aoqi@1: aoqi@1: // Don't use call_VM as it will see a possible pending exception and forward it aoqi@1: // and never return here preventing us from clearing _last_native_pc down below. aoqi@1: // Also can't use call_VM_leaf either as it will check to see if esi & edi are aoqi@1: // preserved and correspond to the bcp/locals pointers. So we do a runtime call aoqi@1: // by hand. aoqi@1: // aoqi@1: save_native_result(masm, ret_type, stack_slots); aoqi@1: __ move (A0, thread); aoqi@1: __ addi(SP,SP, -wordSize); aoqi@21: __ push(S2); aoqi@21: __ move(AT, -(StackAlignmentInBytes)); aoqi@21: __ move(S2, SP); // use S2 as a sender SP holder aoqi@21: __ andr(SP, SP, AT); // align stack as required by ABI aoqi@1: if (!is_critical_native) { aoqi@1: __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: } else { aoqi@1: __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@21: __ move(SP, S2); // use S2 as a sender SP holder aoqi@21: __ pop(S2); aoqi@1: __ addi(SP,SP, wordSize); aoqi@1: //add for compressedoops aoqi@1: __ reinit_heapbase(); aoqi@1: // Restore any method result value aoqi@1: restore_native_result(masm, ret_type, stack_slots); aoqi@1: aoqi@1: if (is_critical_native) { aoqi@1: // The call above performed the transition to thread_in_Java so aoqi@1: // skip the transition logic below. aoqi@1: __ beq(R0, R0, after_transition); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@1: aoqi@1: __ bind(Continue); aoqi@1: } aoqi@1: aoqi@1: // change thread state aoqi@1: __ addi(AT, R0, _thread_in_Java); aoqi@1: __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); aoqi@1: __ bind(after_transition); aoqi@1: Label reguard; aoqi@1: Label reguard_done; Jin@5: __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); aoqi@1: __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); aoqi@1: __ beq(AT, R0, reguard); aoqi@1: __ delayed()->nop(); aoqi@1: // slow path reguard re-enters here aoqi@1: __ bind(reguard_done); aoqi@1: aoqi@1: // Handle possible exception (will unlock if necessary) aoqi@1: aoqi@1: // native result if any is live aoqi@1: aoqi@1: // Unlock aoqi@1: Label slow_path_unlock; aoqi@1: Label unlock_done; aoqi@1: if (method->is_synchronized()) { aoqi@1: aoqi@1: Label done; aoqi@1: aoqi@1: // Get locked oop from the handle we passed to jni aoqi@1: __ ld( obj_reg, oop_handle_reg, 0); aoqi@1: //FIXME aoqi@1: if (UseBiasedLocking) { aoqi@1: __ biased_locking_exit(obj_reg, T8, done); aoqi@1: aoqi@1: } aoqi@1: aoqi@1: // Simple recursive lock? aoqi@1: aoqi@1: __ ld(AT, FP, lock_slot_ebp_offset); aoqi@1: __ beq(AT, R0, done); aoqi@1: __ delayed()->nop(); aoqi@1: // Must save eax if if it is live now because cmpxchg must use it aoqi@1: if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { aoqi@1: save_native_result(masm, ret_type, stack_slots); aoqi@1: } aoqi@1: aoqi@1: // get old displaced header aoqi@1: __ ld (T8, FP, lock_slot_ebp_offset); aoqi@1: // get address of the stack lock aoqi@1: //FIXME aoqi aoqi@1: //__ addi (T6, FP, lock_slot_ebp_offset); aoqi@1: __ addi (c_rarg0, FP, lock_slot_ebp_offset); aoqi@1: // Atomic swap old header if oop still contains the stack lock aoqi@1: //FIXME aoqi aoqi@1: //__ cmpxchg(T8, Address(obj_reg, 0),T6 ); aoqi@1: __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0); aoqi@1: aoqi@1: __ beq(AT, R0, slow_path_unlock); aoqi@1: __ delayed()->nop(); aoqi@1: // slow path re-enters here aoqi@1: __ bind(unlock_done); aoqi@1: if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { aoqi@1: restore_native_result(masm, ret_type, stack_slots); aoqi@1: } aoqi@1: aoqi@1: __ bind(done); aoqi@1: aoqi@1: } aoqi@1: { aoqi@1: SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); aoqi@1: // Tell dtrace about this method exit aoqi@1: save_native_result(masm, ret_type, stack_slots); aoqi@1: int metadata_index = __ oop_recorder()->find_index( (method())); aoqi@1: RelocationHolder rspec = metadata_Relocation::spec(metadata_index); aoqi@1: __ relocate(rspec); aoqi@1: //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method()))); aoqi@1: //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method()))); fujie@368: __ patchable_set48(AT, (long)(method())); aoqi@1: aoqi@1: __ call_VM_leaf( aoqi@1: CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), aoqi@1: thread, AT); aoqi@1: restore_native_result(masm, ret_type, stack_slots); aoqi@1: } aoqi@1: aoqi@1: // We can finally stop using that last_Java_frame we setup ages ago aoqi@1: aoqi@1: __ reset_last_Java_frame(false, true); aoqi@1: aoqi@1: // Unpack oop result aoqi@1: if (ret_type == T_OBJECT || ret_type == T_ARRAY) { aoqi@1: Label L; aoqi@1: // __ cmpl(eax, NULL_WORD); aoqi@1: // __ jcc(Assembler::equal, L); aoqi@1: __ beq(V0, R0,L ); aoqi@1: __ delayed()->nop(); aoqi@1: // __ movl(eax, Address(eax)); aoqi@1: __ ld(V0, V0, 0); aoqi@1: __ bind(L); aoqi@1: // __ verify_oop(eax); aoqi@1: __ verify_oop(V0); aoqi@1: } aoqi@1: aoqi@1: if (!is_critical_native) { aoqi@1: // reset handle block aoqi@1: __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); aoqi@1: __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); aoqi@1: } aoqi@1: aoqi@1: if (!is_critical_native) { aoqi@1: // Any exception pending? aoqi@1: __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: aoqi@1: __ bne(AT, R0, exception_pending); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@1: // no exception, we're almost done aoqi@1: aoqi@1: // check that only result value is on FPU stack aoqi@1: __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); aoqi@1: aoqi@1: // Fixup floating pointer results so that result looks like a return from a compiled method aoqi@1: /* if (ret_type == T_FLOAT) { aoqi@1: if (UseSSE >= 1) { aoqi@1: // Pop st0 and store as float and reload into xmm register aoqi@1: __ fstp_s(Address(ebp, -4)); aoqi@1: __ movss(xmm0, Address(ebp, -4)); aoqi@1: } aoqi@1: } else if (ret_type == T_DOUBLE) { aoqi@1: if (UseSSE >= 2) { aoqi@1: // Pop st0 and store as double and reload into xmm register aoqi@1: __ fstp_d(Address(ebp, -8)); aoqi@1: __ movsd(xmm0, Address(ebp, -8)); aoqi@1: } aoqi@1: } aoqi@1: */ aoqi@1: // Return aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(TREG); aoqi@1: #endif aoqi@1: __ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); aoqi@1: __ leave(); aoqi@1: aoqi@1: __ jr(RA); aoqi@1: __ delayed()->nop(); aoqi@1: // Unexpected paths are out of line and go here aoqi@1: /* aoqi@1: if (!is_critical_native) { aoqi@1: // forward the exception aoqi@1: __ bind(exception_pending); aoqi@1: aoqi@1: // and forward the exception aoqi@1: __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); aoqi@1: } aoqi@1: */ aoqi@1: // Slow path locking & unlocking aoqi@1: if (method->is_synchronized()) { aoqi@1: aoqi@1: // BEGIN Slow path lock aoqi@1: aoqi@1: __ bind(slow_path_lock); aoqi@1: aoqi@1: // protect the args we've loaded aoqi@1: save_args(masm, total_c_args, c_arg, out_regs); aoqi@1: aoqi@1: // has last_Java_frame setup. No exceptions so do vanilla call not call_VM aoqi@1: // args are (oop obj, BasicLock* lock, JavaThread* thread) aoqi@1: aoqi@1: __ move(A0, obj_reg); aoqi@1: __ move(A1, lock_reg); aoqi@1: __ move(A2, thread); aoqi@1: __ addi(SP, SP, - 3*wordSize); aoqi@1: aoqi@1: __ move(AT, -(StackAlignmentInBytes)); aoqi@1: __ move(S2, SP); // use S2 as a sender SP holder aoqi@1: __ andr(SP, SP, AT); // align stack as required by ABI aoqi@1: aoqi@1: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: __ move(SP, S2); aoqi@1: __ addi(SP, SP, 3*wordSize); aoqi@1: aoqi@1: restore_args(masm, total_c_args, c_arg, out_regs); aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: { Label L; aoqi@1: // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); aoqi@1: __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: //__ jcc(Assembler::equal, L); aoqi@1: __ beq(AT, R0, L); aoqi@1: __ delayed()->nop(); aoqi@1: __ stop("no pending exception allowed on exit from monitorenter"); aoqi@1: __ bind(L); aoqi@1: } aoqi@1: #endif aoqi@1: __ b(lock_done); aoqi@1: __ delayed()->nop(); aoqi@1: // END Slow path lock aoqi@1: aoqi@1: // BEGIN Slow path unlock aoqi@1: __ bind(slow_path_unlock); aoqi@1: aoqi@1: // Slow path unlock aoqi@1: aoqi@1: if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { aoqi@1: save_native_result(masm, ret_type, stack_slots); aoqi@1: } aoqi@1: // Save pending exception around call to VM (which contains an EXCEPTION_MARK) aoqi@1: aoqi@1: __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: __ push(AT); aoqi@1: __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: aoqi@1: __ move(AT, -(StackAlignmentInBytes)); aoqi@1: __ move(S2, SP); // use S2 as a sender SP holder aoqi@1: __ andr(SP, SP, AT); // align stack as required by ABI aoqi@1: aoqi@1: // should be a peal aoqi@1: // +wordSize because of the push above aoqi@1: __ addi(A1, FP, lock_slot_ebp_offset); aoqi@1: aoqi@1: __ move(A0, obj_reg); aoqi@1: __ addi(SP,SP, -2*wordSize); aoqi@1: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), aoqi@1: relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: __ addi(SP,SP, 2*wordSize); aoqi@1: __ move(SP, S2); aoqi@1: //add for compressedoops aoqi@1: __ reinit_heapbase(); aoqi@1: #ifdef ASSERT aoqi@1: { aoqi@1: Label L; aoqi@1: // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); aoqi@1: __ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: //__ jcc(Assembler::equal, L); aoqi@1: __ beq(AT, R0, L); aoqi@1: __ delayed()->nop(); aoqi@1: __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); aoqi@1: __ bind(L); aoqi@1: } aoqi@1: #endif /* ASSERT */ aoqi@1: aoqi@1: __ pop(AT); aoqi@1: __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { aoqi@1: restore_native_result(masm, ret_type, stack_slots); aoqi@1: } aoqi@1: __ b(unlock_done); aoqi@1: __ delayed()->nop(); aoqi@1: // END Slow path unlock aoqi@1: aoqi@1: } aoqi@1: aoqi@1: // SLOW PATH Reguard the stack if needed aoqi@1: aoqi@1: __ bind(reguard); aoqi@1: save_native_result(masm, ret_type, stack_slots); aoqi@1: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), aoqi@1: relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: //add for compressedoops aoqi@1: __ reinit_heapbase(); aoqi@1: restore_native_result(masm, ret_type, stack_slots); aoqi@1: __ b(reguard_done); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: // BEGIN EXCEPTION PROCESSING aoqi@1: if (!is_critical_native) { aoqi@1: // Forward the exception aoqi@1: __ bind(exception_pending); aoqi@1: aoqi@1: // remove possible return value from FPU register stack aoqi@1: __ empty_FPU_stack(); aoqi@1: aoqi@1: // pop our frame aoqi@1: //forward_exception_entry need return address on stack aoqi@1: __ addiu(SP, FP, wordSize); aoqi@1: __ ld(FP, SP, (-1) * wordSize); aoqi@1: aoqi@1: // and forward the exception aoqi@1: __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: } aoqi@1: __ flush(); aoqi@1: aoqi@1: nmethod *nm = nmethod::new_native_nmethod(method, aoqi@1: compile_id, aoqi@1: masm->code(), aoqi@1: vep_offset, aoqi@1: frame_complete, aoqi@1: stack_slots / VMRegImpl::slots_per_word, aoqi@1: (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), aoqi@1: in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), aoqi@1: oop_maps); aoqi@1: aoqi@1: if (is_critical_native) { aoqi@1: nm->set_lazy_critical_native(true); aoqi@1: } aoqi@1: return nm; aoqi@1: aoqi@1: aoqi@1: } aoqi@1: aoqi@1: #ifdef HAVE_DTRACE_H aoqi@1: // --------------------------------------------------------------------------- aoqi@1: // Generate a dtrace nmethod for a given signature. The method takes arguments aoqi@1: // in the Java compiled code convention, marshals them to the native aoqi@1: // abi and then leaves nops at the position you would expect to call a native aoqi@1: // function. When the probe is enabled the nops are replaced with a trap aoqi@1: // instruction that dtrace inserts and the trace will cause a notification aoqi@1: // to dtrace. aoqi@1: // aoqi@1: // The probes are only able to take primitive types and java/lang/String as aoqi@1: // arguments. No other java types are allowed. Strings are converted to utf8 aoqi@1: // strings so that from dtrace point of view java strings are converted to C aoqi@1: // strings. There is an arbitrary fixed limit on the total space that a method aoqi@1: // can use for converting the strings. (256 chars per string in the signature). aoqi@1: // So any java string larger then this is truncated. aoqi@1: aoqi@1: static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; aoqi@1: static bool offsets_initialized = false; aoqi@1: aoqi@1: static VMRegPair reg64_to_VMRegPair(Register r) { aoqi@1: VMRegPair ret; aoqi@1: if (wordSize == 8) { aoqi@1: ret.set2(r->as_VMReg()); aoqi@1: } else { aoqi@1: ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); aoqi@1: } aoqi@1: return ret; aoqi@1: } aoqi@1: aoqi@1: aoqi@1: nmethod *SharedRuntime::generate_dtrace_nmethod( aoqi@1: MacroAssembler *masm, methodHandle method) { aoqi@1: aoqi@1: aoqi@1: // generate_dtrace_nmethod is guarded by a mutex so we are sure to aoqi@1: // be single threaded in this method. aoqi@1: assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); aoqi@1: aoqi@1: // Fill in the signature array, for the calling-convention call. aoqi@1: int total_args_passed = method->size_of_parameters(); aoqi@1: aoqi@1: BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); aoqi@1: VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); aoqi@1: aoqi@1: // The signature we are going to use for the trap that dtrace will see aoqi@1: // java/lang/String is converted. We drop "this" and any other object aoqi@1: // is converted to NULL. (A one-slot java/lang/Long object reference aoqi@1: // is converted to a two-slot long, which is why we double the allocation). aoqi@1: BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); aoqi@1: VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); aoqi@1: aoqi@1: int i=0; aoqi@1: int total_strings = 0; aoqi@1: int first_arg_to_pass = 0; aoqi@1: int total_c_args = 0; aoqi@1: aoqi@1: // Skip the receiver as dtrace doesn't want to see it aoqi@1: if( !method->is_static() ) { aoqi@1: in_sig_bt[i++] = T_OBJECT; aoqi@1: first_arg_to_pass = 1; aoqi@1: } aoqi@1: aoqi@1: SignatureStream ss(method->signature()); aoqi@1: for ( ; !ss.at_return_type(); ss.next()) { aoqi@1: BasicType bt = ss.type(); aoqi@1: in_sig_bt[i++] = bt; // Collect remaining bits of signature aoqi@1: out_sig_bt[total_c_args++] = bt; aoqi@1: if( bt == T_OBJECT) { aoqi@1: symbolOop s = ss.as_symbol_or_null(); aoqi@1: if (s == vmSymbols::java_lang_String()) { aoqi@1: total_strings++; aoqi@1: out_sig_bt[total_c_args-1] = T_ADDRESS; aoqi@1: } else if (s == vmSymbols::java_lang_Boolean() || aoqi@1: s == vmSymbols::java_lang_Byte()) { aoqi@1: out_sig_bt[total_c_args-1] = T_BYTE; aoqi@1: } else if (s == vmSymbols::java_lang_Character() || aoqi@1: s == vmSymbols::java_lang_Short()) { aoqi@1: out_sig_bt[total_c_args-1] = T_SHORT; aoqi@1: } else if (s == vmSymbols::java_lang_Integer() || aoqi@1: s == vmSymbols::java_lang_Float()) { aoqi@1: out_sig_bt[total_c_args-1] = T_INT; aoqi@1: } else if (s == vmSymbols::java_lang_Long() || aoqi@1: s == vmSymbols::java_lang_Double()) { aoqi@1: out_sig_bt[total_c_args-1] = T_LONG; aoqi@1: out_sig_bt[total_c_args++] = T_VOID; aoqi@1: } aoqi@1: } else if ( bt == T_LONG || bt == T_DOUBLE ) { aoqi@1: in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots aoqi@1: // We convert double to long aoqi@1: out_sig_bt[total_c_args-1] = T_LONG; aoqi@1: out_sig_bt[total_c_args++] = T_VOID; aoqi@1: } else if ( bt == T_FLOAT) { aoqi@1: // We convert float to int aoqi@1: out_sig_bt[total_c_args-1] = T_INT; aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: assert(i==total_args_passed, "validly parsed signature"); aoqi@1: aoqi@1: // Now get the compiled-Java layout as input arguments aoqi@1: int comp_args_on_stack; aoqi@1: comp_args_on_stack = SharedRuntime::java_calling_convention( aoqi@1: in_sig_bt, in_regs, total_args_passed, false); aoqi@1: aoqi@1: // We have received a description of where all the java arg are located aoqi@1: // on entry to the wrapper. We need to convert these args to where aoqi@1: // the a native (non-jni) function would expect them. To figure out aoqi@1: // where they go we convert the java signature to a C signature and remove aoqi@1: // T_VOID for any long/double we might have received. aoqi@1: aoqi@1: aoqi@1: // Now figure out where the args must be stored and how much stack space aoqi@1: // they require (neglecting out_preserve_stack_slots but space for storing aoqi@1: // the 1st six register arguments). It's weird see int_stk_helper. aoqi@1: // aoqi@1: int out_arg_slots; aoqi@1: out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); aoqi@1: aoqi@1: // Calculate the total number of stack slots we will need. aoqi@1: aoqi@1: // First count the abi requirement plus all of the outgoing args aoqi@1: int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; aoqi@1: aoqi@1: // Plus a temp for possible converion of float/double/long register args aoqi@1: aoqi@1: int conversion_temp = stack_slots; aoqi@1: stack_slots += 2; aoqi@1: aoqi@1: aoqi@1: // Now space for the string(s) we must convert aoqi@1: aoqi@1: int string_locs = stack_slots; aoqi@1: stack_slots += total_strings * aoqi@1: (max_dtrace_string_size / VMRegImpl::stack_slot_size); aoqi@1: aoqi@1: // Ok The space we have allocated will look like: aoqi@1: // aoqi@1: // aoqi@1: // FP-> | | aoqi@1: // |---------------------| aoqi@1: // | string[n] | aoqi@1: // |---------------------| <- string_locs[n] aoqi@1: // | string[n-1] | aoqi@1: // |---------------------| <- string_locs[n-1] aoqi@1: // | ... | aoqi@1: // | ... | aoqi@1: // |---------------------| <- string_locs[1] aoqi@1: // | string[0] | aoqi@1: // |---------------------| <- string_locs[0] aoqi@1: // | temp | aoqi@1: // |---------------------| <- conversion_temp aoqi@1: // | outbound memory | aoqi@1: // | based arguments | aoqi@1: // | | aoqi@1: // |---------------------| aoqi@1: // | | aoqi@1: // SP-> | out_preserved_slots | aoqi@1: // aoqi@1: // aoqi@1: aoqi@1: // Now compute actual number of stack words we need rounding to make aoqi@1: // stack properly aligned. aoqi@1: stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); aoqi@1: aoqi@1: int stack_size = stack_slots * VMRegImpl::stack_slot_size; aoqi@1: aoqi@1: intptr_t start = (intptr_t)__ pc(); aoqi@1: aoqi@1: // First thing make an ic check to see if we should even be here aoqi@1: aoqi@1: { aoqi@1: Label L; aoqi@1: const Register temp_reg = G3_scratch; aoqi@1: Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); aoqi@1: __ verify_oop(O0); aoqi@1: __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); aoqi@1: __ cmp(temp_reg, G5_inline_cache_reg); aoqi@1: __ brx(Assembler::equal, true, Assembler::pt, L); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: __ jump_to(ic_miss, 0); aoqi@1: __ delayed()->nop(); aoqi@1: __ align(CodeEntryAlignment); aoqi@1: __ bind(L); aoqi@1: } aoqi@1: aoqi@1: int vep_offset = ((intptr_t)__ pc()) - start; aoqi@1: aoqi@1: aoqi@1: // The instruction at the verified entry point must be 5 bytes or longer aoqi@1: // because it can be patched on the fly by make_non_entrant. The stack bang aoqi@1: // instruction fits that requirement. aoqi@1: aoqi@1: // Generate stack overflow check before creating frame aoqi@1: __ generate_stack_overflow_check(stack_size); aoqi@1: aoqi@1: assert(((intptr_t)__ pc() - start - vep_offset) >= 5, aoqi@1: "valid size for make_non_entrant"); aoqi@1: aoqi@1: // Generate a new frame for the wrapper. aoqi@1: __ save(SP, -stack_size, SP); aoqi@1: aoqi@1: // Frame is now completed as far a size and linkage. aoqi@1: aoqi@1: int frame_complete = ((intptr_t)__ pc()) - start; aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: bool reg_destroyed[RegisterImpl::number_of_registers]; aoqi@1: bool freg_destroyed[FloatRegisterImpl::number_of_registers]; aoqi@1: for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { aoqi@1: reg_destroyed[r] = false; aoqi@1: } aoqi@1: for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { aoqi@1: freg_destroyed[f] = false; aoqi@1: } aoqi@1: aoqi@1: #endif /* ASSERT */ aoqi@1: aoqi@1: VMRegPair zero; aoqi@1: const Register g0 = G0; // without this we get a compiler warning (why??) aoqi@1: zero.set2(g0->as_VMReg()); aoqi@1: aoqi@1: int c_arg, j_arg; aoqi@1: aoqi@1: Register conversion_off = noreg; aoqi@1: aoqi@1: for (j_arg = first_arg_to_pass, c_arg = 0 ; aoqi@1: j_arg < total_args_passed ; j_arg++, c_arg++ ) { aoqi@1: aoqi@1: VMRegPair src = in_regs[j_arg]; aoqi@1: VMRegPair dst = out_regs[c_arg]; aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: if (src.first()->is_Register()) { aoqi@1: assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); aoqi@1: } else if (src.first()->is_FloatRegister()) { aoqi@1: assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( aoqi@1: FloatRegisterImpl::S)], "ack!"); aoqi@1: } aoqi@1: if (dst.first()->is_Register()) { aoqi@1: reg_destroyed[dst.first()->as_Register()->encoding()] = true; aoqi@1: } else if (dst.first()->is_FloatRegister()) { aoqi@1: freg_destroyed[dst.first()->as_FloatRegister()->encoding( aoqi@1: FloatRegisterImpl::S)] = true; aoqi@1: } aoqi@1: #endif /* ASSERT */ aoqi@1: aoqi@1: switch (in_sig_bt[j_arg]) { aoqi@1: case T_ARRAY: aoqi@1: case T_OBJECT: aoqi@1: { aoqi@1: if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || aoqi@1: out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { aoqi@1: // need to unbox a one-slot value aoqi@1: Register in_reg = L0; aoqi@1: Register tmp = L2; aoqi@1: if ( src.first()->is_reg() ) { aoqi@1: in_reg = src.first()->as_Register(); aoqi@1: } else { aoqi@1: assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), aoqi@1: "must be"); aoqi@1: __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); aoqi@1: } aoqi@1: // If the final destination is an acceptable register aoqi@1: if ( dst.first()->is_reg() ) { aoqi@1: if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { aoqi@1: tmp = dst.first()->as_Register(); aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: Label skipUnbox; aoqi@1: if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { aoqi@1: __ mov(G0, tmp->successor()); aoqi@1: } aoqi@1: __ br_null(in_reg, true, Assembler::pn, skipUnbox); aoqi@1: __ delayed()->mov(G0, tmp); aoqi@1: aoqi@1: BasicType bt = out_sig_bt[c_arg]; aoqi@1: int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); aoqi@1: switch (bt) { aoqi@1: case T_BYTE: aoqi@1: __ ldub(in_reg, box_offset, tmp); break; aoqi@1: case T_SHORT: aoqi@1: __ lduh(in_reg, box_offset, tmp); break; aoqi@1: case T_INT: aoqi@1: __ ld(in_reg, box_offset, tmp); break; aoqi@1: case T_LONG: aoqi@1: __ ld_long(in_reg, box_offset, tmp); break; aoqi@1: default: ShouldNotReachHere(); aoqi@1: } aoqi@1: aoqi@1: __ bind(skipUnbox); aoqi@1: // If tmp wasn't final destination copy to final destination aoqi@1: if (tmp == L2) { aoqi@1: VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); aoqi@1: if (out_sig_bt[c_arg] == T_LONG) { aoqi@1: long_move(masm, tmp_as_VM, dst); aoqi@1: } else { aoqi@1: move32_64(masm, tmp_as_VM, out_regs[c_arg]); aoqi@1: } aoqi@1: } aoqi@1: if (out_sig_bt[c_arg] == T_LONG) { aoqi@1: assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); aoqi@1: ++c_arg; // move over the T_VOID to keep the loop indices in sync aoqi@1: } aoqi@1: } else if (out_sig_bt[c_arg] == T_ADDRESS) { aoqi@1: Register s = aoqi@1: src.first()->is_reg() ? src.first()->as_Register() : L2; aoqi@1: Register d = aoqi@1: dst.first()->is_reg() ? dst.first()->as_Register() : L2; aoqi@1: aoqi@1: // We store the oop now so that the conversion pass can reach aoqi@1: // while in the inner frame. This will be the only store if aoqi@1: // the oop is NULL. aoqi@1: if (s != L2) { aoqi@1: // src is register aoqi@1: if (d != L2) { aoqi@1: // dst is register aoqi@1: __ mov(s, d); aoqi@1: } else { aoqi@1: assert(Assembler::is_simm13(reg2offset(dst.first()) + aoqi@1: STACK_BIAS), "must be"); aoqi@1: __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); aoqi@1: } aoqi@1: } else { aoqi@1: // src not a register aoqi@1: assert(Assembler::is_simm13(reg2offset(src.first()) + aoqi@1: STACK_BIAS), "must be"); aoqi@1: __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); aoqi@1: if (d == L2) { aoqi@1: assert(Assembler::is_simm13(reg2offset(dst.first()) + aoqi@1: STACK_BIAS), "must be"); aoqi@1: __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); aoqi@1: } aoqi@1: } aoqi@1: } else if (out_sig_bt[c_arg] != T_VOID) { aoqi@1: // Convert the arg to NULL aoqi@1: if (dst.first()->is_reg()) { aoqi@1: __ mov(G0, dst.first()->as_Register()); aoqi@1: } else { aoqi@1: assert(Assembler::is_simm13(reg2offset(dst.first()) + aoqi@1: STACK_BIAS), "must be"); aoqi@1: __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: break; aoqi@1: case T_VOID: aoqi@1: break; aoqi@1: aoqi@1: case T_FLOAT: aoqi@1: if (src.first()->is_stack()) { aoqi@1: // Stack to stack/reg is simple aoqi@1: move32_64(masm, src, dst); aoqi@1: } else { aoqi@1: if (dst.first()->is_reg()) { aoqi@1: // freg -> reg aoqi@1: int off = aoqi@1: STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; aoqi@1: Register d = dst.first()->as_Register(); aoqi@1: if (Assembler::is_simm13(off)) { aoqi@1: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), aoqi@1: SP, off); aoqi@1: __ ld(SP, off, d); aoqi@1: } else { aoqi@1: if (conversion_off == noreg) { aoqi@1: __ set(off, L6); aoqi@1: conversion_off = L6; aoqi@1: } aoqi@1: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), aoqi@1: SP, conversion_off); aoqi@1: __ ld(SP, conversion_off , d); aoqi@1: } aoqi@1: } else { aoqi@1: // freg -> mem aoqi@1: int off = STACK_BIAS + reg2offset(dst.first()); aoqi@1: if (Assembler::is_simm13(off)) { aoqi@1: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), aoqi@1: SP, off); aoqi@1: } else { aoqi@1: if (conversion_off == noreg) { aoqi@1: __ set(off, L6); aoqi@1: conversion_off = L6; aoqi@1: } aoqi@1: __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), aoqi@1: SP, conversion_off); aoqi@1: } aoqi@1: } aoqi@1: } aoqi@1: break; aoqi@1: aoqi@1: case T_DOUBLE: aoqi@1: assert( j_arg + 1 < total_args_passed && aoqi@1: in_sig_bt[j_arg + 1] == T_VOID && aoqi@1: out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); aoqi@1: if (src.first()->is_stack()) { aoqi@1: // Stack to stack/reg is simple aoqi@1: long_move(masm, src, dst); aoqi@1: } else { aoqi@1: Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; aoqi@1: aoqi@1: // Destination could be an odd reg on 32bit in which case aoqi@1: // we can't load direct to the destination. aoqi@1: aoqi@1: if (!d->is_even() && wordSize == 4) { aoqi@1: d = L2; aoqi@1: } aoqi@1: int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; aoqi@1: if (Assembler::is_simm13(off)) { aoqi@1: __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), aoqi@1: SP, off); aoqi@1: __ ld_long(SP, off, d); aoqi@1: } else { aoqi@1: if (conversion_off == noreg) { aoqi@1: __ set(off, L6); aoqi@1: conversion_off = L6; aoqi@1: } aoqi@1: __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), aoqi@1: SP, conversion_off); aoqi@1: __ ld_long(SP, conversion_off, d); aoqi@1: } aoqi@1: if (d == L2) { aoqi@1: long_move(masm, reg64_to_VMRegPair(L2), dst); aoqi@1: } aoqi@1: } aoqi@1: break; aoqi@1: aoqi@1: case T_LONG : aoqi@1: // 32bit can't do a split move of something like g1 -> O0, O1 aoqi@1: // so use a memory temp aoqi@1: if (src.is_single_phys_reg() && wordSize == 4) { aoqi@1: Register tmp = L2; aoqi@1: if (dst.first()->is_reg() && aoqi@1: (wordSize == 8 || dst.first()->as_Register()->is_even())) { aoqi@1: tmp = dst.first()->as_Register(); aoqi@1: } aoqi@1: aoqi@1: int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; aoqi@1: if (Assembler::is_simm13(off)) { aoqi@1: __ stx(src.first()->as_Register(), SP, off); aoqi@1: __ ld_long(SP, off, tmp); aoqi@1: } else { aoqi@1: if (conversion_off == noreg) { aoqi@1: __ set(off, L6); aoqi@1: conversion_off = L6; aoqi@1: } aoqi@1: __ stx(src.first()->as_Register(), SP, conversion_off); aoqi@1: __ ld_long(SP, conversion_off, tmp); aoqi@1: } aoqi@1: aoqi@1: if (tmp == L2) { aoqi@1: long_move(masm, reg64_to_VMRegPair(L2), dst); aoqi@1: } aoqi@1: } else { aoqi@1: long_move(masm, src, dst); aoqi@1: } aoqi@1: break; aoqi@1: aoqi@1: case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); aoqi@1: aoqi@1: default: aoqi@1: move32_64(masm, src, dst); aoqi@1: } aoqi@1: } aoqi@1: aoqi@1: aoqi@1: // If we have any strings we must store any register based arg to the stack aoqi@1: // This includes any still live xmm registers too. aoqi@1: aoqi@1: if (total_strings > 0 ) { aoqi@1: aoqi@1: // protect all the arg registers aoqi@1: __ save_frame(0); aoqi@1: __ mov(G2_thread, L7_thread_cache); aoqi@1: const Register L2_string_off = L2; aoqi@1: aoqi@1: // Get first string offset aoqi@1: __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); aoqi@1: aoqi@1: for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { aoqi@1: if (out_sig_bt[c_arg] == T_ADDRESS) { aoqi@1: aoqi@1: VMRegPair dst = out_regs[c_arg]; aoqi@1: const Register d = dst.first()->is_reg() ? aoqi@1: dst.first()->as_Register()->after_save() : noreg; aoqi@1: aoqi@1: // It's a string the oop and it was already copied to the out arg aoqi@1: // position aoqi@1: if (d != noreg) { aoqi@1: __ mov(d, O0); aoqi@1: } else { aoqi@1: assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), aoqi@1: "must be"); aoqi@1: __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); aoqi@1: } aoqi@1: Label skip; aoqi@1: aoqi@1: __ br_null(O0, false, Assembler::pn, skip); aoqi@1: __ delayed()->add(FP, L2_string_off, O1); aoqi@1: aoqi@1: if (d != noreg) { aoqi@1: __ mov(O1, d); aoqi@1: } else { aoqi@1: assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), aoqi@1: "must be"); aoqi@1: __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); aoqi@1: } aoqi@1: aoqi@1: __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), aoqi@1: relocInfo::runtime_call_type); aoqi@1: __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off); aoqi@1: aoqi@1: __ bind(skip); aoqi@1: aoqi@1: } aoqi@1: aoqi@1: } aoqi@1: __ mov(L7_thread_cache, G2_thread); aoqi@1: __ restore(); aoqi@1: aoqi@1: } aoqi@1: aoqi@1: aoqi@1: // Ok now we are done. Need to place the nop that dtrace wants in order to aoqi@1: // patch in the trap aoqi@1: aoqi@1: int patch_offset = ((intptr_t)__ pc()) - start; aoqi@1: aoqi@1: __ nop(); aoqi@1: aoqi@1: aoqi@1: // Return aoqi@1: aoqi@1: __ ret(); aoqi@1: __ delayed()->restore(); aoqi@1: aoqi@1: __ flush(); aoqi@1: aoqi@1: nmethod *nm = nmethod::new_dtrace_nmethod( aoqi@1: method, masm->code(), vep_offset, patch_offset, frame_complete, aoqi@1: stack_slots / VMRegImpl::slots_per_word); aoqi@1: return nm; aoqi@1: aoqi@1: } aoqi@1: aoqi@1: #endif // HAVE_DTRACE_H aoqi@1: aoqi@1: // this function returns the adjust size (in number of words) to a c2i adapter aoqi@1: // activation for use during deoptimization aoqi@1: int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { aoqi@1: return (callee_locals - callee_parameters) * Interpreter::stackElementWords; aoqi@1: } aoqi@1: aoqi@1: // "Top of Stack" slots that may be unused by the calling convention but must aoqi@1: // otherwise be preserved. aoqi@1: // On Intel these are not necessary and the value can be zero. aoqi@1: // On Sparc this describes the words reserved for storing a register window aoqi@1: // when an interrupt occurs. aoqi@1: uint SharedRuntime::out_preserve_stack_slots() { aoqi@1: //return frame::register_save_words * VMRegImpl::slots_per_word; aoqi@1: return 0; aoqi@1: } aoqi@1: /* aoqi@1: static void gen_new_frame(MacroAssembler* masm, bool deopt) { aoqi@1: // aoqi@1: // Common out the new frame generation for deopt and uncommon trap aoqi@1: // aoqi@1: Register G3pcs = G3_scratch; // Array of new pcs (input) aoqi@1: Register Oreturn0 = O0; aoqi@1: Register Oreturn1 = O1; aoqi@1: Register O2UnrollBlock = O2; aoqi@1: Register O3array = O3; // Array of frame sizes (input) aoqi@1: Register O4array_size = O4; // number of frames (input) aoqi@1: Register O7frame_size = O7; // number of frames (input) aoqi@1: aoqi@1: __ ld_ptr(O3array, 0, O7frame_size); aoqi@1: __ sub(G0, O7frame_size, O7frame_size); aoqi@1: __ save(SP, O7frame_size, SP); aoqi@1: __ ld_ptr(G3pcs, 0, I7); // load frame's new pc aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: // make sure that the frames are aligned properly aoqi@1: #ifndef _LP64 aoqi@1: __ btst(wordSize*2-1, SP); aoqi@1: __ breakpoint_trap(Assembler::notZero); aoqi@1: #endif aoqi@1: #endif aoqi@1: aoqi@1: // Deopt needs to pass some extra live values from frame to frame aoqi@1: aoqi@1: if (deopt) { aoqi@1: __ mov(Oreturn0->after_save(), Oreturn0); aoqi@1: __ mov(Oreturn1->after_save(), Oreturn1); aoqi@1: } aoqi@1: aoqi@1: __ mov(O4array_size->after_save(), O4array_size); aoqi@1: __ sub(O4array_size, 1, O4array_size); aoqi@1: __ mov(O3array->after_save(), O3array); aoqi@1: __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); aoqi@1: __ add(G3pcs, wordSize, G3pcs); // point to next pc value aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: // trash registers to show a clear pattern in backtraces aoqi@1: __ set(0xDEAD0000, I0); aoqi@1: __ add(I0, 2, I1); aoqi@1: __ add(I0, 4, I2); aoqi@1: __ add(I0, 6, I3); aoqi@1: __ add(I0, 8, I4); aoqi@1: // Don't touch I5 could have valuable savedSP aoqi@1: __ set(0xDEADBEEF, L0); aoqi@1: __ mov(L0, L1); aoqi@1: __ mov(L0, L2); aoqi@1: __ mov(L0, L3); aoqi@1: __ mov(L0, L4); aoqi@1: __ mov(L0, L5); aoqi@1: aoqi@1: // trash the return value as there is nothing to return yet aoqi@1: __ set(0xDEAD0001, O7); aoqi@1: #endif aoqi@1: aoqi@1: __ mov(SP, O5_savedSP); aoqi@1: } aoqi@1: aoqi@1: aoqi@1: static void make_new_frames(MacroAssembler* masm, bool deopt) { aoqi@1: // aoqi@1: // loop through the UnrollBlock info and create new frames aoqi@1: // aoqi@1: Register G3pcs = G3_scratch; aoqi@1: Register Oreturn0 = O0; aoqi@1: Register Oreturn1 = O1; aoqi@1: Register O2UnrollBlock = O2; aoqi@1: Register O3array = O3; aoqi@1: Register O4array_size = O4; aoqi@1: Label loop; aoqi@1: aoqi@1: // Before we make new frames, check to see if stack is available. aoqi@1: // Do this after the caller's return address is on top of stack aoqi@1: if (UseStackBanging) { aoqi@1: // Get total frame size for interpreted frames aoqi@1: __ ld(Address(O2UnrollBlock, 0, aoqi@1: Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4); aoqi@1: __ bang_stack_size(O4, O3, G3_scratch); aoqi@1: } aoqi@1: aoqi@1: __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size); aoqi@1: __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs); aoqi@1: aoqi@1: __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array); aoqi@1: aoqi@1: // Adjust old interpreter frame to make space for new frame's extra java locals aoqi@1: // aoqi@1: // We capture the original sp for the transition frame only because it is needed in aoqi@1: // order to properly calculate interpreter_sp_adjustment. Even though in real life aoqi@1: // every interpreter frame captures a savedSP it is only needed at the transition aoqi@1: // (fortunately). If we had to have it correct everywhere then we would need to aoqi@1: // be told the sp_adjustment for each frame we create. If the frame size array aoqi@1: // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] aoqi@1: // for each frame we create and keep up the illusion every where. aoqi@1: // aoqi@1: aoqi@1: __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7); aoqi@1: __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment aoqi@1: __ sub(SP, O7, SP); aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: // make sure that there is at least one entry in the array aoqi@1: __ tst(O4array_size); aoqi@1: __ breakpoint_trap(Assembler::zero); aoqi@1: #endif aoqi@1: aoqi@1: // Now push the new interpreter frames aoqi@1: __ bind(loop); aoqi@1: aoqi@1: // allocate a new frame, filling the registers aoqi@1: aoqi@1: gen_new_frame(masm, deopt); // allocate an interpreter frame aoqi@1: aoqi@1: __ tst(O4array_size); aoqi@1: __ br(Assembler::notZero, false, Assembler::pn, loop); aoqi@1: __ delayed()->add(O3array, wordSize, O3array); aoqi@1: __ ld_ptr(G3pcs, 0, O7); // load final frame new pc aoqi@1: aoqi@1: } aoqi@1: */ aoqi@1: aoqi@1: //------------------------------generate_deopt_blob---------------------------- aoqi@1: // Ought to generate an ideal graph & compile, but here's some SPARC ASM aoqi@1: // instead. aoqi@1: void SharedRuntime::generate_deopt_blob() { aoqi@1: // allocate space for the code aoqi@1: ResourceMark rm; aoqi@1: // setup code generation tools aoqi@1: //CodeBuffer buffer ("deopt_blob", 4000, 2048); aoqi@1: CodeBuffer buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug aoqi@1: MacroAssembler* masm = new MacroAssembler( & buffer); aoqi@1: int frame_size_in_words; aoqi@1: OopMap* map = NULL; aoqi@1: // Account for the extra args we place on the stack aoqi@1: // by the time we call fetch_unroll_info aoqi@1: const int additional_words = 2; // deopt kind, thread aoqi@1: aoqi@1: OopMapSet *oop_maps = new OopMapSet(); aoqi@1: aoqi@1: address start = __ pc(); aoqi@1: Label cont; aoqi@1: // we use S3 for DeOpt reason register aoqi@1: Register reason = S3; aoqi@1: // use S6 for thread register aoqi@1: Register thread = TREG; aoqi@1: // use S7 for fetch_unroll_info returned UnrollBlock aoqi@1: Register unroll = S7; aoqi@1: // Prolog for non exception case! aoqi@1: // Correct the return address we were given. aoqi@1: //FIXME, return address is on the tos or Ra? fujie@375: __ addi(RA, RA, - (NativeCall::return_address_offset_long)); aoqi@1: // Save everything in sight. aoqi@1: map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); aoqi@1: // Normal deoptimization aoqi@1: __ move(reason, Deoptimization::Unpack_deopt); aoqi@1: __ b(cont); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: int reexecute_offset = __ pc() - start; aoqi@1: aoqi@1: // Reexecute case aoqi@1: // return address is the pc describes what bci to do re-execute at aoqi@1: aoqi@1: // No need to update map as each call to save_live_registers will produce identical oopmap aoqi@1: //__ addi(RA, RA, - (NativeCall::return_address_offset)); aoqi@1: (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); aoqi@1: __ move(reason, Deoptimization::Unpack_reexecute); aoqi@1: __ b(cont); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: int exception_offset = __ pc() - start; aoqi@1: // Prolog for exception case aoqi@1: aoqi@1: // all registers are dead at this entry point, except for eax and aoqi@1: // edx which contain the exception oop and exception pc aoqi@1: // respectively. Set them in TLS and fall thru to the aoqi@1: // unpack_with_exception_in_tls entry point. aoqi@1: aoqi@1: __ get_thread(thread); aoqi@1: __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); aoqi@1: __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); aoqi@1: int exception_in_tls_offset = __ pc() - start; aoqi@1: // new implementation because exception oop is now passed in JavaThread aoqi@1: aoqi@1: // Prolog for exception case aoqi@1: // All registers must be preserved because they might be used by LinearScan aoqi@1: // Exceptiop oop and throwing PC are passed in JavaThread aoqi@1: // tos: stack at point of call to method that threw the exception (i.e. only aoqi@1: // args are on the stack, no return address) aoqi@1: aoqi@1: // Return address will be patched later with the throwing pc. The correct value is not aoqi@1: // available now because loading it from memory would destroy registers. aoqi@1: // Save everything in sight. aoqi@1: // No need to update map as each call to save_live_registers will produce identical oopmap fujie@375: __ addi(RA, RA, - (NativeCall::return_address_offset_long)); aoqi@1: (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); aoqi@1: aoqi@1: // Now it is safe to overwrite any register aoqi@1: // store the correct deoptimization type aoqi@1: __ move(reason, Deoptimization::Unpack_exception); aoqi@1: // load throwing pc from JavaThread and patch it as the return address aoqi@1: // of the current frame. Then clear the field in JavaThread aoqi@1: __ get_thread(thread); aoqi@1: __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); aoqi@1: __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra aoqi@1: __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); aoqi@1: aoqi@1: aoqi@1: #ifdef ASSERT aoqi@1: // verify that there is really an exception oop in JavaThread aoqi@1: __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); aoqi@1: __ verify_oop(AT); aoqi@1: // verify that there is no pending exception aoqi@1: Label no_pending_exception; aoqi@1: __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: __ beq(AT, R0, no_pending_exception); aoqi@1: __ delayed()->nop(); aoqi@1: __ stop("must not have pending exception here"); aoqi@1: __ bind(no_pending_exception); aoqi@1: #endif aoqi@1: __ bind(cont); aoqi@1: // Compiled code leaves the floating point stack dirty, empty it. aoqi@1: __ empty_FPU_stack(); aoqi@1: aoqi@1: aoqi@1: // Call C code. Need thread and this frame, but NOT official VM entry aoqi@1: // crud. We cannot block on this call, no GC can happen. aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(thread); aoqi@1: #endif aoqi@1: aoqi@1: __ move(A0, thread); aoqi@1: __ addi(SP, SP, -additional_words * wordSize); aoqi@1: aoqi@1: __ set_last_Java_frame(NOREG, NOREG, NULL); aoqi@1: aoqi@1: // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on aoqi@1: // this call, no GC can happen. Call should capture return values. aoqi@1: aoqi@1: __ relocate(relocInfo::internal_pc_type); aoqi@1: { fujie@373: intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; fujie@368: __ patchable_set48(AT, save_pc); aoqi@1: } aoqi@1: __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); aoqi@1: aoqi@1: __ call((address)Deoptimization::fetch_unroll_info); aoqi@1: //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: oop_maps->add_gc_map(__ pc() - start, map); aoqi@1: __ addiu(SP, SP, additional_words * wordSize); aoqi@1: __ get_thread(thread); aoqi@1: __ reset_last_Java_frame(false, true); aoqi@1: aoqi@1: // Load UnrollBlock into S7 aoqi@1: __ move(unroll, V0); aoqi@1: aoqi@1: aoqi@1: // Move the unpack kind to a safe place in the UnrollBlock because aoqi@1: // we are very short of registers aoqi@1: aoqi@1: Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); aoqi@1: //__ pop(reason); aoqi@1: __ sw(reason, unpack_kind); aoqi@1: // save the unpack_kind value aoqi@1: // Retrieve the possible live values (return values) aoqi@1: // All callee save registers representing jvm state aoqi@1: // are now in the vframeArray. aoqi@1: aoqi@1: Label noException; aoqi@1: __ move(AT, Deoptimization::Unpack_exception); aoqi@1: __ bne(AT, reason, noException);// Was exception pending? aoqi@1: __ delayed()->nop(); aoqi@1: __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); aoqi@1: __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); aoqi@1: __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); aoqi@1: __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); aoqi@1: aoqi@1: __ verify_oop(V0); aoqi@1: aoqi@1: // Overwrite the result registers with the exception results. aoqi@1: __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); aoqi@1: __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); aoqi@1: aoqi@1: __ bind(noException); aoqi@1: aoqi@1: aoqi@1: // Stack is back to only having register save data on the stack. aoqi@1: // Now restore the result registers. Everything else is either dead or captured aoqi@1: // in the vframeArray. aoqi@1: aoqi@1: RegisterSaver::restore_result_registers(masm); aoqi@1: // All of the register save area has been popped of the stack. Only the aoqi@1: // return address remains. aoqi@1: // Pop all the frames we must move/replace. aoqi@1: // Frame picture (youngest to oldest) aoqi@1: // 1: self-frame (no frame link) aoqi@1: // 2: deopting frame (no frame link) aoqi@1: // 3: caller of deopting frame (could be compiled/interpreted). aoqi@1: // aoqi@1: // Note: by leaving the return address of self-frame on the stack aoqi@1: // and using the size of frame 2 to adjust the stack aoqi@1: // when we are done the return to frame 3 will still be on the stack. aoqi@1: aoqi@1: // register for the sender's sp aoqi@1: Register sender_sp = Rsender; aoqi@1: // register for frame pcs aoqi@1: Register pcs = T0; aoqi@1: // register for frame sizes aoqi@1: Register sizes = T1; aoqi@1: // register for frame count aoqi@1: Register count = T3; aoqi@1: aoqi@1: // Pop deoptimized frame aoqi@1: __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); aoqi@1: __ add(SP, SP, AT); aoqi@1: // sp should be pointing at the return address to the caller (3) aoqi@1: aoqi@1: // Load array of frame pcs into pcs aoqi@1: __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); aoqi@1: __ addi(SP, SP, wordSize); // trash the old pc aoqi@1: // Load array of frame sizes into T6 aoqi@1: __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); aoqi@1: aoqi@1: aoqi@1: aoqi@1: // Load count of frams into T3 aoqi@1: __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); aoqi@1: // Pick up the initial fp we should save aoqi@1: __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); aoqi@1: // Now adjust the caller's stack to make up for the extra locals aoqi@1: // but record the original sp so that we can save it in the skeletal interpreter aoqi@1: // frame and the stack walking of interpreter_sender will get the unextended sp aoqi@1: // value and not the "real" sp value. aoqi@1: __ move(sender_sp, SP); aoqi@1: __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); aoqi@1: __ sub(SP, SP, AT); aoqi@1: aoqi@1: // Push interpreter frames in a loop aoqi@1: /* aoqi@1: * aoqi@1: Loop: aoqi@1: 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld aoqi@1: 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] aoqi@1: 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 aoqi@1: 0x000000555bd82d24: daddi sp, sp, 0xfffffff0 aoqi@1: 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp aoqi@1: 0x000000555bd82d2c: sd at, 0x8(sp) ; push at aoqi@1: 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp aoqi@1: 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2 aoqi@1: 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); aoqi@1: 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); aoqi@1: 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP); aoqi@1: 0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count -- aoqi@1: 0x000000555bd82d48: daddi t1, t1, 0x4 ; sizes += 4 aoqi@1: 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 aoqi@1: 0x000000555bd82d50: daddi t0, t0, 0x4 ; <--- error t0 += 8 aoqi@1: */ aoqi@1: aoqi@1: // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split aoqi@1: Label loop; aoqi@1: __ bind(loop); aoqi@1: __ ld(T2, sizes, 0); // Load frame size aoqi@1: __ ld_ptr(AT, pcs, 0); // save return address aoqi@1: __ addi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand aoqi@1: __ push2(AT, FP); aoqi@1: __ move(FP, SP); aoqi@1: __ sub(SP, SP, T2); // Prolog! aoqi@1: // This value is corrected by layout_activation_impl aoqi@1: __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); aoqi@1: __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable aoqi@1: __ move(sender_sp, SP); // pass to next frame aoqi@1: __ addi(count, count, -1); // decrement counter aoqi@1: __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes) aoqi@1: __ bne(count, R0, loop); aoqi@1: __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs) aoqi@1: __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); aoqi@1: // Re-push self-frame aoqi@1: __ push2(AT, FP); aoqi@1: __ move(FP, SP); aoqi@1: __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); aoqi@1: __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); aoqi@1: __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); aoqi@1: aoqi@1: // Restore frame locals after moving the frame aoqi@1: __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); aoqi@1: __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); aoqi@1: __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local aoqi@1: __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); aoqi@1: aoqi@1: aoqi@1: // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on aoqi@1: // this call, no GC can happen. aoqi@1: __ move(A1, reason); // exec_mode aoqi@1: __ get_thread(thread); aoqi@1: __ move(A0, thread); // thread aoqi@1: __ addi(SP, SP, (-additional_words) *wordSize); aoqi@1: aoqi@1: // set last_Java_sp, last_Java_fp aoqi@1: __ set_last_Java_frame(NOREG, FP, NULL); aoqi@1: aoqi@1: __ move(AT, -(StackAlignmentInBytes)); aoqi@1: __ andr(SP, SP, AT); // Fix stack alignment as required by ABI aoqi@1: aoqi@1: __ relocate(relocInfo::internal_pc_type); aoqi@1: { fujie@373: intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; fujie@368: __ patchable_set48(AT, save_pc); aoqi@1: } aoqi@1: __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); aoqi@1: aoqi@1: //__ call(Deoptimization::unpack_frames); aoqi@1: __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); aoqi@1: __ delayed()->nop(); aoqi@1: // Revert SP alignment after call since we're going to do some SP relative addressing below aoqi@1: __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); aoqi@1: // Set an oopmap for the call site aoqi@1: oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); aoqi@1: aoqi@1: __ push(V0); aoqi@1: aoqi@1: __ get_thread(thread); chenhaoxuan@361: __ reset_last_Java_frame(true, true); aoqi@1: aoqi@1: // Collect return values aoqi@1: __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words +1) * wordSize); aoqi@1: __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize); aoqi@1: __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local aoqi@1: __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); aoqi@1: //FIXME, aoqi@1: // Clear floating point stack before returning to interpreter aoqi@1: __ empty_FPU_stack(); aoqi@1: //FIXME, we should consider about float and double aoqi@1: // Push a float or double return value if necessary. aoqi@1: __ leave(); aoqi@1: aoqi@1: // Jump to interpreter aoqi@1: __ jr(RA); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: masm->flush(); aoqi@1: _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); aoqi@1: _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); aoqi@1: } aoqi@1: aoqi@1: #ifdef COMPILER2 aoqi@1: aoqi@1: //------------------------------generate_uncommon_trap_blob-------------------- aoqi@1: // Ought to generate an ideal graph & compile, but here's some SPARC ASM aoqi@1: // instead. aoqi@1: void SharedRuntime::generate_uncommon_trap_blob() { aoqi@1: // allocate space for the code aoqi@1: ResourceMark rm; aoqi@1: // setup code generation tools aoqi@1: CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); aoqi@1: MacroAssembler* masm = new MacroAssembler(&buffer); aoqi@1: aoqi@1: enum frame_layout { aoqi@1: s0_off, s0_off2, aoqi@1: s1_off, s1_off2, aoqi@1: s2_off, s2_off2, aoqi@1: s3_off, s3_off2, aoqi@1: s4_off, s4_off2, aoqi@1: s5_off, s5_off2, aoqi@1: s6_off, s6_off2, aoqi@1: s7_off, s7_off2, aoqi@1: fp_off, fp_off2, aoqi@1: return_off, return_off2, // slot for return address sp + 9 aoqi@1: framesize aoqi@1: }; aoqi@1: assert(framesize % 4 == 0, "sp not 16-byte aligned"); aoqi@1: aoqi@1: address start = __ pc(); aoqi@1: aoqi@1: // Push self-frame. aoqi@1: __ daddiu(SP, SP, -framesize * BytesPerInt); aoqi@1: aoqi@1: __ sd(RA, SP, return_off * BytesPerInt); aoqi@1: __ sd(FP, SP, fp_off * BytesPerInt); aoqi@1: aoqi@1: // Save callee saved registers. None for UseSSE=0, aoqi@1: // floats-only for UseSSE=1, and doubles for UseSSE=2. aoqi@1: __ sd(S0, SP, s0_off * BytesPerInt); aoqi@1: __ sd(S1, SP, s1_off * BytesPerInt); aoqi@1: __ sd(S2, SP, s2_off * BytesPerInt); aoqi@1: __ sd(S3, SP, s3_off * BytesPerInt); aoqi@1: __ sd(S4, SP, s4_off * BytesPerInt); aoqi@1: __ sd(S5, SP, s5_off * BytesPerInt); aoqi@1: __ sd(S6, SP, s6_off * BytesPerInt); aoqi@1: __ sd(S7, SP, s7_off * BytesPerInt); aoqi@1: aoqi@1: __ daddi(FP, SP, fp_off * BytesPerInt); aoqi@1: aoqi@1: // Clear the floating point exception stack aoqi@1: __ empty_FPU_stack(); aoqi@1: aoqi@1: Register thread = TREG; aoqi@1: aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(thread); aoqi@1: #endif aoqi@1: // set last_Java_sp aoqi@1: __ set_last_Java_frame(NOREG, FP, NULL); aoqi@1: __ relocate(relocInfo::internal_pc_type); aoqi@1: { fujie@373: long save_pc = (long)__ pc() + 52; fujie@368: __ patchable_set48(AT, (long)save_pc); aoqi@1: __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); aoqi@1: } aoqi@1: // Call C code. Need thread but NOT official VM entry aoqi@1: // crud. We cannot block on this call, no GC can happen. Call should aoqi@1: // capture callee-saved registers as well as return values. aoqi@1: __ move(A0, thread); aoqi@1: // argument already in T0 aoqi@1: __ move(A1, T0); fujie@373: __ patchable_set48(T9, (long)Deoptimization::uncommon_trap); aoqi@1: __ jalr(T9); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: // Set an oopmap for the call site aoqi@1: OopMapSet *oop_maps = new OopMapSet(); aoqi@1: OopMap* map = new OopMap( framesize, 0 ); aoqi@1: aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() ); aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s1_off ), S1->as_VMReg() ); aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s2_off ), S2->as_VMReg() ); aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s3_off ), S3->as_VMReg() ); aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s4_off ), S4->as_VMReg() ); aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s5_off ), S5->as_VMReg() ); aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s6_off ), S6->as_VMReg() ); aoqi@1: map->set_callee_saved( VMRegImpl::stack2reg(s7_off ), S7->as_VMReg() ); aoqi@1: aoqi@1: //oop_maps->add_gc_map( __ offset(), true, map); aoqi@1: oop_maps->add_gc_map( __ offset(), map); aoqi@1: aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(thread); aoqi@1: #endif aoqi@1: __ reset_last_Java_frame(false,false); aoqi@1: aoqi@1: // Load UnrollBlock into S7 aoqi@1: Register unroll = S7; aoqi@1: __ move(unroll, V0); aoqi@1: aoqi@1: // Pop all the frames we must move/replace. aoqi@1: // aoqi@1: // Frame picture (youngest to oldest) aoqi@1: // 1: self-frame (no frame link) aoqi@1: // 2: deopting frame (no frame link) aoqi@1: // 3: possible-i2c-adapter-frame aoqi@1: // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an aoqi@1: // and c2i here) aoqi@1: aoqi@1: // Pop self-frame. We have no frame, and must rely only on EAX and ESP. aoqi@1: __ daddiu(SP, SP, framesize * BytesPerInt); aoqi@1: aoqi@1: // Pop deoptimized frame aoqi@1: __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); aoqi@1: __ dadd(SP, SP, AT); aoqi@1: aoqi@1: // register for frame pcs aoqi@1: Register pcs = T8; aoqi@1: // register for frame sizes aoqi@1: Register sizes = T9; aoqi@1: // register for frame count aoqi@1: Register count = T3; aoqi@1: // register for the sender's sp aoqi@1: Register sender_sp = T1; aoqi@1: aoqi@1: // sp should be pointing at the return address to the caller (4) aoqi@1: // Load array of frame pcs into ECX aoqi@1: __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); aoqi@1: aoqi@1: /* 2012/9/7 Not needed in MIPS aoqi@1: __ addiu(SP, SP, wordSize); aoqi@1: */ aoqi@1: aoqi@1: // Load array of frame sizes into ESI aoqi@1: __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); aoqi@1: __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); aoqi@1: aoqi@1: // Pick up the initial fp we should save aoqi@1: __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); aoqi@1: // Now adjust the caller's stack to make up for the extra locals aoqi@1: // but record the original sp so that we can save it in the skeletal interpreter aoqi@1: // frame and the stack walking of interpreter_sender will get the unextended sp aoqi@1: // value and not the "real" sp value. aoqi@1: aoqi@1: __ move(sender_sp, SP); aoqi@1: __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); aoqi@1: __ dsub(SP, SP, AT); aoqi@1: // Push interpreter frames in a loop aoqi@1: Label loop; aoqi@1: __ bind(loop); aoqi@1: __ ld(T2, sizes, 0); // Load frame size aoqi@1: __ ld(AT, pcs, 0); // save return address aoqi@1: __ daddi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand aoqi@1: __ push2(AT, FP); aoqi@1: __ move(FP, SP); aoqi@1: __ dsub(SP, SP, T2); // Prolog! aoqi@1: // This value is corrected by layout_activation_impl aoqi@1: __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); aoqi@1: __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable aoqi@1: __ move(sender_sp, SP); // pass to next frame aoqi@1: __ daddi(count, count, -1); // decrement counter aoqi@1: __ daddi(sizes, sizes, wordSize); // Bump array pointer (sizes) aoqi@1: __ addi(pcs, pcs, wordSize); // Bump array pointer (pcs) aoqi@1: __ bne(count, R0, loop); aoqi@1: __ delayed()->nop(); // Bump array pointer (pcs) aoqi@1: aoqi@1: __ ld(RA, pcs, 0); aoqi@1: aoqi@1: // Re-push self-frame aoqi@1: __ daddi(SP, SP, - 2 * wordSize); // save old & set new FP aoqi@1: __ sd(FP, SP, 0 * wordSize); // save final return address aoqi@1: __ sd(RA, SP, 1 * wordSize); aoqi@1: __ move(FP, SP); aoqi@1: __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize); aoqi@1: aoqi@1: // set last_Java_sp, last_Java_fp aoqi@1: __ set_last_Java_frame(NOREG, FP, NULL); aoqi@1: aoqi@1: __ move(AT, -(StackAlignmentInBytes)); aoqi@1: __ andr(SP, SP, AT); // Fix stack alignment as required by ABI aoqi@1: aoqi@1: __ relocate(relocInfo::internal_pc_type); aoqi@1: { fujie@373: long save_pc = (long)__ pc() + 52; fujie@368: __ patchable_set48(AT, (long)save_pc); aoqi@1: } aoqi@1: __ sd(AT, thread,in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); aoqi@1: aoqi@1: // Call C code. Need thread but NOT official VM entry aoqi@1: // crud. We cannot block on this call, no GC can happen. Call should aoqi@1: // restore return values to their stack-slots with the new SP. aoqi@1: __ move(A0, thread); aoqi@1: __ move(A1, Deoptimization::Unpack_uncommon_trap); fujie@373: __ patchable_set48(T9, (long)Deoptimization::unpack_frames); aoqi@1: __ jalr(T9); aoqi@1: __ delayed()->nop(); aoqi@1: // Set an oopmap for the call site aoqi@1: //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); aoqi@1: oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) );//Fu aoqi@1: aoqi@1: __ reset_last_Java_frame(true,true); aoqi@1: aoqi@1: // Pop self-frame. aoqi@1: __ leave(); // Epilog! aoqi@1: aoqi@1: // Jump to interpreter aoqi@1: __ jr(RA); aoqi@1: __ delayed()->nop(); aoqi@1: // ------------- aoqi@1: // make sure all code is generated aoqi@1: masm->flush(); aoqi@1: aoqi@1: _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); aoqi@1: } aoqi@1: aoqi@1: #endif // COMPILER2 aoqi@1: aoqi@1: //------------------------------generate_handler_blob------------------- aoqi@1: // aoqi@1: // Generate a special Compile2Runtime blob that saves all registers, and sets aoqi@1: // up an OopMap and calls safepoint code to stop the compiled code for aoqi@1: // a safepoint. aoqi@1: // aoqi@1: // This blob is jumped to (via a breakpoint and the signal handler) from a aoqi@1: // safepoint in compiled code. aoqi@1: aoqi@1: SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { aoqi@1: aoqi@1: // Account for thread arg in our frame aoqi@1: const int additional_words = 0; aoqi@1: int frame_size_in_words; aoqi@1: aoqi@1: assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); aoqi@1: aoqi@1: ResourceMark rm; aoqi@1: OopMapSet *oop_maps = new OopMapSet(); aoqi@1: OopMap* map; aoqi@1: aoqi@1: // allocate space for the code aoqi@1: // setup code generation tools aoqi@1: CodeBuffer buffer ("handler_blob", 2048, 512); aoqi@1: MacroAssembler* masm = new MacroAssembler( &buffer); aoqi@1: aoqi@1: const Register thread = TREG; aoqi@1: address start = __ pc(); aoqi@1: address call_pc = NULL; aoqi@1: bool cause_return = (pool_type == POLL_AT_RETURN); aoqi@1: bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); aoqi@1: aoqi@1: // If cause_return is true we are at a poll_return and there is aoqi@1: // the return address in RA to the caller on the nmethod aoqi@1: // that is safepoint. We can leave this return in RA and aoqi@1: // effectively complete the return and safepoint in the caller. aoqi@1: // Otherwise we load exception pc to RA. aoqi@1: __ push(thread); aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(thread); aoqi@1: #endif aoqi@1: aoqi@1: if(!cause_return) { aoqi@1: __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset())); aoqi@1: } aoqi@1: aoqi@1: __ pop(thread); aoqi@1: map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); aoqi@1: aoqi@1: #ifndef OPT_THREAD aoqi@1: __ get_thread(thread); aoqi@1: #endif aoqi@1: // The following is basically a call_VM. However, we need the precise aoqi@1: // address of the call in order to generate an oopmap. Hence, we do all the aoqi@1: // work outselvs. aoqi@1: aoqi@1: __ move(A0, thread); aoqi@1: __ set_last_Java_frame(NOREG, NOREG, NULL); aoqi@1: aoqi@1: //__ relocate(relocInfo::internal_pc_type); aoqi@1: if (!cause_return) aoqi@1: { aoqi@1: /* aoqi@1: intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4; aoqi@1: __ li48(AT, save_pc); aoqi@1: __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); aoqi@1: */ aoqi@1: } aoqi@1: aoqi@1: aoqi@1: // do the call aoqi@1: //__ lui(T9, Assembler::split_high((int)call_ptr)); aoqi@1: //__ addiu(T9, T9, Assembler::split_low((int)call_ptr)); aoqi@1: __ call(call_ptr); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: // Set an oopmap for the call site. This oopmap will map all aoqi@1: // oop-registers and debug-info registers as callee-saved. This aoqi@1: // will allow deoptimization at this safepoint to find all possible aoqi@1: // debug-info recordings, as well as let GC find all oops. aoqi@1: oop_maps->add_gc_map(__ offset(), map); aoqi@1: aoqi@1: Label noException; aoqi@1: aoqi@1: // Clear last_Java_sp again aoqi@1: __ reset_last_Java_frame(false, false); aoqi@1: aoqi@1: __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: __ beq(AT, R0, noException); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: // Exception pending aoqi@1: aoqi@1: RegisterSaver::restore_live_registers(masm, save_vectors); aoqi@1: //forward_exception_entry need return address on the stack aoqi@1: __ push(RA); aoqi@1: //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry())); aoqi@1: //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry())); aoqi@1: __ li(T9, StubRoutines::forward_exception_entry()); aoqi@1: __ jr(T9); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: // No exception case aoqi@1: __ bind(noException); aoqi@1: // Normal exit, register restoring and exit aoqi@1: RegisterSaver::restore_live_registers(masm, save_vectors); aoqi@1: __ jr(RA); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: masm->flush(); aoqi@1: aoqi@1: // Fill-out other meta info aoqi@1: return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); aoqi@1: } aoqi@1: aoqi@1: // aoqi@1: // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss aoqi@1: // aoqi@1: // Generate a stub that calls into vm to find out the proper destination aoqi@1: // of a java call. All the argument registers are live at this point aoqi@1: // but since this is generic code we don't know what they are and the caller aoqi@1: // must do any gc of the args. aoqi@1: // aoqi@1: RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { aoqi@1: assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); aoqi@1: aoqi@1: // allocate space for the code aoqi@1: ResourceMark rm; aoqi@1: aoqi@1: //CodeBuffer buffer(name, 1000, 512); aoqi@1: //FIXME. aoqi. code_size aoqi@1: CodeBuffer buffer(name, 20000, 2048); aoqi@1: MacroAssembler* masm = new MacroAssembler(&buffer); aoqi@1: aoqi@1: int frame_size_words; aoqi@1: //we put the thread in A0 aoqi@1: aoqi@1: OopMapSet *oop_maps = new OopMapSet(); aoqi@1: OopMap* map = NULL; aoqi@1: aoqi@1: int start = __ offset(); aoqi@1: map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); aoqi@1: aoqi@1: aoqi@1: int frame_complete = __ offset(); aoqi@1: aoqi@1: const Register thread = T8; aoqi@1: __ get_thread(thread); aoqi@1: aoqi@1: __ move(A0, thread); aoqi@1: __ set_last_Java_frame(noreg, FP, NULL); aoqi@1: //__ addi(SP, SP, -wordSize); aoqi@1: //align the stack before invoke native aoqi@1: __ move(AT, -(StackAlignmentInBytes)); aoqi@1: __ andr(SP, SP, AT); aoqi@1: __ relocate(relocInfo::internal_pc_type); aoqi@1: { fujie@373: intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; aoqi@1: //tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test fujie@368: __ patchable_set48(AT, save_pc); aoqi@1: } aoqi@1: __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); aoqi@1: aoqi@1: __ call(destination); aoqi@1: __ delayed()->nop(); aoqi@1: aoqi@1: // Set an oopmap for the call site. aoqi@1: // We need this not only for callee-saved registers, but also for volatile aoqi@1: // registers that the compiler might be keeping live across a safepoint. aoqi@1: oop_maps->add_gc_map( __ offset() - start, map); aoqi@1: // V0 contains the address we are going to jump to assuming no exception got installed aoqi@1: __ get_thread(thread); aoqi@1: __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); aoqi@1: // clear last_Java_sp aoqi@1: __ reset_last_Java_frame(true, true); aoqi@1: // check for pending exceptions aoqi@1: Label pending; aoqi@1: __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: __ bne(AT, R0, pending); aoqi@1: __ delayed()->nop(); aoqi@1: // get the returned Method* aoqi@1: //FIXME, do mips need this ? aoqi@1: __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 aoqi@1: __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); aoqi@1: __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); aoqi@1: RegisterSaver::restore_live_registers(masm); aoqi@1: aoqi@1: // We are back the the original state on entry and ready to go the callee method. aoqi@1: __ jr(V0); aoqi@1: __ delayed()->nop(); aoqi@1: // Pending exception after the safepoint aoqi@1: aoqi@1: __ bind(pending); aoqi@1: aoqi@1: RegisterSaver::restore_live_registers(masm); aoqi@1: aoqi@1: // exception pending => remove activation and forward to exception handler aoqi@1: //forward_exception_entry need return address on the stack aoqi@1: __ push(RA); aoqi@1: __ get_thread(thread); aoqi@1: __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); aoqi@1: __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); aoqi@1: __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); aoqi@1: __ delayed() -> nop(); aoqi@1: // ------------- aoqi@1: // make sure all code is generated aoqi@1: masm->flush(); aoqi@1: aoqi@1: RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); aoqi@1: return tmp; aoqi@1: } aoqi@1: aoqi@1: /*void SharedRuntime::generate_stubs() { aoqi@1: _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, aoqi@1: SharedRuntime::handle_wrong_method),"wrong_method_stub"); aoqi@1: _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, aoqi@1: SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub"); aoqi@1: _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, aoqi@1: SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call"); aoqi@1: _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, aoqi@1: SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call"); aoqi@1: _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, aoqi@1: SharedRuntime::resolve_static_call_C),"resolve_static_call"); aoqi@1: _polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, aoqi@1: SafepointSynchronize::handle_polling_page_exception), false); aoqi@1: _polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, aoqi@1: SafepointSynchronize::handle_polling_page_exception), true); aoqi@1: generate_deopt_blob(); aoqi@1: #ifdef COMPILER2 aoqi@1: generate_uncommon_trap_blob(); aoqi@1: #endif // COMPILER2 aoqi@1: }*/ aoqi@1: aoqi@1: extern "C" int SpinPause() {return 0;} aoqi@1: // extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ; aoqi@1: // extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;