diff -r 11d997b1e656 -r 52ea28d233d2 src/cpu/mips/vm/sharedRuntime_mips_64.cpp --- a/src/cpu/mips/vm/sharedRuntime_mips_64.cpp Fri Sep 01 10:28:22 2017 +0800 +++ b/src/cpu/mips/vm/sharedRuntime_mips_64.cpp Thu Sep 07 09:12:16 2017 +0800 @@ -43,126 +43,95 @@ #endif #define __ masm-> + const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; class RegisterSaver { - enum { FPU_regs_live = 32 }; - // Capture info about frame layout - enum layout { + enum { FPU_regs_live = 32 }; + // Capture info about frame layout + enum layout { #define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, - DEF_LAYOUT_OFFS(for_16_bytes_aligned) - DEF_LAYOUT_OFFS(fpr0) - DEF_LAYOUT_OFFS(fpr1) - DEF_LAYOUT_OFFS(fpr2) - DEF_LAYOUT_OFFS(fpr3) - DEF_LAYOUT_OFFS(fpr4) - DEF_LAYOUT_OFFS(fpr5) - DEF_LAYOUT_OFFS(fpr6) - DEF_LAYOUT_OFFS(fpr7) - DEF_LAYOUT_OFFS(fpr8) - DEF_LAYOUT_OFFS(fpr9) - DEF_LAYOUT_OFFS(fpr10) - DEF_LAYOUT_OFFS(fpr11) - DEF_LAYOUT_OFFS(fpr12) - DEF_LAYOUT_OFFS(fpr13) - DEF_LAYOUT_OFFS(fpr14) - DEF_LAYOUT_OFFS(fpr15) - DEF_LAYOUT_OFFS(fpr16) - DEF_LAYOUT_OFFS(fpr17) - DEF_LAYOUT_OFFS(fpr18) - DEF_LAYOUT_OFFS(fpr19) - DEF_LAYOUT_OFFS(fpr20) - DEF_LAYOUT_OFFS(fpr21) - DEF_LAYOUT_OFFS(fpr22) - DEF_LAYOUT_OFFS(fpr23) - DEF_LAYOUT_OFFS(fpr24) - DEF_LAYOUT_OFFS(fpr25) - DEF_LAYOUT_OFFS(fpr26) - DEF_LAYOUT_OFFS(fpr27) - DEF_LAYOUT_OFFS(fpr28) - DEF_LAYOUT_OFFS(fpr29) - DEF_LAYOUT_OFFS(fpr30) - DEF_LAYOUT_OFFS(fpr31) - - DEF_LAYOUT_OFFS(v0) - DEF_LAYOUT_OFFS(v1) - DEF_LAYOUT_OFFS(a0) - DEF_LAYOUT_OFFS(a1) - DEF_LAYOUT_OFFS(a2) - DEF_LAYOUT_OFFS(a3) - DEF_LAYOUT_OFFS(a4) - DEF_LAYOUT_OFFS(a5) - DEF_LAYOUT_OFFS(a6) - DEF_LAYOUT_OFFS(a7) - DEF_LAYOUT_OFFS(t0) - DEF_LAYOUT_OFFS(t1) - DEF_LAYOUT_OFFS(t2) - DEF_LAYOUT_OFFS(t3) - DEF_LAYOUT_OFFS(s0) - DEF_LAYOUT_OFFS(s1) - DEF_LAYOUT_OFFS(s2) - DEF_LAYOUT_OFFS(s3) - DEF_LAYOUT_OFFS(s4) - DEF_LAYOUT_OFFS(s5) - DEF_LAYOUT_OFFS(s6) - DEF_LAYOUT_OFFS(s7) - DEF_LAYOUT_OFFS(t8) - DEF_LAYOUT_OFFS(t9) - - DEF_LAYOUT_OFFS(gp) - DEF_LAYOUT_OFFS(fp) - DEF_LAYOUT_OFFS(return) -/* - fpr0_off, fpr1_off, - fpr2_off, fpr3_off, - fpr4_off, fpr5_off, - fpr6_off, fpr7_off, - fpr8_off, fpr9_off, - fpr10_off, fpr11_off, - fpr12_off, fpr13_off, - fpr14_off, fpr15_off, - fpr16_off, fpr17_off, - fpr18_off, fpr19_off, - fpr20_off, fpr21_off, - fpr22_off, fpr23_off, - fpr24_off, fpr25_off, - fpr26_off, fpr27_off, - fpr28_off, fpr29_off, - fpr30_off, fpr31_off, - - v0_off, v1_off, - a0_off, a1_off, - a2_off, a3_off, - a4_off, a5_off, - a6_off, a7_off, - t0_off, t1_off, t2_off, t3_off, - s0_off, s1_off, s2_off, s3_off, s4_off, s5_off, s6_off, s7_off, - t8_off, t9_off, - - gp_off, fp_off, - return_off, -*/ - reg_save_size - }; + DEF_LAYOUT_OFFS(for_16_bytes_aligned) + DEF_LAYOUT_OFFS(fpr0) + DEF_LAYOUT_OFFS(fpr1) + DEF_LAYOUT_OFFS(fpr2) + DEF_LAYOUT_OFFS(fpr3) + DEF_LAYOUT_OFFS(fpr4) + DEF_LAYOUT_OFFS(fpr5) + DEF_LAYOUT_OFFS(fpr6) + DEF_LAYOUT_OFFS(fpr7) + DEF_LAYOUT_OFFS(fpr8) + DEF_LAYOUT_OFFS(fpr9) + DEF_LAYOUT_OFFS(fpr10) + DEF_LAYOUT_OFFS(fpr11) + DEF_LAYOUT_OFFS(fpr12) + DEF_LAYOUT_OFFS(fpr13) + DEF_LAYOUT_OFFS(fpr14) + DEF_LAYOUT_OFFS(fpr15) + DEF_LAYOUT_OFFS(fpr16) + DEF_LAYOUT_OFFS(fpr17) + DEF_LAYOUT_OFFS(fpr18) + DEF_LAYOUT_OFFS(fpr19) + DEF_LAYOUT_OFFS(fpr20) + DEF_LAYOUT_OFFS(fpr21) + DEF_LAYOUT_OFFS(fpr22) + DEF_LAYOUT_OFFS(fpr23) + DEF_LAYOUT_OFFS(fpr24) + DEF_LAYOUT_OFFS(fpr25) + DEF_LAYOUT_OFFS(fpr26) + DEF_LAYOUT_OFFS(fpr27) + DEF_LAYOUT_OFFS(fpr28) + DEF_LAYOUT_OFFS(fpr29) + DEF_LAYOUT_OFFS(fpr30) + DEF_LAYOUT_OFFS(fpr31) + + DEF_LAYOUT_OFFS(v0) + DEF_LAYOUT_OFFS(v1) + DEF_LAYOUT_OFFS(a0) + DEF_LAYOUT_OFFS(a1) + DEF_LAYOUT_OFFS(a2) + DEF_LAYOUT_OFFS(a3) + DEF_LAYOUT_OFFS(a4) + DEF_LAYOUT_OFFS(a5) + DEF_LAYOUT_OFFS(a6) + DEF_LAYOUT_OFFS(a7) + DEF_LAYOUT_OFFS(t0) + DEF_LAYOUT_OFFS(t1) + DEF_LAYOUT_OFFS(t2) + DEF_LAYOUT_OFFS(t3) + DEF_LAYOUT_OFFS(s0) + DEF_LAYOUT_OFFS(s1) + DEF_LAYOUT_OFFS(s2) + DEF_LAYOUT_OFFS(s3) + DEF_LAYOUT_OFFS(s4) + DEF_LAYOUT_OFFS(s5) + DEF_LAYOUT_OFFS(s6) + DEF_LAYOUT_OFFS(s7) + DEF_LAYOUT_OFFS(t8) + DEF_LAYOUT_OFFS(t9) + + DEF_LAYOUT_OFFS(gp) + DEF_LAYOUT_OFFS(fp) + DEF_LAYOUT_OFFS(return) + reg_save_size + }; public: - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); - static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); - //FIXME, I have no idea which register to use - static int raOffset(void) { return return_off / 2; } - //Rmethod - static int methodOffset(void) { return s3_off / 2; } - - static int v0Offset(void) { return v0_off / 2; } - static int v1Offset(void) { return v1_off / 2; } - - static int fpResultOffset(void) { return fpr0_off / 2; } - - // During deoptimization only the result register need to be restored - // all the other values have already been extracted. - - static void restore_result_registers(MacroAssembler* masm); + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); + static int raOffset(void) { return return_off / 2; } + //Rmethod + static int methodOffset(void) { return s3_off / 2; } + + static int v0Offset(void) { return v0_off / 2; } + static int v1Offset(void) { return v1_off / 2; } + + static int fpResultOffset(void) { return fpr0_off / 2; } + + // During deoptimization only the result register need to be restored + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm); }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { @@ -183,7 +152,7 @@ int frame_size_in_words = frame_size_in_bytes / wordSize; *total_frame_words = frame_size_in_words; - // save registers, fpu state, and flags + // save registers, fpu state, and flags // We assume caller has already has return address slot on the stack // We push epb twice in this sequence because we want the real ebp // to be under the return like a normal enter and we want to use pushad @@ -194,24 +163,24 @@ __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); - __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); - __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); - __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); - __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); - __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); - __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); - __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); - __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); - __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); - __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); - __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); - __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); - __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); - __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); - __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); - __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); - __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); - __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); + __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); + __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); + __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); + __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); + __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); + __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); + __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); + __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); + __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); + __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); + __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); + __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); + __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); + __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); + __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); + __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); + __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); + __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); __ sd(T0, SP, t0_off * jintSize); __ sd(T1, SP, t1_off * jintSize); __ sd(T2, SP, t2_off * jintSize); @@ -234,8 +203,8 @@ __ daddi(FP, SP, fp_off * jintSize); OopMapSet *oop_maps = new OopMapSet(); - //OopMap* map = new OopMap( frame_words, 0 ); - OopMap* map = new OopMap( frame_size_in_slots, 0 ); + //OopMap* map = new OopMap( frame_words, 0 ); + OopMap* map = new OopMap( frame_size_in_slots, 0 ); //#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) @@ -301,54 +270,6 @@ map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); -/* - if (true) { - map->set_callee_saved(STACK_OFFSET( v0H_off), V0->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( v1H_off), V1->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a0H_off), A0->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a1H_off), A1->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a2H_off), A2->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a3H_off), A3->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a4H_off), A4->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a5H_off), A5->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a6H_off), A6->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( a7H_off), A7->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( t0H_off), T0->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( t1H_off), T1->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( t2H_off), T2->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( t3H_off), T3->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s0H_off), S0->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s1H_off), S1->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s2H_off), S2->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s3H_off), S3->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s4H_off), S4->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s5H_off), S5->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s6H_off), S6->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( s7H_off), S7->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( t8H_off), T8->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( t9H_off), T9->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( gpH_off), GP->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpH_off), FP->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( returnH_off), RA->as_VMReg()->next()); - - map->set_callee_saved(STACK_OFFSET( fpr0H_off), F0->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr2H_off), F2->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr4H_off), F4->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr6H_off), F6->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr8H_off), F8->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr10H_off), F10->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr12H_off), F12->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr14H_off), F14->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr16H_off), F16->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr18H_off), F18->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr20H_off), F20->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr22H_off), F22->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr24H_off), F24->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr26H_off), F26->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr28H_off), F28->as_VMReg()->next()); - map->set_callee_saved(STACK_OFFSET( fpr30H_off), F30->as_VMReg()->next()); - } -*/ #undef STACK_OFFSET return map; } @@ -360,25 +281,25 @@ __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); - __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); - __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); - __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); - __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); - __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); - __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); - __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); - __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); - __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); - __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); - __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); - __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); - __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); - - __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); - __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); - __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); - __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); - __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); + __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); + __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); + __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); + __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); + __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); + __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); + __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); + __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); + __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); + __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); + __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); + __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); + __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); + + __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); + __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); + __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); + __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); + __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); __ ld(T0, SP, t0_off * jintSize); __ ld(T1, SP, t1_off * jintSize); __ ld(T2, SP, t2_off * jintSize); @@ -406,36 +327,37 @@ // a result. // FIXME, if the result is float? void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + // Just restore result register. Only used by deoptimization. By // now any callee save register that needs to be restore to a c2 // caller of the deoptee has been extracted into the vframeArray // and will be stuffed into the c2i adapter we create for later // restoration so only result registers need to be restored here. - // + __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); - __ addiu(SP, SP, return_off * jintSize); + __ addiu(SP, SP, return_off * jintSize); } - // Is vector's size (in bytes) bigger than a size saved by default? - // 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. - bool SharedRuntime::is_wide_vector(int size) { - return size > 16; - } +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} // The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by // the following value. -static int reg2offset_in(VMReg r) { - // Account for saved ebp and return address - // This should really be in_preserve_stack_slots - return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); +static int reg2offset_in(VMReg r) { + // Account for saved ebp and return address + // This should really be in_preserve_stack_slots + return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); } -static int reg2offset_out(VMReg r) { - return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; } // --------------------------------------------------------------------------- @@ -470,10 +392,6 @@ VMRegPair *regs, int total_args_passed, int is_outgoing) { -//#define aoqi_test -#ifdef aoqi_test -tty->print_cr(" SharedRuntime::%s :%d, total_args_passed: %d", __func__, __LINE__, total_args_passed); -#endif // Create the mapping between argument positions and // registers. @@ -543,85 +461,9 @@ ShouldNotReachHere(); break; } -#ifdef aoqi_test -tty->print_cr(" SharedRuntime::%s :%d, sig_bt[%d]: %d, reg[%d]:%d|%d, stk_args:%d", __func__, __LINE__, i, sig_bt[i], i, regs[i].first(), regs[i].second(), stk_args); -#endif } return round_to(stk_args, 2); -/* - // Starting stack position for args on stack - uint stack = 0; - - // Pass first five oop/int args in registers T0, A0 - A3. - uint reg_arg0 = 9999; - uint reg_arg1 = 9999; - uint reg_arg2 = 9999; - uint reg_arg3 = 9999; - uint reg_arg4 = 9999; - - - // Pass doubles & longs &float ligned on the stack. First count stack slots for doubles - int i; - for( i = 0; i < total_args_passed; i++) { - if( sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG ) { - stack += 2; - } - } - int dstack = 0; // Separate counter for placing doubles - for( i = 0; i < total_args_passed; i++) { - // From the type and the argument number (count) compute the location - switch( sig_bt[i] ) { - case T_SHORT: - case T_CHAR: - case T_BYTE: - case T_BOOLEAN: - case T_INT: - case T_ARRAY: - case T_OBJECT: - case T_ADDRESS: - if( reg_arg0 == 9999 ) { - reg_arg0 = i; - regs[i].set1(T0->as_VMReg()); - } else if( reg_arg1 == 9999 ) { - reg_arg1 = i; - regs[i].set1(A0->as_VMReg()); - } else if( reg_arg2 == 9999 ) { - reg_arg2 = i; - regs[i].set1(A1->as_VMReg()); - }else if( reg_arg3 == 9999 ) { - reg_arg3 = i; - regs[i].set1(A2->as_VMReg()); - }else if( reg_arg4 == 9999 ) { - reg_arg4 = i; - regs[i].set1(A3->as_VMReg()); - } else { - regs[i].set1(VMRegImpl::stack2reg(stack++)); - } - break; - case T_FLOAT: - regs[i].set1(VMRegImpl::stack2reg(stack++)); - break; - case T_LONG: - assert(sig_bt[i+1] == T_VOID, "missing Half" ); - regs[i].set2(VMRegImpl::stack2reg(dstack)); - dstack += 2; - break; - case T_DOUBLE: - assert(sig_bt[i+1] == T_VOID, "missing Half" ); - regs[i].set2(VMRegImpl::stack2reg(dstack)); - dstack += 2; - break; - case T_VOID: regs[i].set_bad(); break; - break; - default: - ShouldNotReachHere(); - break; - } - } - // return value can be odd number of VMRegImpl stack slots make multiple of 2 - return round_to(stack, 2); -*/ } // Helper class mostly to avoid passing masm everywhere, and handle store @@ -634,7 +476,6 @@ #endif // _LP64 void patch_callers_callsite(); -// void tag_c2i_arg(frame::Tag t, Register base, int st_off, Register scratch); // base+st_off points to top of argument int arg_offset(const int st_off) { return st_off; } @@ -684,130 +525,77 @@ // Patch the callers callsite with entry to compiled code if it exists. void AdapterGenerator::patch_callers_callsite() { - Label L; - //FIXME , what is stored in eax? - //__ verify_oop(ebx); - __ verify_oop(Rmethod); - // __ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD); - __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); - //__ jcc(Assembler::equal, L); - __ beq(AT,R0,L); - __ delayed()->nop(); - // Schedule the branch target address early. - // Call into the VM to patch the caller, then jump to compiled callee - // eax isn't live so capture return address while we easily can - // __ movl(eax, Address(esp, 0)); -// __ lw(T5,SP,0); - __ move(V0, RA); - - __ pushad(); - //jerome_for_debug - // __ pushad(); - // __ pushfd(); + Label L; + __ verify_oop(Rmethod); + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT,R0,L); + __ delayed()->nop(); + // Schedule the branch target address early. + // Call into the VM to patch the caller, then jump to compiled callee + // eax isn't live so capture return address while we easily can + __ move(V0, RA); + + __ pushad(); #ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - __ empty_FPU_stack(); + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); #endif /* COMPILER2 */ - // VM needs caller's callsite - // __ pushl(eax); - - // VM needs target method - // __ pushl(ebx); - // __ push(Rmethod); - // __ verify_oop(ebx); - - __ move(A0, Rmethod); - __ move(A1, V0); -// __ addi(SP, SP, -8); + // VM needs caller's callsite + // VM needs target method + + __ move(A0, Rmethod); + __ move(A1, V0); //we should preserve the return address - __ verify_oop(Rmethod); - __ move(S0, SP); - __ move(AT, -(StackAlignmentInBytes)); // align the stack - __ andr(SP, SP, AT); - __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), - relocInfo::runtime_call_type); - //__ addl(esp, 2*wordSize); - - __ delayed()->nop(); - // __ addi(SP, SP, 8); - // __ popfd(); - __ move(SP, S0); - __ popad(); - __ bind(L); + __ verify_oop(Rmethod); + __ move(S0, SP); + __ move(AT, -(StackAlignmentInBytes)); // align the stack + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), + relocInfo::runtime_call_type); + + __ delayed()->nop(); + __ move(SP, S0); + __ popad(); + __ bind(L); } -/* -void AdapterGenerator::tag_c2i_arg(frame::Tag t, Register base, int st_off, - Register scratch) { - Unimplemented(); -}*/ #ifdef _LP64 Register AdapterGenerator::arg_slot(const int st_off) { - Unimplemented(); + Unimplemented(); } Register AdapterGenerator::next_arg_slot(const int st_off){ - Unimplemented(); + Unimplemented(); } #endif // _LP64 // Stores long into offset pointed to by base void AdapterGenerator::store_c2i_long(Register r, Register base, const int st_off, bool is_stack) { - Unimplemented(); + Unimplemented(); } void AdapterGenerator::store_c2i_object(Register r, Register base, - const int st_off) { - Unimplemented(); + const int st_off) { + Unimplemented(); } void AdapterGenerator::store_c2i_int(Register r, Register base, - const int st_off) { - Unimplemented(); + const int st_off) { + Unimplemented(); } // Stores into offset pointed to by base void AdapterGenerator::store_c2i_double(VMReg r_2, VMReg r_1, Register base, const int st_off) { - Unimplemented(); + Unimplemented(); } void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, const int st_off) { - Unimplemented(); + Unimplemented(); } -/* -void AdapterGenerator::tag_stack(const BasicType sig, int st_off) { - if (TaggedStackInterpreter) { - int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0); - if (sig == T_OBJECT || sig == T_ARRAY) { - // __ movl(Address(esp, tag_offset), frame::TagReference); - // __ addi(AT,R0, frame::TagReference); - - __ move(AT, frame::TagReference); - __ sw (AT, SP, tag_offset); - } else if (sig == T_LONG || sig == T_DOUBLE) { - int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1); - // __ movl(Address(esp, next_tag_offset), frame::TagValue); - // __ addi(AT,R0, frame::TagValue); - __ move(AT, frame::TagValue); - __ sw (AT, SP, next_tag_offset); - //__ movl(Address(esp, tag_offset), frame::TagValue); - // __ addi(AT,R0, frame::TagValue); - __ move(AT, frame::TagValue); - __ sw (AT, SP, tag_offset); - - } else { - // __ movl(Address(esp, tag_offset), frame::TagValue); - //__ addi(AT,R0, frame::TagValue); - __ move(AT, frame::TagValue); - __ sw (AT, SP, tag_offset); - - } - } -}*/ void AdapterGenerator::gen_c2i_adapter( int total_args_passed, @@ -834,113 +622,77 @@ // call and not bother building another interpreter arg area. We don't // do that at this point. - patch_callers_callsite(); - - __ bind(skip_fixup); + patch_callers_callsite(); + + __ bind(skip_fixup); #ifdef COMPILER2 - __ empty_FPU_stack(); + __ empty_FPU_stack(); #endif /* COMPILER2 */ - //this is for native ? - // Since all args are passed on the stack, total_args_passed * interpreter_ - // stack_element_size is the - // space we need. - int extraspace = total_args_passed * Interpreter::stackElementSize; - - // stack is aligned, keep it that way - extraspace = round_to(extraspace, 2*wordSize); - - // Get return address - // __ popl(eax); - //__ pop(T4); - __ move(V0, RA); - // set senderSP value - // __ movl(esi, esp); -//refer to interpreter_mips.cpp:generate_asm_entry - __ move(Rsender, SP); - //__ subl(esp, extraspace); - __ addi(SP, SP, -extraspace); - - // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), - "missing half"); - continue; - } - - // st_off points to lowest address on stack. - int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); -#endif - // Say 4 args: - // i st_off - // 0 12 T_LONG - // 1 8 T_VOID - // 2 4 T_OBJECT - // 3 0 T_BOOL - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - - if (r_1->is_stack()) { - // memory to memory use fpu stack top - int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_stack, ld_off:%x", __func__, __LINE__, ld_off); -#endif - - if (!r_2->is_valid()) { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, !r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off); -#endif - __ ld_ptr(AT, SP, ld_off); - __ st_ptr(AT, SP, st_off); - //tag_stack(sig_bt[i], st_off); - } else { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, r_2->is_valid, ld_off:%x", __func__, __LINE__, ld_off); -#endif - - // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW - // st_off == MSW, st_off-wordSize == LSW - - int next_off = st_off - Interpreter::stackElementSize; - /* - __ lw(AT, SP, ld_off); - __ sw(AT, SP, next_off); - __ lw(AT, SP, ld_off + wordSize); - __ sw(AT, SP, st_off); - */ - __ ld_ptr(AT, SP, ld_off); - __ st_ptr(AT, SP, st_off); - - /* Ref to is_Register condition */ - if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) - __ st_ptr(AT,SP,st_off - 8); - //tag_stack(sig_bt[i], next_off); - } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, !r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off); -#endif - // __ movl(Address(esp, st_off), r); - __ sd(r,SP, st_off); //aoqi_test FIXME - //tag_stack(sig_bt[i], st_off); - } else { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, r_1->is_Register, r_2->is_valid, st_off: %lx", __func__, __LINE__, st_off); -#endif - //FIXME, mips will not enter here - // long/double in gpr - __ sd(r,SP, st_off); //aoqi_test FIXME -/* Jin: In [java/util/zip/ZipFile.java] + //this is for native ? + // Since all args are passed on the stack, total_args_passed * interpreter_ + // stack_element_size is the + // space we need. + int extraspace = total_args_passed * Interpreter::stackElementSize; + + // stack is aligned, keep it that way + extraspace = round_to(extraspace, 2*wordSize); + + // Get return address + __ move(V0, RA); + // set senderSP value + //refer to interpreter_mips.cpp:generate_asm_entry + __ move(Rsender, SP); + __ addi(SP, SP, -extraspace); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // st_off points to lowest address on stack. + int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; + // Say 4 args: + // i st_off + // 0 12 T_LONG + // 1 8 T_VOID + // 2 4 T_OBJECT + // 3 0 T_BOOL + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use fpu stack top + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + if (!r_2->is_valid()) { + __ ld_ptr(AT, SP, ld_off); + __ st_ptr(AT, SP, st_off); + + } else { + + + int next_off = st_off - Interpreter::stackElementSize; + __ ld_ptr(AT, SP, ld_off); + __ st_ptr(AT, SP, st_off); + + /* Ref to is_Register condition */ + if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ st_ptr(AT,SP,st_off - 8); + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + __ sd(r,SP, st_off); //aoqi_test FIXME + } else { + //FIXME, mips will not enter here + // long/double in gpr + __ sd(r,SP, st_off); //aoqi_test FIXME +/* Jin: In [java/util/zip/ZipFile.java] private static native long open(String name, int mode, long lastModified); private static native int getTotal(long jzfile); @@ -950,9 +702,9 @@ * * Caller -> lir_static_call -> gen_resolve_stub -> -- resolve_static_call_C - `- gen_c2i_adapter() [*] + `- gen_c2i_adapter() [*] | - `- AdapterHandlerLibrary::get_create_apapter_index + `- AdapterHandlerLibrary::get_create_apapter_index -> generate_native_entry -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] @@ -970,7 +722,7 @@ | | (low) * - * However, the sequence is reversed here: + * However, the sequence is reversed here: * (high) | | @@ -986,42 +738,36 @@ * * So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). */ - if (sig_bt[i] == T_LONG) - __ sd(r,SP, st_off - 8); - // ShouldNotReachHere(); - // int next_off = st_off - Interpreter::stackElementSize; - // __ sw(r_2->as_Register(),SP, st_off); - // __ sw(r,SP, next_off); - // tag_stack(masm, sig_bt[i], next_off); - } - } else if (r_1->is_FloatRegister()) { - assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); - - FloatRegister fr = r_1->as_FloatRegister(); - if (sig_bt[i] == T_FLOAT) - __ swc1(fr,SP, st_off); - else - { - __ sdc1(fr,SP, st_off); - __ sdc1(fr,SP, st_off - 8); /* T_DOUBLE needs two slots */ - } - } - } - - // Schedule the branch target address early. - __ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); - // And repush original return address - __ move(RA, V0); - __ jr (AT); - __ delayed()->nop(); + if (sig_bt[i] == T_LONG) + __ sd(r,SP, st_off - 8); + } + } else if (r_1->is_FloatRegister()) { + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ swc1(fr,SP, st_off); + else { + __ sdc1(fr,SP, st_off); + __ sdc1(fr,SP, st_off - 8); /* T_DOUBLE needs two slots */ + } + } + } + + // Schedule the branch target address early. + __ ld_ptr(AT, Rmethod,in_bytes(Method::interpreter_entry_offset()) ); + // And repush original return address + __ move(RA, V0); + __ jr (AT); + __ delayed()->nop(); } void AdapterGenerator::gen_i2c_adapter( - int total_args_passed, - // VMReg max_arg, - int comp_args_on_stack, // VMRegStackSlots - const BasicType *sig_bt, - const VMRegPair *regs) { + int total_args_passed, + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs) { // Generate an I2C adapter: adjust the I-frame to make space for the C-frame // layout. Lesp was saved by the calling I-frame and will be restored on @@ -1058,7 +804,7 @@ // Align the outgoing SP __ move(AT, -(StackAlignmentInBytes)); - __ andr(SP, SP, AT); + __ andr(SP, SP, AT); // push the return address on the stack (note that pushing, rather // than storing it, yields the correct frame alignment for the callee) // Put saved SP in another register @@ -1081,16 +827,16 @@ continue; } - // Pick up 0, 1 or 2 words from SP+offset. - - //FIXME. aoqi. just delete the assert + // Pick up 0, 1 or 2 words from SP+offset. + + //FIXME. aoqi. just delete the assert //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); // Load in argument order going down. int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; // Point to interpreter value (vs. tag) int next_off = ld_off - Interpreter::stackElementSize; // - // + // // VMReg r_1 = regs[i].first(); VMReg r_2 = regs[i].second(); @@ -1098,114 +844,96 @@ assert(!r_2->is_valid(), ""); continue; } -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d, ld_off:%d, next_off: %d", __func__, __LINE__, i, sig_bt[i], total_args_passed, ld_off, next_off); -#endif - if (r_1->is_stack()) { - // Convert stack slot to an SP offset (+ wordSize to + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to // account for return address ) - //NOTICE HERE!!!! I sub a wordSize here - int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + //NOTICE HERE!!!! I sub a wordSize here + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; //+ wordSize; - // We can use esi as a temp here because compiled code doesn't + // We can use esi as a temp here because compiled code doesn't // need esi as an input - // and if we end up going thru a c2i because of a miss a reasonable - // value of esi - // we be generated. + // and if we end up going thru a c2i because of a miss a reasonable + // value of esi + // we be generated. if (!r_2->is_valid()) { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() !r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); -#endif - __ ld(AT, saved_sp, ld_off); - __ sd(AT, SP, st_off); + __ ld(AT, saved_sp, ld_off); + __ sd(AT, SP, st_off); } else { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_stack() r_2->is_valid(), st_off:%d", __func__, __LINE__, i, sig_bt[i], total_args_passed, st_off); -#endif - // Interpreter local[n] == MSW, local[n+1] == LSW however locals - // are accessed as negative so LSW is at LOW address - - // ld_off is MSW so get LSW - // st_off is LSW (i.e. reg.first()) - /* - __ ld(AT, saved_sp, next_off); - __ sd(AT, SP, st_off); - __ ld(AT, saved_sp, ld_off); - __ sd(AT, SP, st_off + wordSize); - */ - - /* 2012/4/9 Jin - * [./org/eclipse/swt/graphics/GC.java] - * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, - int destX, int destY, int destWidth, int destHeight, - boolean simple, - int imgWidth, int imgHeight, - long maskPixmap, <-- Pass T_LONG in stack - int maskType); - * Before this modification, Eclipse displays icons with solid black background. - */ - __ ld(AT, saved_sp, ld_off); + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + // st_off is LSW (i.e. reg.first()) + /* + __ ld(AT, saved_sp, next_off); + __ sd(AT, SP, st_off); + __ ld(AT, saved_sp, ld_off); + __ sd(AT, SP, st_off + wordSize); + */ + + /* 2012/4/9 Jin + * [./org/eclipse/swt/graphics/GC.java] + * void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, + int destX, int destY, int destWidth, int destHeight, + boolean simple, + int imgWidth, int imgHeight, + long maskPixmap, <-- Pass T_LONG in stack + int maskType); + * Before this modification, Eclipse displays icons with solid black background. + */ + __ ld(AT, saved_sp, ld_off); if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) - __ ld(AT, saved_sp, ld_off - 8); - __ sd(AT, SP, st_off); - //__ ld(AT, saved_sp, next_off); - //__ sd(AT, SP, st_off + wordSize); + __ ld(AT, saved_sp, ld_off - 8); + __ sd(AT, SP, st_off); } } else if (r_1->is_Register()) { // Register argument Register r = r_1->as_Register(); // assert(r != eax, "must be different"); if (r_2->is_valid()) { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed); -#endif - // assert(r_2->as_Register() != eax, "need another temporary register"); - // Remember r_1 is low address (and LSB on mips) - // So r_2 gets loaded from high address regardless of the platform - //aoqi - assert(r_2->as_Register() == r_1->as_Register(), ""); - //__ ld(r_2->as_Register(), saved_sp, ld_off); - //__ ld(r, saved_sp, next_off); - __ ld(r, saved_sp, ld_off); - -/* Jin: - * - * For T_LONG type, the real layout is as below: - - (high) - | | - ----------- - | 8 bytes | - | (void) | - ----------- - | 8 bytes | - | (long) | - ----------- - | | - (low) - * - * We should load the low-8 bytes. - */ - if (sig_bt[i] == T_LONG) - __ ld(r, saved_sp, ld_off - 8); + // assert(r_2->as_Register() != eax, "need another temporary register"); + // Remember r_1 is low address (and LSB on mips) + // So r_2 gets loaded from high address regardless of the platform + //aoqi + assert(r_2->as_Register() == r_1->as_Register(), ""); + //__ ld(r_2->as_Register(), saved_sp, ld_off); + //__ ld(r, saved_sp, next_off); + __ ld(r, saved_sp, ld_off); + + /* Jin: + * + * For T_LONG type, the real layout is as below: + + (high) + | | + ----------- + | 8 bytes | + | (void) | + ----------- + | 8 bytes | + | (long) | + ----------- + | | + (low) + * + * We should load the low-8 bytes. + */ + if (sig_bt[i] == T_LONG) + __ ld(r, saved_sp, ld_off - 8); } else { -#ifdef aoqi_test -tty->print_cr(" AdapterGenerator::%s :%d, sig_bt[%d]:%d, total_args_passed:%d r_1->is_Register() !r_2->is_valid()", __func__, __LINE__, i, sig_bt[i], total_args_passed); -#endif - __ lw(r, saved_sp, ld_off); + __ lw(r, saved_sp, ld_off); } } else if (r_1->is_FloatRegister()) { // Float Register - assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); - - FloatRegister fr = r_1->as_FloatRegister(); - if (sig_bt[i] == T_FLOAT) - __ lwc1(fr, saved_sp, ld_off); - else - { - __ ldc1(fr, saved_sp, ld_off); - __ ldc1(fr, saved_sp, ld_off - 8); - } - } + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ lwc1(fr, saved_sp, ld_off); + else { + __ ldc1(fr, saved_sp, ld_off); + __ ldc1(fr, saved_sp, ld_off - 8); + } + } } // 6243940 We might end up in handle_wrong_method if @@ -1223,8 +951,8 @@ // move methodOop to eax in case we end up in an c2i adapter. // the c2i adapters expect methodOop in eax (c2) because c2's // resolve stubs return the result (the method) in eax. - // I'd love to fix this. - __ move(V0, Rmethod); + // I'd love to fix this. + __ move(V0, Rmethod); __ jr(T9); __ delayed()->nop(); } @@ -1264,33 +992,24 @@ Label missed; __ verify_oop(holder); - // __ movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes())); - //__ ld_ptr(temp, receiver, oopDesc::klass_offset_in_bytes()); //add for compressedoops __ load_klass(temp, receiver); __ verify_oop(temp); - // __ cmpl(temp, Address(holder, CompiledICHolder::holder_klass_offset())); - __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); - //__ movl(ebx, Address(holder, CompiledICHolder::holder_method_offset())); + __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_method_offset()); - //__ jcc(Assembler::notEqual, missed); - __ bne(AT, temp, missed); - __ delayed()->nop(); + __ bne(AT, temp, missed); + __ delayed()->nop(); // Method might have been compiled since the call site was patched to // interpreted if that is the case treat it as a miss so we can get // the call site corrected. - //__ cmpl(Address(ebx, in_bytes(Method::code_offset())), NULL_WORD); - //__ jcc(Assembler::equal, skip_fixup); __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); - __ beq(AT, R0, skip_fixup); - __ delayed()->nop(); + __ beq(AT, R0, skip_fixup); + __ delayed()->nop(); __ bind(missed); - // __ move(AT, (int)&jerome7); - // __ sw(RA, AT, 0); __ jmp(ic_miss, relocInfo::runtime_call_type); - __ delayed()->nop(); + __ delayed()->nop(); } address c2i_entry = __ pc(); @@ -1299,72 +1018,34 @@ __ flush(); return AdapterHandlerLibrary::new_entry(fingerprint,i2c_entry, c2i_entry, c2i_unverified_entry); - } -/* -// Helper function for native calling conventions -static VMReg int_stk_helper( int i ) { - // Bias any stack based VMReg we get by ignoring the window area - // but not the register parameter save area. - // - // This is strange for the following reasons. We'd normally expect - // the calling convention to return an VMReg for a stack slot - // completely ignoring any abi reserved area. C2 thinks of that - // abi area as only out_preserve_stack_slots. This does not include - // the area allocated by the C abi to store down integer arguments - // because the java calling convention does not use it. So - // since c2 assumes that there are only out_preserve_stack_slots - // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack - // location the c calling convention must add in this bias amount - // to make up for the fact that the out_preserve_stack_slots is - // insufficient for C calls. What a mess. I sure hope those 6 - // stack words were worth it on every java call! - - // Another way of cleaning this up would be for out_preserve_stack_slots - // to take a parameter to say whether it was C or java calling conventions. - // Then things might look a little better (but not much). - - int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; - if( mem_parm_offset < 0 ) { - return as_oRegister(i)->as_VMReg(); - } else { - int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; - // Now return a biased offset that will be correct when out_preserve_slots is added back in - return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); - } -} -*/ - int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, VMRegPair *regs2, int total_args_passed) { - assert(regs2 == NULL, "not needed on MIPS"); -#ifdef aoqi_test -tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed); -#endif - // Return the number of VMReg stack_slots needed for the args. - // This value does not include an abi space (like register window - // save area). - - // The native convention is V8 if !LP64 - // The LP64 convention is the V9 convention which is slightly more sane. - - // We return the amount of VMReg stack slots we need to reserve for all - // the arguments NOT counting out_preserve_stack_slots. Since we always - // have space for storing at least 6 registers to memory we start with that. - // See int_stk_helper for a further discussion. - // We return the amount of VMRegImpl stack slots we need to reserve for all - // the arguments NOT counting out_preserve_stack_slots. + assert(regs2 == NULL, "not needed on MIPS"); + // Return the number of VMReg stack_slots needed for the args. + // This value does not include an abi space (like register window + // save area). + + // The native convention is V8 if !LP64 + // The LP64 convention is the V9 convention which is slightly more sane. + + // We return the amount of VMReg stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. Since we always + // have space for storing at least 6 registers to memory we start with that. + // See int_stk_helper for a further discussion. + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. static const Register INT_ArgReg[Argument::n_register_parameters] = { A0, A1, A2, A3, A4, A5, A6, A7 }; static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { F12, F13, F14, F15, F16, F17, F18, F19 }; - uint args = 0; - uint stk_args = 0; // inc by 2 each time + uint args = 0; + uint stk_args = 0; // inc by 2 each time /* Example: --- n java.lang.UNIXProcess::forkAndExec @@ -1389,192 +1070,144 @@ jobject stderr_fd) ::c_calling_convention -0: // env <-- a0 -1: L // klass/obj <-- t0 => a1 -2: [ // prog[] <-- a0 => a2 -3: [ // argBlock[] <-- a1 => a3 -4: I // argc -5: [ // envBlock[] <-- a3 => a5 -6: I // envc -7: [ // dir[] <-- a5 => a7 -8: Z // redirectErrorStream a6 => sp[0] -9: L // stdin a7 => sp[8] -10: L // stdout fp[16] => sp[16] -11: L // stderr fp[24] => sp[24] +0: // env <-- a0 +1: L // klass/obj <-- t0 => a1 +2: [ // prog[] <-- a0 => a2 +3: [ // argBlock[] <-- a1 => a3 +4: I // argc +5: [ // envBlock[] <-- a3 => a5 +6: I // envc +7: [ // dir[] <-- a5 => a7 +8: Z // redirectErrorStream a6 => sp[0] +9: L // stdin a7 => sp[8] +10: L // stdout fp[16] => sp[16] +11: L // stderr fp[24] => sp[24] */ - for (int i = 0; i < total_args_passed; i++) { - switch (sig_bt[i]) { - case T_VOID: // Halves of longs and doubles - assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); - regs[i].set_bad(); - break; - case T_BOOLEAN: - case T_CHAR: - case T_BYTE: - case T_SHORT: - case T_INT: - if (args < Argument::n_register_parameters) { - regs[i].set1(INT_ArgReg[args++]->as_VMReg()); - } else { - regs[i].set1(VMRegImpl::stack2reg(stk_args)); - stk_args += 2; - } - break; - case T_LONG: - assert(sig_bt[i + 1] == T_VOID, "expecting half"); - // fall through - case T_OBJECT: - case T_ARRAY: - case T_ADDRESS: - case T_METADATA: - if (args < Argument::n_register_parameters) { - regs[i].set2(INT_ArgReg[args++]->as_VMReg()); - } else { - regs[i].set2(VMRegImpl::stack2reg(stk_args)); - stk_args += 2; - } - break; - case T_FLOAT: - if (args < Argument::n_float_register_parameters) { - regs[i].set1(FP_ArgReg[args++]->as_VMReg()); - } else { - regs[i].set1(VMRegImpl::stack2reg(stk_args)); - stk_args += 2; - } - break; - case T_DOUBLE: - assert(sig_bt[i + 1] == T_VOID, "expecting half"); - if (args < Argument::n_float_register_parameters) { - regs[i].set2(FP_ArgReg[args++]->as_VMReg()); - } else { - regs[i].set2(VMRegImpl::stack2reg(stk_args)); - stk_args += 2; - } - break; - default: - ShouldNotReachHere(); - break; - } - } - - return round_to(stk_args, 2); -} -/* -int SharedRuntime::c_calling_convention_jni(const BasicType *sig_bt, - VMRegPair *regs, - int total_args_passed) { -// We return the amount of VMRegImpl stack slots we need to reserve for all -// the arguments NOT counting out_preserve_stack_slots. - bool unalign = 0; - uint stack = 0; // All arguments on stack -#ifdef aoqi_test -tty->print_cr(" SharedRuntime::%s :%d total_args_passed:%d", __func__, __LINE__, total_args_passed); -#endif - - for( int i = 0; i < total_args_passed; i++) { - // From the type and the argument number (count) compute the location - switch( sig_bt[i] ) { + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; case T_BOOLEAN: case T_CHAR: - case T_FLOAT: case T_BYTE: case T_SHORT: case T_INT: + if (args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through case T_OBJECT: case T_ARRAY: case T_ADDRESS: - regs[i].set1(VMRegImpl::stack2reg(stack++)); - unalign = !unalign; + case T_METADATA: + if (args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } break; - case T_LONG: - case T_DOUBLE: // The stack numbering is reversed from Java - // Since C arguments do not get reversed, the ordering for - // doubles on the stack must be opposite the Java convention - assert(sig_bt[i+1] == T_VOID, "missing Half" ); - if(unalign){ - stack += 1; - unalign = ! unalign; - } - regs[i].set2(VMRegImpl::stack2reg(stack)); - stack += 2; + case T_FLOAT: + if (args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } break; - case T_VOID: regs[i].set_bad(); break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; default: ShouldNotReachHere(); break; } } - return stack; + + return round_to(stk_args, 2); } -*/ // --------------------------------------------------------------------------- void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { - // We always ignore the frame_slots arg and just use the space just below frame pointer - // which by this time is free to use - switch (ret_type) { - case T_FLOAT: - __ swc1(FSF, FP, -wordSize); - break; - case T_DOUBLE: - __ sdc1(FSF, FP, -wordSize ); - break; - case T_VOID: break; - case T_LONG: - __ sd(V0, FP, -wordSize); - break; - case T_OBJECT: - case T_ARRAY: - __ sd(V0, FP, -wordSize); - break; - default: { - __ sw(V0, FP, -wordSize); - } - } + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ swc1(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ sdc1(FSF, FP, -wordSize ); + break; + case T_VOID: break; + case T_LONG: + __ sd(V0, FP, -wordSize); + break; + case T_OBJECT: + case T_ARRAY: + __ sd(V0, FP, -wordSize); + break; + default: { + __ sw(V0, FP, -wordSize); + } + } } void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { - // We always ignore the frame_slots arg and just use the space just below frame pointer - // which by this time is free to use - switch (ret_type) { - case T_FLOAT: - __ lwc1(FSF, FP, -wordSize); - break; - case T_DOUBLE: - __ ldc1(FSF, FP, -wordSize ); - break; - case T_LONG: - __ ld(V0, FP, -wordSize); - break; - case T_VOID: break; - case T_OBJECT: - case T_ARRAY: - __ ld(V0, FP, -wordSize); - break; - default: { - __ lw(V0, FP, -wordSize); - } - } + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ lwc1(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ ldc1(FSF, FP, -wordSize ); + break; + case T_LONG: + __ ld(V0, FP, -wordSize); + break; + case T_VOID: break; + case T_OBJECT: + case T_ARRAY: + __ ld(V0, FP, -wordSize); + break; + default: { + __ lw(V0, FP, -wordSize); + } + } } static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { - for ( int i = first_arg ; i < arg_count ; i++ ) { - if (args[i].first()->is_Register()) { - __ push(args[i].first()->as_Register()); - } else if (args[i].first()->is_FloatRegister()) { - __ push(args[i].first()->as_FloatRegister()); - } + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + __ push(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ push(args[i].first()->as_FloatRegister()); } + } } static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { - for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { - if (args[i].first()->is_Register()) { - __ pop(args[i].first()->as_Register()); - } else if (args[i].first()->is_FloatRegister()) { - __ pop(args[i].first()->as_FloatRegister()); - } + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + __ pop(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ pop(args[i].first()->as_FloatRegister()); } + } } // A simple move of integer like type @@ -1582,46 +1215,21 @@ if (src.first()->is_stack()) { if (dst.first()->is_stack()) { // stack to stack - __ lw(AT, FP, reg2offset_in(src.first())); - __ sd(AT,SP, reg2offset_out(dst.first())); + __ lw(AT, FP, reg2offset_in(src.first())); + __ sd(AT,SP, reg2offset_out(dst.first())); } else { // stack to reg - //__ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); - __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); } } else if (dst.first()->is_stack()) { // reg to stack - __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); } else { - //__ mov(src.first()->as_Register(), dst.first()->as_Register()); - if (dst.first() != src.first()){ - __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() - } + if (dst.first() != src.first()){ + __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() + } } } -/* -// On 64 bit we will store integer like items to the stack as -// 64 bits items (sparc abi) even though java would only store -// 32bits for a parameter. On 32bit it will simply be 32 bits -// So this routine will do 32->32 on 32bit and 32->64 on 64bit -static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - if (src.first()->is_stack()) { - if (dst.first()->is_stack()) { - // stack to stack - __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); - __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); - } else { - // stack to reg - __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); - } - } else if (dst.first()->is_stack()) { - // reg to stack - __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); - } else { - __ mov(src.first()->as_Register(), dst.first()->as_Register()); - } -} -*/ // An oop arg. Must pass a handle not the oop itself static void object_move(MacroAssembler* masm, @@ -1635,181 +1243,133 @@ // must pass a handle. First figure out the location we use as a handle - //FIXME, for mips, dst can be register - if (src.first()->is_stack()) { - // Oop is already on the stack as an argument - Register rHandle = V0; - Label nil; - //__ xorl(rHandle, rHandle); - __ xorr(rHandle, rHandle, rHandle); - //__ cmpl(Address(ebp, reg2offset_in(src.first())), NULL_WORD); - __ ld(AT, FP, reg2offset_in(src.first())); - //__ jcc(Assembler::equal, nil); - __ beq(AT,R0, nil); - __ delayed()->nop(); - // __ leal(rHandle, Address(ebp, reg2offset_in(src.first()))); - __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); - __ bind(nil); - //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle); - if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); - else __ move( (dst.first())->as_Register(),rHandle); - //if dst is register - //FIXME, do mips need out preserve stack slots? - int offset_in_older_frame = src.first()->reg2stack() - + SharedRuntime::out_preserve_stack_slots(); - map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); - if (is_receiver) { - *receiver_offset = (offset_in_older_frame - + framesize_in_slots) * VMRegImpl::stack_slot_size; - } - } else { - // Oop is in an a register we must store it to the space we reserve - // on the stack for oop_handles - const Register rOop = src.first()->as_Register(); - assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); - // const Register rHandle = eax; - const Register rHandle = V0; - //Important: refer to java_calling_convertion - int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; - int offset = oop_slot*VMRegImpl::stack_slot_size; - Label skip; - // __ movl(Address(esp, offset), rOop); - __ sd( rOop , SP, offset ); - map->set_oop(VMRegImpl::stack2reg(oop_slot)); - // __ xorl(rHandle, rHandle); - __ xorr( rHandle, rHandle, rHandle); - //__ cmpl(rOop, NULL_WORD); - // __ jcc(Assembler::equal, skip); - __ beq(rOop, R0, skip); - __ delayed()->nop(); - // __ leal(rHandle, Address(esp, offset)); - __ lea(rHandle, Address(SP, offset)); - __ bind(skip); - // Store the handle parameter - //__ movl(Address(esp, reg2offset_out(dst.first())), rHandle); - if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); - else __ move((dst.first())->as_Register(), rHandle); - //if dst is register - - if (is_receiver) { - *receiver_offset = offset; - } - } + //FIXME, for mips, dst can be register + if (src.first()->is_stack()) { + // Oop is already on the stack as an argument + Register rHandle = V0; + Label nil; + __ xorr(rHandle, rHandle, rHandle); + __ ld(AT, FP, reg2offset_in(src.first())); + __ beq(AT,R0, nil); + __ delayed()->nop(); + __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); + __ bind(nil); + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); + else __ move( (dst.first())->as_Register(),rHandle); + //if dst is register + //FIXME, do mips need out preserve stack slots? + int offset_in_older_frame = src.first()->reg2stack() + + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + } else { + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles + const Register rOop = src.first()->as_Register(); + assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); + const Register rHandle = V0; + //Important: refer to java_calling_convertion + int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + Label skip; + __ sd( rOop , SP, offset ); + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + __ xorr( rHandle, rHandle, rHandle); + __ beq(rOop, R0, skip); + __ delayed()->nop(); + __ lea(rHandle, Address(SP, offset)); + __ bind(skip); + // Store the handle parameter + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); + else __ move((dst.first())->as_Register(), rHandle); + //if dst is register + + if (is_receiver) { + *receiver_offset = offset; + } + } } // A float arg may have to do float reg int reg conversion static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); - if (src.first()->is_stack()) { - if(dst.first()->is_stack()){ - // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); - __ lwc1(F12 , FP, reg2offset_in(src.first())); - // __ movl(Address(esp, reg2offset_out(dst.first())), eax); - __ swc1(F12 ,SP, reg2offset_out(dst.first())); - } - else - __ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); - } else { - // reg to stack - // __ movss(Address(esp, reg2offset_out(dst.first())), - // src.first()->as_XMMRegister()); - // __ movl(Address(esp, reg2offset_out(dst.first())), eax); - if(dst.first()->is_stack()) - __ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); - else - __ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); - } + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + __ lwc1(F12 , FP, reg2offset_in(src.first())); + __ swc1(F12 ,SP, reg2offset_out(dst.first())); + } + else + __ lwc1( dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); + } else { + // reg to stack + if(dst.first()->is_stack()) + __ swc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); + else + __ mov_s( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } } -/* -static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - VMRegPair src_lo(src.first()); - VMRegPair src_hi(src.second()); - VMRegPair dst_lo(dst.first()); - VMRegPair dst_hi(dst.second()); - simple_move32(masm, src_lo, dst_lo); - simple_move32(masm, src_hi, dst_hi); -} -*/ + // A long move static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - // The only legal possibility for a long_move VMRegPair is: - // 1: two stack slots (possibly unaligned) - // as neither the java or C calling convention will use registers - // for longs. - - if (src.first()->is_stack()) { - assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); - // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); - if( dst.first()->is_stack()){ - __ ld(AT, FP, reg2offset_in(src.first())); - // __ movl(ebx, address(ebp, reg2offset_in(src.second()))); - //__ lw(V0, FP, reg2offset_in(src.second())); - // __ movl(address(esp, reg2offset_out(dst.first())), eax); - __ sd(AT, SP, reg2offset_out(dst.first())); - // __ movl(address(esp, reg2offset_out(dst.second())), ebx); - //__ sw(V0, SP, reg2offset_out(dst.second())); - } else{ - __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); - //__ lw( (dst.second())->as_Register(), FP, reg2offset_in(src.second())); - } - } else { - if( dst.first()->is_stack()){ - __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); - //__ sw( (src.second())->as_Register(), SP, reg2offset_out(dst.second())); - } else{ - __ move( (dst.first())->as_Register() , (src.first())->as_Register()); - //__ move( (dst.second())->as_Register(), (src.second())->as_Register()); - } - } + // The only legal possibility for a long_move VMRegPair is: + // 1: two stack slots (possibly unaligned) + // as neither the java or C calling convention will use registers + // for longs. + + if (src.first()->is_stack()) { + assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); + if( dst.first()->is_stack()){ + __ ld(AT, FP, reg2offset_in(src.first())); + __ sd(AT, SP, reg2offset_out(dst.first())); + } else { + __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); + } + } else { + if( dst.first()->is_stack()){ + __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); + } else{ + __ move( (dst.first())->as_Register() , (src.first())->as_Register()); + } + } } // A double move static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - // The only legal possibilities for a double_move VMRegPair are: - // The painful thing here is that like long_move a VMRegPair might be - - // Because of the calling convention we know that src is either - // 1: a single physical register (xmm registers only) - // 2: two stack slots (possibly unaligned) - // dst can only be a pair of stack slots. - - // assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || - // src.first()->is_stack()), "bad args"); - // assert(dst.first()->is_stack() || src.first()->is_stack()), "bad args"); - - if (src.first()->is_stack()) { - // source is all stack - // __ movl(eax, Address(ebp, reg2offset_in(src.first()))); - if( dst.first()->is_stack()){ - __ ldc1(F12, FP, reg2offset_in(src.first())); - //__ movl(ebx, Address(ebp, reg2offset_in(src.second()))); - //__ lwc1(F14, FP, reg2offset_in(src.second())); - - // __ movl(Address(esp, reg2offset_out(dst.first())), eax); - __ sdc1(F12, SP, reg2offset_out(dst.first())); - // __ movl(Address(esp, reg2offset_out(dst.second())), ebx); - //__ swc1(F14, SP, reg2offset_out(dst.second())); - } else{ - __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); - //__ lwc1( (dst.second())->as_FloatRegister(), FP, reg2offset_in(src.second())); - } - - } else { - // reg to stack - // No worries about stack alignment - // __ movsd(Address(esp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); - if( dst.first()->is_stack()){ - __ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); - //__ swc1( src.second()->as_FloatRegister(),SP, reg2offset_out(dst.second())); - } - else - __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); - //__ mov_s( dst.second()->as_FloatRegister(), src.second()->as_FloatRegister()); - - } + // The only legal possibilities for a double_move VMRegPair are: + // The painful thing here is that like long_move a VMRegPair might be + + // Because of the calling convention we know that src is either + // 1: a single physical register (xmm registers only) + // 2: two stack slots (possibly unaligned) + // dst can only be a pair of stack slots. + + + if (src.first()->is_stack()) { + // source is all stack + if( dst.first()->is_stack()){ + __ ldc1(F12, FP, reg2offset_in(src.first())); + + __ sdc1(F12, SP, reg2offset_out(dst.first())); + } else{ + __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); + } + + } else { + // reg to stack + // No worries about stack alignment + if( dst.first()->is_stack()){ + __ sdc1( src.first()->as_FloatRegister(),SP, reg2offset_out(dst.first())); + } + else + __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + + } } static void verify_oop_args(MacroAssembler* masm, @@ -1824,7 +1384,6 @@ VMReg r = regs[i].first(); assert(r->is_valid(), "bad oop arg"); if (r->is_stack()) { -// __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); __ verify_oop(temp_reg); } else { @@ -1850,7 +1409,6 @@ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); if (ref_kind != 0) { member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument -// member_reg = rbx; // known to be free at this point member_reg = S3; // known to be free at this point has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); } else if (iid == vmIntrinsics::_invokeBasic) { @@ -1882,7 +1440,6 @@ // pass the receiver oop in a register. If this is not true on some // platform, pick a temp and load the receiver from stack. fatal("receiver always in a register"); -// receiver_reg = j_rarg0; // known to be free at this point receiver_reg = SSR; // known to be free at this point __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); } else { @@ -1905,20 +1462,17 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, methodHandle method, int compile_id, - BasicType *in_sig_bt, - VMRegPair *in_regs, + BasicType* in_sig_bt, + VMRegPair* in_regs, BasicType ret_type) { - if (method->is_method_handle_intrinsic()) { vmIntrinsics::ID iid = method->intrinsic_id(); intptr_t start = (intptr_t)__ pc(); int vep_offset = ((intptr_t)__ pc()) - start; - gen_special_dispatch(masm, method, in_sig_bt, in_regs); - int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period __ flush(); int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually @@ -1947,11 +1501,11 @@ // An OopMap for lock (and class if static), and one for the VM call itself OopMapSet *oop_maps = new OopMapSet(); - // We have received a description of where all the java arg are located - // on entry to the wrapper. We need to convert these args to where - // the jni function will expect them. To figure out where they go - // we convert the java signature to a C signature by inserting - // the hidden arguments as arg[0] and possibly arg[1] (static method) + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) const int total_in_args = method->size_of_parameters(); int total_c_args = total_in_args; @@ -1968,8 +1522,8 @@ } } - BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); - VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); BasicType* in_elem_bt = NULL; int argc = 0; @@ -2024,7 +1578,7 @@ // int out_arg_slots; //out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); // Compute framesize for the wrapper. We need to handlize all oops in // registers. We must create space for them here that is disjoint from @@ -2079,16 +1633,14 @@ } int oop_handle_offset = stack_slots; -// stack_slots += 9*VMRegImpl::slots_per_word; // T0, A0 ~ A7 stack_slots += total_save_slots; // Now any space we need for handlizing a klass if static method - int klass_slot_offset = 0; - int klass_offset = -1; - int lock_slot_offset = 0; - bool is_static = false; - //int oop_temp_slot_offset = 0; + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; if (method->is_static()) { klass_slot_offset = stack_slots; @@ -2105,9 +1657,8 @@ } // Now a place to save return value or as a temporary for any gpr -> fpr moves - // + 2 for return address (which we own) and saved ebp - //stack_slots += 2; - stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) + // + 2 for return address (which we own) and saved ebp + stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) // Ok The space we have allocated will look like: // @@ -2140,141 +1691,137 @@ int stack_size = stack_slots * VMRegImpl::stack_slot_size; - intptr_t start = (intptr_t)__ pc(); - - - - // First thing make an ic check to see if we should even be here - address ic_miss = SharedRuntime::get_ic_miss_stub(); - - // We are free to use all registers as temps without saving them and - // restoring them except ebp. ebp is the only callee save register - // as far as the interpreter and the compiler(s) are concerned. + intptr_t start = (intptr_t)__ pc(); + + + + // First thing make an ic check to see if we should even be here + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + // We are free to use all registers as temps without saving them and + // restoring them except ebp. ebp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. //refer to register_mips.hpp:IC_Klass - const Register ic_reg = T1; - const Register receiver = T0; - Label hit; - Label exception_pending; - - __ verify_oop(receiver); - //__ lw(AT, receiver, oopDesc::klass_offset_in_bytes()); - //add for compressedoops - __ load_klass(T9, receiver); - __ beq(T9, ic_reg, hit); - __ delayed()->nop(); - __ jmp(ic_miss, relocInfo::runtime_call_type); - __ delayed()->nop(); - // verified entry must be aligned for code patching. - // and the first 5 bytes must be in the same cache line - // if we align at 8 then we will be sure 5 bytes are in the same line - __ align(8); - - __ bind(hit); - - - int vep_offset = ((intptr_t)__ pc()) - start; + const Register ic_reg = T1; + const Register receiver = T0; + + Label hit; + Label exception_pending; + + __ verify_oop(receiver); + //add for compressedoops + __ load_klass(T9, receiver); + __ beq(T9, ic_reg, hit); + __ delayed()->nop(); + __ jmp(ic_miss, relocInfo::runtime_call_type); + __ delayed()->nop(); + // verified entry must be aligned for code patching. + // and the first 5 bytes must be in the same cache line + // if we align at 8 then we will be sure 5 bytes are in the same line + __ align(8); + + __ bind(hit); + + + int vep_offset = ((intptr_t)__ pc()) - start; #ifdef COMPILER1 - if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { - // Object.hashCode can pull the hashCode from the header word - // instead of doing a full VM transition once it's been computed. - // Since hashCode is usually polymorphic at call sites we can't do - // this optimization at the call site without a lot of work. - Label slowCase; - Register receiver = T0; - Register result = V0; - __ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); - // check if locked - __ andi(AT, result, markOopDesc::unlocked_value); - __ beq(AT, R0, slowCase); - __ delayed()->nop(); - if (UseBiasedLocking) { - // Check if biased and fall through to runtime if so - __ andi (AT, result, markOopDesc::biased_lock_bit_in_place); - __ bne(AT,R0, slowCase); - __ delayed()->nop(); - } - // get hash - __ li(AT, markOopDesc::hash_mask_in_place); - __ andr (AT, result, AT); - // test if hashCode exists - __ beq (AT, R0, slowCase); - __ delayed()->nop(); - __ shr(result, markOopDesc::hash_shift); - __ jr(RA); - __ delayed()->nop(); - __ bind (slowCase); - } + if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { + // Object.hashCode can pull the hashCode from the header word + // instead of doing a full VM transition once it's been computed. + // Since hashCode is usually polymorphic at call sites we can't do + // this optimization at the call site without a lot of work. + Label slowCase; + Register receiver = T0; + Register result = V0; + __ ld ( result, receiver, oopDesc::mark_offset_in_bytes()); + // check if locked + __ andi(AT, result, markOopDesc::unlocked_value); + __ beq(AT, R0, slowCase); + __ delayed()->nop(); + if (UseBiasedLocking) { + // Check if biased and fall through to runtime if so + __ andi (AT, result, markOopDesc::biased_lock_bit_in_place); + __ bne(AT,R0, slowCase); + __ delayed()->nop(); + } + // get hash + __ li(AT, markOopDesc::hash_mask_in_place); + __ andr (AT, result, AT); + // test if hashCode exists + __ beq (AT, R0, slowCase); + __ delayed()->nop(); + __ shr(result, markOopDesc::hash_shift); + __ jr(RA); + __ delayed()->nop(); + __ bind (slowCase); + } #endif // COMPILER1 - // The instruction at the verified entry point must be 5 bytes or longer - // because it can be patched on the fly by make_non_entrant. The stack bang - // instruction fits that requirement. - - // Generate stack overflow check - - if (UseStackBanging) { - //this function will modify the value in A0 - __ push(A0); - __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); - __ pop(A0); - } else { - // need a 5 byte instruction to allow MT safe patching to non-entrant - __ nop(); - __ nop(); - __ nop(); - __ nop(); - __ nop(); - } - // Generate a new frame for the wrapper. - // do mips need this ? + // The instruction at the verified entry point must be 5 bytes or longer + // because it can be patched on the fly by make_non_entrant. The stack bang + // instruction fits that requirement. + + // Generate stack overflow check + + if (UseStackBanging) { + //this function will modify the value in A0 + __ push(A0); + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); + __ pop(A0); + } else { + // need a 5 byte instruction to allow MT safe patching to non-entrant + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + } + // Generate a new frame for the wrapper. + // do mips need this ? #ifndef OPT_THREAD - __ get_thread(TREG); + __ get_thread(TREG); #endif //FIXME here - __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); - // -2 because return address is already present and so is saved ebp - __ move(AT, -(StackAlignmentInBytes)); - __ andr(SP, SP, AT); - - __ enter(); - __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); - - // Frame is now completed as far a size and linkage. - - int frame_complete = ((intptr_t)__ pc()) - start; - - // Calculate the difference between esp and ebp. We need to know it - // after the native call because on windows Java Natives will pop - // the arguments and it is painful to do esp relative addressing - // in a platform independent way. So after the call we switch to - // ebp relative addressing. -//FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change -//the SP - int fp_adjustment = stack_size - 2*wordSize; + __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + // -2 because return address is already present and so is saved ebp + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + __ enter(); + __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + + // Calculate the difference between esp and ebp. We need to know it + // after the native call because on windows Java Natives will pop + // the arguments and it is painful to do esp relative addressing + // in a platform independent way. So after the call we switch to + // ebp relative addressing. + //FIXME actually , the fp_adjustment may not be the right, because andr(sp,sp,at)may change + //the SP + int fp_adjustment = stack_size - 2*wordSize; #ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - // if (UseSSE >= 2) { - // __ verify_FPU(0, "c2i transition should have clean FPU stack"); - //} else { - __ empty_FPU_stack(); - //} + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); #endif /* COMPILER2 */ - // Compute the ebp offset for any slots used after the jni call - - int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; - // We use edi as a thread pointer because it is callee save and - // if we load it once it is usable thru the entire wrapper - // const Register thread = edi; - const Register thread = TREG; - - // We use esi as the oop handle for the receiver/klass - // It is callee save so it survives the call to native - - // const Register oop_handle_reg = esi; - const Register oop_handle_reg = S4; + // Compute the ebp offset for any slots used after the jni call + + int lock_slot_ebp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; + // We use edi as a thread pointer because it is callee save and + // if we load it once it is usable thru the entire wrapper + // const Register thread = edi; + const Register thread = TREG; + + // We use esi as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + // const Register oop_handle_reg = esi; + const Register oop_handle_reg = S4; if (is_critical_native) { __ stop("generate_native_wrapper in sharedRuntime <2>"); //TODO:Fu @@ -2285,7 +1832,7 @@ } #ifndef OPT_THREAD - __ get_thread(thread); + __ get_thread(thread); #endif // @@ -2295,7 +1842,7 @@ // them. // ----------------- - // The Grand Shuffle + // The Grand Shuffle // // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* // and, if static, the class mirror instead of a receiver. This pretty much @@ -2308,18 +1855,18 @@ // vectors we have in our possession. We simply walk the java vector to // get the source locations and the c vector to get the destinations. - int c_arg = method->is_static() ? 2 : 1 ; - - // Record esp-based slot for receiver on stack for non-static methods - int receiver_offset = -1; - - // This is a trick. We double the stack slots so we can claim - // the oops in the caller's frame. Since we are sure to have - // more args than the caller doubling is enough to make - // sure we can capture all the incoming oop args from the - // caller. - // - OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + int c_arg = method->is_static() ? 2 : 1 ; + + // Record esp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); // Mark location of rbp (someday) // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); @@ -2341,9 +1888,9 @@ #endif /* ASSERT */ - // We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx - // Are free to temporaries if we have to do stack to steck moves. - // All inbound args are referenced based on ebp and all outbound args via esp. + // We know that we only have args in at most two integer registers (ecx, edx). So eax, ebx + // Are free to temporaries if we have to do stack to steck moves. + // All inbound args are referenced based on ebp and all outbound args via esp. // This may iterate in two different directions depending on the // kind of native it is. The reason is that for regular JNI natives @@ -2351,7 +1898,6 @@ // critical natives they are offset down. GrowableArray arg_order(2 * total_in_args); VMRegPair tmp_vmreg; -// tmp_vmreg.set1(rbx->as_VMReg()); tmp_vmreg.set1(T8->as_VMReg()); if (!is_critical_native) { @@ -2400,7 +1946,7 @@ switch (in_sig_bt[i]) { case T_ARRAY: if (is_critical_native) { - __ stop("generate_native_wrapper in sharedRuntime <2>"); + __ stop("generate_native_wrapper in sharedRuntime <2>"); //TODO:Fu // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); c_arg++; @@ -2440,7 +1986,6 @@ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); default: -// move32_64(masm, in_regs[i], out_regs[c_arg]); simple_move32(masm, in_regs[i], out_regs[c_arg]); } } @@ -2448,397 +1993,359 @@ // point c_arg at the first arg that is already loaded in case we // need to spill before we call out c_arg = total_c_args - total_in_args; - // Pre-load a static method's oop into esi. Used both by locking code and - // the normal JNI call code. - - __ move(oop_handle_reg, A1); - - if (method->is_static() && !is_critical_native) { - - // load opp into a register - int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( - (method->method_holder())->java_mirror())); - - - RelocationHolder rspec = oop_Relocation::spec(oop_index); - __ relocate(rspec); - //__ lui(oop_handle_reg, Assembler::split_high((int)JNIHandles::make_local( - // Klass::cast(method->method_holder())->java_mirror()))); - //__ addiu(oop_handle_reg, oop_handle_reg, Assembler::split_low((int) - // JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()))); - __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); - // __ verify_oop(oop_handle_reg); - // Now handlize the static class mirror it's known not-null. - __ sd( oop_handle_reg, SP, klass_offset); - map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); - - // Now get the handle - __ lea(oop_handle_reg, Address(SP, klass_offset)); - // store the klass handle as second argument - __ move(A1, oop_handle_reg); - // and protect the arg if we must spill - c_arg--; - } + // Pre-load a static method's oop into esi. Used both by locking code and + // the normal JNI call code. + + __ move(oop_handle_reg, A1); + + if (method->is_static() && !is_critical_native) { + + // load opp into a register + int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( + (method->method_holder())->java_mirror())); + + + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); + // Now handlize the static class mirror it's known not-null. + __ sd( oop_handle_reg, SP, klass_offset); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(SP, klass_offset)); + // store the klass handle as second argument + __ move(A1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; + } + // Change state to native (we save the return address in the thread, since it might not // be pushed on the stack when we do a a stack traversal). It is enough that the pc() // points into the right code segment. It does not have to be the correct return pc. // We use the same pc/oopMap repeatedly when we call out - intptr_t the_pc = (intptr_t) __ pc(); - - oop_maps->add_gc_map(the_pc - start, map); - - //__ set_last_Java_frame(thread, esp, noreg, (address)the_pc); - __ set_last_Java_frame(SP, noreg, NULL); - __ relocate(relocInfo::internal_pc_type); - { - intptr_t save_pc = (intptr_t)the_pc ; - __ patchable_set48(AT, save_pc); - } - __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); - - - // We have all of the arguments setup at this point. We must not touch any register - // argument registers at this point (what if we save/restore them there are no oop? - { - SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); - int metadata_index = __ oop_recorder()->find_index(method()); - RelocationHolder rspec = metadata_Relocation::spec(metadata_index); - __ relocate(rspec); - //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method()))); - //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method()))); - __ patchable_set48(AT, (long)(method())); - - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), - thread, AT); - - } - - // These are register definitions we need for locking/unlocking -// const Register swap_reg = eax; // Must use eax for cmpxchg instruction -// const Register obj_reg = ecx; // Will contain the oop - // const Register lock_reg = edx; // Address of compiler lock object (BasicLock) -//FIXME, I hava no idea which register to use - const Register swap_reg = T8; // Must use eax for cmpxchg instruction - const Register obj_reg = T9; // Will contain the oop - //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) - const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) - - - - Label slow_path_lock; - Label lock_done; - - // Lock a synchronized method - if (method->is_synchronized()) { - assert(!is_critical_native, "unhandled"); - - const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); - - // Get the handle (the 2nd argument) - __ move(oop_handle_reg, A1); - - // Get address of the box - __ lea(lock_reg, Address(FP, lock_slot_ebp_offset)); - - // Load the oop from the handle - __ ld(obj_reg, oop_handle_reg, 0); - - if (UseBiasedLocking) { - // Note that oop_handle_reg is trashed during this call - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, - false, lock_done, &slow_path_lock); - } - - // Load immediate 1 into swap_reg %eax - __ move(swap_reg, 1); - - __ ld(AT, obj_reg, 0); - __ orr(swap_reg, swap_reg, AT); - - __ sd( swap_reg, lock_reg, mark_word_offset); - __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); - __ bne(AT, R0, lock_done); - __ delayed()->nop(); - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 3) == 0, and - // 2) esp <= mark < mark + os::pagesize() - // These 3 tests can be done by evaluating the following - // expression: ((mark - esp) & (3 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 2 bits clear. - // NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg - - __ dsub(swap_reg, swap_reg,SP); - __ move(AT, 3 - os::vm_page_size()); - __ andr(swap_reg , swap_reg, AT); - // Save the test result, for recursive case, the result is zero - __ sd(swap_reg, lock_reg, mark_word_offset); - //FIXME here, Why notEqual? - __ bne(swap_reg,R0, slow_path_lock); - __ delayed()->nop(); - // Slow path will re-enter here - __ bind(lock_done); - - if (UseBiasedLocking) { - // Re-fetch oop_handle_reg as we trashed it above - __ move(A1, oop_handle_reg); - } - } - - - // Finally just about ready to make the JNI call - - - // get JNIEnv* which is first argument to native + intptr_t the_pc = (intptr_t) __ pc(); + oop_maps->add_gc_map(the_pc - start, map); + + __ set_last_Java_frame(SP, noreg, NULL); + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)the_pc ; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + int metadata_index = __ oop_recorder()->find_index(method()); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_set48(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + thread, AT); + + } + + // These are register definitions we need for locking/unlocking + const Register swap_reg = T8; // Must use eax for cmpxchg instruction + const Register obj_reg = T9; // Will contain the oop + //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) + const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) + + + + Label slow_path_lock; + Label lock_done; + + // Lock a synchronized method + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ move(oop_handle_reg, A1); + + // Get address of the box + __ lea(lock_reg, Address(FP, lock_slot_ebp_offset)); + + // Load the oop from the handle + __ ld(obj_reg, oop_handle_reg, 0); + + if (UseBiasedLocking) { + // Note that oop_handle_reg is trashed during this call + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); + } + + // Load immediate 1 into swap_reg %eax + __ move(swap_reg, 1); + + __ ld(AT, obj_reg, 0); + __ orr(swap_reg, swap_reg, AT); + + __ sd( swap_reg, lock_reg, mark_word_offset); + __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); + __ bne(AT, R0, lock_done); + __ delayed()->nop(); + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) esp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - esp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %eax as the result of cmpxchg + + __ dsub(swap_reg, swap_reg,SP); + __ move(AT, 3 - os::vm_page_size()); + __ andr(swap_reg , swap_reg, AT); + // Save the test result, for recursive case, the result is zero + __ sd(swap_reg, lock_reg, mark_word_offset); + //FIXME here, Why notEqual? + __ bne(swap_reg,R0, slow_path_lock); + __ delayed()->nop(); + // Slow path will re-enter here + __ bind(lock_done); + + if (UseBiasedLocking) { + // Re-fetch oop_handle_reg as we trashed it above + __ move(A1, oop_handle_reg); + } + } + + + // Finally just about ready to make the JNI call + + + // get JNIEnv* which is first argument to native if (!is_critical_native) { - __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset())); + __ addi(A0, thread, in_bytes(JavaThread::jni_environment_offset())); } - // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) - /* Load the second arguments into A1 */ - //__ ld(A1, SP , wordSize ); // klass - - // Now set thread in native - __ addi(AT, R0, _thread_in_native); - __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); - /* Jin: do the call */ - __ call(method->native_function(), relocInfo::runtime_call_type); - __ delayed()->nop(); - // WARNING - on Windows Java Natives use pascal calling convention and pop the - // arguments off of the stack. We could just re-adjust the stack pointer here - // and continue to do SP relative addressing but we instead switch to FP - // relative addressing. - - // Unpack native results. - switch (ret_type) { - case T_BOOLEAN: __ c2bool(V0); break; - case T_CHAR : __ andi(V0,V0, 0xFFFF); break; - case T_BYTE : __ sign_extend_byte (V0); break; - case T_SHORT : __ sign_extend_short(V0); break; - case T_INT : // nothing to do break; - case T_DOUBLE : - case T_FLOAT : - // Result is in st0 we'll save as needed - break; - case T_ARRAY: // Really a handle - case T_OBJECT: // Really a handle - break; // can't de-handlize until after safepoint check - case T_VOID: break; - case T_LONG: break; - default : ShouldNotReachHere(); - } - // Switch thread to "native transition" state before reading the synchronization state. - // This additional state is necessary because reading and testing the synchronization - // state is not atomic w.r.t. GC, as this scenario demonstrates: - // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. - // VM thread changes sync state to synchronizing and suspends threads for GC. - // Thread A is resumed to finish this native method, but doesn't block here since it - // didn't see any synchronization is progress, and escapes. - // __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); - //__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset()); - // __ move(AT, (int)_thread_in_native_trans); - __ addi(AT, R0, _thread_in_native_trans); - __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); - + // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) + /* Load the second arguments into A1 */ + //__ ld(A1, SP , wordSize ); // klass + + // Now set thread in native + __ addi(AT, R0, _thread_in_native); + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + /* Jin: do the call */ + __ call(method->native_function(), relocInfo::runtime_call_type); + __ delayed()->nop(); + // WARNING - on Windows Java Natives use pascal calling convention and pop the + // arguments off of the stack. We could just re-adjust the stack pointer here + // and continue to do SP relative addressing but we instead switch to FP + // relative addressing. + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ andi(V0,V0, 0xFFFF); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : // nothing to do break; + case T_DOUBLE : + case T_FLOAT : + // Result is in st0 we'll save as needed + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + // __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); + //__ sw(_thread_in_native_trans, thread, JavaThread::thread_state_offset()); + // __ move(AT, (int)_thread_in_native_trans); + __ addi(AT, R0, _thread_in_native_trans); + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + Label after_transition; - // check for safepoint operation in progress and/or pending suspend requests - { Label Continue; + // check for safepoint operation in progress and/or pending suspend requests + { Label Continue; //FIXME here, which regiser should we use? - // SafepointSynchronize::_not_synchronized); - __ li(AT, SafepointSynchronize::address_of_state()); - __ lw(A0, AT, 0); - __ addi(AT, A0, -SafepointSynchronize::_not_synchronized); - Label L; - __ bne(AT,R0, L); - __ delayed()->nop(); - __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); - __ beq(AT, R0, Continue); - __ delayed()->nop(); - __ bind(L); - - // Don't use call_VM as it will see a possible pending exception and forward it - // and never return here preventing us from clearing _last_native_pc down below. - // Also can't use call_VM_leaf either as it will check to see if esi & edi are - // preserved and correspond to the bcp/locals pointers. So we do a runtime call - // by hand. - // - save_native_result(masm, ret_type, stack_slots); - __ move (A0, thread); - __ addi(SP,SP, -wordSize); + // SafepointSynchronize::_not_synchronized); + __ li(AT, SafepointSynchronize::address_of_state()); + __ lw(A0, AT, 0); + __ addi(AT, A0, -SafepointSynchronize::_not_synchronized); + Label L; + __ bne(AT,R0, L); + __ delayed()->nop(); + __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ delayed()->nop(); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // Also can't use call_VM_leaf either as it will check to see if esi & edi are + // preserved and correspond to the bcp/locals pointers. So we do a runtime call + // by hand. + // + save_native_result(masm, ret_type, stack_slots); + __ move (A0, thread); + __ addi(SP,SP, -wordSize); __ push(S2); __ move(AT, -(StackAlignmentInBytes)); __ move(S2, SP); // use S2 as a sender SP holder __ andr(SP, SP, AT); // align stack as required by ABI if (!is_critical_native) { __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); - __ delayed()->nop(); + __ delayed()->nop(); } else { __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); - __ delayed()->nop(); + __ delayed()->nop(); } __ move(SP, S2); // use S2 as a sender SP holder __ pop(S2); - __ addi(SP,SP, wordSize); - //add for compressedoops - __ reinit_heapbase(); - // Restore any method result value - restore_native_result(masm, ret_type, stack_slots); + __ addi(SP,SP, wordSize); + //add for compressedoops + __ reinit_heapbase(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); if (is_critical_native) { // The call above performed the transition to thread_in_Java so // skip the transition logic below. __ beq(R0, R0, after_transition); - __ delayed()->nop(); + __ delayed()->nop(); } - __ bind(Continue); - } - - // change thread state - __ addi(AT, R0, _thread_in_Java); - __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + __ bind(Continue); + } + + // change thread state + __ addi(AT, R0, _thread_in_Java); + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); __ bind(after_transition); - Label reguard; - Label reguard_done; - __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); - __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); - __ beq(AT, R0, reguard); - __ delayed()->nop(); - // slow path reguard re-enters here - __ bind(reguard_done); - - // Handle possible exception (will unlock if necessary) - - // native result if any is live - - // Unlock - Label slow_path_unlock; - Label unlock_done; - if (method->is_synchronized()) { - - Label done; - - // Get locked oop from the handle we passed to jni - __ ld( obj_reg, oop_handle_reg, 0); - //FIXME - if (UseBiasedLocking) { - __ biased_locking_exit(obj_reg, T8, done); - - } - - // Simple recursive lock? - - __ ld(AT, FP, lock_slot_ebp_offset); - __ beq(AT, R0, done); - __ delayed()->nop(); - // Must save eax if if it is live now because cmpxchg must use it - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - save_native_result(masm, ret_type, stack_slots); - } - - // get old displaced header - __ ld (T8, FP, lock_slot_ebp_offset); - // get address of the stack lock - //FIXME aoqi - //__ addi (T6, FP, lock_slot_ebp_offset); - __ addi (c_rarg0, FP, lock_slot_ebp_offset); - // Atomic swap old header if oop still contains the stack lock - //FIXME aoqi - //__ cmpxchg(T8, Address(obj_reg, 0),T6 ); - __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0); - - __ beq(AT, R0, slow_path_unlock); - __ delayed()->nop(); - // slow path re-enters here - __ bind(unlock_done); - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - restore_native_result(masm, ret_type, stack_slots); - } - - __ bind(done); - - } - { - SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); - // Tell dtrace about this method exit - save_native_result(masm, ret_type, stack_slots); - int metadata_index = __ oop_recorder()->find_index( (method())); - RelocationHolder rspec = metadata_Relocation::spec(metadata_index); - __ relocate(rspec); - //__ lui(T6, Assembler::split_high((int)JNIHandles::make_local(method()))); - //__ addiu(T6, T6, Assembler::split_low((int)JNIHandles::make_local(method()))); - __ patchable_set48(AT, (long)(method())); - - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), - thread, AT); - restore_native_result(masm, ret_type, stack_slots); - } - - // We can finally stop using that last_Java_frame we setup ages ago - - __ reset_last_Java_frame(false, true); - - // Unpack oop result - if (ret_type == T_OBJECT || ret_type == T_ARRAY) { - Label L; - // __ cmpl(eax, NULL_WORD); - // __ jcc(Assembler::equal, L); - __ beq(V0, R0,L ); - __ delayed()->nop(); - // __ movl(eax, Address(eax)); - __ ld(V0, V0, 0); - __ bind(L); - // __ verify_oop(eax); - __ verify_oop(V0); - } + Label reguard; + Label reguard_done; + __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ addi(AT, AT, -JavaThread::stack_guard_yellow_disabled); + __ beq(AT, R0, reguard); + __ delayed()->nop(); + // slow path reguard re-enters here + __ bind(reguard_done); + + // Handle possible exception (will unlock if necessary) + + // native result if any is live + + // Unlock + Label slow_path_unlock; + Label unlock_done; + if (method->is_synchronized()) { + + Label done; + + // Get locked oop from the handle we passed to jni + __ ld( obj_reg, oop_handle_reg, 0); + //FIXME + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, T8, done); + + } + + // Simple recursive lock? + + __ ld(AT, FP, lock_slot_ebp_offset); + __ beq(AT, R0, done); + __ delayed()->nop(); + // Must save eax if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + // get old displaced header + __ ld (T8, FP, lock_slot_ebp_offset); + // get address of the stack lock + __ addi (c_rarg0, FP, lock_slot_ebp_offset); + // Atomic swap old header if oop still contains the stack lock + __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0); + + __ beq(AT, R0, slow_path_unlock); + __ delayed()->nop(); + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + + } + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + // Tell dtrace about this method exit + save_native_result(masm, ret_type, stack_slots); + int metadata_index = __ oop_recorder()->find_index( (method())); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_set48(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + thread, AT); + restore_native_result(masm, ret_type, stack_slots); + } + + // We can finally stop using that last_Java_frame we setup ages ago + + __ reset_last_Java_frame(false, true); + + // Unpack oop result + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + Label L; + __ beq(V0, R0,L ); + __ delayed()->nop(); + __ ld(V0, V0, 0); + __ bind(L); + __ verify_oop(V0); + } if (!is_critical_native) { - // reset handle block - __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); - __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); + // reset handle block + __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); + __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); } if (!is_critical_native) { - // Any exception pending? - __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); - - __ bne(AT, R0, exception_pending); - __ delayed()->nop(); + // Any exception pending? + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + + __ bne(AT, R0, exception_pending); + __ delayed()->nop(); } - // no exception, we're almost done - - // check that only result value is on FPU stack - __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); - - // Fixup floating pointer results so that result looks like a return from a compiled method -/* if (ret_type == T_FLOAT) { - if (UseSSE >= 1) { - // Pop st0 and store as float and reload into xmm register - __ fstp_s(Address(ebp, -4)); - __ movss(xmm0, Address(ebp, -4)); - } - } else if (ret_type == T_DOUBLE) { - if (UseSSE >= 2) { - // Pop st0 and store as double and reload into xmm register - __ fstp_d(Address(ebp, -8)); - __ movsd(xmm0, Address(ebp, -8)); - } - } -*/ + // no exception, we're almost done + + // check that only result value is on FPU stack + __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); + // Return #ifndef OPT_THREAD - __ get_thread(TREG); + __ get_thread(TREG); #endif - __ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); - __ leave(); - - __ jr(RA); - __ delayed()->nop(); - // Unexpected paths are out of line and go here + __ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ leave(); + + __ jr(RA); + __ delayed()->nop(); + // Unexpected paths are out of line and go here /* if (!is_critical_native) { // forward the exception @@ -2848,152 +2355,147 @@ __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); } */ - // Slow path locking & unlocking - if (method->is_synchronized()) { - - // BEGIN Slow path lock - - __ bind(slow_path_lock); - - // protect the args we've loaded - save_args(masm, total_c_args, c_arg, out_regs); - - // has last_Java_frame setup. No exceptions so do vanilla call not call_VM - // args are (oop obj, BasicLock* lock, JavaThread* thread) - - __ move(A0, obj_reg); - __ move(A1, lock_reg); - __ move(A2, thread); - __ addi(SP, SP, - 3*wordSize); + // Slow path locking & unlocking + if (method->is_synchronized()) { + + // BEGIN Slow path lock + __ bind(slow_path_lock); + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + __ move(A0, obj_reg); + __ move(A1, lock_reg); + __ move(A2, thread); + __ addi(SP, SP, - 3*wordSize); + + __ move(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ move(SP, S2); + __ addi(SP, SP, 3*wordSize); + + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ b(lock_done); + __ delayed()->nop(); + // END Slow path lock + + // BEGIN Slow path unlock + __ bind(slow_path_unlock); + + // Slow path unlock + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ push(AT); + __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); __ move(AT, -(StackAlignmentInBytes)); __ move(S2, SP); // use S2 as a sender SP holder __ andr(SP, SP, AT); // align stack as required by ABI - __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); - __ delayed()->nop(); + // should be a peal + // +wordSize because of the push above + __ addi(A1, FP, lock_slot_ebp_offset); + + __ move(A0, obj_reg); + __ addi(SP,SP, -2*wordSize); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), + relocInfo::runtime_call_type); + __ delayed()->nop(); + __ addi(SP,SP, 2*wordSize); __ move(SP, S2); - __ addi(SP, SP, 3*wordSize); - - restore_args(masm, total_c_args, c_arg, out_regs); - + //add for compressedoops + __ reinit_heapbase(); #ifdef ASSERT - { Label L; - // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); - __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); - //__ jcc(Assembler::equal, L); - __ beq(AT, R0, L); - __ delayed()->nop(); - __ stop("no pending exception allowed on exit from monitorenter"); - __ bind(L); - } -#endif - __ b(lock_done); - __ delayed()->nop(); - // END Slow path lock - - // BEGIN Slow path unlock - __ bind(slow_path_unlock); - - // Slow path unlock - - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - save_native_result(masm, ret_type, stack_slots); - } - // Save pending exception around call to VM (which contains an EXCEPTION_MARK) - - __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); - __ push(AT); - __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); - - __ move(AT, -(StackAlignmentInBytes)); - __ move(S2, SP); // use S2 as a sender SP holder - __ andr(SP, SP, AT); // align stack as required by ABI - - // should be a peal - // +wordSize because of the push above - __ addi(A1, FP, lock_slot_ebp_offset); - - __ move(A0, obj_reg); - __ addi(SP,SP, -2*wordSize); - __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), - relocInfo::runtime_call_type); - __ delayed()->nop(); - __ addi(SP,SP, 2*wordSize); - __ move(SP, S2); - //add for compressedoops - __ reinit_heapbase(); -#ifdef ASSERT - { - Label L; - // __ cmpl(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - __ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); - //__ jcc(Assembler::equal, L); - __ beq(AT, R0, L); - __ delayed()->nop(); - __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); - __ bind(L); - } + { + Label L; + __ lw( AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } #endif /* ASSERT */ - __ pop(AT); - __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - restore_native_result(masm, ret_type, stack_slots); - } - __ b(unlock_done); - __ delayed()->nop(); - // END Slow path unlock - - } - - // SLOW PATH Reguard the stack if needed - - __ bind(reguard); - save_native_result(masm, ret_type, stack_slots); - __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), - relocInfo::runtime_call_type); - __ delayed()->nop(); - //add for compressedoops - __ reinit_heapbase(); - restore_native_result(masm, ret_type, stack_slots); - __ b(reguard_done); - __ delayed()->nop(); - - // BEGIN EXCEPTION PROCESSING - if (!is_critical_native) { - // Forward the exception - __ bind(exception_pending); - - // remove possible return value from FPU register stack - __ empty_FPU_stack(); - - // pop our frame - //forward_exception_entry need return address on stack - __ addiu(SP, FP, wordSize); - __ ld(FP, SP, (-1) * wordSize); - - // and forward the exception - __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); - __ delayed()->nop(); + __ pop(AT); + __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); } - __ flush(); - - nmethod *nm = nmethod::new_native_nmethod(method, - compile_id, - masm->code(), - vep_offset, - frame_complete, - stack_slots / VMRegImpl::slots_per_word, - (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), - in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), - oop_maps); + __ b(unlock_done); + __ delayed()->nop(); + // END Slow path unlock + + } + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), + relocInfo::runtime_call_type); + __ delayed()->nop(); + //add for compressedoops + __ reinit_heapbase(); + restore_native_result(masm, ret_type, stack_slots); + __ b(reguard_done); + __ delayed()->nop(); + + // BEGIN EXCEPTION PROCESSING + if (!is_critical_native) { + // Forward the exception + __ bind(exception_pending); + + // remove possible return value from FPU register stack + __ empty_FPU_stack(); + + // pop our frame + //forward_exception_entry need return address on stack + __ addiu(SP, FP, wordSize); + __ ld(FP, SP, (-1) * wordSize); + + // and forward the exception + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + } + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); if (is_critical_native) { nm->set_lazy_critical_native(true); } - return nm; - + + return nm; } @@ -3027,8 +2529,8 @@ } -nmethod *SharedRuntime::generate_dtrace_nmethod( - MacroAssembler *masm, methodHandle method) { +nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, + methodHandle method) { // generate_dtrace_nmethod is guarded by a mutex so we are sure to @@ -3111,7 +2613,7 @@ // Now figure out where the args must be stored and how much stack space // they require (neglecting out_preserve_stack_slots but space for storing // the 1st six register arguments). It's weird see int_stk_helper. - // + int out_arg_slots; out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); @@ -3547,7 +3049,7 @@ // this function returns the adjust size (in number of words) to a c2i adapter // activation for use during deoptimization int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { - return (callee_locals - callee_parameters) * Interpreter::stackElementWords; + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; } // "Top of Stack" slots that may be unused by the calling convention but must @@ -3557,131 +3059,8 @@ // when an interrupt occurs. uint SharedRuntime::out_preserve_stack_slots() { //return frame::register_save_words * VMRegImpl::slots_per_word; - return 0; + return 0; } -/* -static void gen_new_frame(MacroAssembler* masm, bool deopt) { -// -// Common out the new frame generation for deopt and uncommon trap -// - Register G3pcs = G3_scratch; // Array of new pcs (input) - Register Oreturn0 = O0; - Register Oreturn1 = O1; - Register O2UnrollBlock = O2; - Register O3array = O3; // Array of frame sizes (input) - Register O4array_size = O4; // number of frames (input) - Register O7frame_size = O7; // number of frames (input) - - __ ld_ptr(O3array, 0, O7frame_size); - __ sub(G0, O7frame_size, O7frame_size); - __ save(SP, O7frame_size, SP); - __ ld_ptr(G3pcs, 0, I7); // load frame's new pc - - #ifdef ASSERT - // make sure that the frames are aligned properly -#ifndef _LP64 - __ btst(wordSize*2-1, SP); - __ breakpoint_trap(Assembler::notZero); -#endif - #endif - - // Deopt needs to pass some extra live values from frame to frame - - if (deopt) { - __ mov(Oreturn0->after_save(), Oreturn0); - __ mov(Oreturn1->after_save(), Oreturn1); - } - - __ mov(O4array_size->after_save(), O4array_size); - __ sub(O4array_size, 1, O4array_size); - __ mov(O3array->after_save(), O3array); - __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); - __ add(G3pcs, wordSize, G3pcs); // point to next pc value - - #ifdef ASSERT - // trash registers to show a clear pattern in backtraces - __ set(0xDEAD0000, I0); - __ add(I0, 2, I1); - __ add(I0, 4, I2); - __ add(I0, 6, I3); - __ add(I0, 8, I4); - // Don't touch I5 could have valuable savedSP - __ set(0xDEADBEEF, L0); - __ mov(L0, L1); - __ mov(L0, L2); - __ mov(L0, L3); - __ mov(L0, L4); - __ mov(L0, L5); - - // trash the return value as there is nothing to return yet - __ set(0xDEAD0001, O7); - #endif - - __ mov(SP, O5_savedSP); -} - - -static void make_new_frames(MacroAssembler* masm, bool deopt) { - // - // loop through the UnrollBlock info and create new frames - // - Register G3pcs = G3_scratch; - Register Oreturn0 = O0; - Register Oreturn1 = O1; - Register O2UnrollBlock = O2; - Register O3array = O3; - Register O4array_size = O4; - Label loop; - - // Before we make new frames, check to see if stack is available. - // Do this after the caller's return address is on top of stack - if (UseStackBanging) { - // Get total frame size for interpreted frames - __ ld(Address(O2UnrollBlock, 0, - Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()), O4); - __ bang_stack_size(O4, O3, G3_scratch); - } - - __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()), O4array_size); - __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()), G3pcs); - - __ ld_ptr(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()), O3array); - - // Adjust old interpreter frame to make space for new frame's extra java locals - // - // We capture the original sp for the transition frame only because it is needed in - // order to properly calculate interpreter_sp_adjustment. Even though in real life - // every interpreter frame captures a savedSP it is only needed at the transition - // (fortunately). If we had to have it correct everywhere then we would need to - // be told the sp_adjustment for each frame we create. If the frame size array - // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] - // for each frame we create and keep up the illusion every where. - // - - __ ld(Address(O2UnrollBlock, 0, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()), O7); - __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment - __ sub(SP, O7, SP); - -#ifdef ASSERT - // make sure that there is at least one entry in the array - __ tst(O4array_size); - __ breakpoint_trap(Assembler::zero); -#endif - - // Now push the new interpreter frames - __ bind(loop); - - // allocate a new frame, filling the registers - - gen_new_frame(masm, deopt); // allocate an interpreter frame - - __ tst(O4array_size); - __ br(Assembler::notZero, false, Assembler::pn, loop); - __ delayed()->add(O3array, wordSize, O3array); - __ ld_ptr(G3pcs, 0, O7); // load final frame new pc - -} -*/ //------------------------------generate_deopt_blob---------------------------- // Ought to generate an ideal graph & compile, but here's some SPARC ASM @@ -3711,7 +3090,7 @@ Register unroll = S7; // Prolog for non exception case! // Correct the return address we were given. - //FIXME, return address is on the tos or Ra? + //FIXME, return address is on the tos or Ra? __ addi(RA, RA, - (NativeCall::return_address_offset_long)); // Save everything in sight. map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); @@ -3722,13 +3101,12 @@ int reexecute_offset = __ pc() - start; - // Reexecute case - // return address is the pc describes what bci to do re-execute at - - // No need to update map as each call to save_live_registers will produce identical oopmap - //__ addi(RA, RA, - (NativeCall::return_address_offset)); + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); - __ move(reason, Deoptimization::Unpack_reexecute); + __ move(reason, Deoptimization::Unpack_reexecute); __ b(cont); __ delayed()->nop(); @@ -3739,9 +3117,9 @@ // edx which contain the exception oop and exception pc // respectively. Set them in TLS and fall thru to the // unpack_with_exception_in_tls entry point. - + __ get_thread(thread); - __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); int exception_in_tls_offset = __ pc() - start; // new implementation because exception oop is now passed in JavaThread @@ -3752,9 +3130,9 @@ // tos: stack at point of call to method that threw the exception (i.e. only // args are on the stack, no return address) - // Return address will be patched later with the throwing pc. The correct value is not + // Return address will be patched later with the throwing pc. The correct value is not // available now because loading it from memory would destroy registers. - // Save everything in sight. + // Save everything in sight. // No need to update map as each call to save_live_registers will produce identical oopmap __ addi(RA, RA, - (NativeCall::return_address_offset_long)); (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); @@ -3762,7 +3140,7 @@ // Now it is safe to overwrite any register // store the correct deoptimization type __ move(reason, Deoptimization::Unpack_exception); - // load throwing pc from JavaThread and patch it as the return address + // load throwing pc from JavaThread and patch it as the return address // of the current frame. Then clear the field in JavaThread __ get_thread(thread); __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); @@ -3777,8 +3155,8 @@ // verify that there is no pending exception Label no_pending_exception; __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); - __ beq(AT, R0, no_pending_exception); - __ delayed()->nop(); + __ beq(AT, R0, no_pending_exception); + __ delayed()->nop(); __ stop("must not have pending exception here"); __ bind(no_pending_exception); #endif @@ -3788,7 +3166,7 @@ // Call C code. Need thread and this frame, but NOT official VM entry - // crud. We cannot block on this call, no GC can happen. + // crud. We cannot block on this call, no GC can happen. #ifndef OPT_THREAD __ get_thread(thread); #endif @@ -3801,8 +3179,8 @@ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on // this call, no GC can happen. Call should capture return values. - __ relocate(relocInfo::internal_pc_type); - { + __ relocate(relocInfo::internal_pc_type); + { intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; __ patchable_set48(AT, save_pc); } @@ -3824,7 +3202,6 @@ // we are very short of registers Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); - //__ pop(reason); __ sw(reason, unpack_kind); // save the unpack_kind value // Retrieve the possible live values (return values) @@ -3839,13 +3216,13 @@ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); - + __ verify_oop(V0); // Overwrite the result registers with the exception results. - __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); + __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); - + __ bind(noException); @@ -3856,11 +3233,11 @@ RegisterSaver::restore_result_registers(masm); // All of the register save area has been popped of the stack. Only the // return address remains. - // Pop all the frames we must move/replace. + // Pop all the frames we must move/replace. // Frame picture (youngest to oldest) // 1: self-frame (no frame link) // 2: deopting frame (no frame link) - // 3: caller of deopting frame (could be compiled/interpreted). + // 3: caller of deopting frame (could be compiled/interpreted). // // Note: by leaving the return address of self-frame on the stack // and using the size of frame 2 to adjust the stack @@ -3874,19 +3251,19 @@ Register sizes = T1; // register for frame count Register count = T3; - + // Pop deoptimized frame __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); __ add(SP, SP, AT); // sp should be pointing at the return address to the caller (3) - + // Load array of frame pcs into pcs __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); __ addi(SP, SP, wordSize); // trash the old pc // Load array of frame sizes into T6 __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); - + // Load count of frams into T3 __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); @@ -3904,14 +3281,14 @@ /* * Loop: - 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld + 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] - 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 + 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 0x000000555bd82d24: daddi sp, sp, 0xfffffff0 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp 0x000000555bd82d2c: sd at, 0x8(sp) ; push at - 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp - 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2 + 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp + 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP); @@ -3924,26 +3301,26 @@ // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split Label loop; __ bind(loop); - __ ld(T2, sizes, 0); // Load frame size - __ ld_ptr(AT, pcs, 0); // save return address + __ ld(T2, sizes, 0); // Load frame size + __ ld_ptr(AT, pcs, 0); // save return address __ addi(T2, T2, -2*wordSize); // we'll push pc and rbp, by hand - __ push2(AT, FP); + __ push2(AT, FP); __ move(FP, SP); - __ sub(SP, SP, T2); // Prolog! + __ sub(SP, SP, T2); // Prolog! // This value is corrected by layout_activation_impl - __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable - __ move(sender_sp, SP); // pass to next frame - __ addi(count, count, -1); // decrement counter - __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ move(sender_sp, SP); // pass to next frame + __ addi(count, count, -1); // decrement counter + __ addi(sizes, sizes, wordSize); // Bump array pointer (sizes) __ bne(count, R0, loop); - __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs) - __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); + __ delayed()->addi(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); // Re-push self-frame - __ push2(AT, FP); + __ push2(AT, FP); __ move(FP, SP); - __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); - __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); __ addi(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); // Restore frame locals after moving the frame @@ -3952,12 +3329,12 @@ __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); - + // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on // this call, no GC can happen. - __ move(A1, reason); // exec_mode + __ move(A1, reason); // exec_mode __ get_thread(thread); - __ move(A0, thread); // thread + __ move(A0, thread); // thread __ addi(SP, SP, (-additional_words) *wordSize); // set last_Java_sp, last_Java_fp @@ -3966,14 +3343,13 @@ __ move(AT, -(StackAlignmentInBytes)); __ andr(SP, SP, AT); // Fix stack alignment as required by ABI - __ relocate(relocInfo::internal_pc_type); - { + __ relocate(relocInfo::internal_pc_type); + { intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; __ patchable_set48(AT, save_pc); } __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); - - //__ call(Deoptimization::unpack_frames); + __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); __ delayed()->nop(); // Revert SP alignment after call since we're going to do some SP relative addressing below @@ -3982,7 +3358,7 @@ oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); __ push(V0); - + __ get_thread(thread); __ reset_last_Java_frame(true, true); @@ -3991,7 +3367,7 @@ __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words +1) * wordSize); __ ldc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); - //FIXME, + //FIXME, // Clear floating point stack before returning to interpreter __ empty_FPU_stack(); //FIXME, we should consider about float and double @@ -4016,20 +3392,20 @@ // allocate space for the code ResourceMark rm; // setup code generation tools - CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); - MacroAssembler* masm = new MacroAssembler(&buffer); + CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); + MacroAssembler* masm = new MacroAssembler(&buffer); enum frame_layout { - s0_off, s0_off2, - s1_off, s1_off2, - s2_off, s2_off2, - s3_off, s3_off2, - s4_off, s4_off2, - s5_off, s5_off2, - s6_off, s6_off2, - s7_off, s7_off2, - fp_off, fp_off2, - return_off, return_off2, // slot for return address sp + 9 + s0_off, s0_off2, + s1_off, s1_off2, + s2_off, s2_off2, + s3_off, s3_off2, + s4_off, s4_off2, + s5_off, s5_off2, + s6_off, s6_off2, + s7_off, s7_off2, + fp_off, fp_off2, + return_off, return_off2, // slot for return address sp + 9 framesize }; assert(framesize % 4 == 0, "sp not 16-byte aligned"); @@ -4042,7 +3418,7 @@ __ sd(RA, SP, return_off * BytesPerInt); __ sd(FP, SP, fp_off * BytesPerInt); - // Save callee saved registers. None for UseSSE=0, + // Save callee saved registers. None for UseSSE=0, // floats-only for UseSSE=1, and doubles for UseSSE=2. __ sd(S0, SP, s0_off * BytesPerInt); __ sd(S1, SP, s1_off * BytesPerInt); @@ -4065,8 +3441,8 @@ #endif // set last_Java_sp __ set_last_Java_frame(NOREG, FP, NULL); - __ relocate(relocInfo::internal_pc_type); - { + __ relocate(relocInfo::internal_pc_type); + { long save_pc = (long)__ pc() + 52; __ patchable_set48(AT, (long)save_pc); __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); @@ -4083,7 +3459,7 @@ OopMapSet *oop_maps = new OopMapSet(); OopMap* map = new OopMap( framesize, 0 ); - map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() ); + map->set_callee_saved( VMRegImpl::stack2reg(s0_off ), S0->as_VMReg() ); map->set_callee_saved( VMRegImpl::stack2reg(s1_off ), S1->as_VMReg() ); map->set_callee_saved( VMRegImpl::stack2reg(s2_off ), S2->as_VMReg() ); map->set_callee_saved( VMRegImpl::stack2reg(s3_off ), S3->as_VMReg() ); @@ -4093,7 +3469,7 @@ map->set_callee_saved( VMRegImpl::stack2reg(s7_off ), S7->as_VMReg() ); //oop_maps->add_gc_map( __ offset(), true, map); - oop_maps->add_gc_map( __ offset(), map); + oop_maps->add_gc_map( __ offset(), map); #ifndef OPT_THREAD __ get_thread(thread); @@ -4104,12 +3480,12 @@ Register unroll = S7; __ move(unroll, V0); - // Pop all the frames we must move/replace. - // + // Pop all the frames we must move/replace. + // // Frame picture (youngest to oldest) // 1: self-frame (no frame link) // 2: deopting frame (no frame link) - // 3: possible-i2c-adapter-frame + // 3: possible-i2c-adapter-frame // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an // and c2i here) @@ -4133,10 +3509,6 @@ // Load array of frame pcs into ECX __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); -/* 2012/9/7 Not needed in MIPS - __ addiu(SP, SP, wordSize); -*/ - // Load array of frame sizes into ESI __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); @@ -4176,7 +3548,7 @@ __ daddi(SP, SP, - 2 * wordSize); // save old & set new FP __ sd(FP, SP, 0 * wordSize); // save final return address __ sd(RA, SP, 1 * wordSize); - __ move(FP, SP); + __ move(FP, SP); __ daddi(SP, SP, -(framesize / 2 - 2) * wordSize); // set last_Java_sp, last_Java_fp @@ -4185,8 +3557,8 @@ __ move(AT, -(StackAlignmentInBytes)); __ andr(SP, SP, AT); // Fix stack alignment as required by ABI - __ relocate(relocInfo::internal_pc_type); - { + __ relocate(relocInfo::internal_pc_type); + { long save_pc = (long)__ pc() + 52; __ patchable_set48(AT, (long)save_pc); } @@ -4199,7 +3571,7 @@ __ move(A1, Deoptimization::Unpack_uncommon_trap); __ patchable_call((address)Deoptimization::unpack_frames); // Set an oopmap for the call site - //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); + //oop_maps->add_gc_map( __ offset(), true, new OopMap( framesize, 0 ) ); oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) );//Fu __ reset_last_Java_frame(true,true); @@ -4226,28 +3598,28 @@ // a safepoint. // // This blob is jumped to (via a breakpoint and the signal handler) from a -// safepoint in compiled code. - +// safepoint in compiled code. + SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { // Account for thread arg in our frame - const int additional_words = 0; + const int additional_words = 0; int frame_size_in_words; - assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ResourceMark rm; OopMapSet *oop_maps = new OopMapSet(); OopMap* map; // allocate space for the code - // setup code generation tools + // setup code generation tools CodeBuffer buffer ("handler_blob", 2048, 512); MacroAssembler* masm = new MacroAssembler( &buffer); - - const Register thread = TREG; - address start = __ pc(); - address call_pc = NULL; + + const Register thread = TREG; + address start = __ pc(); + address call_pc = NULL; bool cause_return = (pool_type == POLL_AT_RETURN); bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); @@ -4264,7 +3636,7 @@ if(!cause_return) { __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset())); } - + __ pop(thread); map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); @@ -4278,16 +3650,6 @@ __ move(A0, thread); __ set_last_Java_frame(NOREG, NOREG, NULL); - //__ relocate(relocInfo::internal_pc_type); - if (!cause_return) - { -/* - intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + NativeCall::return_address_offset + 4; - __ li48(AT, save_pc); - __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); -*/ - } - // do the call //__ lui(T9, Assembler::split_high((int)call_ptr)); @@ -4315,21 +3677,19 @@ RegisterSaver::restore_live_registers(masm, save_vectors); //forward_exception_entry need return address on the stack __ push(RA); - //__ lui(T9, Assembler::split_high((int)StubRoutines::forward_exception_entry())); - //__ addiu(T9, T9, Assembler::split_low((int)StubRoutines::forward_exception_entry())); __ patchable_jump((address)StubRoutines::forward_exception_entry()); // No exception case __ bind(noException); - // Normal exit, register restoring and exit + // Normal exit, register restoring and exit RegisterSaver::restore_live_registers(masm, save_vectors); __ jr(RA); __ delayed()->nop(); - - masm->flush(); + + masm->flush(); // Fill-out other meta info - return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); } // @@ -4352,7 +3712,7 @@ MacroAssembler* masm = new MacroAssembler(&buffer); int frame_size_words; - //we put the thread in A0 + //we put the thread in A0 OopMapSet *oop_maps = new OopMapSet(); OopMap* map = NULL; @@ -4366,16 +3726,14 @@ const Register thread = T8; __ get_thread(thread); - __ move(A0, thread); + __ move(A0, thread); __ set_last_Java_frame(noreg, FP, NULL); - //__ addi(SP, SP, -wordSize); - //align the stack before invoke native + //align the stack before invoke native __ move(AT, -(StackAlignmentInBytes)); - __ andr(SP, SP, AT); - __ relocate(relocInfo::internal_pc_type); - { + __ andr(SP, SP, AT); + __ relocate(relocInfo::internal_pc_type); + { intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; -//tty->print_cr(" %s :%d, name:%s, pc: %lx, save_pc: %lx, frame_size_words: %lx", __func__, __LINE__, name, __ pc(), save_pc, frame_size_words); //aoqi_test __ patchable_set48(AT, save_pc); } __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); @@ -4396,9 +3754,9 @@ Label pending; __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); __ bne(AT, R0, pending); - __ delayed()->nop(); - // get the returned Method* - //FIXME, do mips need this ? + __ delayed()->nop(); + // get the returned Method* + //FIXME, do mips need this ? __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); @@ -4414,42 +3772,21 @@ RegisterSaver::restore_live_registers(masm); // exception pending => remove activation and forward to exception handler - //forward_exception_entry need return address on the stack + //forward_exception_entry need return address on the stack __ push(RA); __ get_thread(thread); - __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); __ delayed() -> nop(); // ------------- // make sure all code is generated - masm->flush(); + masm->flush(); RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); return tmp; } -/*void SharedRuntime::generate_stubs() { - _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, - SharedRuntime::handle_wrong_method),"wrong_method_stub"); - _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, - SharedRuntime::handle_wrong_method_ic_miss),"ic_miss_stub"); - _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, - SharedRuntime::resolve_opt_virtual_call_C),"resolve_opt_virtual_call"); - _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, - SharedRuntime::resolve_virtual_call_C),"resolve_virtual_call"); - _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, - SharedRuntime::resolve_static_call_C),"resolve_static_call"); - _polling_page_safepoint_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, - SafepointSynchronize::handle_polling_page_exception), false); - _polling_page_return_handler_blob =generate_handler_blob(CAST_FROM_FN_PTR(address, - SafepointSynchronize::handle_polling_page_exception), true); - generate_deopt_blob(); -#ifdef COMPILER2 - generate_uncommon_trap_blob(); -#endif // COMPILER2 -}*/ - extern "C" int SpinPause() {return 0;} // extern "C" int SafeFetch32 (int * adr, int errValue) {return 0;} ; // extern "C" intptr_t SafeFetchN (intptr_t * adr, intptr_t errValue) {return *adr; } ;