Fri, 07 Dec 2018 14:53:37 +0800
#7987 implements tieredcompilation in mips template interpreter
------- Effect (scores, higher is better) ----------
SPECjvm2008 Benchmarks C2-only Tiered
startup.helloworld 70.51 74.81
startup.compiler.compiler 2.17 4.68
startup.compiler.sunflow 2.31 3.76
startup.compress 12.64 12.90
startup.crypto.aes 4.19 4.30
startup.crypto.rsa 13.86 14.10
startup.crypto.signverify 13.97 15.22
startup.mpegaudio 5.76 6.18
startup.scimark.fft 20.58 20.86
startup.scimark.lu 20.32 21.23
startup.scimark.monte_carlo 7.21 6.10
startup.scimark.sor 8.50 8.58
startup.scimark.sparse 10.35 10.80
startup.serial 4.82 5.26
startup.sunflow 5.52 7.02
startup.xml.transform 0.57 0.61
startup.xml.validation 5.48 8.71
1.1 --- a/src/cpu/mips/vm/assembler_mips.hpp Fri Dec 07 14:41:48 2018 +0800 1.2 +++ b/src/cpu/mips/vm/assembler_mips.hpp Fri Dec 07 14:53:37 2018 +0800 1.3 @@ -342,6 +342,21 @@ 1.4 friend class StubGenerator; 1.5 1.6 public: 1.7 + enum Condition { 1.8 + zero , 1.9 + notZero , 1.10 + equal , 1.11 + notEqual , 1.12 + less , 1.13 + lessEqual , 1.14 + greater , 1.15 + greaterEqual , 1.16 + below , 1.17 + belowEqual , 1.18 + above , 1.19 + aboveEqual 1.20 + }; 1.21 + 1.22 static const int LogInstructionSize = 2; 1.23 static const int InstructionSize = 1 << LogInstructionSize; 1.24
2.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.cpp Fri Dec 07 14:41:48 2018 +0800 2.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.cpp Fri Dec 07 14:53:37 2018 +0800 2.3 @@ -2136,3 +2136,27 @@ 2.4 pop(state); 2.5 } 2.6 } 2.7 + 2.8 +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. 2.9 +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, 2.10 + int increment, int mask, 2.11 + Register scratch, bool preloaded, 2.12 + Condition cond, Label* where) { 2.13 + assert_different_registers(scratch, AT); 2.14 + 2.15 + if (!preloaded) { 2.16 + lw(scratch, counter_addr); 2.17 + } 2.18 + addiu32(scratch, scratch, increment); 2.19 + sw(scratch, counter_addr); 2.20 + 2.21 + move(AT, mask); 2.22 + andr(scratch, scratch, AT); 2.23 + 2.24 + if (cond == Assembler::zero) { 2.25 + beq(scratch, R0, *where); 2.26 + delayed()->nop(); 2.27 + } else { 2.28 + unimplemented(); 2.29 + } 2.30 +}
3.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.hpp Fri Dec 07 14:41:48 2018 +0800 3.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.hpp Fri Dec 07 14:53:37 2018 +0800 3.3 @@ -216,10 +216,10 @@ 3.4 bool decrement = false); 3.5 void increment_mdp_data_at(Register mdp_in, Register reg, int constant, 3.6 bool decrement = false); 3.7 -/* void increment_mask_and_jump(Address counter_addr, 3.8 + void increment_mask_and_jump(Address counter_addr, 3.9 int increment, int mask, 3.10 Register scratch, bool preloaded, 3.11 - Condition cond, Label* where); */ 3.12 + Condition cond, Label* where); 3.13 void set_mdp_flag_at(Register mdp_in, int flag_constant); 3.14 void test_mdp_data_at(Register mdp_in, int offset, Register value, 3.15 Register test_value_out,
4.1 --- a/src/cpu/mips/vm/templateInterpreter_mips_64.cpp Fri Dec 07 14:41:48 2018 +0800 4.2 +++ b/src/cpu/mips/vm/templateInterpreter_mips_64.cpp Fri Dec 07 14:53:37 2018 +0800 4.3 @@ -302,64 +302,92 @@ 4.4 // Note: checking for negative value instead of overflow 4.5 // so we have a 'sticky' overflow test 4.6 // 4.7 -// prerequisites : method in T0, invocation counter in T3 4.8 +// Rmethod: method 4.9 +// T3 : invocation counter 4.10 +// 4.11 void InterpreterGenerator::generate_counter_incr( 4.12 Label* overflow, 4.13 Label* profile_method, 4.14 Label* profile_method_continue) { 4.15 Label done; 4.16 - const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) 4.17 - + in_bytes(InvocationCounter::counter_offset())); 4.18 - const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) 4.19 - + in_bytes(InvocationCounter::counter_offset())); 4.20 + if (TieredCompilation) { 4.21 + int increment = InvocationCounter::count_increment; 4.22 + int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; 4.23 + Label no_mdo; 4.24 + if (ProfileInterpreter) { 4.25 + // Are we profiling? 4.26 + __ ld(FSR, Address(Rmethod, Method::method_data_offset())); 4.27 + __ beq(FSR, R0, no_mdo); 4.28 + __ delayed()->nop(); 4.29 + // Increment counter in the MDO 4.30 + const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + 4.31 + in_bytes(InvocationCounter::counter_offset())); 4.32 + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); 4.33 + __ beq(R0, R0, done); 4.34 + __ delayed()->nop(); 4.35 + } 4.36 + __ bind(no_mdo); 4.37 + // Increment counter in MethodCounters 4.38 + const Address invocation_counter(FSR, 4.39 + MethodCounters::invocation_counter_offset() + 4.40 + InvocationCounter::counter_offset()); 4.41 + __ get_method_counters(Rmethod, FSR, done); 4.42 + __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); 4.43 + __ bind(done); 4.44 + } else { 4.45 + const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) 4.46 + + in_bytes(InvocationCounter::counter_offset())); 4.47 + const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) 4.48 + + in_bytes(InvocationCounter::counter_offset())); 4.49 4.50 - __ get_method_counters(Rmethod, FSR, done); 4.51 + __ get_method_counters(Rmethod, FSR, done); 4.52 4.53 - if (ProfileInterpreter) { // %%% Merge this into methodDataOop 4.54 - __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); 4.55 - __ incrementl(T9, 1); 4.56 - __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); 4.57 - } 4.58 - // Update standard invocation counters 4.59 - __ lw(T3, invocation_counter); 4.60 - __ increment(T3, InvocationCounter::count_increment); 4.61 - __ sw(T3, invocation_counter); // save invocation count 4.62 + if (ProfileInterpreter) { // %%% Merge this into methodDataOop 4.63 + __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); 4.64 + __ incrementl(T9, 1); 4.65 + __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); 4.66 + } 4.67 + // Update standard invocation counters 4.68 + __ lw(T3, invocation_counter); 4.69 + __ increment(T3, InvocationCounter::count_increment); 4.70 + __ sw(T3, invocation_counter); // save invocation count 4.71 4.72 - __ lw(FSR, backedge_counter); // load backedge counter 4.73 - __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits 4.74 - __ andr(FSR, FSR, AT); 4.75 + __ lw(FSR, backedge_counter); // load backedge counter 4.76 + __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits 4.77 + __ andr(FSR, FSR, AT); 4.78 4.79 - __ dadd(T3, T3, FSR); // add both counters 4.80 + __ dadd(T3, T3, FSR); // add both counters 4.81 4.82 - if (ProfileInterpreter && profile_method != NULL) { 4.83 - // Test to see if we should create a method data oop 4.84 - if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { 4.85 - __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); 4.86 + if (ProfileInterpreter && profile_method != NULL) { 4.87 + // Test to see if we should create a method data oop 4.88 + if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { 4.89 + __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); 4.90 + } else { 4.91 + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); 4.92 + __ lw(AT, AT, 0); 4.93 + __ slt(AT, T3, AT); 4.94 + } 4.95 + 4.96 + __ bne_far(AT, R0, *profile_method_continue); 4.97 + __ delayed()->nop(); 4.98 + 4.99 + // if no method data exists, go to profile_method 4.100 + __ test_method_data_pointer(FSR, *profile_method); 4.101 + } 4.102 + 4.103 + if (Assembler::is_simm16(CompileThreshold)) { 4.104 + __ srl(AT, T3, InvocationCounter::count_shift); 4.105 + __ slti(AT, AT, CompileThreshold); 4.106 } else { 4.107 - __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); 4.108 + __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); 4.109 __ lw(AT, AT, 0); 4.110 __ slt(AT, T3, AT); 4.111 } 4.112 4.113 - __ bne_far(AT, R0, *profile_method_continue); 4.114 + __ beq_far(AT, R0, *overflow); 4.115 __ delayed()->nop(); 4.116 - 4.117 - // if no method data exists, go to profile_method 4.118 - __ test_method_data_pointer(FSR, *profile_method); 4.119 + __ bind(done); 4.120 } 4.121 - 4.122 - if (Assembler::is_simm16(CompileThreshold)) { 4.123 - __ srl(AT, T3, InvocationCounter::count_shift); 4.124 - __ slti(AT, AT, CompileThreshold); 4.125 - } else { 4.126 - __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); 4.127 - __ lw(AT, AT, 0); 4.128 - __ slt(AT, T3, AT); 4.129 - } 4.130 - 4.131 - __ beq_far(AT, R0, *overflow); 4.132 - __ delayed()->nop(); 4.133 - __ bind(done); 4.134 } 4.135 4.136 void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
5.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp Fri Dec 07 14:41:48 2018 +0800 5.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp Fri Dec 07 14:53:37 2018 +0800 5.3 @@ -1938,12 +1938,10 @@ 5.4 __ get_method(T3); 5.5 __ profile_taken_branch(A7, T2); // only C2 meaningful 5.6 5.7 -#ifndef CORE 5.8 const ByteSize be_offset = MethodCounters::backedge_counter_offset() + 5.9 InvocationCounter::counter_offset(); 5.10 const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + 5.11 InvocationCounter::counter_offset(); 5.12 -#endif // CORE 5.13 5.14 // Load up T4 with the branch displacement 5.15 if (!is_wide) { 5.16 @@ -1984,12 +1982,6 @@ 5.17 // Adjust the bcp in S0 by the displacement in T4 5.18 __ dadd(BCP, BCP, A7); 5.19 5.20 -#ifdef CORE 5.21 - // Pre-load the next target bytecode into EBX 5.22 - __ lbu(Rnext, BCP, 0); 5.23 - // continue with the bytecode @ target 5.24 - __ dispatch_only(vtos); 5.25 -#else 5.26 assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); 5.27 Label backedge_counter_overflow; 5.28 Label profile_method; 5.29 @@ -2021,68 +2013,92 @@ 5.30 __ delayed()->nop(); 5.31 __ bind(has_counters); 5.32 5.33 - // increment back edge counter 5.34 - __ ld(T1, T3, in_bytes(Method::method_counters_offset())); 5.35 - __ lw(T0, T1, in_bytes(be_offset)); 5.36 - __ increment(T0, InvocationCounter::count_increment); 5.37 - __ sw(T0, T1, in_bytes(be_offset)); 5.38 - 5.39 - // load invocation counter 5.40 - __ lw(T1, T1, in_bytes(inv_offset)); 5.41 - // buffer bit added, mask no needed 5.42 - 5.43 - // dadd backedge counter & invocation counter 5.44 - __ dadd(T1, T1, T0); 5.45 - 5.46 - if (ProfileInterpreter) { 5.47 - // Test to see if we should create a method data oop 5.48 - // T1 : backedge counter & invocation counter 5.49 - if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { 5.50 - __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); 5.51 + if (TieredCompilation) { 5.52 + Label no_mdo; 5.53 + int increment = InvocationCounter::count_increment; 5.54 + int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; 5.55 + if (ProfileInterpreter) { 5.56 + // Are we profiling? 5.57 + __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); 5.58 + __ beq(T0, R0, no_mdo); 5.59 + __ delayed()->nop(); 5.60 + // Increment the MDO backedge counter 5.61 + const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + 5.62 + in_bytes(InvocationCounter::counter_offset())); 5.63 + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, 5.64 + T1, false, Assembler::zero, &backedge_counter_overflow); 5.65 + __ beq(R0, R0, dispatch); 5.66 + __ delayed()->nop(); 5.67 + } 5.68 + __ bind(no_mdo); 5.69 + // Increment backedge counter in MethodCounters* 5.70 + __ ld(T0, Address(T3, Method::method_counters_offset())); 5.71 + __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, 5.72 + T1, false, Assembler::zero, &backedge_counter_overflow); 5.73 + } else { 5.74 + // increment back edge counter 5.75 + __ ld(T1, T3, in_bytes(Method::method_counters_offset())); 5.76 + __ lw(T0, T1, in_bytes(be_offset)); 5.77 + __ increment(T0, InvocationCounter::count_increment); 5.78 + __ sw(T0, T1, in_bytes(be_offset)); 5.79 + 5.80 + // load invocation counter 5.81 + __ lw(T1, T1, in_bytes(inv_offset)); 5.82 + // buffer bit added, mask no needed 5.83 + 5.84 + // dadd backedge counter & invocation counter 5.85 + __ dadd(T1, T1, T0); 5.86 + 5.87 + if (ProfileInterpreter) { 5.88 + // Test to see if we should create a method data oop 5.89 + // T1 : backedge counter & invocation counter 5.90 + if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { 5.91 + __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); 5.92 + } else { 5.93 + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); 5.94 + __ lw(AT, AT, 0); 5.95 + __ slt(AT, T1, AT); 5.96 + } 5.97 + 5.98 + __ bne(AT, R0, dispatch); 5.99 + __ delayed()->nop(); 5.100 + 5.101 + // if no method data exists, go to profile method 5.102 + __ test_method_data_pointer(T1, profile_method); 5.103 + 5.104 + if (UseOnStackReplacement) { 5.105 + if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { 5.106 + __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); 5.107 + } else { 5.108 + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); 5.109 + __ lw(AT, AT, 0); 5.110 + __ slt(AT, T2, AT); 5.111 + } 5.112 + 5.113 + __ bne(AT, R0, dispatch); 5.114 + __ delayed()->nop(); 5.115 + 5.116 + // When ProfileInterpreter is on, the backedge_count comes 5.117 + // from the methodDataOop, which value does not get reset on 5.118 + // the call to frequency_counter_overflow(). 5.119 + // To avoid excessive calls to the overflow routine while 5.120 + // the method is being compiled, dadd a second test to make 5.121 + // sure the overflow function is called only once every 5.122 + // overflow_frequency. 5.123 + const int overflow_frequency = 1024; 5.124 + __ andi(AT, T2, overflow_frequency-1); 5.125 + __ beq(AT, R0, backedge_counter_overflow); 5.126 + __ delayed()->nop(); 5.127 + } 5.128 } else { 5.129 - __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); 5.130 - __ lw(AT, AT, 0); 5.131 - __ slt(AT, T1, AT); 5.132 - } 5.133 - 5.134 - __ bne(AT, R0, dispatch); 5.135 - __ delayed()->nop(); 5.136 - 5.137 - // if no method data exists, go to profile method 5.138 - __ test_method_data_pointer(T1, profile_method); 5.139 - 5.140 - if (UseOnStackReplacement) { 5.141 - if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { 5.142 - __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); 5.143 - } else { 5.144 + if (UseOnStackReplacement) { 5.145 + // check for overflow against eax, which is the sum of the counters 5.146 __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); 5.147 __ lw(AT, AT, 0); 5.148 - __ slt(AT, T2, AT); 5.149 + __ slt(AT, T1, AT); 5.150 + __ beq(AT, R0, backedge_counter_overflow); 5.151 + __ delayed()->nop(); 5.152 } 5.153 - 5.154 - __ bne(AT, R0, dispatch); 5.155 - __ delayed()->nop(); 5.156 - 5.157 - // When ProfileInterpreter is on, the backedge_count comes 5.158 - // from the methodDataOop, which value does not get reset on 5.159 - // the call to frequency_counter_overflow(). 5.160 - // To avoid excessive calls to the overflow routine while 5.161 - // the method is being compiled, dadd a second test to make 5.162 - // sure the overflow function is called only once every 5.163 - // overflow_frequency. 5.164 - const int overflow_frequency = 1024; 5.165 - __ andi(AT, T2, overflow_frequency-1); 5.166 - __ beq(AT, R0, backedge_counter_overflow); 5.167 - __ delayed()->nop(); 5.168 - } 5.169 - } else { 5.170 - if (UseOnStackReplacement) { 5.171 - // check for overflow against eax, which is the sum of the counters 5.172 - __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); 5.173 - __ lw(AT, AT, 0); 5.174 - __ slt(AT, T1, AT); 5.175 - __ beq(AT, R0, backedge_counter_overflow); 5.176 - __ delayed()->nop(); 5.177 } 5.178 } 5.179 __ bind(dispatch); 5.180 @@ -2164,7 +2180,6 @@ 5.181 __ delayed()->nop(); 5.182 } 5.183 } 5.184 -#endif // not CORE 5.185 } 5.186 5.187