#7987 implements tieredcompilation in mips template interpreter

Fri, 07 Dec 2018 14:53:37 +0800

author
fujie
date
Fri, 07 Dec 2018 14:53:37 +0800
changeset 9274
0e232e568230
parent 9273
1baeed12b00e
child 9275
8a9136007601

#7987 implements tieredcompilation in mips template interpreter

------- Effect (scores, higher is better) ----------
SPECjvm2008 Benchmarks C2-only Tiered
startup.helloworld 70.51 74.81
startup.compiler.compiler 2.17 4.68
startup.compiler.sunflow 2.31 3.76
startup.compress 12.64 12.90
startup.crypto.aes 4.19 4.30
startup.crypto.rsa 13.86 14.10
startup.crypto.signverify 13.97 15.22
startup.mpegaudio 5.76 6.18
startup.scimark.fft 20.58 20.86
startup.scimark.lu 20.32 21.23
startup.scimark.monte_carlo 7.21 6.10
startup.scimark.sor 8.50 8.58
startup.scimark.sparse 10.35 10.80
startup.serial 4.82 5.26
startup.sunflow 5.52 7.02
startup.xml.transform 0.57 0.61
startup.xml.validation 5.48 8.71

src/cpu/mips/vm/assembler_mips.hpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/interp_masm_mips_64.cpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/interp_masm_mips_64.hpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/templateInterpreter_mips_64.cpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/templateTable_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/assembler_mips.hpp	Fri Dec 07 14:41:48 2018 +0800
     1.2 +++ b/src/cpu/mips/vm/assembler_mips.hpp	Fri Dec 07 14:53:37 2018 +0800
     1.3 @@ -342,6 +342,21 @@
     1.4    friend class StubGenerator;
     1.5  
     1.6   public:
     1.7 +  enum Condition {
     1.8 +    zero         ,
     1.9 +    notZero      ,
    1.10 +    equal        ,
    1.11 +    notEqual     ,
    1.12 +    less         ,
    1.13 +    lessEqual    ,
    1.14 +    greater      ,
    1.15 +    greaterEqual ,
    1.16 +    below        ,
    1.17 +    belowEqual   ,
    1.18 +    above        ,
    1.19 +    aboveEqual
    1.20 +  };
    1.21 +
    1.22    static const int LogInstructionSize = 2;
    1.23    static const int InstructionSize    = 1 << LogInstructionSize;
    1.24  
     2.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.cpp	Fri Dec 07 14:41:48 2018 +0800
     2.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.cpp	Fri Dec 07 14:53:37 2018 +0800
     2.3 @@ -2136,3 +2136,27 @@
     2.4      pop(state);
     2.5    }
     2.6  }
     2.7 +
     2.8 +// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
     2.9 +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
    2.10 +                                                        int increment, int mask,
    2.11 +                                                        Register scratch, bool preloaded,
    2.12 +                                                        Condition cond, Label* where) {
    2.13 +  assert_different_registers(scratch, AT);
    2.14 +
    2.15 +  if (!preloaded) {
    2.16 +    lw(scratch, counter_addr);
    2.17 +  }
    2.18 +  addiu32(scratch, scratch, increment);
    2.19 +  sw(scratch, counter_addr);
    2.20 +
    2.21 +  move(AT, mask);
    2.22 +  andr(scratch, scratch, AT);
    2.23 +
    2.24 +  if (cond == Assembler::zero) {
    2.25 +    beq(scratch, R0, *where);
    2.26 +    delayed()->nop();
    2.27 +  } else {
    2.28 +    unimplemented();
    2.29 +  }
    2.30 +}
     3.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.hpp	Fri Dec 07 14:41:48 2018 +0800
     3.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.hpp	Fri Dec 07 14:53:37 2018 +0800
     3.3 @@ -216,10 +216,10 @@
     3.4                               bool decrement = false);
     3.5    void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
     3.6                               bool decrement = false);
     3.7 -/*  void increment_mask_and_jump(Address counter_addr,
     3.8 +  void increment_mask_and_jump(Address counter_addr,
     3.9                                 int increment, int mask,
    3.10                                 Register scratch, bool preloaded,
    3.11 -                               Condition cond, Label* where); */
    3.12 +                               Condition cond, Label* where);
    3.13    void set_mdp_flag_at(Register mdp_in, int flag_constant);
    3.14    void test_mdp_data_at(Register mdp_in, int offset, Register value,
    3.15                          Register test_value_out,
     4.1 --- a/src/cpu/mips/vm/templateInterpreter_mips_64.cpp	Fri Dec 07 14:41:48 2018 +0800
     4.2 +++ b/src/cpu/mips/vm/templateInterpreter_mips_64.cpp	Fri Dec 07 14:53:37 2018 +0800
     4.3 @@ -302,64 +302,92 @@
     4.4  // Note: checking for negative value instead of overflow
     4.5  //       so we have a 'sticky' overflow test
     4.6  //
     4.7 -// prerequisites : method in T0, invocation counter in T3
     4.8 +// Rmethod: method
     4.9 +// T3     : invocation counter
    4.10 +//
    4.11  void InterpreterGenerator::generate_counter_incr(
    4.12          Label* overflow,
    4.13          Label* profile_method,
    4.14          Label* profile_method_continue) {
    4.15    Label done;
    4.16 -  const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
    4.17 -      + in_bytes(InvocationCounter::counter_offset()));
    4.18 -  const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
    4.19 -      + in_bytes(InvocationCounter::counter_offset()));
    4.20 +  if (TieredCompilation) {
    4.21 +    int increment = InvocationCounter::count_increment;
    4.22 +    int mask = ((1 << Tier0InvokeNotifyFreqLog)  - 1) << InvocationCounter::count_shift;
    4.23 +    Label no_mdo;
    4.24 +    if (ProfileInterpreter) {
    4.25 +      // Are we profiling?
    4.26 +      __ ld(FSR, Address(Rmethod, Method::method_data_offset()));
    4.27 +      __ beq(FSR, R0, no_mdo);
    4.28 +      __ delayed()->nop();
    4.29 +      // Increment counter in the MDO
    4.30 +      const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) +
    4.31 +                                                in_bytes(InvocationCounter::counter_offset()));
    4.32 +      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
    4.33 +      __ beq(R0, R0, done);
    4.34 +      __ delayed()->nop();
    4.35 +    }
    4.36 +    __ bind(no_mdo);
    4.37 +    // Increment counter in MethodCounters
    4.38 +    const Address invocation_counter(FSR,
    4.39 +                  MethodCounters::invocation_counter_offset() +
    4.40 +                  InvocationCounter::counter_offset());
    4.41 +    __ get_method_counters(Rmethod, FSR, done);
    4.42 +    __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
    4.43 +    __ bind(done);
    4.44 +  } else {
    4.45 +    const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
    4.46 +        + in_bytes(InvocationCounter::counter_offset()));
    4.47 +    const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
    4.48 +        + in_bytes(InvocationCounter::counter_offset()));
    4.49  
    4.50 -  __ get_method_counters(Rmethod, FSR, done);
    4.51 +    __ get_method_counters(Rmethod, FSR, done);
    4.52  
    4.53 -  if (ProfileInterpreter) { // %%% Merge this into methodDataOop
    4.54 -    __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
    4.55 -    __ incrementl(T9, 1);
    4.56 -    __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
    4.57 -  }
    4.58 -  // Update standard invocation counters
    4.59 -  __ lw(T3, invocation_counter);
    4.60 -  __ increment(T3, InvocationCounter::count_increment);
    4.61 -  __ sw(T3, invocation_counter);  // save invocation count
    4.62 +    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
    4.63 +      __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
    4.64 +      __ incrementl(T9, 1);
    4.65 +      __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
    4.66 +    }
    4.67 +    // Update standard invocation counters
    4.68 +    __ lw(T3, invocation_counter);
    4.69 +    __ increment(T3, InvocationCounter::count_increment);
    4.70 +    __ sw(T3, invocation_counter);  // save invocation count
    4.71  
    4.72 -  __ lw(FSR, backedge_counter);  // load backedge counter
    4.73 -  __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
    4.74 -  __ andr(FSR, FSR, AT);
    4.75 +    __ lw(FSR, backedge_counter);  // load backedge counter
    4.76 +    __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
    4.77 +    __ andr(FSR, FSR, AT);
    4.78  
    4.79 -  __ dadd(T3, T3, FSR);          // add both counters
    4.80 +    __ dadd(T3, T3, FSR);          // add both counters
    4.81  
    4.82 -  if (ProfileInterpreter && profile_method != NULL) {
    4.83 -    // Test to see if we should create a method data oop
    4.84 -    if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
    4.85 -      __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
    4.86 +    if (ProfileInterpreter && profile_method != NULL) {
    4.87 +      // Test to see if we should create a method data oop
    4.88 +      if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
    4.89 +        __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
    4.90 +      } else {
    4.91 +        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
    4.92 +        __ lw(AT, AT, 0);
    4.93 +        __ slt(AT, T3, AT);
    4.94 +      }
    4.95 +
    4.96 +      __ bne_far(AT, R0, *profile_method_continue);
    4.97 +      __ delayed()->nop();
    4.98 +
    4.99 +      // if no method data exists, go to profile_method
   4.100 +      __ test_method_data_pointer(FSR, *profile_method);
   4.101 +    }
   4.102 +
   4.103 +    if (Assembler::is_simm16(CompileThreshold)) {
   4.104 +      __ srl(AT, T3, InvocationCounter::count_shift);
   4.105 +      __ slti(AT, AT, CompileThreshold);
   4.106      } else {
   4.107 -      __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
   4.108 +      __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
   4.109        __ lw(AT, AT, 0);
   4.110        __ slt(AT, T3, AT);
   4.111      }
   4.112  
   4.113 -    __ bne_far(AT, R0, *profile_method_continue);
   4.114 +    __ beq_far(AT, R0, *overflow);
   4.115      __ delayed()->nop();
   4.116 -
   4.117 -    // if no method data exists, go to profile_method
   4.118 -    __ test_method_data_pointer(FSR, *profile_method);
   4.119 +    __ bind(done);
   4.120    }
   4.121 -
   4.122 -  if (Assembler::is_simm16(CompileThreshold)) {
   4.123 -    __ srl(AT, T3, InvocationCounter::count_shift);
   4.124 -    __ slti(AT, AT, CompileThreshold);
   4.125 -  } else {
   4.126 -    __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
   4.127 -    __ lw(AT, AT, 0);
   4.128 -    __ slt(AT, T3, AT);
   4.129 -  }
   4.130 -
   4.131 -  __ beq_far(AT, R0, *overflow);
   4.132 -  __ delayed()->nop();
   4.133 -  __ bind(done);
   4.134  }
   4.135  
   4.136  void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
     5.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp	Fri Dec 07 14:41:48 2018 +0800
     5.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp	Fri Dec 07 14:53:37 2018 +0800
     5.3 @@ -1938,12 +1938,10 @@
     5.4    __ get_method(T3);
     5.5    __ profile_taken_branch(A7, T2);    // only C2 meaningful
     5.6  
     5.7 -#ifndef CORE
     5.8    const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
     5.9                               InvocationCounter::counter_offset();
    5.10    const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
    5.11                                InvocationCounter::counter_offset();
    5.12 -#endif // CORE
    5.13  
    5.14    // Load up T4 with the branch displacement
    5.15    if (!is_wide) {
    5.16 @@ -1984,12 +1982,6 @@
    5.17    // Adjust the bcp in S0 by the displacement in T4
    5.18    __ dadd(BCP, BCP, A7);
    5.19  
    5.20 -#ifdef CORE
    5.21 -  // Pre-load the next target bytecode into EBX
    5.22 -  __ lbu(Rnext, BCP, 0);
    5.23 -  // continue with the bytecode @ target
    5.24 -  __ dispatch_only(vtos);
    5.25 -#else
    5.26    assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
    5.27    Label backedge_counter_overflow;
    5.28    Label profile_method;
    5.29 @@ -2021,68 +2013,92 @@
    5.30      __ delayed()->nop();
    5.31      __ bind(has_counters);
    5.32  
    5.33 -    // increment back edge counter
    5.34 -    __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
    5.35 -    __ lw(T0, T1, in_bytes(be_offset));
    5.36 -    __ increment(T0, InvocationCounter::count_increment);
    5.37 -    __ sw(T0, T1, in_bytes(be_offset));
    5.38 -
    5.39 -    // load invocation counter
    5.40 -    __ lw(T1, T1, in_bytes(inv_offset));
    5.41 -    // buffer bit added, mask no needed
    5.42 -
    5.43 -    // dadd backedge counter & invocation counter
    5.44 -    __ dadd(T1, T1, T0);
    5.45 -
    5.46 -    if (ProfileInterpreter) {
    5.47 -      // Test to see if we should create a method data oop
    5.48 -      // T1 : backedge counter & invocation counter
    5.49 -      if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
    5.50 -        __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
    5.51 +    if (TieredCompilation) {
    5.52 +      Label no_mdo;
    5.53 +      int increment = InvocationCounter::count_increment;
    5.54 +      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
    5.55 +      if (ProfileInterpreter) {
    5.56 +        // Are we profiling?
    5.57 +        __ ld(T0, Address(T3, in_bytes(Method::method_data_offset())));
    5.58 +        __ beq(T0, R0, no_mdo);
    5.59 +        __ delayed()->nop();
    5.60 +        // Increment the MDO backedge counter
    5.61 +        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
    5.62 +                                           in_bytes(InvocationCounter::counter_offset()));
    5.63 +        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
    5.64 +                                   T1, false, Assembler::zero, &backedge_counter_overflow);
    5.65 +        __ beq(R0, R0, dispatch);
    5.66 +        __ delayed()->nop();
    5.67 +      }
    5.68 +      __ bind(no_mdo);
    5.69 +      // Increment backedge counter in MethodCounters*
    5.70 +      __ ld(T0, Address(T3, Method::method_counters_offset()));
    5.71 +      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
    5.72 +                                 T1, false, Assembler::zero, &backedge_counter_overflow);
    5.73 +    } else {
    5.74 +      // increment back edge counter
    5.75 +      __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
    5.76 +      __ lw(T0, T1, in_bytes(be_offset));
    5.77 +      __ increment(T0, InvocationCounter::count_increment);
    5.78 +      __ sw(T0, T1, in_bytes(be_offset));
    5.79 +
    5.80 +      // load invocation counter
    5.81 +      __ lw(T1, T1, in_bytes(inv_offset));
    5.82 +      // buffer bit added, mask no needed
    5.83 +
    5.84 +      // dadd backedge counter & invocation counter
    5.85 +      __ dadd(T1, T1, T0);
    5.86 +
    5.87 +      if (ProfileInterpreter) {
    5.88 +        // Test to see if we should create a method data oop
    5.89 +        // T1 : backedge counter & invocation counter
    5.90 +        if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
    5.91 +          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
    5.92 +        } else {
    5.93 +          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
    5.94 +          __ lw(AT, AT, 0);
    5.95 +          __ slt(AT, T1, AT);
    5.96 +        }
    5.97 +
    5.98 +        __ bne(AT, R0, dispatch);
    5.99 +        __ delayed()->nop();
   5.100 +
   5.101 +        // if no method data exists, go to profile method
   5.102 +        __ test_method_data_pointer(T1, profile_method);
   5.103 +
   5.104 +        if (UseOnStackReplacement) {
   5.105 +          if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
   5.106 +            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
   5.107 +          } else {
   5.108 +            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
   5.109 +            __ lw(AT, AT, 0);
   5.110 +            __ slt(AT, T2, AT);
   5.111 +          }
   5.112 +
   5.113 +          __ bne(AT, R0, dispatch);
   5.114 +          __ delayed()->nop();
   5.115 +
   5.116 +          // When ProfileInterpreter is on, the backedge_count comes
   5.117 +          // from the methodDataOop, which value does not get reset on
   5.118 +          // the call to  frequency_counter_overflow().
   5.119 +          // To avoid excessive calls to the overflow routine while
   5.120 +          // the method is being compiled, dadd a second test to make
   5.121 +          // sure the overflow function is called only once every
   5.122 +          // overflow_frequency.
   5.123 +          const int overflow_frequency = 1024;
   5.124 +          __ andi(AT, T2, overflow_frequency-1);
   5.125 +          __ beq(AT, R0, backedge_counter_overflow);
   5.126 +          __ delayed()->nop();
   5.127 +        }
   5.128        } else {
   5.129 -        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
   5.130 -        __ lw(AT, AT, 0);
   5.131 -        __ slt(AT, T1, AT);
   5.132 -      }
   5.133 -
   5.134 -      __ bne(AT, R0, dispatch);
   5.135 -      __ delayed()->nop();
   5.136 -
   5.137 -      // if no method data exists, go to profile method
   5.138 -      __ test_method_data_pointer(T1, profile_method);
   5.139 -
   5.140 -      if (UseOnStackReplacement) {
   5.141 -        if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
   5.142 -          __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
   5.143 -        } else {
   5.144 +        if (UseOnStackReplacement) {
   5.145 +          // check for overflow against eax, which is the sum of the counters
   5.146            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
   5.147            __ lw(AT, AT, 0);
   5.148 -          __ slt(AT, T2, AT);
   5.149 +          __ slt(AT, T1, AT);
   5.150 +          __ beq(AT, R0, backedge_counter_overflow);
   5.151 +          __ delayed()->nop();
   5.152          }
   5.153 -
   5.154 -        __ bne(AT, R0, dispatch);
   5.155 -        __ delayed()->nop();
   5.156 -
   5.157 -        // When ProfileInterpreter is on, the backedge_count comes
   5.158 -        // from the methodDataOop, which value does not get reset on
   5.159 -        // the call to  frequency_counter_overflow().
   5.160 -        // To avoid excessive calls to the overflow routine while
   5.161 -        // the method is being compiled, dadd a second test to make
   5.162 -        // sure the overflow function is called only once every
   5.163 -        // overflow_frequency.
   5.164 -        const int overflow_frequency = 1024;
   5.165 -        __ andi(AT, T2, overflow_frequency-1);
   5.166 -        __ beq(AT, R0, backedge_counter_overflow);
   5.167 -        __ delayed()->nop();
   5.168 -      }
   5.169 -    } else {
   5.170 -      if (UseOnStackReplacement) {
   5.171 -        // check for overflow against eax, which is the sum of the counters
   5.172 -        __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
   5.173 -        __ lw(AT, AT, 0);
   5.174 -        __ slt(AT, T1, AT);
   5.175 -        __ beq(AT, R0, backedge_counter_overflow);
   5.176 -        __ delayed()->nop();
   5.177        }
   5.178      }
   5.179      __ bind(dispatch);
   5.180 @@ -2164,7 +2180,6 @@
   5.181        __ delayed()->nop();
   5.182      }
   5.183    }
   5.184 -#endif // not CORE
   5.185  }
   5.186  
   5.187  

mercurial