[Interpreter] Performance of the new operation is 10% up.

Mon, 18 Sep 2017 14:54:20 +0800

author
fujie
date
Mon, 18 Sep 2017 14:54:20 +0800
changeset 6884
37fd1c756f31
parent 6883
57b216e5c8e3
child 6885
75ee8543b584

[Interpreter] Performance of the new operation is 10% up.

Running time of the test program: 206ms --> 187ms (on 3A3000-1way @ 1.4 GHz)

public class Test {

private static final int MAX = 400000;

private int a;
private char b;
private char c;
private long d;
private short e;
private byte f;

Test() {
a = 1;
b = 1;
c = 1;
d = 1;
e = 1;
f = 1;
}

public long sum() {
return a + b + c + d + e + f;
}

public static void main(String[] args) throws Exception {
long t = System.currentTimeMillis();
Test[] arr = new Test[MAX];
for (int i = 0; i < MAX; i++) {
arr[i] = new Test();
}
System.out.println((System.currentTimeMillis() - t) + "ms");
//System.gc();
System.out.println(arr[0].sum());
}

}

src/cpu/mips/vm/interp_masm_mips_64.cpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/templateTable_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.cpp	Mon Sep 18 16:49:35 2017 +0800
     1.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.cpp	Mon Sep 18 14:54:20 2017 +0800
     1.3 @@ -272,12 +272,12 @@
     1.4  }
     1.5  
     1.6  
     1.7 -void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(
     1.8 -                                                                 Register reg,
     1.9 +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
    1.10                                                                   int bcp_offset) {
    1.11    assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
    1.12 -  get_2_byte_integer_at_bcp(reg, AT, bcp_offset);
    1.13 -  hswap(reg);
    1.14 +  lbu(AT, BCP, bcp_offset);
    1.15 +  lbu(reg, BCP, bcp_offset + 1);
    1.16 +  ins(reg, AT, 8, 8);
    1.17  }
    1.18  
    1.19  
     2.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp	Mon Sep 18 16:49:35 2017 +0800
     2.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp	Mon Sep 18 14:54:20 2017 +0800
     2.3 @@ -3915,8 +3915,7 @@
     2.4  // return object in FSR
     2.5  void TemplateTable::_new() {
     2.6    transition(vtos, atos);
     2.7 -  __ get_2_byte_integer_at_bcp(A2, AT, 1);
     2.8 -  __ huswap(A2);
     2.9 +  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
    2.10  
    2.11    Label slow_case;
    2.12    Label done;
    2.13 @@ -3926,18 +3925,27 @@
    2.14  
    2.15    // get InstanceKlass in T3
    2.16    __ get_cpool_and_tags(A1, T1);
    2.17 +
    2.18    __ dsll(AT, A2, Address::times_8);
    2.19 -  __ dadd(AT, A1, AT);
    2.20 -  __ ld(T3, AT, sizeof(ConstantPool));
    2.21 +  if (UseLoongsonISA && Assembler::is_simm(sizeof(ConstantPool), 8)) {
    2.22 +    __ gsldx(T3, A1, AT, sizeof(ConstantPool));
    2.23 +  } else {
    2.24 +    __ dadd(AT, A1, AT);
    2.25 +    __ ld(T3, AT, sizeof(ConstantPool));
    2.26 +  }
    2.27  
    2.28    // make sure the class we're about to instantiate has been resolved.
    2.29    // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
    2.30    const int tags_offset = Array<u1>::base_offset_in_bytes();
    2.31 -  __ dadd(T1, T1, A2);
    2.32 -  __ lb(AT, T1, tags_offset);
    2.33 +  if (UseLoongsonISA && Assembler::is_simm(tags_offset, 8)) {
    2.34 +    __ gslbx(AT, T1, A2, tags_offset);
    2.35 +  } else {
    2.36 +    __ dadd(T1, T1, A2);
    2.37 +    __ lb(AT, T1, tags_offset);
    2.38 +  }
    2.39    __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
    2.40    __ bne(AT, R0, slow_case);
    2.41 -  __ delayed()->nop();
    2.42 +  //__ delayed()->nop();
    2.43  
    2.44  
    2.45    // make sure klass is initialized & doesn't have finalizer
    2.46 @@ -3945,20 +3953,13 @@
    2.47    __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
    2.48    __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
    2.49    __ bne(AT, R0, slow_case);
    2.50 -  __ delayed()->nop();
    2.51 +  //__ delayed()->nop();
    2.52  
    2.53    // has_finalizer
    2.54 -  //__ lw(T1, T3, Klass::access_flags_offset() + sizeof(oopDesc));
    2.55 -  //__ move(AT, JVM_ACC_CAN_BE_FASTPATH_ALLOCATED);
    2.56 -  //__ andr(AT, T1, AT);
    2.57 -  __ lw(T1, T3, in_bytes(Klass::layout_helper_offset()) );
    2.58 -  __ andi(AT, T1, Klass::_lh_instance_slow_path_bit);
    2.59 +  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
    2.60 +  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
    2.61    __ bne(AT, R0, slow_case);
    2.62 -  __ delayed()->nop();
    2.63 -
    2.64 -  // get instance_size in InstanceKlass (already aligned) in T0,
    2.65 -  // be sure to preserve this value
    2.66 -  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
    2.67 +  //__ delayed()->nop();
    2.68  
    2.69    // Allocate the instance
    2.70    // 1) Try to allocate in the TLAB
    2.71 @@ -3978,9 +3979,9 @@
    2.72  #endif
    2.73      // get tlab_top
    2.74      __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
    2.75 -    __ dadd(T2, FSR, T0);
    2.76      // get tlab_end
    2.77      __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
    2.78 +    __ dadd(T2, FSR, T0);
    2.79      __ slt(AT, AT, T2);
    2.80      __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
    2.81      __ delayed()->nop();
    2.82 @@ -3988,10 +3989,10 @@
    2.83  
    2.84      if (ZeroTLAB) {
    2.85        // the fields have been already cleared
    2.86 -      __ b_far(initialize_header);
    2.87 +      __ beq(R0, R0, initialize_header);
    2.88      } else {
    2.89        // initialize both the header and fields
    2.90 -      __ b_far(initialize_object);
    2.91 +      __ beq(R0, R0, initialize_object);
    2.92      }
    2.93      __ delayed()->nop();
    2.94    }
    2.95 @@ -4003,13 +4004,13 @@
    2.96  
    2.97      Label retry;
    2.98      Address heap_top(T1);
    2.99 -    __ li(T1, (long)Universe::heap()->top_addr());
   2.100 -
   2.101 +    __ set64(T1, (long)Universe::heap()->top_addr());
   2.102      __ ld(FSR, heap_top);
   2.103 +
   2.104      __ bind(retry);
   2.105 +    __ set64(AT, (long)Universe::heap()->end_addr());
   2.106 +    __ ld(AT, AT, 0);
   2.107      __ dadd(T2, FSR, T0);
   2.108 -    __ li(AT, (long)Universe::heap()->end_addr());
   2.109 -    __ ld(AT, AT, 0);
   2.110      __ slt(AT, AT, T2);
   2.111      __ bne(AT, R0, slow_case);
   2.112      __ delayed()->nop();
   2.113 @@ -4032,24 +4033,11 @@
   2.114      // The object is initialized before the header.  If the object size is
   2.115      // zero, go directly to the header initialization.
   2.116      __ bind(initialize_object);
   2.117 -    __ li(AT, - sizeof(oopDesc));
   2.118 +    __ set64(AT, - sizeof(oopDesc));
   2.119      __ daddu(T0, T0, AT);
   2.120 -    __ beq_far(T0, R0, initialize_header);
   2.121 +    __ beq(T0, R0, initialize_header);
   2.122      __ delayed()->nop();
   2.123  
   2.124 -
   2.125 -    // T0 must have been multiple of 2
   2.126 -#ifdef ASSERT
   2.127 -    // make sure T0 was multiple of 2
   2.128 -    Label L;
   2.129 -    __ andi(AT, T0, 1);
   2.130 -    __ beq(AT, R0, L);
   2.131 -    __ delayed()->nop();
   2.132 -    __ stop("object size is not multiple of 2 - adjust this code");
   2.133 -    __ bind(L);
   2.134 -    // edx must be > 0, no extra check needed here
   2.135 -#endif
   2.136 -
   2.137      // initialize remaining object fields: T0 is a multiple of 2
   2.138      {
   2.139        Label loop;
   2.140 @@ -4060,9 +4048,8 @@
   2.141        __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
   2.142        __ bne(T1, FSR, loop); //dont clear header
   2.143        __ delayed()->daddi(T1, T1, -oopSize);
   2.144 -      // actually sizeof(oopDesc)==8, so we can move
   2.145 -      // __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1
   2.146      }
   2.147 +
   2.148      //klass in T3,
   2.149      // initialize object header only.
   2.150      __ bind(initialize_header);
   2.151 @@ -4070,7 +4057,7 @@
   2.152        __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
   2.153        __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
   2.154      } else {
   2.155 -      __ li(AT, (long)markOopDesc::prototype());
   2.156 +      __ set64(AT, (long)markOopDesc::prototype());
   2.157        __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
   2.158      }
   2.159  

mercurial