Mon, 18 Sep 2017 14:54:20 +0800
[Interpreter] Performance of the new operation is 10% up.
Running time of the test program: 206ms --> 187ms (on 3A3000-1way @ 1.4 GHz)
public class Test {
private static final int MAX = 400000;
private int a;
private char b;
private char c;
private long d;
private short e;
private byte f;
Test() {
a = 1;
b = 1;
c = 1;
d = 1;
e = 1;
f = 1;
}
public long sum() {
return a + b + c + d + e + f;
}
public static void main(String[] args) throws Exception {
long t = System.currentTimeMillis();
Test[] arr = new Test[MAX];
for (int i = 0; i < MAX; i++) {
arr[i] = new Test();
}
System.out.println((System.currentTimeMillis() - t) + "ms");
//System.gc();
System.out.println(arr[0].sum());
}
}
src/cpu/mips/vm/interp_masm_mips_64.cpp | file | annotate | diff | comparison | revisions | |
src/cpu/mips/vm/templateTable_mips_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.cpp Mon Sep 18 16:49:35 2017 +0800 1.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.cpp Mon Sep 18 14:54:20 2017 +0800 1.3 @@ -272,12 +272,12 @@ 1.4 } 1.5 1.6 1.7 -void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp( 1.8 - Register reg, 1.9 +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, 1.10 int bcp_offset) { 1.11 assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); 1.12 - get_2_byte_integer_at_bcp(reg, AT, bcp_offset); 1.13 - hswap(reg); 1.14 + lbu(AT, BCP, bcp_offset); 1.15 + lbu(reg, BCP, bcp_offset + 1); 1.16 + ins(reg, AT, 8, 8); 1.17 } 1.18 1.19
2.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp Mon Sep 18 16:49:35 2017 +0800 2.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp Mon Sep 18 14:54:20 2017 +0800 2.3 @@ -3915,8 +3915,7 @@ 2.4 // return object in FSR 2.5 void TemplateTable::_new() { 2.6 transition(vtos, atos); 2.7 - __ get_2_byte_integer_at_bcp(A2, AT, 1); 2.8 - __ huswap(A2); 2.9 + __ get_unsigned_2_byte_index_at_bcp(A2, 1); 2.10 2.11 Label slow_case; 2.12 Label done; 2.13 @@ -3926,18 +3925,27 @@ 2.14 2.15 // get InstanceKlass in T3 2.16 __ get_cpool_and_tags(A1, T1); 2.17 + 2.18 __ dsll(AT, A2, Address::times_8); 2.19 - __ dadd(AT, A1, AT); 2.20 - __ ld(T3, AT, sizeof(ConstantPool)); 2.21 + if (UseLoongsonISA && Assembler::is_simm(sizeof(ConstantPool), 8)) { 2.22 + __ gsldx(T3, A1, AT, sizeof(ConstantPool)); 2.23 + } else { 2.24 + __ dadd(AT, A1, AT); 2.25 + __ ld(T3, AT, sizeof(ConstantPool)); 2.26 + } 2.27 2.28 // make sure the class we're about to instantiate has been resolved. 2.29 // Note: slow_case does a pop of stack, which is why we loaded class/pushed above 2.30 const int tags_offset = Array<u1>::base_offset_in_bytes(); 2.31 - __ dadd(T1, T1, A2); 2.32 - __ lb(AT, T1, tags_offset); 2.33 + if (UseLoongsonISA && Assembler::is_simm(tags_offset, 8)) { 2.34 + __ gslbx(AT, T1, A2, tags_offset); 2.35 + } else { 2.36 + __ dadd(T1, T1, A2); 2.37 + __ lb(AT, T1, tags_offset); 2.38 + } 2.39 __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); 2.40 __ bne(AT, R0, slow_case); 2.41 - __ delayed()->nop(); 2.42 + //__ delayed()->nop(); 2.43 2.44 2.45 // make sure klass is initialized & doesn't have finalizer 2.46 @@ -3945,20 +3953,13 @@ 2.47 __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); 2.48 __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); 2.49 __ bne(AT, R0, slow_case); 2.50 - __ delayed()->nop(); 2.51 + //__ delayed()->nop(); 2.52 2.53 // has_finalizer 2.54 - //__ lw(T1, T3, Klass::access_flags_offset() + sizeof(oopDesc)); 2.55 - //__ move(AT, JVM_ACC_CAN_BE_FASTPATH_ALLOCATED); 2.56 - //__ andr(AT, T1, AT); 2.57 - __ lw(T1, T3, in_bytes(Klass::layout_helper_offset()) ); 2.58 - __ andi(AT, T1, Klass::_lh_instance_slow_path_bit); 2.59 + __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); 2.60 + __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); 2.61 __ bne(AT, R0, slow_case); 2.62 - __ delayed()->nop(); 2.63 - 2.64 - // get instance_size in InstanceKlass (already aligned) in T0, 2.65 - // be sure to preserve this value 2.66 - __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); 2.67 + //__ delayed()->nop(); 2.68 2.69 // Allocate the instance 2.70 // 1) Try to allocate in the TLAB 2.71 @@ -3978,9 +3979,9 @@ 2.72 #endif 2.73 // get tlab_top 2.74 __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); 2.75 - __ dadd(T2, FSR, T0); 2.76 // get tlab_end 2.77 __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); 2.78 + __ dadd(T2, FSR, T0); 2.79 __ slt(AT, AT, T2); 2.80 __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); 2.81 __ delayed()->nop(); 2.82 @@ -3988,10 +3989,10 @@ 2.83 2.84 if (ZeroTLAB) { 2.85 // the fields have been already cleared 2.86 - __ b_far(initialize_header); 2.87 + __ beq(R0, R0, initialize_header); 2.88 } else { 2.89 // initialize both the header and fields 2.90 - __ b_far(initialize_object); 2.91 + __ beq(R0, R0, initialize_object); 2.92 } 2.93 __ delayed()->nop(); 2.94 } 2.95 @@ -4003,13 +4004,13 @@ 2.96 2.97 Label retry; 2.98 Address heap_top(T1); 2.99 - __ li(T1, (long)Universe::heap()->top_addr()); 2.100 - 2.101 + __ set64(T1, (long)Universe::heap()->top_addr()); 2.102 __ ld(FSR, heap_top); 2.103 + 2.104 __ bind(retry); 2.105 + __ set64(AT, (long)Universe::heap()->end_addr()); 2.106 + __ ld(AT, AT, 0); 2.107 __ dadd(T2, FSR, T0); 2.108 - __ li(AT, (long)Universe::heap()->end_addr()); 2.109 - __ ld(AT, AT, 0); 2.110 __ slt(AT, AT, T2); 2.111 __ bne(AT, R0, slow_case); 2.112 __ delayed()->nop(); 2.113 @@ -4032,24 +4033,11 @@ 2.114 // The object is initialized before the header. If the object size is 2.115 // zero, go directly to the header initialization. 2.116 __ bind(initialize_object); 2.117 - __ li(AT, - sizeof(oopDesc)); 2.118 + __ set64(AT, - sizeof(oopDesc)); 2.119 __ daddu(T0, T0, AT); 2.120 - __ beq_far(T0, R0, initialize_header); 2.121 + __ beq(T0, R0, initialize_header); 2.122 __ delayed()->nop(); 2.123 2.124 - 2.125 - // T0 must have been multiple of 2 2.126 -#ifdef ASSERT 2.127 - // make sure T0 was multiple of 2 2.128 - Label L; 2.129 - __ andi(AT, T0, 1); 2.130 - __ beq(AT, R0, L); 2.131 - __ delayed()->nop(); 2.132 - __ stop("object size is not multiple of 2 - adjust this code"); 2.133 - __ bind(L); 2.134 - // edx must be > 0, no extra check needed here 2.135 -#endif 2.136 - 2.137 // initialize remaining object fields: T0 is a multiple of 2 2.138 { 2.139 Label loop; 2.140 @@ -4060,9 +4048,8 @@ 2.141 __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); 2.142 __ bne(T1, FSR, loop); //dont clear header 2.143 __ delayed()->daddi(T1, T1, -oopSize); 2.144 - // actually sizeof(oopDesc)==8, so we can move 2.145 - // __ addiu(AT, AT, -8) to delay slot, and compare FSR with T1 2.146 } 2.147 + 2.148 //klass in T3, 2.149 // initialize object header only. 2.150 __ bind(initialize_header); 2.151 @@ -4070,7 +4057,7 @@ 2.152 __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); 2.153 __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); 2.154 } else { 2.155 - __ li(AT, (long)markOopDesc::prototype()); 2.156 + __ set64(AT, (long)markOopDesc::prototype()); 2.157 __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); 2.158 } 2.159