Mon, 30 May 2016 02:35:49 -0400
[Interpreter] Fixed unaligned load in InterpreterMacroAssembler::get_cache_index_at_bcp
java -jar SPECjvm2008.jar -ikv -ict -bt 1 startup.helloworld
Effect:
elimated 24 unaligned load in startup.helloworld and many other benchmarks.
1.1 --- a/src/cpu/mips/vm/assembler_mips.cpp Mon May 30 02:01:38 2016 -0400 1.2 +++ b/src/cpu/mips/vm/assembler_mips.cpp Mon May 30 02:35:49 2016 -0400 1.3 @@ -3303,6 +3303,39 @@ 1.4 else 1.5 sh(reg, BCP, offset); 1.6 } 1.7 + 1.8 +void MacroAssembler::get_4_byte_integer_at_bcp( 1.9 + Register reg, 1.10 + Register tmp, 1.11 + int offset) { 1.12 + assert(reg != tmp, "need separate temp register"); 1.13 + if (offset & 3) { // Offset unaligned? 1.14 + lbu(reg, BCP, offset+3); 1.15 + lbu(tmp, BCP, offset+2); 1.16 +#ifdef _LP64 1.17 + dsll(reg, reg, 8); 1.18 + daddu(reg, tmp, reg); 1.19 + lbu(tmp, BCP, offset+1); 1.20 + dsll(reg, reg, 8); 1.21 + daddu(reg, tmp, reg); 1.22 + lbu(tmp, BCP, offset); 1.23 + dsll(reg, reg, 8); 1.24 + daddu(reg, tmp, reg); 1.25 +#else 1.26 + sll(reg, reg, 8); 1.27 + addu(reg, tmp, reg); 1.28 + lbu(tmp, BCP, offset+1); 1.29 + sll(reg, reg, 8); 1.30 + addu(reg, tmp, reg); 1.31 + lbu(tmp, BCP, offset); 1.32 + sll(reg, reg, 8); 1.33 + addu(reg, tmp, reg); 1.34 +#endif 1.35 + } else { 1.36 + lwu(reg, BCP, offset); 1.37 + } 1.38 +} 1.39 + 1.40 //for UseCompressedOops Option 1.41 void MacroAssembler::load_klass(Register dst, Register src) { 1.42 #ifdef _LP64
2.1 --- a/src/cpu/mips/vm/assembler_mips.hpp Mon May 30 02:01:38 2016 -0400 2.2 +++ b/src/cpu/mips/vm/assembler_mips.hpp Mon May 30 02:35:49 2016 -0400 2.3 @@ -1881,8 +1881,10 @@ 2.4 void pushad(); 2.5 void popad(); 2.6 2.7 + // load_two_bytes_from_at_bcp should be get_2_byte_integer_at_bcp, and defined in InterpreterMacroAssembler. 2.8 void load_two_bytes_from_at_bcp(Register reg, Register tmp, int offset); 2.9 void store_two_byts_to_at_bcp(Register reg, Register tmp, int offset); 2.10 + void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); 2.11 // Test sub_klass against super_klass, with fast and slow paths. 2.12 2.13 // The fast path produces a tri-state answer: yes / no / maybe-slow.
3.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.cpp Mon May 30 02:01:38 2016 -0400 3.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.cpp Mon May 30 02:35:49 2016 -0400 3.3 @@ -384,7 +384,7 @@ 3.4 load_two_bytes_from_at_bcp(index, AT, bcp_offset); 3.5 } else if (index_size == sizeof(u4)) { 3.6 assert(EnableInvokeDynamic, "giant index used only for JSR 292"); 3.7 - lwu(index, BCP, bcp_offset); 3.8 + get_4_byte_integer_at_bcp(index, AT, bcp_offset); 3.9 // Check if the secondary index definition is still ~x, otherwise 3.10 // we have to change the following assembler code to calculate the 3.11 // plain index.