[Interpreter] Fixed unaligned load in InterpreterMacroAssembler::get_cache_index_at_bcp

Mon, 30 May 2016 02:35:49 -0400

author
aoqi
date
Mon, 30 May 2016 02:35:49 -0400
changeset 14
92759d406e78
parent 13
bc227c49eaae
child 15
b0af19080e55

[Interpreter] Fixed unaligned load in InterpreterMacroAssembler::get_cache_index_at_bcp
java -jar SPECjvm2008.jar -ikv -ict -bt 1 startup.helloworld
Effect:
elimated 24 unaligned load in startup.helloworld and many other benchmarks.

src/cpu/mips/vm/assembler_mips.cpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/assembler_mips.hpp file | annotate | diff | comparison | revisions
src/cpu/mips/vm/interp_masm_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/assembler_mips.cpp	Mon May 30 02:01:38 2016 -0400
     1.2 +++ b/src/cpu/mips/vm/assembler_mips.cpp	Mon May 30 02:35:49 2016 -0400
     1.3 @@ -3303,6 +3303,39 @@
     1.4  	else
     1.5  		sh(reg, BCP, offset);
     1.6  }
     1.7 +
     1.8 +void MacroAssembler::get_4_byte_integer_at_bcp(
     1.9 +                                  Register   reg,
    1.10 +                                  Register   tmp,
    1.11 +                                  int        offset) {
    1.12 +  assert(reg != tmp, "need separate temp register");
    1.13 +  if (offset & 3) { // Offset unaligned?
    1.14 +    lbu(reg, BCP, offset+3);
    1.15 +    lbu(tmp, BCP, offset+2);
    1.16 +#ifdef _LP64
    1.17 +    dsll(reg, reg, 8);
    1.18 +    daddu(reg, tmp, reg);
    1.19 +    lbu(tmp, BCP, offset+1);
    1.20 +    dsll(reg, reg, 8);
    1.21 +    daddu(reg, tmp, reg);
    1.22 +    lbu(tmp, BCP, offset);
    1.23 +    dsll(reg, reg, 8);
    1.24 +    daddu(reg, tmp, reg);
    1.25 +#else
    1.26 +    sll(reg, reg, 8);
    1.27 +    addu(reg, tmp, reg);
    1.28 +    lbu(tmp, BCP, offset+1);
    1.29 +    sll(reg, reg, 8);
    1.30 +    addu(reg, tmp, reg);
    1.31 +    lbu(tmp, BCP, offset);
    1.32 +    sll(reg, reg, 8);
    1.33 +    addu(reg, tmp, reg);
    1.34 +#endif
    1.35 +  } else {
    1.36 +    lwu(reg, BCP, offset);
    1.37 +  }
    1.38 +}
    1.39 +
    1.40  //for UseCompressedOops Option
    1.41  void MacroAssembler::load_klass(Register dst, Register src) {
    1.42  #ifdef _LP64
     2.1 --- a/src/cpu/mips/vm/assembler_mips.hpp	Mon May 30 02:01:38 2016 -0400
     2.2 +++ b/src/cpu/mips/vm/assembler_mips.hpp	Mon May 30 02:35:49 2016 -0400
     2.3 @@ -1881,8 +1881,10 @@
     2.4  	void pushad();
     2.5  	void popad();
     2.6  
     2.7 +  // load_two_bytes_from_at_bcp should be get_2_byte_integer_at_bcp, and defined in InterpreterMacroAssembler.
     2.8  	void  load_two_bytes_from_at_bcp(Register reg, Register tmp, int offset);
     2.9  	void  store_two_byts_to_at_bcp(Register reg, Register tmp, int offset);
    2.10 +  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
    2.11    // Test sub_klass against super_klass, with fast and slow paths.
    2.12  
    2.13    // The fast path produces a tri-state answer: yes / no / maybe-slow.
     3.1 --- a/src/cpu/mips/vm/interp_masm_mips_64.cpp	Mon May 30 02:01:38 2016 -0400
     3.2 +++ b/src/cpu/mips/vm/interp_masm_mips_64.cpp	Mon May 30 02:35:49 2016 -0400
     3.3 @@ -384,7 +384,7 @@
     3.4      load_two_bytes_from_at_bcp(index, AT, bcp_offset);
     3.5    } else if (index_size == sizeof(u4)) {
     3.6      assert(EnableInvokeDynamic, "giant index used only for JSR 292");
     3.7 -    lwu(index, BCP, bcp_offset);
     3.8 +    get_4_byte_integer_at_bcp(index, AT, bcp_offset);
     3.9      // Check if the secondary index definition is still ~x, otherwise
    3.10      // we have to change the following assembler code to calculate the
    3.11      // plain index.

mercurial