Sun, 08 May 2016 22:17:54 -0400
[C2] disjoint_byte_copy: fix unaligned LW/SW
/mnt/j2sdk-image/bin/java \
-XX:+PrintCompilation \
-XX:CompileCommand="compileonly,java/util/jar/Manifest\$FastInputStream.*" \
-jar SPECjvm2008.jar -ikv -coe -ict -bt 1 \
compress
It triggers 35000 unaligned instructions.
280 CALL_LEAF_NOFP,runtime jbyte_arraycopy
a0: (-122610748, .) fc127a6b pc: ffe9809004 <-- src是个奇地址
a1: (-122610748, .) fc128100 pc: ffe980910c <-- dest是个偶地址
a2: (-122610748, .) 2b pc: ffe9809210
t3: (-122610748, .) fc127a6c pc: ffe98093a4 <-- 试图把src对齐4字节,
t0: (-122610748, .) fc128101 pc: ffe98094a8 <-- 则dest就不对齐了,使用sw就出了unalign
t3: (-122610747, *) fc127a70 pc: ffe98093a4
t0: (-122610747, .) fc128105 pc: ffe98094a8 <-- 后面所有对dest的访问都会unalign
t3: (-122610746, *) fc127a74 pc: ffe98093a4
t0: (-122610746, .) fc128109 pc: ffe98094a8
Effect:
* 0 unaligned instructions.
src/cpu/mips/vm/stubGenerator_mips_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp Sun May 08 17:53:51 2016 -0400 1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Sun May 08 22:17:54 2016 -0400 1.3 @@ -638,6 +638,12 @@ 1.4 __ blez(AT, l_4); 1.5 __ delayed()->nop(); 1.6 if (!aligned) { 1.7 + // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */ 1.8 + __ andi(AT, T3, 3); 1.9 + __ andi(T9, T0, 3); 1.10 + __ bne(AT, T9, l_5); 1.11 + __ delayed()->nop(); 1.12 + 1.13 // align source address at dword address boundary 1.14 __ move(T1, 4); 1.15 __ sub(T1, T1, T3); 1.16 @@ -711,6 +717,7 @@ 1.17 // 1.18 address generate_conjoint_byte_copy(bool aligned, const char *name) { 1.19 Label l_1, l_2, l_3, l_4, l_5; 1.20 + Label l_unaligned, l_aligned; 1.21 StubCodeMark mark(this, "StubRoutines", name); 1.22 __ align(CodeEntryAlignment); 1.23 address start = __ pc(); 1.24 @@ -725,19 +732,42 @@ 1.25 __ push(T1); 1.26 __ push(T8); 1.27 1.28 - 1.29 // copy from high to low 1.30 __ move(T3, A0); 1.31 __ move(T0, A1); 1.32 __ move(T1, A2); 1.33 - __ dadd(AT, T3, T1); 1.34 - __ lea(T3, Address(AT, -4)); 1.35 - __ dadd(AT, T0, T1); 1.36 - __ lea(T0, Address(AT, -4)); 1.37 + __ dadd(T3, T3, T1); 1.38 + __ dadd(T0, T0, T1); 1.39 + 1.40 + // 2016/5/8 Jin: copy starting unalinged bytes 1.41 + __ bind(l_unaligned); 1.42 + __ beq(T1, R0, l_5); 1.43 + __ delayed()->nop(); 1.44 + 1.45 + __ andi(AT, T3, 3); 1.46 + __ beq(AT, R0, l_aligned); 1.47 + __ delayed()->nop(); 1.48 + __ lb(AT, T3, -1); 1.49 + __ sb(AT, T0, -1); 1.50 + __ daddi(AT, T1, -1); 1.51 + __ daddi(AT, T3, -1); 1.52 + __ daddi(AT, T0, -1); 1.53 + __ b(l_unaligned); 1.54 + __ delayed()->nop(); 1.55 + 1.56 + // now T0, T3 point to 4-byte aligned high-ends 1.57 + // T1 contains byte count that is not copied. 1.58 + __ bind(l_aligned); 1.59 + 1.60 __ move(T8, T1); 1.61 __ daddi(AT, T1, -3); 1.62 __ blez(AT, l_3); 1.63 __ delayed()->nop(); 1.64 + 1.65 + __ andi(T8, T8, 3); 1.66 + __ lea(T3, Address(T3, -4)); 1.67 + __ lea(T0, Address(T0, -4)); 1.68 + 1.69 __ dsrl(T1, T1, 2); 1.70 __ align(16); 1.71 __ bind(l_1);