# HG changeset patch # User Jin # Date 1462760274 14400 # Node ID e26ad49b719414c091797674dfc1bf20485e9bfa # Parent fbd9470e188d645f7bfcd8225bc69c1725ae75e2 [C2] disjoint_byte_copy: fix unaligned LW/SW /mnt/j2sdk-image/bin/java \ -XX:+PrintCompilation \ -XX:CompileCommand="compileonly,java/util/jar/Manifest\$FastInputStream.*" \ -jar SPECjvm2008.jar -ikv -coe -ict -bt 1 \ compress It triggers 35000 unaligned instructions. 280 CALL_LEAF_NOFP,runtime jbyte_arraycopy a0: (-122610748, .) fc127a6b pc: ffe9809004 <-- src是个奇地址 a1: (-122610748, .) fc128100 pc: ffe980910c <-- dest是个偶地址 a2: (-122610748, .) 2b pc: ffe9809210 t3: (-122610748, .) fc127a6c pc: ffe98093a4 <-- 试图把src对齐4字节, t0: (-122610748, .) fc128101 pc: ffe98094a8 <-- 则dest就不对齐了,使用sw就出了unalign t3: (-122610747, *) fc127a70 pc: ffe98093a4 t0: (-122610747, .) fc128105 pc: ffe98094a8 <-- 后面所有对dest的访问都会unalign t3: (-122610746, *) fc127a74 pc: ffe98093a4 t0: (-122610746, .) fc128109 pc: ffe98094a8 Effect: * 0 unaligned instructions. diff -r fbd9470e188d -r e26ad49b7194 src/cpu/mips/vm/stubGenerator_mips_64.cpp --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp Sun May 08 17:53:51 2016 -0400 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Sun May 08 22:17:54 2016 -0400 @@ -638,6 +638,12 @@ __ blez(AT, l_4); __ delayed()->nop(); if (!aligned) { + // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */ + __ andi(AT, T3, 3); + __ andi(T9, T0, 3); + __ bne(AT, T9, l_5); + __ delayed()->nop(); + // align source address at dword address boundary __ move(T1, 4); __ sub(T1, T1, T3); @@ -711,6 +717,7 @@ // address generate_conjoint_byte_copy(bool aligned, const char *name) { Label l_1, l_2, l_3, l_4, l_5; + Label l_unaligned, l_aligned; StubCodeMark mark(this, "StubRoutines", name); __ align(CodeEntryAlignment); address start = __ pc(); @@ -725,19 +732,42 @@ __ push(T1); __ push(T8); - // copy from high to low __ move(T3, A0); __ move(T0, A1); __ move(T1, A2); - __ dadd(AT, T3, T1); - __ lea(T3, Address(AT, -4)); - __ dadd(AT, T0, T1); - __ lea(T0, Address(AT, -4)); + __ dadd(T3, T3, T1); + __ dadd(T0, T0, T1); + + // 2016/5/8 Jin: copy starting unalinged bytes + __ bind(l_unaligned); + __ beq(T1, R0, l_5); + __ delayed()->nop(); + + __ andi(AT, T3, 3); + __ beq(AT, R0, l_aligned); + __ delayed()->nop(); + __ lb(AT, T3, -1); + __ sb(AT, T0, -1); + __ daddi(AT, T1, -1); + __ daddi(AT, T3, -1); + __ daddi(AT, T0, -1); + __ b(l_unaligned); + __ delayed()->nop(); + + // now T0, T3 point to 4-byte aligned high-ends + // T1 contains byte count that is not copied. + __ bind(l_aligned); + __ move(T8, T1); __ daddi(AT, T1, -3); __ blez(AT, l_3); __ delayed()->nop(); + + __ andi(T8, T8, 3); + __ lea(T3, Address(T3, -4)); + __ lea(T0, Address(T0, -4)); + __ dsrl(T1, T1, 2); __ align(16); __ bind(l_1);