# HG changeset patch # User aoqi # Date 1462907331 14400 # Node ID cf5765c81f87853c425b2df66d7ef6b8abe3e37e # Parent e26ad49b719414c091797674dfc1bf20485e9bfa Fixed changset 108. diff -r e26ad49b7194 -r cf5765c81f87 src/cpu/mips/vm/stubGenerator_mips_64.cpp --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp Sun May 08 22:17:54 2016 -0400 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Tue May 10 15:08:51 2016 -0400 @@ -550,6 +550,14 @@ __ bgtz(AT, no_overlap_target); __ delayed()->nop(); + // 2016/05/10 aoqi: If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target + Label L; + __ bgez(A0, L); + __ delayed()->nop(); + __ bgtz(A1, no_overlap_target); + __ delayed()->nop(); + __ bind(L); + } // @@ -638,6 +646,7 @@ __ blez(AT, l_4); __ delayed()->nop(); if (!aligned) { + //TODO: copy 8 bytes at one time // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */ __ andi(AT, T3, 3); __ andi(T9, T0, 3); @@ -706,9 +715,9 @@ // name - stub name string // // Inputs: - // c_rarg0 - source array address - // c_rarg1 - destination array address - // c_rarg2 - element count, treated as ssize_t, can be zero + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero // // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, // we let the hardware handle it. The one to eight bytes within words, @@ -716,95 +725,128 @@ // and stored atomically. // address generate_conjoint_byte_copy(bool aligned, const char *name) { - Label l_1, l_2, l_3, l_4, l_5; - Label l_unaligned, l_aligned; - StubCodeMark mark(this, "StubRoutines", name); - __ align(CodeEntryAlignment); - address start = __ pc(); - address nooverlap_target = aligned ? - StubRoutines::arrayof_jbyte_disjoint_arraycopy() : - StubRoutines::jbyte_disjoint_arraycopy(); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); - array_overlap_test(nooverlap_target, 0); + Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; + Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; - __ push(T3); - __ push(T0); - __ push(T1); - __ push(T8); + address nooverlap_target = aligned ? + StubRoutines::arrayof_jbyte_disjoint_arraycopy() : + StubRoutines::jbyte_disjoint_arraycopy(); - // copy from high to low - __ move(T3, A0); - __ move(T0, A1); - __ move(T1, A2); - __ dadd(T3, T3, T1); - __ dadd(T0, T0, T1); + array_overlap_test(nooverlap_target, 0); - // 2016/5/8 Jin: copy starting unalinged bytes - __ bind(l_unaligned); - __ beq(T1, R0, l_5); - __ delayed()->nop(); + const Register from = A0; // source array address + const Register to = A1; // destination array address + const Register count = A2; // elements count + const Register end_from = T3; // source array end address + const Register end_to = T0; // destination array end address + const Register end_count = T1; // destination array end address - __ andi(AT, T3, 3); - __ beq(AT, R0, l_aligned); - __ delayed()->nop(); - __ lb(AT, T3, -1); - __ sb(AT, T0, -1); - __ daddi(AT, T1, -1); - __ daddi(AT, T3, -1); - __ daddi(AT, T0, -1); - __ b(l_unaligned); - __ delayed()->nop(); - - // now T0, T3 point to 4-byte aligned high-ends - // T1 contains byte count that is not copied. - __ bind(l_aligned); + __ push(end_from); + __ push(end_to); + __ push(end_count); + __ push(T8); - __ move(T8, T1); - __ daddi(AT, T1, -3); - __ blez(AT, l_3); - __ delayed()->nop(); + // copy from high to low + __ move(end_count, count); + __ dadd(end_from, from, end_count); + __ dadd(end_to, to, end_count); - __ andi(T8, T8, 3); - __ lea(T3, Address(T3, -4)); - __ lea(T0, Address(T0, -4)); + // 2016/05/08 aoqi: If end_from and end_to has differante alignment, unaligned copy is performed. + __ andi(AT, end_from, 3); + __ andi(T8, end_to, 3); + __ bne(AT, T8, l_copy_byte); + __ delayed()->nop(); - __ dsrl(T1, T1, 2); - __ align(16); - __ bind(l_1); - __ lw(AT, T3, 0); - __ sw(AT, T0, 0); - __ addi(T3, T3, -4); - __ addi(T0, T0, -4); - __ addi(T1, T1, -1); - __ bne(T1, R0, l_1); - __ delayed()->nop(); - __ b(l_3); - __ delayed()->nop(); - // copy dwords aligned or not with repeat move - __ bind(l_2); - __ bind(l_3); - // copy suffix (0-3 bytes) - __ andi(T8, T8, 3); - __ beq(T8, R0, l_5); - __ delayed()->nop(); - __ addi(T3, T3, 3); - __ addi(T0, T0, 3); - __ bind(l_4); - __ lb(AT, T3, 0); - __ sb(AT, T0, 0); - __ addi(T3, T3, -1); - __ addi(T0, T0, -1); - __ addi(T8, T8, -1); - __ bne(T8, R0, l_4); - __ delayed()->nop(); - __ bind(l_5); - __ pop(T8); - __ pop(T1); - __ pop(T0); - __ pop(T3); - __ jr(RA); - __ delayed()->nop(); - return start; + // First deal with the unaligned data at the top. + __ bind(l_unaligned); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + + __ andi(AT, end_from, 3); + __ bne(AT, R0, l_from_unaligned); + __ delayed()->nop(); + + __ andi(AT, end_to, 3); + __ beq(AT, R0, l_4_bytes_aligned); + __ delayed()->nop(); + + __ bind(l_from_unaligned); + __ lb(AT, end_from, -1); + __ sb(AT, end_to, -1); + __ daddi(end_from, end_from, -1); + __ daddi(end_to, end_to, -1); + __ daddi(end_count, end_count, -1); + __ b(l_unaligned); + __ delayed()->nop(); + + // now end_to, end_from point to 4-byte aligned high-ends + // end_count contains byte count that is not copied. + // copy 4 bytes at a time + __ bind(l_4_bytes_aligned); + + __ move(T8, end_count); + __ daddi(AT, end_count, -3); + __ blez(AT, l_copy_suffix); + __ delayed()->nop(); + + //__ andi(T8, T8, 3); + __ lea(end_from, Address(end_from, -4)); + __ lea(end_to, Address(end_to, -4)); + + __ dsrl(end_count, end_count, 2); + __ align(16); + __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes + __ lw(AT, end_from, 0); + __ sw(AT, end_to, 0); + __ addi(end_from, end_from, -4); + __ addi(end_to, end_to, -4); + __ addi(end_count, end_count, -1); + __ bne(end_count, R0, l_copy_4_bytes_loop); + __ delayed()->nop(); + + __ b(l_copy_suffix); + __ delayed()->nop(); + // copy dwords aligned or not with repeat move + // l_copy_suffix + // copy suffix (0-3 bytes) + __ bind(l_copy_suffix); + __ andi(T8, T8, 3); + __ beq(T8, R0, l_exit); + __ delayed()->nop(); + __ addi(end_from, end_from, 3); + __ addi(end_to, end_to, 3); + __ bind(l_copy_suffix_loop); + __ lb(AT, end_from, 0); + __ sb(AT, end_to, 0); + __ addi(end_from, end_from, -1); + __ addi(end_to, end_to, -1); + __ addi(T8, T8, -1); + __ bne(T8, R0, l_copy_suffix_loop); + __ delayed()->nop(); + + __ bind(l_copy_byte); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + __ lb(AT, end_from, -1); + __ sb(AT, end_to, -1); + __ daddi(end_from, end_from, -1); + __ daddi(end_to, end_to, -1); + __ daddi(end_count, end_count, -1); + __ b(l_copy_byte); + __ delayed()->nop(); + + __ bind(l_exit); + __ pop(T8); + __ pop(end_count); + __ pop(end_to); + __ pop(end_from); + __ jr(RA); + __ delayed()->nop(); + return start; } // Arguments: