Sat, 29 Dec 2018 15:50:23 +0800
#7999 Fixed unaligned load in generate_conjoint_short_copy().
Reviewed-by: aoqi
src/cpu/mips/vm/stubGenerator_mips_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp Thu Dec 27 12:42:35 2018 +0800 1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Sat Dec 29 15:50:23 2018 +0800 1.3 @@ -1336,63 +1336,99 @@ 1.4 // and stored atomically. 1.5 // 1.6 address generate_conjoint_short_copy(bool aligned, const char *name) { 1.7 - Label l_1, l_2, l_3, l_4, l_5; 1.8 StubCodeMark mark(this, "StubRoutines", name); 1.9 __ align(CodeEntryAlignment); 1.10 address start = __ pc(); 1.11 + 1.12 + Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; 1.13 + 1.14 address nooverlap_target = aligned ? 1.15 StubRoutines::arrayof_jshort_disjoint_arraycopy() : 1.16 StubRoutines::jshort_disjoint_arraycopy(); 1.17 1.18 array_overlap_test(nooverlap_target, 1); 1.19 1.20 - __ push(T3); 1.21 - __ push(T0); 1.22 - __ push(T1); 1.23 + const Register from = A0; // source array address 1.24 + const Register to = A1; // destination array address 1.25 + const Register count = A2; // elements count 1.26 + const Register end_from = T3; // source array end address 1.27 + const Register end_to = T0; // destination array end address 1.28 + const Register end_count = T1; // destination array end address 1.29 + 1.30 + __ push(end_from); 1.31 + __ push(end_to); 1.32 + __ push(end_count); 1.33 __ push(T8); 1.34 1.35 - __ move(T1, A2); 1.36 - __ move(T3, A0); 1.37 - __ move(T0, A1); 1.38 + // copy from high to low 1.39 + __ move(end_count, count); 1.40 + __ sll(AT, end_count, Address::times_2); 1.41 + __ dadd(end_from, from, AT); 1.42 + __ dadd(end_to, to, AT); 1.43 1.44 + // If end_from and end_to has differante alignment, unaligned copy is performed. 1.45 + __ andi(AT, end_from, 3); 1.46 + __ andi(T8, end_to, 3); 1.47 + __ bne(AT, T8, l_copy_short); 1.48 + __ delayed()->nop(); 1.49 1.50 - // copy dwords from high to low 1.51 - __ sll(AT, T1, Address::times_2); 1.52 - __ add(AT, T3, AT); 1.53 - __ daddiu(T3, AT, -4); 1.54 - __ sll(AT,T1 , Address::times_2); 1.55 - __ add(AT, T0, AT); 1.56 - __ daddiu(T0, AT, -4); 1.57 - __ move(T8, T1); 1.58 - __ bind(l_1); 1.59 - __ sra(T1,T1, 1); 1.60 - __ beq(T1, R0, l_4); 1.61 + // First deal with the unaligned data at the top. 1.62 + __ bind(l_unaligned); 1.63 + __ beq(end_count, R0, l_exit); 1.64 __ delayed()->nop(); 1.65 - __ align(16); 1.66 - __ bind(l_2); 1.67 - __ lw(AT, T3, 0); 1.68 - __ sw(AT, T0, 0); 1.69 - __ addi(T3, T3, -4); 1.70 - __ addi(T0, T0, -4); 1.71 - __ addi(T1, T1, -1); 1.72 - __ bne(T1, R0, l_2); 1.73 + 1.74 + __ andi(AT, end_from, 3); 1.75 + __ bne(AT, R0, l_from_unaligned); 1.76 __ delayed()->nop(); 1.77 - __ b(l_4); 1.78 + 1.79 + __ andi(AT, end_to, 3); 1.80 + __ beq(AT, R0, l_4_bytes_aligned); 1.81 __ delayed()->nop(); 1.82 - // copy dwords with repeat move 1.83 - __ bind(l_3); 1.84 - __ bind(l_4); 1.85 - __ andi(T8, T8, 1); // suffix count 1.86 - __ beq(T8, R0, l_5 ); 1.87 + 1.88 + // Copy 1 element if necessary to align to 4 bytes. 1.89 + __ bind(l_from_unaligned); 1.90 + __ lhu(AT, end_from, -2); 1.91 + __ sh(AT, end_to, -2); 1.92 + __ daddi(end_from, end_from, -2); 1.93 + __ daddi(end_to, end_to, -2); 1.94 + __ daddi(end_count, end_count, -1); 1.95 + __ b(l_unaligned); 1.96 __ delayed()->nop(); 1.97 - // copy suffix 1.98 - __ lh(AT, T3, 2); 1.99 - __ sh(AT, T0, 2); 1.100 - __ bind(l_5); 1.101 + 1.102 + // now end_to, end_from point to 4-byte aligned high-ends 1.103 + // end_count contains byte count that is not copied. 1.104 + // copy 4 bytes at a time 1.105 + __ bind(l_4_bytes_aligned); 1.106 + 1.107 + __ daddi(AT, end_count, -1); 1.108 + __ blez(AT, l_copy_short); 1.109 + __ delayed()->nop(); 1.110 + 1.111 + __ lw(AT, end_from, -4); 1.112 + __ sw(AT, end_to, -4); 1.113 + __ addi(end_from, end_from, -4); 1.114 + __ addi(end_to, end_to, -4); 1.115 + __ addi(end_count, end_count, -2); 1.116 + __ b(l_4_bytes_aligned); 1.117 + __ delayed()->nop(); 1.118 + 1.119 + // copy 1 element at a time 1.120 + __ bind(l_copy_short); 1.121 + __ beq(end_count, R0, l_exit); 1.122 + __ delayed()->nop(); 1.123 + __ lhu(AT, end_from, -2); 1.124 + __ sh(AT, end_to, -2); 1.125 + __ daddi(end_from, end_from, -2); 1.126 + __ daddi(end_to, end_to, -2); 1.127 + __ daddi(end_count, end_count, -1); 1.128 + __ b(l_copy_short); 1.129 + __ delayed()->nop(); 1.130 + 1.131 + __ bind(l_exit); 1.132 __ pop(T8); 1.133 - __ pop(T1); 1.134 - __ pop(T0); 1.135 - __ pop(T3); 1.136 + __ pop(end_count); 1.137 + __ pop(end_to); 1.138 + __ pop(end_from); 1.139 __ jr(RA); 1.140 __ delayed()->nop(); 1.141 return start;