#7999 Fixed unaligned load in generate_conjoint_short_copy().

Sat, 29 Dec 2018 15:50:23 +0800

author
wangxue
date
Sat, 29 Dec 2018 15:50:23 +0800
changeset 9452
2fac4ec98618
parent 9451
24bab1ef457b
child 9453
dca904de5de5

#7999 Fixed unaligned load in generate_conjoint_short_copy().
Reviewed-by: aoqi

src/cpu/mips/vm/stubGenerator_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Thu Dec 27 12:42:35 2018 +0800
     1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Sat Dec 29 15:50:23 2018 +0800
     1.3 @@ -1336,63 +1336,99 @@
     1.4    // and stored atomically.
     1.5    //
     1.6    address generate_conjoint_short_copy(bool aligned, const char *name) {
     1.7 -    Label l_1, l_2, l_3, l_4, l_5;
     1.8      StubCodeMark mark(this, "StubRoutines", name);
     1.9      __ align(CodeEntryAlignment);
    1.10      address start = __ pc();
    1.11 +
    1.12 +    Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
    1.13 +
    1.14      address nooverlap_target = aligned ?
    1.15              StubRoutines::arrayof_jshort_disjoint_arraycopy() :
    1.16              StubRoutines::jshort_disjoint_arraycopy();
    1.17  
    1.18      array_overlap_test(nooverlap_target, 1);
    1.19  
    1.20 -    __ push(T3);
    1.21 -    __ push(T0);
    1.22 -    __ push(T1);
    1.23 +    const Register from      = A0;   // source array address
    1.24 +    const Register to        = A1;   // destination array address
    1.25 +    const Register count     = A2;   // elements count
    1.26 +    const Register end_from  = T3;   // source array end address
    1.27 +    const Register end_to    = T0;   // destination array end address
    1.28 +    const Register end_count = T1;   // destination array end address
    1.29 +
    1.30 +    __ push(end_from);
    1.31 +    __ push(end_to);
    1.32 +    __ push(end_count);
    1.33      __ push(T8);
    1.34  
    1.35 -    __ move(T1, A2);
    1.36 -    __ move(T3, A0);
    1.37 -    __ move(T0, A1);
    1.38 +    // copy from high to low
    1.39 +    __ move(end_count, count);
    1.40 +    __ sll(AT, end_count, Address::times_2);
    1.41 +    __ dadd(end_from, from, AT);
    1.42 +    __ dadd(end_to, to, AT);
    1.43  
    1.44 +    // If end_from and end_to has differante alignment, unaligned copy is performed.
    1.45 +    __ andi(AT, end_from, 3);
    1.46 +    __ andi(T8, end_to, 3);
    1.47 +    __ bne(AT, T8, l_copy_short);
    1.48 +    __ delayed()->nop();
    1.49  
    1.50 -    // copy dwords from high to low
    1.51 -    __ sll(AT, T1, Address::times_2);
    1.52 -    __ add(AT, T3, AT);
    1.53 -    __ daddiu(T3, AT, -4);
    1.54 -    __ sll(AT,T1 , Address::times_2);
    1.55 -    __ add(AT, T0, AT);
    1.56 -    __ daddiu(T0, AT, -4);
    1.57 -    __ move(T8, T1);
    1.58 -    __ bind(l_1);
    1.59 -    __ sra(T1,T1, 1);
    1.60 -    __ beq(T1, R0, l_4);
    1.61 +    // First deal with the unaligned data at the top.
    1.62 +    __ bind(l_unaligned);
    1.63 +    __ beq(end_count, R0, l_exit);
    1.64      __ delayed()->nop();
    1.65 -    __ align(16);
    1.66 -    __ bind(l_2);
    1.67 -    __ lw(AT, T3, 0);
    1.68 -    __ sw(AT, T0, 0);
    1.69 -    __ addi(T3, T3, -4);
    1.70 -    __ addi(T0, T0, -4);
    1.71 -    __ addi(T1, T1, -1);
    1.72 -    __ bne(T1, R0, l_2);
    1.73 +
    1.74 +    __ andi(AT, end_from, 3);
    1.75 +    __ bne(AT, R0, l_from_unaligned);
    1.76      __ delayed()->nop();
    1.77 -    __ b(l_4);
    1.78 +
    1.79 +    __ andi(AT, end_to, 3);
    1.80 +    __ beq(AT, R0, l_4_bytes_aligned);
    1.81      __ delayed()->nop();
    1.82 -    // copy dwords with repeat move
    1.83 -    __ bind(l_3);
    1.84 -    __ bind(l_4);
    1.85 -    __ andi(T8, T8, 1);              // suffix count
    1.86 -    __ beq(T8, R0, l_5 );
    1.87 +
    1.88 +    // Copy 1 element if necessary to align to 4 bytes.
    1.89 +    __ bind(l_from_unaligned);
    1.90 +    __ lhu(AT, end_from, -2);
    1.91 +    __ sh(AT, end_to, -2);
    1.92 +    __ daddi(end_from, end_from, -2);
    1.93 +    __ daddi(end_to, end_to, -2);
    1.94 +    __ daddi(end_count, end_count, -1);
    1.95 +    __ b(l_unaligned);
    1.96      __ delayed()->nop();
    1.97 -    // copy suffix
    1.98 -    __ lh(AT, T3, 2);
    1.99 -    __ sh(AT, T0, 2);
   1.100 -    __ bind(l_5);
   1.101 +
   1.102 +    // now end_to, end_from point to 4-byte aligned high-ends
   1.103 +    //     end_count contains byte count that is not copied.
   1.104 +    // copy 4 bytes at a time
   1.105 +    __ bind(l_4_bytes_aligned);
   1.106 +
   1.107 +    __ daddi(AT, end_count, -1);
   1.108 +    __ blez(AT, l_copy_short);
   1.109 +    __ delayed()->nop();
   1.110 +
   1.111 +    __ lw(AT, end_from, -4);
   1.112 +    __ sw(AT, end_to, -4);
   1.113 +    __ addi(end_from, end_from, -4);
   1.114 +    __ addi(end_to, end_to, -4);
   1.115 +    __ addi(end_count, end_count, -2);
   1.116 +    __ b(l_4_bytes_aligned);
   1.117 +    __ delayed()->nop();
   1.118 +
   1.119 +    // copy 1 element at a time
   1.120 +    __ bind(l_copy_short);
   1.121 +    __ beq(end_count, R0, l_exit);
   1.122 +    __ delayed()->nop();
   1.123 +    __ lhu(AT, end_from, -2);
   1.124 +    __ sh(AT, end_to, -2);
   1.125 +    __ daddi(end_from, end_from, -2);
   1.126 +    __ daddi(end_to, end_to, -2);
   1.127 +    __ daddi(end_count, end_count, -1);
   1.128 +    __ b(l_copy_short);
   1.129 +    __ delayed()->nop();
   1.130 +
   1.131 +    __ bind(l_exit);
   1.132      __ pop(T8);
   1.133 -    __ pop(T1);
   1.134 -    __ pop(T0);
   1.135 -    __ pop(T3);
   1.136 +    __ pop(end_count);
   1.137 +    __ pop(end_to);
   1.138 +    __ pop(end_from);
   1.139      __ jr(RA);
   1.140      __ delayed()->nop();
   1.141      return start;

mercurial