Tue, 20 Sep 2016 10:40:42 +0800
#4537 Rewrite generate_disjoint_int_oop_copy
Eliminated unaligned access and Optimized copy algorithm. changeset 114
The unaligned account does not increase, has passed the SPECjvm2008 test.
src/cpu/mips/vm/stubGenerator_mips_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp Tue Sep 20 11:48:21 2016 +0800 1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp Tue Sep 20 10:40:42 2016 +0800 1.3 @@ -1276,85 +1276,92 @@ 1.4 // used by generate_conjoint_int_oop_copy(). 1.5 // 1.6 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) { 1.7 - Label l_2, l_3, l_4, l_stchk; 1.8 - StubCodeMark mark(this, "StubRoutines", name); 1.9 - __ align(CodeEntryAlignment); 1.10 - address start = __ pc(); 1.11 - /* 1.12 - __ pushl(esi); 1.13 - __ movl(ecx, Address(esp, 4+12)); // count 1.14 - __ pushl(edi); 1.15 - __ movl(esi, Address(esp, 8+ 4)); // from 1.16 - __ movl(edi, Address(esp, 8+ 8)); // to 1.17 - */ 1.18 - __ push(T3); 1.19 - __ push(T0); 1.20 - __ push(T1); 1.21 - __ push(T8); 1.22 - __ move(T1, A2); 1.23 - __ move(T3, A0); 1.24 - __ move(T0, A1); 1.25 - 1.26 - // __ cmpl(ecx, 32); 1.27 - // __ jcc(Assembler::belowEqual, l_2); // <= 32 dwords 1.28 - // __ rep_movl(); 1.29 - __ b(l_2); 1.30 - __ delayed()->nop(); 1.31 - if (is_oop) { 1.32 - // __ jmp(l_stchk); 1.33 - __ b(l_stchk); 1.34 - __ delayed()->nop(); 1.35 - } 1.36 - // __ popl(edi); 1.37 - // __ popl(esi); 1.38 - // __ ret(0); 1.39 - __ pop(T8); 1.40 - __ pop(T1); 1.41 - __ pop(T0); 1.42 - __ pop(T3); 1.43 - __ jr(RA); 1.44 - __ delayed()->nop(); 1.45 - 1.46 - __ bind(l_2); 1.47 - // __ subl(edi, esi); 1.48 - // __ testl(ecx, ecx); 1.49 - // __ jcc(Assembler::zero, l_4); 1.50 - __ beq(T1, R0, l_4); 1.51 - __ delayed()->nop(); 1.52 - __ align(16); 1.53 - __ bind(l_3); 1.54 - //__ movl(edx, Address(esi)); 1.55 - __ lw(AT, T3, 0); 1.56 - // __ movl(Address(edi, esi, Address::times_1), edx); 1.57 - __ sw(AT, T0, 0); 1.58 - // __ addl(esi, 4); 1.59 - __ addi(T3, T3, 4); 1.60 - __ addi(T0, T0, 4); 1.61 - // __ decl(ecx); 1.62 - __ addi(T1, T1, -1); 1.63 - // __ jcc(Assembler::notEqual, l_3); 1.64 - __ bne(T1, R0, l_3); 1.65 - __ delayed()->nop(); 1.66 - if (is_oop) { 1.67 - __ bind(l_stchk); 1.68 - // __ movl(edi, Address(esp, 8+ 8)); 1.69 - // __ movl(ecx, Address(esp, 8+ 12)); 1.70 - __ move(T0, A1); 1.71 - __ move(T1, A2); 1.72 - array_store_check(); 1.73 - } 1.74 - __ bind(l_4); 1.75 - // __ popl(edi); 1.76 - // __ popl(esi); 1.77 - // __ ret(0); 1.78 - __ pop(T8); 1.79 - __ pop(T1); 1.80 - __ pop(T0); 1.81 - __ pop(T3); 1.82 - __ jr(RA); 1.83 - __ delayed()->nop(); 1.84 - return start; 1.85 - } 1.86 + Label l_2, l_3, l_4, l_5, l_6, l_7, l_stchk; 1.87 + StubCodeMark mark(this, "StubRoutines", name); 1.88 + __ align(CodeEntryAlignment); 1.89 + address start = __ pc(); 1.90 + __ push(T3); 1.91 + __ push(T0); 1.92 + __ push(T1); 1.93 + __ push(T8); 1.94 + __ move(T1, A2); 1.95 + __ move(T3, A0); 1.96 + __ move(T0, A1); 1.97 + 1.98 + __ b(l_2); 1.99 + __ delayed()->nop(); 1.100 + if (is_oop) { 1.101 + __ b(l_stchk); 1.102 + __ delayed()->nop(); 1.103 + } 1.104 + __ pop(T8); 1.105 + __ pop(T1); 1.106 + __ pop(T0); 1.107 + __ pop(T3); 1.108 + __ jr(RA); 1.109 + __ delayed()->nop(); 1.110 + 1.111 + __ bind(l_2); 1.112 + if(!aligned) { 1.113 + __ xorr(AT, T3, T0); 1.114 + __ andi(AT, AT, 7); 1.115 + __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time 1.116 + __ delayed()->nop(); 1.117 + 1.118 + __ andi(AT, T3, 7); 1.119 + __ beq(AT, R0, l_6); //copy 2 elements each time 1.120 + __ delayed()->nop(); 1.121 + 1.122 + __ lw(AT, T3, 0); 1.123 + __ daddi(T1, T1, -1); 1.124 + __ sw(AT, T0, 0); 1.125 + __ daddi(T3, T3, 4); 1.126 + __ daddi(T0, T0, 4); 1.127 + } 1.128 + __ bind(l_6); 1.129 + { 1.130 + __ daddi(AT, T1, -1); 1.131 + __ blez(AT, l_5); 1.132 + __ delayed()->nop(); 1.133 + 1.134 + __ bind(l_7); 1.135 + __ ld(AT, T3, 0); 1.136 + __ sd(AT, T0, 0); 1.137 + __ daddi(T3, T3, 8); 1.138 + __ daddi(T0, T0, 8); 1.139 + __ daddi(T1, T1, -2); 1.140 + __ daddi(AT, T1, -2); 1.141 + __ bgez(AT, l_7); 1.142 + __ delayed()->nop(); 1.143 + } 1.144 + 1.145 + __ bind(l_5); 1.146 + __ beq(T1, R0, l_4); 1.147 + __ delayed()->nop(); 1.148 + __ align(16); 1.149 + __ bind(l_3); 1.150 + __ lw(AT, T3, 0); 1.151 + __ sw(AT, T0, 0); 1.152 + __ addi(T3, T3, 4); 1.153 + __ addi(T0, T0, 4); 1.154 + __ addi(T1, T1, -1); 1.155 + __ bne(T1, R0, l_3); 1.156 + __ delayed()->nop(); 1.157 + if (is_oop) { 1.158 + __ bind(l_stchk); 1.159 + __ move(T0, A1); 1.160 + __ move(T1, A2); 1.161 + array_store_check(); 1.162 + } 1.163 + __ bind(l_4); 1.164 + __ pop(T8); 1.165 + __ pop(T1); 1.166 + __ pop(T0); 1.167 + __ pop(T3); 1.168 + __ jr(RA); 1.169 + __ delayed()->nop(); 1.170 + return start; 1.171 + } 1.172 1.173 // Arguments: 1.174 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary