#4537 Rewrite generate_disjoint_int_oop_copy

Tue, 20 Sep 2016 10:40:42 +0800

author
jiangshaofeng
date
Tue, 20 Sep 2016 10:40:42 +0800
changeset 118
bf4b1d1988a6
parent 117
89e1dfe996be
child 119
659662477125

#4537 Rewrite generate_disjoint_int_oop_copy
Eliminated unaligned access and Optimized copy algorithm. changeset 114
The unaligned account does not increase, has passed the SPECjvm2008 test.

src/cpu/mips/vm/stubGenerator_mips_64.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Tue Sep 20 11:48:21 2016 +0800
     1.2 +++ b/src/cpu/mips/vm/stubGenerator_mips_64.cpp	Tue Sep 20 10:40:42 2016 +0800
     1.3 @@ -1276,85 +1276,92 @@
     1.4    //   used by generate_conjoint_int_oop_copy().
     1.5    //
     1.6    address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name) {
     1.7 -		Label l_2, l_3, l_4, l_stchk;
     1.8 -		StubCodeMark mark(this, "StubRoutines", name);
     1.9 -		__ align(CodeEntryAlignment);
    1.10 -		address start = __ pc();
    1.11 -		/*
    1.12 -			 __ pushl(esi);
    1.13 -			 __ movl(ecx, Address(esp, 4+12));      // count
    1.14 -			 __ pushl(edi);
    1.15 -			 __ movl(esi, Address(esp, 8+ 4));      // from
    1.16 -			 __ movl(edi, Address(esp, 8+ 8));      // to
    1.17 -		 */
    1.18 -		__ push(T3);	
    1.19 -		__ push(T0);	
    1.20 -		__ push(T1);	
    1.21 -		__ push(T8);	
    1.22 -		__ move(T1, A2);  
    1.23 -		__ move(T3, A0); 
    1.24 -		__ move(T0, A1);
    1.25 -
    1.26 -		// __ cmpl(ecx, 32);
    1.27 -		// __ jcc(Assembler::belowEqual, l_2);                   // <= 32 dwords
    1.28 -		// __ rep_movl();
    1.29 -		__ b(l_2); 	
    1.30 -		__ delayed()->nop();	
    1.31 -		if (is_oop) {
    1.32 -		//  __ jmp(l_stchk);
    1.33 -			__ b(l_stchk); 
    1.34 -			__ delayed()->nop(); 
    1.35 -		}
    1.36 -		//    __ popl(edi);
    1.37 -		//   __ popl(esi);
    1.38 -		//  __ ret(0);
    1.39 -		__ pop(T8);	
    1.40 -		__ pop(T1);	
    1.41 -		__ pop(T0);	
    1.42 -		__ pop(T3);	
    1.43 -		__ jr(RA); 
    1.44 -		__ delayed()->nop(); 
    1.45 -
    1.46 -		__ bind(l_2);
    1.47 -		//  __ subl(edi, esi);
    1.48 -		//  __ testl(ecx, ecx);
    1.49 -		// __ jcc(Assembler::zero, l_4);
    1.50 -		__ beq(T1, R0, l_4);  
    1.51 -		__ delayed()->nop(); 
    1.52 -		__ align(16);
    1.53 -		__ bind(l_3);
    1.54 -		//__ movl(edx, Address(esi));
    1.55 -		__ lw(AT, T3, 0);   
    1.56 -		// __ movl(Address(edi, esi, Address::times_1), edx);
    1.57 -		__ sw(AT, T0, 0); 
    1.58 -		// __ addl(esi, 4);
    1.59 -		__ addi(T3, T3, 4);
    1.60 -		__ addi(T0, T0, 4);
    1.61 -		//   __ decl(ecx);
    1.62 -		__ addi(T1, T1, -1); 
    1.63 -		//    __ jcc(Assembler::notEqual, l_3);
    1.64 -		__ bne(T1, R0, l_3); 
    1.65 -		__ delayed()->nop(); 
    1.66 -		if (is_oop) {
    1.67 -			__ bind(l_stchk);
    1.68 -			//      __ movl(edi, Address(esp, 8+ 8));
    1.69 -			//     __ movl(ecx, Address(esp, 8+ 12));
    1.70 -			__ move(T0, A1); 
    1.71 -			__ move(T1, A2); 
    1.72 -			array_store_check();
    1.73 -		}
    1.74 -		__ bind(l_4);
    1.75 -		//    __ popl(edi);
    1.76 -		//   __ popl(esi);
    1.77 -		//  __ ret(0);
    1.78 -		__ pop(T8);
    1.79 -		__ pop(T1);
    1.80 -		__ pop(T0);
    1.81 -		__ pop(T3);
    1.82 -		__ jr(RA); 
    1.83 -		__ delayed()->nop(); 
    1.84 -		return start;
    1.85 -	}
    1.86 +    Label l_2, l_3, l_4, l_5, l_6, l_7, l_stchk;
    1.87 +    StubCodeMark mark(this, "StubRoutines", name);
    1.88 +    __ align(CodeEntryAlignment);
    1.89 +    address start = __ pc();
    1.90 +    __ push(T3);	
    1.91 +    __ push(T0);	
    1.92 +    __ push(T1);	
    1.93 +    __ push(T8);	
    1.94 +    __ move(T1, A2);  
    1.95 +    __ move(T3, A0); 
    1.96 +    __ move(T0, A1);
    1.97 +
    1.98 +    __ b(l_2); 	
    1.99 +    __ delayed()->nop();	
   1.100 +    if (is_oop) {
   1.101 +      __ b(l_stchk); 
   1.102 +      __ delayed()->nop(); 
   1.103 +    }
   1.104 +    __ pop(T8);	
   1.105 +    __ pop(T1);	
   1.106 +    __ pop(T0);	
   1.107 +    __ pop(T3);	
   1.108 +    __ jr(RA); 
   1.109 +    __ delayed()->nop(); 
   1.110 +
   1.111 +    __ bind(l_2);
   1.112 +    if(!aligned) {
   1.113 +      __ xorr(AT, T3, T0);
   1.114 +      __ andi(AT, AT, 7);
   1.115 +      __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
   1.116 +      __ delayed()->nop();
   1.117 +
   1.118 +      __ andi(AT, T3, 7);
   1.119 +      __ beq(AT, R0, l_6); //copy 2 elements each time
   1.120 +      __ delayed()->nop();
   1.121 +
   1.122 +      __ lw(AT, T3, 0);
   1.123 +      __ daddi(T1, T1, -1);
   1.124 +      __ sw(AT, T0, 0);
   1.125 +      __ daddi(T3, T3, 4);
   1.126 +      __ daddi(T0, T0, 4);
   1.127 +    }
   1.128 +    __ bind(l_6);
   1.129 +    {
   1.130 +      __ daddi(AT, T1, -1);
   1.131 +      __ blez(AT, l_5);
   1.132 +      __ delayed()->nop();
   1.133 +
   1.134 +      __ bind(l_7);
   1.135 +      __ ld(AT, T3, 0);
   1.136 +      __ sd(AT, T0, 0);
   1.137 +      __ daddi(T3, T3, 8);
   1.138 +      __ daddi(T0, T0, 8);
   1.139 +      __ daddi(T1, T1, -2);
   1.140 +      __ daddi(AT, T1, -2);
   1.141 +      __ bgez(AT, l_7);
   1.142 +      __ delayed()->nop();
   1.143 +    }
   1.144 +
   1.145 +    __ bind(l_5);
   1.146 +    __ beq(T1, R0, l_4);  
   1.147 +    __ delayed()->nop(); 
   1.148 +    __ align(16);
   1.149 +    __ bind(l_3);
   1.150 +    __ lw(AT, T3, 0);   
   1.151 +    __ sw(AT, T0, 0); 
   1.152 +    __ addi(T3, T3, 4);
   1.153 +    __ addi(T0, T0, 4);
   1.154 +    __ addi(T1, T1, -1); 
   1.155 +    __ bne(T1, R0, l_3); 
   1.156 +    __ delayed()->nop(); 
   1.157 +    if (is_oop) {
   1.158 +      __ bind(l_stchk);
   1.159 +      __ move(T0, A1); 
   1.160 +      __ move(T1, A2); 
   1.161 +      array_store_check();
   1.162 +    }
   1.163 +    __ bind(l_4);
   1.164 +    __ pop(T8);
   1.165 +    __ pop(T1);
   1.166 +    __ pop(T0);
   1.167 +    __ pop(T3);
   1.168 +    __ jr(RA); 
   1.169 +    __ delayed()->nop(); 
   1.170 +    return start;
   1.171 +  }
   1.172  
   1.173    // Arguments:
   1.174    //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary

mercurial