Thu, 25 Aug 2016 22:31:58 +0800
#4428: Use gsdmult to optimize the lmul template of the interpreter.
I have run the test program on 3A2000 computer
Effects:
[loongson@localhost project]$ ./LmulTest.sh
before
time:3225ms
after
time:3115ms
The test java program:
public class LmulTest{
public static void main(String args[]){
int count = 10000000;
long startTime = System.currentTimeMillis();
//long startTime = System.nanoTime();
for(int i = 0; i < count; i++){
long a, b, c;
a = 12345678;
a++;
b = 87654321;
b++;
c = a * b;
//System.out.println(c);
}
long endTime = System.currentTimeMillis();
//long endTime = System.nanoTime();
System.out.println("time:" + (endTime - startTime) + "ms");
}
}
src/cpu/mips/vm/templateTable_mips_64.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/mips/vm/templateTable_mips_64.cpp Fri Aug 26 20:31:57 2016 +0800 1.2 +++ b/src/cpu/mips/vm/templateTable_mips_64.cpp Thu Aug 25 22:31:58 2016 +0800 1.3 @@ -1302,39 +1302,16 @@ 1.4 __ mfhi(FSR); 1.5 } 1.6 1.7 -// the multiplier in SSR||FSR, the multiplicand in stack 1.8 -// the result in SSR||FSR 1.9 -// used registers : T2, T3 1.10 void TemplateTable::lmul() { 1.11 transition(ltos, ltos); 1.12 - Label done; 1.13 - 1.14 - __ pop_l(T2, T3); 1.15 -#ifdef ASSERT 1.16 - { 1.17 - Label L; 1.18 - __ orr(AT, T3, SSR); 1.19 - __ beq(AT, R0, L); 1.20 - __ delayed()->nop(); 1.21 - //FIXME, aoqi 1.22 - //__ stop("lmul, wrong stack"); 1.23 - __ bind(L); 1.24 - } 1.25 -#endif 1.26 - __ orr(AT, T2, FSR); 1.27 - __ beq(AT, R0, done); 1.28 - __ delayed()->nop(); 1.29 - 1.30 - __ dmultu(T2, FSR); 1.31 - __ daddu(SSR, SSR, T3); 1.32 - __ nop(); 1.33 - __ mflo(FSR); 1.34 - __ mfhi(SSR); 1.35 - __ b(done); 1.36 - __ delayed()->nop(); 1.37 - 1.38 - __ bind(done); 1.39 -} 1.40 + __ pop_l(T2); 1.41 + if(UseLoongsonISA){ 1.42 + __ gsdmult(FSR, T2, FSR); 1.43 + } else { 1.44 + __ dmult(T2, FSR); 1.45 + __ mflo(FSR); 1.46 + } 1.47 +} 1.48 1.49 // NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry 1.50 void TemplateTable::ldiv() {