6708714: Optimize long LShift on 32-bits x86

Mon, 23 Jun 2008 14:11:12 -0700

author
kvn
date
Mon, 23 Jun 2008 14:11:12 -0700
changeset 654
ab65a4c9b2e8
parent 652
411c61adc994
child 655
30369db7f5d2

6708714: Optimize long LShift on 32-bits x86
Summary: For small (1-3 bits) left long shifts in 32-bits VM use sets of add+addc instructions instead of shld+shl on new AMD cpus.
Reviewed-by: never
Contributed-by: shrinivas.joshi@amd.com

src/cpu/x86/vm/vm_version_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_32.ad file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/x86/vm/vm_version_x86_32.cpp	Sat Jun 21 10:03:31 2008 -0700
     1.2 +++ b/src/cpu/x86/vm/vm_version_x86_32.cpp	Mon Jun 23 14:11:12 2008 -0700
     1.3 @@ -307,6 +307,10 @@
     1.4        // Use it on new AMD cpus starting from Opteron.
     1.5        UseAddressNop = true;
     1.6      }
     1.7 +    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
     1.8 +      // Use it on new AMD cpus starting from Opteron.
     1.9 +      UseNewLongLShift = true;
    1.10 +    }
    1.11      if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
    1.12        if( supports_sse4a() ) {
    1.13          UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
     2.1 --- a/src/cpu/x86/vm/x86_32.ad	Sat Jun 21 10:03:31 2008 -0700
     2.2 +++ b/src/cpu/x86/vm/x86_32.ad	Mon Jun 23 14:11:12 2008 -0700
     2.3 @@ -4754,6 +4754,33 @@
     2.4    interface(CONST_INTER);
     2.5  %}
     2.6  
     2.7 +operand immI_1() %{
     2.8 +  predicate( n->get_int() == 1 );
     2.9 +  match(ConI);
    2.10 +
    2.11 +  op_cost(0);
    2.12 +  format %{ %}
    2.13 +  interface(CONST_INTER);
    2.14 +%}
    2.15 +
    2.16 +operand immI_2() %{
    2.17 +  predicate( n->get_int() == 2 );
    2.18 +  match(ConI);
    2.19 +
    2.20 +  op_cost(0);
    2.21 +  format %{ %}
    2.22 +  interface(CONST_INTER);
    2.23 +%}
    2.24 +
    2.25 +operand immI_3() %{
    2.26 +  predicate( n->get_int() == 3 );
    2.27 +  match(ConI);
    2.28 +
    2.29 +  op_cost(0);
    2.30 +  format %{ %}
    2.31 +  interface(CONST_INTER);
    2.32 +%}
    2.33 +
    2.34  // Pointer Immediate
    2.35  operand immP() %{
    2.36    match(ConP);
    2.37 @@ -8943,6 +8970,63 @@
    2.38    ins_pipe( ialu_reg_long_mem );
    2.39  %}
    2.40  
    2.41 +// Shift Left Long by 1
    2.42 +instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
    2.43 +  predicate(UseNewLongLShift);
    2.44 +  match(Set dst (LShiftL dst cnt));
    2.45 +  effect(KILL cr);
    2.46 +  ins_cost(100);
    2.47 +  format %{ "ADD    $dst.lo,$dst.lo\n\t"
    2.48 +            "ADC    $dst.hi,$dst.hi" %}
    2.49 +  ins_encode %{
    2.50 +    __ addl($dst$$Register,$dst$$Register);
    2.51 +    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
    2.52 +  %}
    2.53 +  ins_pipe( ialu_reg_long );
    2.54 +%}
    2.55 +
    2.56 +// Shift Left Long by 2
    2.57 +instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
    2.58 +  predicate(UseNewLongLShift);
    2.59 +  match(Set dst (LShiftL dst cnt));
    2.60 +  effect(KILL cr);
    2.61 +  ins_cost(100);
    2.62 +  format %{ "ADD    $dst.lo,$dst.lo\n\t"
    2.63 +            "ADC    $dst.hi,$dst.hi\n\t" 
    2.64 +            "ADD    $dst.lo,$dst.lo\n\t"
    2.65 +            "ADC    $dst.hi,$dst.hi" %}
    2.66 +  ins_encode %{
    2.67 +    __ addl($dst$$Register,$dst$$Register);
    2.68 +    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
    2.69 +    __ addl($dst$$Register,$dst$$Register);
    2.70 +    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
    2.71 +  %}
    2.72 +  ins_pipe( ialu_reg_long );
    2.73 +%}
    2.74 +
    2.75 +// Shift Left Long by 3
    2.76 +instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
    2.77 +  predicate(UseNewLongLShift);
    2.78 +  match(Set dst (LShiftL dst cnt));
    2.79 +  effect(KILL cr);
    2.80 +  ins_cost(100);
    2.81 +  format %{ "ADD    $dst.lo,$dst.lo\n\t"
    2.82 +            "ADC    $dst.hi,$dst.hi\n\t" 
    2.83 +            "ADD    $dst.lo,$dst.lo\n\t"
    2.84 +            "ADC    $dst.hi,$dst.hi\n\t" 
    2.85 +            "ADD    $dst.lo,$dst.lo\n\t"
    2.86 +            "ADC    $dst.hi,$dst.hi" %}
    2.87 +  ins_encode %{
    2.88 +    __ addl($dst$$Register,$dst$$Register);
    2.89 +    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
    2.90 +    __ addl($dst$$Register,$dst$$Register);
    2.91 +    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
    2.92 +    __ addl($dst$$Register,$dst$$Register);
    2.93 +    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
    2.94 +  %}
    2.95 +  ins_pipe( ialu_reg_long );
    2.96 +%}
    2.97 +
    2.98  // Shift Left Long by 1-31
    2.99  instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
   2.100    match(Set dst (LShiftL dst cnt));
     3.1 --- a/src/share/vm/runtime/globals.hpp	Sat Jun 21 10:03:31 2008 -0700
     3.2 +++ b/src/share/vm/runtime/globals.hpp	Mon Jun 23 14:11:12 2008 -0700
     3.3 @@ -946,6 +946,9 @@
     3.4    diagnostic(bool, UseIncDec, true,                                         \
     3.5            "Use INC, DEC instructions on x86")                               \
     3.6                                                                              \
     3.7 +  product(bool, UseNewLongLShift, false,                                    \
     3.8 +          "Use optimized bitwise shift left")                               \
     3.9 +                                                                            \
    3.10    product(bool, UseStoreImmI16, true,                                       \
    3.11            "Use store immediate 16-bits value instruction on x86")           \
    3.12                                                                              \

mercurial