6662967: Optimize I2D conversion on new x86

Wed, 19 Mar 2008 15:33:25 -0700

author
kvn
date
Wed, 19 Mar 2008 15:33:25 -0700
changeset 506
3d62cb85208d
parent 505
b683f557224b
child 507
f705f25597eb

6662967: Optimize I2D conversion on new x86
Summary: Use CVTDQ2PS and CVTDQ2PD for integer values conversions to float and double values on new AMD cpu.
Reviewed-by: sgoldman, never

src/cpu/x86/vm/assembler_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86_32.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86_64.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_32.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_64.ad file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/x86/vm/assembler_x86_32.cpp	Wed Mar 19 15:14:36 2008 -0700
     1.2 +++ b/src/cpu/x86/vm/assembler_x86_32.cpp	Wed Mar 19 15:33:25 2008 -0700
     1.3 @@ -2672,6 +2672,22 @@
     1.4    emit_sse_operand(dst, src);
     1.5  }
     1.6  
     1.7 +void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
     1.8 +  assert(VM_Version::supports_sse2(), "");
     1.9 +
    1.10 +  emit_byte(0xF3);
    1.11 +  emit_byte(0x0F);
    1.12 +  emit_byte(0xE6);
    1.13 +  emit_sse_operand(dst, src);
    1.14 +}
    1.15 +
    1.16 +void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
    1.17 +  assert(VM_Version::supports_sse2(), "");
    1.18 +
    1.19 +  emit_byte(0x0F);
    1.20 +  emit_byte(0x5B);
    1.21 +  emit_sse_operand(dst, src);
    1.22 +}
    1.23  
    1.24  emit_sse_instruction(andps,  sse,  0,    0x54, XMMRegister, XMMRegister);
    1.25  emit_sse_instruction(andpd,  sse2, 0x66, 0x54, XMMRegister, XMMRegister);
     2.1 --- a/src/cpu/x86/vm/assembler_x86_32.hpp	Wed Mar 19 15:14:36 2008 -0700
     2.2 +++ b/src/cpu/x86/vm/assembler_x86_32.hpp	Wed Mar 19 15:33:25 2008 -0700
     2.3 @@ -901,6 +901,8 @@
     2.4    void cvtss2sd(XMMRegister dst, XMMRegister src);
     2.5    void cvtsd2ss(XMMRegister dst, Address src);   // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
     2.6    void cvtsd2ss(XMMRegister dst, XMMRegister src);
     2.7 +  void cvtdq2pd(XMMRegister dst, XMMRegister src);
     2.8 +  void cvtdq2ps(XMMRegister dst, XMMRegister src);
     2.9  
    2.10    void cvtsi2ss(XMMRegister dst, Address src);   // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
    2.11    void cvtsi2ss(XMMRegister dst, Register src);
     3.1 --- a/src/cpu/x86/vm/assembler_x86_64.cpp	Wed Mar 19 15:14:36 2008 -0700
     3.2 +++ b/src/cpu/x86/vm/assembler_x86_64.cpp	Wed Mar 19 15:33:25 2008 -0700
     3.3 @@ -3372,6 +3372,21 @@
     3.4    emit_byte(0xC0 | encode);
     3.5  }
     3.6  
     3.7 +void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
     3.8 +  emit_byte(0xF3);
     3.9 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
    3.10 +  emit_byte(0x0F);
    3.11 +  emit_byte(0xE6);
    3.12 +  emit_byte(0xC0 | encode);
    3.13 +}
    3.14 +
    3.15 +void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
    3.16 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
    3.17 +  emit_byte(0x0F);
    3.18 +  emit_byte(0x5B);
    3.19 +  emit_byte(0xC0 | encode);
    3.20 +}
    3.21 +
    3.22  void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
    3.23    emit_byte(0xF2);
    3.24    int encode = prefix_and_encode(dst->encoding(), src->encoding());
     4.1 --- a/src/cpu/x86/vm/assembler_x86_64.hpp	Wed Mar 19 15:14:36 2008 -0700
     4.2 +++ b/src/cpu/x86/vm/assembler_x86_64.hpp	Wed Mar 19 15:33:25 2008 -0700
     4.3 @@ -922,6 +922,8 @@
     4.4    void cvttsd2siq(Register dst, XMMRegister src); // truncates
     4.5    void cvtss2sd(XMMRegister dst, XMMRegister src);
     4.6    void cvtsd2ss(XMMRegister dst, XMMRegister src);
     4.7 +  void cvtdq2pd(XMMRegister dst, XMMRegister src);
     4.8 +  void cvtdq2ps(XMMRegister dst, XMMRegister src);
     4.9  
    4.10    void pxor(XMMRegister dst, Address src);       // Xor Packed Byte Integer Values
    4.11    void pxor(XMMRegister dst, XMMRegister src);   // Xor Packed Byte Integer Values
     5.1 --- a/src/cpu/x86/vm/vm_version_x86_32.cpp	Wed Mar 19 15:14:36 2008 -0700
     5.2 +++ b/src/cpu/x86/vm/vm_version_x86_32.cpp	Wed Mar 19 15:33:25 2008 -0700
     5.3 @@ -321,6 +321,20 @@
     5.4          UseXmmRegToRegMoveAll = false;
     5.5        }
     5.6      }
     5.7 +    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
     5.8 +      if( supports_sse4a() ) {
     5.9 +        UseXmmI2F = true;
    5.10 +      } else {
    5.11 +        UseXmmI2F = false;
    5.12 +      }
    5.13 +    }
    5.14 +    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
    5.15 +      if( supports_sse4a() ) {
    5.16 +        UseXmmI2D = true;
    5.17 +      } else {
    5.18 +        UseXmmI2D = false;
    5.19 +      }
    5.20 +    }
    5.21    }
    5.22  
    5.23    if( is_intel() ) { // Intel cpus specific settings
     6.1 --- a/src/cpu/x86/vm/vm_version_x86_64.cpp	Wed Mar 19 15:14:36 2008 -0700
     6.2 +++ b/src/cpu/x86/vm/vm_version_x86_64.cpp	Wed Mar 19 15:33:25 2008 -0700
     6.3 @@ -265,6 +265,20 @@
     6.4          UseXmmRegToRegMoveAll = false;
     6.5        }
     6.6      }
     6.7 +    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
     6.8 +      if( supports_sse4a() ) {
     6.9 +        UseXmmI2F = true;
    6.10 +      } else {
    6.11 +        UseXmmI2F = false;
    6.12 +      }
    6.13 +    }
    6.14 +    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
    6.15 +      if( supports_sse4a() ) {
    6.16 +        UseXmmI2D = true;
    6.17 +      } else {
    6.18 +        UseXmmI2D = false;
    6.19 +      }
    6.20 +    }
    6.21    }
    6.22  
    6.23    if( is_intel() ) { // Intel cpus specific settings
     7.1 --- a/src/cpu/x86/vm/x86_32.ad	Wed Mar 19 15:14:36 2008 -0700
     7.2 +++ b/src/cpu/x86/vm/x86_32.ad	Wed Mar 19 15:33:25 2008 -0700
     7.3 @@ -10970,7 +10970,7 @@
     7.4  %}
     7.5  
     7.6  instruct convI2XD_reg(regXD dst, eRegI src) %{
     7.7 -  predicate( UseSSE>=2 );
     7.8 +  predicate( UseSSE>=2 && !UseXmmI2D );
     7.9    match(Set dst (ConvI2D src));
    7.10    format %{ "CVTSI2SD $dst,$src" %}
    7.11    opcode(0xF2, 0x0F, 0x2A);
    7.12 @@ -10987,6 +10987,20 @@
    7.13    ins_pipe( pipe_slow );
    7.14  %}
    7.15  
    7.16 +instruct convXI2XD_reg(regXD dst, eRegI src)
    7.17 +%{
    7.18 +  predicate( UseSSE>=2 && UseXmmI2D );
    7.19 +  match(Set dst (ConvI2D src));
    7.20 +
    7.21 +  format %{ "MOVD  $dst,$src\n\t"
    7.22 +            "CVTDQ2PD $dst,$dst\t# i2d" %}
    7.23 +  ins_encode %{
    7.24 +    __ movd($dst$$XMMRegister, $src$$Register);
    7.25 +    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
    7.26 +  %}
    7.27 +  ins_pipe(pipe_slow); // XXX
    7.28 +%}
    7.29 +
    7.30  instruct convI2D_mem(regD dst, memory mem) %{
    7.31    predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
    7.32    match(Set dst (ConvI2D (LoadI mem)));
    7.33 @@ -11062,7 +11076,7 @@
    7.34  
    7.35  // Convert an int to a float in xmm; no rounding step needed.
    7.36  instruct convI2X_reg(regX dst, eRegI src) %{
    7.37 -  predicate(UseSSE>=1);
    7.38 +  predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
    7.39    match(Set dst (ConvI2F src));
    7.40    format %{ "CVTSI2SS $dst, $src" %}
    7.41  
    7.42 @@ -11071,6 +11085,20 @@
    7.43    ins_pipe( pipe_slow );
    7.44  %}
    7.45  
    7.46 + instruct convXI2X_reg(regX dst, eRegI src)
    7.47 +%{
    7.48 +  predicate( UseSSE>=2 && UseXmmI2F );
    7.49 +  match(Set dst (ConvI2F src));
    7.50 +
    7.51 +  format %{ "MOVD  $dst,$src\n\t"
    7.52 +            "CVTDQ2PS $dst,$dst\t# i2f" %}
    7.53 +  ins_encode %{
    7.54 +    __ movd($dst$$XMMRegister, $src$$Register);
    7.55 +    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
    7.56 +  %}
    7.57 +  ins_pipe(pipe_slow); // XXX
    7.58 +%}
    7.59 +
    7.60  instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
    7.61    match(Set dst (ConvI2L src));
    7.62    effect(KILL cr);
     8.1 --- a/src/cpu/x86/vm/x86_64.ad	Wed Mar 19 15:14:36 2008 -0700
     8.2 +++ b/src/cpu/x86/vm/x86_64.ad	Wed Mar 19 15:33:25 2008 -0700
     8.3 @@ -10098,6 +10098,7 @@
     8.4  
     8.5  instruct convI2F_reg_reg(regF dst, rRegI src)
     8.6  %{
     8.7 +  predicate(!UseXmmI2F);
     8.8    match(Set dst (ConvI2F src));
     8.9  
    8.10    format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
    8.11 @@ -10118,6 +10119,7 @@
    8.12  
    8.13  instruct convI2D_reg_reg(regD dst, rRegI src)
    8.14  %{
    8.15 +  predicate(!UseXmmI2D);
    8.16    match(Set dst (ConvI2D src));
    8.17  
    8.18    format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
    8.19 @@ -10136,6 +10138,34 @@
    8.20    ins_pipe(pipe_slow); // XXX
    8.21  %}
    8.22  
    8.23 +instruct convXI2F_reg(regF dst, rRegI src)
    8.24 +%{
    8.25 +  predicate(UseXmmI2F);
    8.26 +  match(Set dst (ConvI2F src));
    8.27 +
    8.28 +  format %{ "movdl $dst, $src\n\t"
    8.29 +            "cvtdq2psl $dst, $dst\t# i2f" %}
    8.30 +  ins_encode %{
    8.31 +    __ movdl($dst$$XMMRegister, $src$$Register);
    8.32 +    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
    8.33 +  %}
    8.34 +  ins_pipe(pipe_slow); // XXX
    8.35 +%}
    8.36 +
    8.37 +instruct convXI2D_reg(regD dst, rRegI src)
    8.38 +%{
    8.39 +  predicate(UseXmmI2D);
    8.40 +  match(Set dst (ConvI2D src));
    8.41 +
    8.42 +  format %{ "movdl $dst, $src\n\t"
    8.43 +            "cvtdq2pdl $dst, $dst\t# i2d" %}
    8.44 +  ins_encode %{
    8.45 +    __ movdl($dst$$XMMRegister, $src$$Register);
    8.46 +    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
    8.47 +  %}
    8.48 +  ins_pipe(pipe_slow); // XXX
    8.49 +%}
    8.50 +
    8.51  instruct convL2F_reg_reg(regF dst, rRegL src)
    8.52  %{
    8.53    match(Set dst (ConvL2F src));
     9.1 --- a/src/share/vm/runtime/globals.hpp	Wed Mar 19 15:14:36 2008 -0700
     9.2 +++ b/src/share/vm/runtime/globals.hpp	Wed Mar 19 15:33:25 2008 -0700
     9.3 @@ -949,6 +949,12 @@
     9.4    product(bool, UseXmmRegToRegMoveAll, false,                               \
     9.5            "Copy all XMM register bits when moving value between registers") \
     9.6                                                                              \
     9.7 +  product(bool, UseXmmI2D, false,                                           \
     9.8 +          "Use SSE2 CVTDQ2PD instruction to convert Integer to Double")     \
     9.9 +                                                                            \
    9.10 +  product(bool, UseXmmI2F, false,                                           \
    9.11 +          "Use SSE2 CVTDQ2PS instruction to convert Integer to Float")      \
    9.12 +                                                                            \
    9.13    product(intx, FieldsAllocationStyle, 1,                                   \
    9.14            "0 - type based with oops first, 1 - with oops last")             \
    9.15                                                                              \

mercurial