Wed, 19 Mar 2008 15:33:25 -0700
6662967: Optimize I2D conversion on new x86
Summary: Use CVTDQ2PS and CVTDQ2PD for integer values conversions to float and double values on new AMD cpu.
Reviewed-by: sgoldman, never
1.1 --- a/src/cpu/x86/vm/assembler_x86_32.cpp Wed Mar 19 15:14:36 2008 -0700 1.2 +++ b/src/cpu/x86/vm/assembler_x86_32.cpp Wed Mar 19 15:33:25 2008 -0700 1.3 @@ -2672,6 +2672,22 @@ 1.4 emit_sse_operand(dst, src); 1.5 } 1.6 1.7 +void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1.8 + assert(VM_Version::supports_sse2(), ""); 1.9 + 1.10 + emit_byte(0xF3); 1.11 + emit_byte(0x0F); 1.12 + emit_byte(0xE6); 1.13 + emit_sse_operand(dst, src); 1.14 +} 1.15 + 1.16 +void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1.17 + assert(VM_Version::supports_sse2(), ""); 1.18 + 1.19 + emit_byte(0x0F); 1.20 + emit_byte(0x5B); 1.21 + emit_sse_operand(dst, src); 1.22 +} 1.23 1.24 emit_sse_instruction(andps, sse, 0, 0x54, XMMRegister, XMMRegister); 1.25 emit_sse_instruction(andpd, sse2, 0x66, 0x54, XMMRegister, XMMRegister);
2.1 --- a/src/cpu/x86/vm/assembler_x86_32.hpp Wed Mar 19 15:14:36 2008 -0700 2.2 +++ b/src/cpu/x86/vm/assembler_x86_32.hpp Wed Mar 19 15:33:25 2008 -0700 2.3 @@ -901,6 +901,8 @@ 2.4 void cvtss2sd(XMMRegister dst, XMMRegister src); 2.5 void cvtsd2ss(XMMRegister dst, Address src); // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value 2.6 void cvtsd2ss(XMMRegister dst, XMMRegister src); 2.7 + void cvtdq2pd(XMMRegister dst, XMMRegister src); 2.8 + void cvtdq2ps(XMMRegister dst, XMMRegister src); 2.9 2.10 void cvtsi2ss(XMMRegister dst, Address src); // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value 2.11 void cvtsi2ss(XMMRegister dst, Register src);
3.1 --- a/src/cpu/x86/vm/assembler_x86_64.cpp Wed Mar 19 15:14:36 2008 -0700 3.2 +++ b/src/cpu/x86/vm/assembler_x86_64.cpp Wed Mar 19 15:33:25 2008 -0700 3.3 @@ -3372,6 +3372,21 @@ 3.4 emit_byte(0xC0 | encode); 3.5 } 3.6 3.7 +void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 3.8 + emit_byte(0xF3); 3.9 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 3.10 + emit_byte(0x0F); 3.11 + emit_byte(0xE6); 3.12 + emit_byte(0xC0 | encode); 3.13 +} 3.14 + 3.15 +void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 3.16 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 3.17 + emit_byte(0x0F); 3.18 + emit_byte(0x5B); 3.19 + emit_byte(0xC0 | encode); 3.20 +} 3.21 + 3.22 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 3.23 emit_byte(0xF2); 3.24 int encode = prefix_and_encode(dst->encoding(), src->encoding());
4.1 --- a/src/cpu/x86/vm/assembler_x86_64.hpp Wed Mar 19 15:14:36 2008 -0700 4.2 +++ b/src/cpu/x86/vm/assembler_x86_64.hpp Wed Mar 19 15:33:25 2008 -0700 4.3 @@ -922,6 +922,8 @@ 4.4 void cvttsd2siq(Register dst, XMMRegister src); // truncates 4.5 void cvtss2sd(XMMRegister dst, XMMRegister src); 4.6 void cvtsd2ss(XMMRegister dst, XMMRegister src); 4.7 + void cvtdq2pd(XMMRegister dst, XMMRegister src); 4.8 + void cvtdq2ps(XMMRegister dst, XMMRegister src); 4.9 4.10 void pxor(XMMRegister dst, Address src); // Xor Packed Byte Integer Values 4.11 void pxor(XMMRegister dst, XMMRegister src); // Xor Packed Byte Integer Values
5.1 --- a/src/cpu/x86/vm/vm_version_x86_32.cpp Wed Mar 19 15:14:36 2008 -0700 5.2 +++ b/src/cpu/x86/vm/vm_version_x86_32.cpp Wed Mar 19 15:33:25 2008 -0700 5.3 @@ -321,6 +321,20 @@ 5.4 UseXmmRegToRegMoveAll = false; 5.5 } 5.6 } 5.7 + if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 5.8 + if( supports_sse4a() ) { 5.9 + UseXmmI2F = true; 5.10 + } else { 5.11 + UseXmmI2F = false; 5.12 + } 5.13 + } 5.14 + if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 5.15 + if( supports_sse4a() ) { 5.16 + UseXmmI2D = true; 5.17 + } else { 5.18 + UseXmmI2D = false; 5.19 + } 5.20 + } 5.21 } 5.22 5.23 if( is_intel() ) { // Intel cpus specific settings
6.1 --- a/src/cpu/x86/vm/vm_version_x86_64.cpp Wed Mar 19 15:14:36 2008 -0700 6.2 +++ b/src/cpu/x86/vm/vm_version_x86_64.cpp Wed Mar 19 15:33:25 2008 -0700 6.3 @@ -265,6 +265,20 @@ 6.4 UseXmmRegToRegMoveAll = false; 6.5 } 6.6 } 6.7 + if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 6.8 + if( supports_sse4a() ) { 6.9 + UseXmmI2F = true; 6.10 + } else { 6.11 + UseXmmI2F = false; 6.12 + } 6.13 + } 6.14 + if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 6.15 + if( supports_sse4a() ) { 6.16 + UseXmmI2D = true; 6.17 + } else { 6.18 + UseXmmI2D = false; 6.19 + } 6.20 + } 6.21 } 6.22 6.23 if( is_intel() ) { // Intel cpus specific settings
7.1 --- a/src/cpu/x86/vm/x86_32.ad Wed Mar 19 15:14:36 2008 -0700 7.2 +++ b/src/cpu/x86/vm/x86_32.ad Wed Mar 19 15:33:25 2008 -0700 7.3 @@ -10970,7 +10970,7 @@ 7.4 %} 7.5 7.6 instruct convI2XD_reg(regXD dst, eRegI src) %{ 7.7 - predicate( UseSSE>=2 ); 7.8 + predicate( UseSSE>=2 && !UseXmmI2D ); 7.9 match(Set dst (ConvI2D src)); 7.10 format %{ "CVTSI2SD $dst,$src" %} 7.11 opcode(0xF2, 0x0F, 0x2A); 7.12 @@ -10987,6 +10987,20 @@ 7.13 ins_pipe( pipe_slow ); 7.14 %} 7.15 7.16 +instruct convXI2XD_reg(regXD dst, eRegI src) 7.17 +%{ 7.18 + predicate( UseSSE>=2 && UseXmmI2D ); 7.19 + match(Set dst (ConvI2D src)); 7.20 + 7.21 + format %{ "MOVD $dst,$src\n\t" 7.22 + "CVTDQ2PD $dst,$dst\t# i2d" %} 7.23 + ins_encode %{ 7.24 + __ movd($dst$$XMMRegister, $src$$Register); 7.25 + __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 7.26 + %} 7.27 + ins_pipe(pipe_slow); // XXX 7.28 +%} 7.29 + 7.30 instruct convI2D_mem(regD dst, memory mem) %{ 7.31 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 7.32 match(Set dst (ConvI2D (LoadI mem))); 7.33 @@ -11062,7 +11076,7 @@ 7.34 7.35 // Convert an int to a float in xmm; no rounding step needed. 7.36 instruct convI2X_reg(regX dst, eRegI src) %{ 7.37 - predicate(UseSSE>=1); 7.38 + predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 7.39 match(Set dst (ConvI2F src)); 7.40 format %{ "CVTSI2SS $dst, $src" %} 7.41 7.42 @@ -11071,6 +11085,20 @@ 7.43 ins_pipe( pipe_slow ); 7.44 %} 7.45 7.46 + instruct convXI2X_reg(regX dst, eRegI src) 7.47 +%{ 7.48 + predicate( UseSSE>=2 && UseXmmI2F ); 7.49 + match(Set dst (ConvI2F src)); 7.50 + 7.51 + format %{ "MOVD $dst,$src\n\t" 7.52 + "CVTDQ2PS $dst,$dst\t# i2f" %} 7.53 + ins_encode %{ 7.54 + __ movd($dst$$XMMRegister, $src$$Register); 7.55 + __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 7.56 + %} 7.57 + ins_pipe(pipe_slow); // XXX 7.58 +%} 7.59 + 7.60 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ 7.61 match(Set dst (ConvI2L src)); 7.62 effect(KILL cr);
8.1 --- a/src/cpu/x86/vm/x86_64.ad Wed Mar 19 15:14:36 2008 -0700 8.2 +++ b/src/cpu/x86/vm/x86_64.ad Wed Mar 19 15:33:25 2008 -0700 8.3 @@ -10098,6 +10098,7 @@ 8.4 8.5 instruct convI2F_reg_reg(regF dst, rRegI src) 8.6 %{ 8.7 + predicate(!UseXmmI2F); 8.8 match(Set dst (ConvI2F src)); 8.9 8.10 format %{ "cvtsi2ssl $dst, $src\t# i2f" %} 8.11 @@ -10118,6 +10119,7 @@ 8.12 8.13 instruct convI2D_reg_reg(regD dst, rRegI src) 8.14 %{ 8.15 + predicate(!UseXmmI2D); 8.16 match(Set dst (ConvI2D src)); 8.17 8.18 format %{ "cvtsi2sdl $dst, $src\t# i2d" %} 8.19 @@ -10136,6 +10138,34 @@ 8.20 ins_pipe(pipe_slow); // XXX 8.21 %} 8.22 8.23 +instruct convXI2F_reg(regF dst, rRegI src) 8.24 +%{ 8.25 + predicate(UseXmmI2F); 8.26 + match(Set dst (ConvI2F src)); 8.27 + 8.28 + format %{ "movdl $dst, $src\n\t" 8.29 + "cvtdq2psl $dst, $dst\t# i2f" %} 8.30 + ins_encode %{ 8.31 + __ movdl($dst$$XMMRegister, $src$$Register); 8.32 + __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 8.33 + %} 8.34 + ins_pipe(pipe_slow); // XXX 8.35 +%} 8.36 + 8.37 +instruct convXI2D_reg(regD dst, rRegI src) 8.38 +%{ 8.39 + predicate(UseXmmI2D); 8.40 + match(Set dst (ConvI2D src)); 8.41 + 8.42 + format %{ "movdl $dst, $src\n\t" 8.43 + "cvtdq2pdl $dst, $dst\t# i2d" %} 8.44 + ins_encode %{ 8.45 + __ movdl($dst$$XMMRegister, $src$$Register); 8.46 + __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 8.47 + %} 8.48 + ins_pipe(pipe_slow); // XXX 8.49 +%} 8.50 + 8.51 instruct convL2F_reg_reg(regF dst, rRegL src) 8.52 %{ 8.53 match(Set dst (ConvL2F src));
9.1 --- a/src/share/vm/runtime/globals.hpp Wed Mar 19 15:14:36 2008 -0700 9.2 +++ b/src/share/vm/runtime/globals.hpp Wed Mar 19 15:33:25 2008 -0700 9.3 @@ -949,6 +949,12 @@ 9.4 product(bool, UseXmmRegToRegMoveAll, false, \ 9.5 "Copy all XMM register bits when moving value between registers") \ 9.6 \ 9.7 + product(bool, UseXmmI2D, false, \ 9.8 + "Use SSE2 CVTDQ2PD instruction to convert Integer to Double") \ 9.9 + \ 9.10 + product(bool, UseXmmI2F, false, \ 9.11 + "Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \ 9.12 + \ 9.13 product(intx, FieldsAllocationStyle, 1, \ 9.14 "0 - type based with oops first, 1 - with oops last") \ 9.15 \