Tue, 20 Dec 2011 00:55:02 -0800
7121648: Use 3-operands SIMD instructions on x86 with AVX
Summary: Use 3-operands SIMD instructions in C2 generated code for machines with AVX.
Reviewed-by: never
1.1 --- a/make/bsd/makefiles/adlc.make Mon Dec 19 14:16:23 2011 -0800 1.2 +++ b/make/bsd/makefiles/adlc.make Tue Dec 20 00:55:02 2011 -0800 1.3 @@ -39,9 +39,16 @@ 1.4 1.5 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 1.6 1.7 -SOURCES.AD = \ 1.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}") 1.9 + SOURCES.AD = \ 1.10 $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 1.11 $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 1.12 +else 1.13 + SOURCES.AD = \ 1.14 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 1.15 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ 1.16 + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 1.17 +endif 1.18 1.19 EXEC = $(OUTDIR)/adlc 1.20
2.1 --- a/make/linux/makefiles/adlc.make Mon Dec 19 14:16:23 2011 -0800 2.2 +++ b/make/linux/makefiles/adlc.make Tue Dec 20 00:55:02 2011 -0800 2.3 @@ -39,9 +39,16 @@ 2.4 2.5 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 2.6 2.7 -SOURCES.AD = \ 2.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}") 2.9 + SOURCES.AD = \ 2.10 $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 2.11 $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 2.12 +else 2.13 + SOURCES.AD = \ 2.14 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 2.15 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ 2.16 + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 2.17 +endif 2.18 2.19 EXEC = $(OUTDIR)/adlc 2.20
3.1 --- a/make/solaris/makefiles/adlc.make Mon Dec 19 14:16:23 2011 -0800 3.2 +++ b/make/solaris/makefiles/adlc.make Tue Dec 20 00:55:02 2011 -0800 3.3 @@ -40,9 +40,16 @@ 3.4 3.5 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 3.6 3.7 -SOURCES.AD = \ 3.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}") 3.9 + SOURCES.AD = \ 3.10 $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 3.11 $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 3.12 +else 3.13 + SOURCES.AD = \ 3.14 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 3.15 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ 3.16 + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 3.17 +endif 3.18 3.19 EXEC = $(OUTDIR)/adlc 3.20
4.1 --- a/make/windows/makefiles/adlc.make Mon Dec 19 14:16:23 2011 -0800 4.2 +++ b/make/windows/makefiles/adlc.make Tue Dec 20 00:55:02 2011 -0800 4.3 @@ -53,6 +53,17 @@ 4.4 /I "$(WorkSpace)\src\os\windows\vm" \ 4.5 /I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm" 4.6 4.7 +!if "$(Platform_arch_model)" == "$(Platform_arch)" 4.8 +SOURCES_AD=\ 4.9 + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ 4.10 + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad 4.11 +!else 4.12 +SOURCES_AD=\ 4.13 + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ 4.14 + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \ 4.15 + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad 4.16 +!endif 4.17 + 4.18 # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR 4.19 # and ProjectCreatorIDEOptions in projectcreator.make. 4.20 GENERATED_NAMES=\ 4.21 @@ -105,7 +116,6 @@ 4.22 $(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad 4.23 mv $(GENERATED_NAMES) $(AdlcOutDir)/ 4.24 4.25 -$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad 4.26 +$(Platform_arch_model).ad: $(SOURCES_AD) 4.27 rm -f $(Platform_arch_model).ad 4.28 - cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ 4.29 - $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad 4.30 + cat $(SOURCES_AD) >$(Platform_arch_model).ad
5.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Mon Dec 19 14:16:23 2011 -0800 5.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Tue Dec 20 00:55:02 2011 -0800 5.3 @@ -2932,6 +2932,161 @@ 5.4 emit_operand(dst, src); 5.5 } 5.6 5.7 +// AVX 3-operands non destructive source instructions (encoded with VEX prefix) 5.8 + 5.9 +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 5.10 + assert(VM_Version::supports_avx(), ""); 5.11 + InstructionMark im(this); 5.12 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 5.13 + emit_byte(0x58); 5.14 + emit_operand(dst, src); 5.15 +} 5.16 + 5.17 +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.18 + assert(VM_Version::supports_avx(), ""); 5.19 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 5.20 + emit_byte(0x58); 5.21 + emit_byte(0xC0 | encode); 5.22 +} 5.23 + 5.24 +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 5.25 + assert(VM_Version::supports_avx(), ""); 5.26 + InstructionMark im(this); 5.27 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 5.28 + emit_byte(0x58); 5.29 + emit_operand(dst, src); 5.30 +} 5.31 + 5.32 +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.33 + assert(VM_Version::supports_avx(), ""); 5.34 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 5.35 + emit_byte(0x58); 5.36 + emit_byte(0xC0 | encode); 5.37 +} 5.38 + 5.39 +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { 5.40 + assert(VM_Version::supports_avx(), ""); 5.41 + InstructionMark im(this); 5.42 + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 5.43 + emit_byte(0x54); 5.44 + emit_operand(dst, src); 5.45 +} 5.46 + 5.47 +void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { 5.48 + assert(VM_Version::supports_avx(), ""); 5.49 + InstructionMark im(this); 5.50 + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 5.51 + emit_byte(0x54); 5.52 + emit_operand(dst, src); 5.53 +} 5.54 + 5.55 +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 5.56 + assert(VM_Version::supports_avx(), ""); 5.57 + InstructionMark im(this); 5.58 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 5.59 + emit_byte(0x5E); 5.60 + emit_operand(dst, src); 5.61 +} 5.62 + 5.63 +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.64 + assert(VM_Version::supports_avx(), ""); 5.65 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 5.66 + emit_byte(0x5E); 5.67 + emit_byte(0xC0 | encode); 5.68 +} 5.69 + 5.70 +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 5.71 + assert(VM_Version::supports_avx(), ""); 5.72 + InstructionMark im(this); 5.73 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 5.74 + emit_byte(0x5E); 5.75 + emit_operand(dst, src); 5.76 +} 5.77 + 5.78 +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.79 + assert(VM_Version::supports_avx(), ""); 5.80 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 5.81 + emit_byte(0x5E); 5.82 + emit_byte(0xC0 | encode); 5.83 +} 5.84 + 5.85 +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 5.86 + assert(VM_Version::supports_avx(), ""); 5.87 + InstructionMark im(this); 5.88 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 5.89 + emit_byte(0x59); 5.90 + emit_operand(dst, src); 5.91 +} 5.92 + 5.93 +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.94 + assert(VM_Version::supports_avx(), ""); 5.95 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 5.96 + emit_byte(0x59); 5.97 + emit_byte(0xC0 | encode); 5.98 +} 5.99 + 5.100 +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 5.101 + InstructionMark im(this); 5.102 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 5.103 + emit_byte(0x59); 5.104 + emit_operand(dst, src); 5.105 +} 5.106 + 5.107 +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.108 + assert(VM_Version::supports_avx(), ""); 5.109 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 5.110 + emit_byte(0x59); 5.111 + emit_byte(0xC0 | encode); 5.112 +} 5.113 + 5.114 + 5.115 +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 5.116 + assert(VM_Version::supports_avx(), ""); 5.117 + InstructionMark im(this); 5.118 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 5.119 + emit_byte(0x5C); 5.120 + emit_operand(dst, src); 5.121 +} 5.122 + 5.123 +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.124 + assert(VM_Version::supports_avx(), ""); 5.125 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 5.126 + emit_byte(0x5C); 5.127 + emit_byte(0xC0 | encode); 5.128 +} 5.129 + 5.130 +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 5.131 + assert(VM_Version::supports_avx(), ""); 5.132 + InstructionMark im(this); 5.133 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 5.134 + emit_byte(0x5C); 5.135 + emit_operand(dst, src); 5.136 +} 5.137 + 5.138 +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5.139 + assert(VM_Version::supports_avx(), ""); 5.140 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 5.141 + emit_byte(0x5C); 5.142 + emit_byte(0xC0 | encode); 5.143 +} 5.144 + 5.145 +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { 5.146 + assert(VM_Version::supports_avx(), ""); 5.147 + InstructionMark im(this); 5.148 + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 5.149 + emit_byte(0x57); 5.150 + emit_operand(dst, src); 5.151 +} 5.152 + 5.153 +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { 5.154 + assert(VM_Version::supports_avx(), ""); 5.155 + InstructionMark im(this); 5.156 + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 5.157 + emit_byte(0x57); 5.158 + emit_operand(dst, src); 5.159 +} 5.160 + 5.161 + 5.162 #ifndef _LP64 5.163 // 32bit only pieces of the assembler 5.164 5.165 @@ -7235,6 +7390,157 @@ 5.166 } 5.167 } 5.168 5.169 +void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 5.170 + if (reachable(src)) { 5.171 + Assembler::ucomisd(dst, as_Address(src)); 5.172 + } else { 5.173 + lea(rscratch1, src); 5.174 + Assembler::ucomisd(dst, Address(rscratch1, 0)); 5.175 + } 5.176 +} 5.177 + 5.178 +void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 5.179 + if (reachable(src)) { 5.180 + Assembler::ucomiss(dst, as_Address(src)); 5.181 + } else { 5.182 + lea(rscratch1, src); 5.183 + Assembler::ucomiss(dst, Address(rscratch1, 0)); 5.184 + } 5.185 +} 5.186 + 5.187 +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 5.188 + // Used in sign-bit flipping with aligned address. 5.189 + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 5.190 + if (reachable(src)) { 5.191 + Assembler::xorpd(dst, as_Address(src)); 5.192 + } else { 5.193 + lea(rscratch1, src); 5.194 + Assembler::xorpd(dst, Address(rscratch1, 0)); 5.195 + } 5.196 +} 5.197 + 5.198 +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 5.199 + // Used in sign-bit flipping with aligned address. 5.200 + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 5.201 + if (reachable(src)) { 5.202 + Assembler::xorps(dst, as_Address(src)); 5.203 + } else { 5.204 + lea(rscratch1, src); 5.205 + Assembler::xorps(dst, Address(rscratch1, 0)); 5.206 + } 5.207 +} 5.208 + 5.209 +// AVX 3-operands instructions 5.210 + 5.211 +void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.212 + if (reachable(src)) { 5.213 + vaddsd(dst, nds, as_Address(src)); 5.214 + } else { 5.215 + lea(rscratch1, src); 5.216 + vaddsd(dst, nds, Address(rscratch1, 0)); 5.217 + } 5.218 +} 5.219 + 5.220 +void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.221 + if (reachable(src)) { 5.222 + vaddss(dst, nds, as_Address(src)); 5.223 + } else { 5.224 + lea(rscratch1, src); 5.225 + vaddss(dst, nds, Address(rscratch1, 0)); 5.226 + } 5.227 +} 5.228 + 5.229 +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.230 + if (reachable(src)) { 5.231 + vandpd(dst, nds, as_Address(src)); 5.232 + } else { 5.233 + lea(rscratch1, src); 5.234 + vandpd(dst, nds, Address(rscratch1, 0)); 5.235 + } 5.236 +} 5.237 + 5.238 +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.239 + if (reachable(src)) { 5.240 + vandps(dst, nds, as_Address(src)); 5.241 + } else { 5.242 + lea(rscratch1, src); 5.243 + vandps(dst, nds, Address(rscratch1, 0)); 5.244 + } 5.245 +} 5.246 + 5.247 +void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.248 + if (reachable(src)) { 5.249 + vdivsd(dst, nds, as_Address(src)); 5.250 + } else { 5.251 + lea(rscratch1, src); 5.252 + vdivsd(dst, nds, Address(rscratch1, 0)); 5.253 + } 5.254 +} 5.255 + 5.256 +void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.257 + if (reachable(src)) { 5.258 + vdivss(dst, nds, as_Address(src)); 5.259 + } else { 5.260 + lea(rscratch1, src); 5.261 + vdivss(dst, nds, Address(rscratch1, 0)); 5.262 + } 5.263 +} 5.264 + 5.265 +void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.266 + if (reachable(src)) { 5.267 + vmulsd(dst, nds, as_Address(src)); 5.268 + } else { 5.269 + lea(rscratch1, src); 5.270 + vmulsd(dst, nds, Address(rscratch1, 0)); 5.271 + } 5.272 +} 5.273 + 5.274 +void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.275 + if (reachable(src)) { 5.276 + vmulss(dst, nds, as_Address(src)); 5.277 + } else { 5.278 + lea(rscratch1, src); 5.279 + vmulss(dst, nds, Address(rscratch1, 0)); 5.280 + } 5.281 +} 5.282 + 5.283 +void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.284 + if (reachable(src)) { 5.285 + vsubsd(dst, nds, as_Address(src)); 5.286 + } else { 5.287 + lea(rscratch1, src); 5.288 + vsubsd(dst, nds, Address(rscratch1, 0)); 5.289 + } 5.290 +} 5.291 + 5.292 +void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.293 + if (reachable(src)) { 5.294 + vsubss(dst, nds, as_Address(src)); 5.295 + } else { 5.296 + lea(rscratch1, src); 5.297 + vsubss(dst, nds, Address(rscratch1, 0)); 5.298 + } 5.299 +} 5.300 + 5.301 +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.302 + if (reachable(src)) { 5.303 + vxorpd(dst, nds, as_Address(src)); 5.304 + } else { 5.305 + lea(rscratch1, src); 5.306 + vxorpd(dst, nds, Address(rscratch1, 0)); 5.307 + } 5.308 +} 5.309 + 5.310 +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 5.311 + if (reachable(src)) { 5.312 + vxorps(dst, nds, as_Address(src)); 5.313 + } else { 5.314 + lea(rscratch1, src); 5.315 + vxorps(dst, nds, Address(rscratch1, 0)); 5.316 + } 5.317 +} 5.318 + 5.319 + 5.320 ////////////////////////////////////////////////////////////////////////////////// 5.321 #ifndef SERIALGC 5.322 5.323 @@ -8119,46 +8425,6 @@ 5.324 } 5.325 5.326 5.327 -void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 5.328 - if (reachable(src)) { 5.329 - Assembler::ucomisd(dst, as_Address(src)); 5.330 - } else { 5.331 - lea(rscratch1, src); 5.332 - Assembler::ucomisd(dst, Address(rscratch1, 0)); 5.333 - } 5.334 -} 5.335 - 5.336 -void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 5.337 - if (reachable(src)) { 5.338 - Assembler::ucomiss(dst, as_Address(src)); 5.339 - } else { 5.340 - lea(rscratch1, src); 5.341 - Assembler::ucomiss(dst, Address(rscratch1, 0)); 5.342 - } 5.343 -} 5.344 - 5.345 -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 5.346 - // Used in sign-bit flipping with aligned address. 5.347 - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 5.348 - if (reachable(src)) { 5.349 - Assembler::xorpd(dst, as_Address(src)); 5.350 - } else { 5.351 - lea(rscratch1, src); 5.352 - Assembler::xorpd(dst, Address(rscratch1, 0)); 5.353 - } 5.354 -} 5.355 - 5.356 -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 5.357 - // Used in sign-bit flipping with aligned address. 5.358 - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 5.359 - if (reachable(src)) { 5.360 - Assembler::xorps(dst, as_Address(src)); 5.361 - } else { 5.362 - lea(rscratch1, src); 5.363 - Assembler::xorps(dst, Address(rscratch1, 0)); 5.364 - } 5.365 -} 5.366 - 5.367 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 5.368 if (VM_Version::supports_cmov()) { 5.369 cmovl(cc, dst, src);
6.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Mon Dec 19 14:16:23 2011 -0800 6.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Tue Dec 20 00:55:02 2011 -0800 6.3 @@ -589,10 +589,21 @@ 6.4 VexSimdPrefix pre, VexOpcode opc, 6.5 bool vex_w, bool vector256); 6.6 6.7 + void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, 6.8 + VexSimdPrefix pre, bool vector256 = false) { 6.9 + vex_prefix(src, nds->encoding(), dst->encoding(), 6.10 + pre, VEX_OPCODE_0F, false, vector256); 6.11 + } 6.12 + 6.13 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, 6.14 VexSimdPrefix pre, VexOpcode opc, 6.15 bool vex_w, bool vector256); 6.16 6.17 + int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, 6.18 + VexSimdPrefix pre, bool vector256 = false) { 6.19 + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), 6.20 + pre, VEX_OPCODE_0F, false, vector256); 6.21 + } 6.22 6.23 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, 6.24 VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, 6.25 @@ -1574,6 +1585,29 @@ 6.26 6.27 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 6.28 6.29 + // AVX 3-operands instructions (encoded with VEX prefix) 6.30 + void vaddsd(XMMRegister dst, XMMRegister nds, Address src); 6.31 + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.32 + void vaddss(XMMRegister dst, XMMRegister nds, Address src); 6.33 + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.34 + void vandpd(XMMRegister dst, XMMRegister nds, Address src); 6.35 + void vandps(XMMRegister dst, XMMRegister nds, Address src); 6.36 + void vdivsd(XMMRegister dst, XMMRegister nds, Address src); 6.37 + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.38 + void vdivss(XMMRegister dst, XMMRegister nds, Address src); 6.39 + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.40 + void vmulsd(XMMRegister dst, XMMRegister nds, Address src); 6.41 + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.42 + void vmulss(XMMRegister dst, XMMRegister nds, Address src); 6.43 + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.44 + void vsubsd(XMMRegister dst, XMMRegister nds, Address src); 6.45 + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.46 + void vsubss(XMMRegister dst, XMMRegister nds, Address src); 6.47 + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); 6.48 + void vxorpd(XMMRegister dst, XMMRegister nds, Address src); 6.49 + void vxorps(XMMRegister dst, XMMRegister nds, Address src); 6.50 + 6.51 + 6.52 protected: 6.53 // Next instructions require address alignment 16 bytes SSE mode. 6.54 // They should be called only from corresponding MacroAssembler instructions. 6.55 @@ -2422,6 +2456,53 @@ 6.56 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } 6.57 void xorps(XMMRegister dst, AddressLiteral src); 6.58 6.59 + // AVX 3-operands instructions 6.60 + 6.61 + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } 6.62 + void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } 6.63 + void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.64 + 6.65 + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } 6.66 + void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } 6.67 + void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.68 + 6.69 + void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); } 6.70 + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.71 + 6.72 + void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); } 6.73 + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.74 + 6.75 + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } 6.76 + void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } 6.77 + void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.78 + 6.79 + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } 6.80 + void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } 6.81 + void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.82 + 6.83 + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } 6.84 + void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } 6.85 + void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.86 + 6.87 + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } 6.88 + void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } 6.89 + void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.90 + 6.91 + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } 6.92 + void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } 6.93 + void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.94 + 6.95 + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } 6.96 + void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } 6.97 + void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.98 + 6.99 + void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } 6.100 + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.101 + 6.102 + void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } 6.103 + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src); 6.104 + 6.105 + 6.106 // Data 6.107 6.108 void cmov32( Condition cc, Register dst, Address src);
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/src/cpu/x86/vm/x86.ad Tue Dec 20 00:55:02 2011 -0800 7.3 @@ -0,0 +1,777 @@ 7.4 +// 7.5 +// Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. 7.6 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7.7 +// 7.8 +// This code is free software; you can redistribute it and/or modify it 7.9 +// under the terms of the GNU General Public License version 2 only, as 7.10 +// published by the Free Software Foundation. 7.11 +// 7.12 +// This code is distributed in the hope that it will be useful, but WITHOUT 7.13 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 7.14 +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 7.15 +// version 2 for more details (a copy is included in the LICENSE file that 7.16 +// accompanied this code). 7.17 +// 7.18 +// You should have received a copy of the GNU General Public License version 7.19 +// 2 along with this work; if not, write to the Free Software Foundation, 7.20 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 7.21 +// 7.22 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 7.23 +// or visit www.oracle.com if you need additional information or have any 7.24 +// questions. 7.25 +// 7.26 +// 7.27 + 7.28 +// X86 Common Architecture Description File 7.29 + 7.30 +source %{ 7.31 + // Float masks come from different places depending on platform. 7.32 +#ifdef _LP64 7.33 + static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 7.34 + static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 7.35 + static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 7.36 + static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 7.37 +#else 7.38 + static address float_signmask() { return (address)float_signmask_pool; } 7.39 + static address float_signflip() { return (address)float_signflip_pool; } 7.40 + static address double_signmask() { return (address)double_signmask_pool; } 7.41 + static address double_signflip() { return (address)double_signflip_pool; } 7.42 +#endif 7.43 +%} 7.44 + 7.45 +// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 7.46 + 7.47 +instruct addF_reg(regF dst, regF src) %{ 7.48 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.49 + match(Set dst (AddF dst src)); 7.50 + 7.51 + format %{ "addss $dst, $src" %} 7.52 + ins_cost(150); 7.53 + ins_encode %{ 7.54 + __ addss($dst$$XMMRegister, $src$$XMMRegister); 7.55 + %} 7.56 + ins_pipe(pipe_slow); 7.57 +%} 7.58 + 7.59 +instruct addF_mem(regF dst, memory src) %{ 7.60 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.61 + match(Set dst (AddF dst (LoadF src))); 7.62 + 7.63 + format %{ "addss $dst, $src" %} 7.64 + ins_cost(150); 7.65 + ins_encode %{ 7.66 + __ addss($dst$$XMMRegister, $src$$Address); 7.67 + %} 7.68 + ins_pipe(pipe_slow); 7.69 +%} 7.70 + 7.71 +instruct addF_imm(regF dst, immF con) %{ 7.72 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.73 + match(Set dst (AddF dst con)); 7.74 + format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 7.75 + ins_cost(150); 7.76 + ins_encode %{ 7.77 + __ addss($dst$$XMMRegister, $constantaddress($con)); 7.78 + %} 7.79 + ins_pipe(pipe_slow); 7.80 +%} 7.81 + 7.82 +instruct vaddF_reg(regF dst, regF src1, regF src2) %{ 7.83 + predicate(UseAVX > 0); 7.84 + match(Set dst (AddF src1 src2)); 7.85 + 7.86 + format %{ "vaddss $dst, $src1, $src2" %} 7.87 + ins_cost(150); 7.88 + ins_encode %{ 7.89 + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.90 + %} 7.91 + ins_pipe(pipe_slow); 7.92 +%} 7.93 + 7.94 +instruct vaddF_mem(regF dst, regF src1, memory src2) %{ 7.95 + predicate(UseAVX > 0); 7.96 + match(Set dst (AddF src1 (LoadF src2))); 7.97 + 7.98 + format %{ "vaddss $dst, $src1, $src2" %} 7.99 + ins_cost(150); 7.100 + ins_encode %{ 7.101 + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.102 + %} 7.103 + ins_pipe(pipe_slow); 7.104 +%} 7.105 + 7.106 +instruct vaddF_imm(regF dst, regF src, immF con) %{ 7.107 + predicate(UseAVX > 0); 7.108 + match(Set dst (AddF src con)); 7.109 + 7.110 + format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 7.111 + ins_cost(150); 7.112 + ins_encode %{ 7.113 + __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.114 + %} 7.115 + ins_pipe(pipe_slow); 7.116 +%} 7.117 + 7.118 +instruct addD_reg(regD dst, regD src) %{ 7.119 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.120 + match(Set dst (AddD dst src)); 7.121 + 7.122 + format %{ "addsd $dst, $src" %} 7.123 + ins_cost(150); 7.124 + ins_encode %{ 7.125 + __ addsd($dst$$XMMRegister, $src$$XMMRegister); 7.126 + %} 7.127 + ins_pipe(pipe_slow); 7.128 +%} 7.129 + 7.130 +instruct addD_mem(regD dst, memory src) %{ 7.131 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.132 + match(Set dst (AddD dst (LoadD src))); 7.133 + 7.134 + format %{ "addsd $dst, $src" %} 7.135 + ins_cost(150); 7.136 + ins_encode %{ 7.137 + __ addsd($dst$$XMMRegister, $src$$Address); 7.138 + %} 7.139 + ins_pipe(pipe_slow); 7.140 +%} 7.141 + 7.142 +instruct addD_imm(regD dst, immD con) %{ 7.143 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.144 + match(Set dst (AddD dst con)); 7.145 + format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 7.146 + ins_cost(150); 7.147 + ins_encode %{ 7.148 + __ addsd($dst$$XMMRegister, $constantaddress($con)); 7.149 + %} 7.150 + ins_pipe(pipe_slow); 7.151 +%} 7.152 + 7.153 +instruct vaddD_reg(regD dst, regD src1, regD src2) %{ 7.154 + predicate(UseAVX > 0); 7.155 + match(Set dst (AddD src1 src2)); 7.156 + 7.157 + format %{ "vaddsd $dst, $src1, $src2" %} 7.158 + ins_cost(150); 7.159 + ins_encode %{ 7.160 + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.161 + %} 7.162 + ins_pipe(pipe_slow); 7.163 +%} 7.164 + 7.165 +instruct vaddD_mem(regD dst, regD src1, memory src2) %{ 7.166 + predicate(UseAVX > 0); 7.167 + match(Set dst (AddD src1 (LoadD src2))); 7.168 + 7.169 + format %{ "vaddsd $dst, $src1, $src2" %} 7.170 + ins_cost(150); 7.171 + ins_encode %{ 7.172 + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.173 + %} 7.174 + ins_pipe(pipe_slow); 7.175 +%} 7.176 + 7.177 +instruct vaddD_imm(regD dst, regD src, immD con) %{ 7.178 + predicate(UseAVX > 0); 7.179 + match(Set dst (AddD src con)); 7.180 + 7.181 + format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 7.182 + ins_cost(150); 7.183 + ins_encode %{ 7.184 + __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.185 + %} 7.186 + ins_pipe(pipe_slow); 7.187 +%} 7.188 + 7.189 +instruct subF_reg(regF dst, regF src) %{ 7.190 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.191 + match(Set dst (SubF dst src)); 7.192 + 7.193 + format %{ "subss $dst, $src" %} 7.194 + ins_cost(150); 7.195 + ins_encode %{ 7.196 + __ subss($dst$$XMMRegister, $src$$XMMRegister); 7.197 + %} 7.198 + ins_pipe(pipe_slow); 7.199 +%} 7.200 + 7.201 +instruct subF_mem(regF dst, memory src) %{ 7.202 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.203 + match(Set dst (SubF dst (LoadF src))); 7.204 + 7.205 + format %{ "subss $dst, $src" %} 7.206 + ins_cost(150); 7.207 + ins_encode %{ 7.208 + __ subss($dst$$XMMRegister, $src$$Address); 7.209 + %} 7.210 + ins_pipe(pipe_slow); 7.211 +%} 7.212 + 7.213 +instruct subF_imm(regF dst, immF con) %{ 7.214 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.215 + match(Set dst (SubF dst con)); 7.216 + format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 7.217 + ins_cost(150); 7.218 + ins_encode %{ 7.219 + __ subss($dst$$XMMRegister, $constantaddress($con)); 7.220 + %} 7.221 + ins_pipe(pipe_slow); 7.222 +%} 7.223 + 7.224 +instruct vsubF_reg(regF dst, regF src1, regF src2) %{ 7.225 + predicate(UseAVX > 0); 7.226 + match(Set dst (SubF src1 src2)); 7.227 + 7.228 + format %{ "vsubss $dst, $src1, $src2" %} 7.229 + ins_cost(150); 7.230 + ins_encode %{ 7.231 + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.232 + %} 7.233 + ins_pipe(pipe_slow); 7.234 +%} 7.235 + 7.236 +instruct vsubF_mem(regF dst, regF src1, memory src2) %{ 7.237 + predicate(UseAVX > 0); 7.238 + match(Set dst (SubF src1 (LoadF src2))); 7.239 + 7.240 + format %{ "vsubss $dst, $src1, $src2" %} 7.241 + ins_cost(150); 7.242 + ins_encode %{ 7.243 + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.244 + %} 7.245 + ins_pipe(pipe_slow); 7.246 +%} 7.247 + 7.248 +instruct vsubF_imm(regF dst, regF src, immF con) %{ 7.249 + predicate(UseAVX > 0); 7.250 + match(Set dst (SubF src con)); 7.251 + 7.252 + format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 7.253 + ins_cost(150); 7.254 + ins_encode %{ 7.255 + __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.256 + %} 7.257 + ins_pipe(pipe_slow); 7.258 +%} 7.259 + 7.260 +instruct subD_reg(regD dst, regD src) %{ 7.261 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.262 + match(Set dst (SubD dst src)); 7.263 + 7.264 + format %{ "subsd $dst, $src" %} 7.265 + ins_cost(150); 7.266 + ins_encode %{ 7.267 + __ subsd($dst$$XMMRegister, $src$$XMMRegister); 7.268 + %} 7.269 + ins_pipe(pipe_slow); 7.270 +%} 7.271 + 7.272 +instruct subD_mem(regD dst, memory src) %{ 7.273 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.274 + match(Set dst (SubD dst (LoadD src))); 7.275 + 7.276 + format %{ "subsd $dst, $src" %} 7.277 + ins_cost(150); 7.278 + ins_encode %{ 7.279 + __ subsd($dst$$XMMRegister, $src$$Address); 7.280 + %} 7.281 + ins_pipe(pipe_slow); 7.282 +%} 7.283 + 7.284 +instruct subD_imm(regD dst, immD con) %{ 7.285 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.286 + match(Set dst (SubD dst con)); 7.287 + format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 7.288 + ins_cost(150); 7.289 + ins_encode %{ 7.290 + __ subsd($dst$$XMMRegister, $constantaddress($con)); 7.291 + %} 7.292 + ins_pipe(pipe_slow); 7.293 +%} 7.294 + 7.295 +instruct vsubD_reg(regD dst, regD src1, regD src2) %{ 7.296 + predicate(UseAVX > 0); 7.297 + match(Set dst (SubD src1 src2)); 7.298 + 7.299 + format %{ "vsubsd $dst, $src1, $src2" %} 7.300 + ins_cost(150); 7.301 + ins_encode %{ 7.302 + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.303 + %} 7.304 + ins_pipe(pipe_slow); 7.305 +%} 7.306 + 7.307 +instruct vsubD_mem(regD dst, regD src1, memory src2) %{ 7.308 + predicate(UseAVX > 0); 7.309 + match(Set dst (SubD src1 (LoadD src2))); 7.310 + 7.311 + format %{ "vsubsd $dst, $src1, $src2" %} 7.312 + ins_cost(150); 7.313 + ins_encode %{ 7.314 + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.315 + %} 7.316 + ins_pipe(pipe_slow); 7.317 +%} 7.318 + 7.319 +instruct vsubD_imm(regD dst, regD src, immD con) %{ 7.320 + predicate(UseAVX > 0); 7.321 + match(Set dst (SubD src con)); 7.322 + 7.323 + format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 7.324 + ins_cost(150); 7.325 + ins_encode %{ 7.326 + __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.327 + %} 7.328 + ins_pipe(pipe_slow); 7.329 +%} 7.330 + 7.331 +instruct mulF_reg(regF dst, regF src) %{ 7.332 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.333 + match(Set dst (MulF dst src)); 7.334 + 7.335 + format %{ "mulss $dst, $src" %} 7.336 + ins_cost(150); 7.337 + ins_encode %{ 7.338 + __ mulss($dst$$XMMRegister, $src$$XMMRegister); 7.339 + %} 7.340 + ins_pipe(pipe_slow); 7.341 +%} 7.342 + 7.343 +instruct mulF_mem(regF dst, memory src) %{ 7.344 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.345 + match(Set dst (MulF dst (LoadF src))); 7.346 + 7.347 + format %{ "mulss $dst, $src" %} 7.348 + ins_cost(150); 7.349 + ins_encode %{ 7.350 + __ mulss($dst$$XMMRegister, $src$$Address); 7.351 + %} 7.352 + ins_pipe(pipe_slow); 7.353 +%} 7.354 + 7.355 +instruct mulF_imm(regF dst, immF con) %{ 7.356 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.357 + match(Set dst (MulF dst con)); 7.358 + format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 7.359 + ins_cost(150); 7.360 + ins_encode %{ 7.361 + __ mulss($dst$$XMMRegister, $constantaddress($con)); 7.362 + %} 7.363 + ins_pipe(pipe_slow); 7.364 +%} 7.365 + 7.366 +instruct vmulF_reg(regF dst, regF src1, regF src2) %{ 7.367 + predicate(UseAVX > 0); 7.368 + match(Set dst (MulF src1 src2)); 7.369 + 7.370 + format %{ "vmulss $dst, $src1, $src2" %} 7.371 + ins_cost(150); 7.372 + ins_encode %{ 7.373 + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.374 + %} 7.375 + ins_pipe(pipe_slow); 7.376 +%} 7.377 + 7.378 +instruct vmulF_mem(regF dst, regF src1, memory src2) %{ 7.379 + predicate(UseAVX > 0); 7.380 + match(Set dst (MulF src1 (LoadF src2))); 7.381 + 7.382 + format %{ "vmulss $dst, $src1, $src2" %} 7.383 + ins_cost(150); 7.384 + ins_encode %{ 7.385 + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.386 + %} 7.387 + ins_pipe(pipe_slow); 7.388 +%} 7.389 + 7.390 +instruct vmulF_imm(regF dst, regF src, immF con) %{ 7.391 + predicate(UseAVX > 0); 7.392 + match(Set dst (MulF src con)); 7.393 + 7.394 + format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 7.395 + ins_cost(150); 7.396 + ins_encode %{ 7.397 + __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.398 + %} 7.399 + ins_pipe(pipe_slow); 7.400 +%} 7.401 + 7.402 +instruct mulD_reg(regD dst, regD src) %{ 7.403 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.404 + match(Set dst (MulD dst src)); 7.405 + 7.406 + format %{ "mulsd $dst, $src" %} 7.407 + ins_cost(150); 7.408 + ins_encode %{ 7.409 + __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 7.410 + %} 7.411 + ins_pipe(pipe_slow); 7.412 +%} 7.413 + 7.414 +instruct mulD_mem(regD dst, memory src) %{ 7.415 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.416 + match(Set dst (MulD dst (LoadD src))); 7.417 + 7.418 + format %{ "mulsd $dst, $src" %} 7.419 + ins_cost(150); 7.420 + ins_encode %{ 7.421 + __ mulsd($dst$$XMMRegister, $src$$Address); 7.422 + %} 7.423 + ins_pipe(pipe_slow); 7.424 +%} 7.425 + 7.426 +instruct mulD_imm(regD dst, immD con) %{ 7.427 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.428 + match(Set dst (MulD dst con)); 7.429 + format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 7.430 + ins_cost(150); 7.431 + ins_encode %{ 7.432 + __ mulsd($dst$$XMMRegister, $constantaddress($con)); 7.433 + %} 7.434 + ins_pipe(pipe_slow); 7.435 +%} 7.436 + 7.437 +instruct vmulD_reg(regD dst, regD src1, regD src2) %{ 7.438 + predicate(UseAVX > 0); 7.439 + match(Set dst (MulD src1 src2)); 7.440 + 7.441 + format %{ "vmulsd $dst, $src1, $src2" %} 7.442 + ins_cost(150); 7.443 + ins_encode %{ 7.444 + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.445 + %} 7.446 + ins_pipe(pipe_slow); 7.447 +%} 7.448 + 7.449 +instruct vmulD_mem(regD dst, regD src1, memory src2) %{ 7.450 + predicate(UseAVX > 0); 7.451 + match(Set dst (MulD src1 (LoadD src2))); 7.452 + 7.453 + format %{ "vmulsd $dst, $src1, $src2" %} 7.454 + ins_cost(150); 7.455 + ins_encode %{ 7.456 + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.457 + %} 7.458 + ins_pipe(pipe_slow); 7.459 +%} 7.460 + 7.461 +instruct vmulD_imm(regD dst, regD src, immD con) %{ 7.462 + predicate(UseAVX > 0); 7.463 + match(Set dst (MulD src con)); 7.464 + 7.465 + format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 7.466 + ins_cost(150); 7.467 + ins_encode %{ 7.468 + __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.469 + %} 7.470 + ins_pipe(pipe_slow); 7.471 +%} 7.472 + 7.473 +instruct divF_reg(regF dst, regF src) %{ 7.474 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.475 + match(Set dst (DivF dst src)); 7.476 + 7.477 + format %{ "divss $dst, $src" %} 7.478 + ins_cost(150); 7.479 + ins_encode %{ 7.480 + __ divss($dst$$XMMRegister, $src$$XMMRegister); 7.481 + %} 7.482 + ins_pipe(pipe_slow); 7.483 +%} 7.484 + 7.485 +instruct divF_mem(regF dst, memory src) %{ 7.486 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.487 + match(Set dst (DivF dst (LoadF src))); 7.488 + 7.489 + format %{ "divss $dst, $src" %} 7.490 + ins_cost(150); 7.491 + ins_encode %{ 7.492 + __ divss($dst$$XMMRegister, $src$$Address); 7.493 + %} 7.494 + ins_pipe(pipe_slow); 7.495 +%} 7.496 + 7.497 +instruct divF_imm(regF dst, immF con) %{ 7.498 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.499 + match(Set dst (DivF dst con)); 7.500 + format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 7.501 + ins_cost(150); 7.502 + ins_encode %{ 7.503 + __ divss($dst$$XMMRegister, $constantaddress($con)); 7.504 + %} 7.505 + ins_pipe(pipe_slow); 7.506 +%} 7.507 + 7.508 +instruct vdivF_reg(regF dst, regF src1, regF src2) %{ 7.509 + predicate(UseAVX > 0); 7.510 + match(Set dst (DivF src1 src2)); 7.511 + 7.512 + format %{ "vdivss $dst, $src1, $src2" %} 7.513 + ins_cost(150); 7.514 + ins_encode %{ 7.515 + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.516 + %} 7.517 + ins_pipe(pipe_slow); 7.518 +%} 7.519 + 7.520 +instruct vdivF_mem(regF dst, regF src1, memory src2) %{ 7.521 + predicate(UseAVX > 0); 7.522 + match(Set dst (DivF src1 (LoadF src2))); 7.523 + 7.524 + format %{ "vdivss $dst, $src1, $src2" %} 7.525 + ins_cost(150); 7.526 + ins_encode %{ 7.527 + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.528 + %} 7.529 + ins_pipe(pipe_slow); 7.530 +%} 7.531 + 7.532 +instruct vdivF_imm(regF dst, regF src, immF con) %{ 7.533 + predicate(UseAVX > 0); 7.534 + match(Set dst (DivF src con)); 7.535 + 7.536 + format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 7.537 + ins_cost(150); 7.538 + ins_encode %{ 7.539 + __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.540 + %} 7.541 + ins_pipe(pipe_slow); 7.542 +%} 7.543 + 7.544 +instruct divD_reg(regD dst, regD src) %{ 7.545 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.546 + match(Set dst (DivD dst src)); 7.547 + 7.548 + format %{ "divsd $dst, $src" %} 7.549 + ins_cost(150); 7.550 + ins_encode %{ 7.551 + __ divsd($dst$$XMMRegister, $src$$XMMRegister); 7.552 + %} 7.553 + ins_pipe(pipe_slow); 7.554 +%} 7.555 + 7.556 +instruct divD_mem(regD dst, memory src) %{ 7.557 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.558 + match(Set dst (DivD dst (LoadD src))); 7.559 + 7.560 + format %{ "divsd $dst, $src" %} 7.561 + ins_cost(150); 7.562 + ins_encode %{ 7.563 + __ divsd($dst$$XMMRegister, $src$$Address); 7.564 + %} 7.565 + ins_pipe(pipe_slow); 7.566 +%} 7.567 + 7.568 +instruct divD_imm(regD dst, immD con) %{ 7.569 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.570 + match(Set dst (DivD dst con)); 7.571 + format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 7.572 + ins_cost(150); 7.573 + ins_encode %{ 7.574 + __ divsd($dst$$XMMRegister, $constantaddress($con)); 7.575 + %} 7.576 + ins_pipe(pipe_slow); 7.577 +%} 7.578 + 7.579 +instruct vdivD_reg(regD dst, regD src1, regD src2) %{ 7.580 + predicate(UseAVX > 0); 7.581 + match(Set dst (DivD src1 src2)); 7.582 + 7.583 + format %{ "vdivsd $dst, $src1, $src2" %} 7.584 + ins_cost(150); 7.585 + ins_encode %{ 7.586 + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 7.587 + %} 7.588 + ins_pipe(pipe_slow); 7.589 +%} 7.590 + 7.591 +instruct vdivD_mem(regD dst, regD src1, memory src2) %{ 7.592 + predicate(UseAVX > 0); 7.593 + match(Set dst (DivD src1 (LoadD src2))); 7.594 + 7.595 + format %{ "vdivsd $dst, $src1, $src2" %} 7.596 + ins_cost(150); 7.597 + ins_encode %{ 7.598 + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 7.599 + %} 7.600 + ins_pipe(pipe_slow); 7.601 +%} 7.602 + 7.603 +instruct vdivD_imm(regD dst, regD src, immD con) %{ 7.604 + predicate(UseAVX > 0); 7.605 + match(Set dst (DivD src con)); 7.606 + 7.607 + format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 7.608 + ins_cost(150); 7.609 + ins_encode %{ 7.610 + __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 7.611 + %} 7.612 + ins_pipe(pipe_slow); 7.613 +%} 7.614 + 7.615 +instruct absF_reg(regF dst) %{ 7.616 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.617 + match(Set dst (AbsF dst)); 7.618 + ins_cost(150); 7.619 + format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 7.620 + ins_encode %{ 7.621 + __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 7.622 + %} 7.623 + ins_pipe(pipe_slow); 7.624 +%} 7.625 + 7.626 +instruct vabsF_reg(regF dst, regF src) %{ 7.627 + predicate(UseAVX > 0); 7.628 + match(Set dst (AbsF src)); 7.629 + ins_cost(150); 7.630 + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 7.631 + ins_encode %{ 7.632 + __ vandps($dst$$XMMRegister, $src$$XMMRegister, 7.633 + ExternalAddress(float_signmask())); 7.634 + %} 7.635 + ins_pipe(pipe_slow); 7.636 +%} 7.637 + 7.638 +instruct absD_reg(regD dst) %{ 7.639 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.640 + match(Set dst (AbsD dst)); 7.641 + ins_cost(150); 7.642 + format %{ "andpd $dst, [0x7fffffffffffffff]\t" 7.643 + "# abs double by sign masking" %} 7.644 + ins_encode %{ 7.645 + __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 7.646 + %} 7.647 + ins_pipe(pipe_slow); 7.648 +%} 7.649 + 7.650 +instruct vabsD_reg(regD dst, regD src) %{ 7.651 + predicate(UseAVX > 0); 7.652 + match(Set dst (AbsD src)); 7.653 + ins_cost(150); 7.654 + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 7.655 + "# abs double by sign masking" %} 7.656 + ins_encode %{ 7.657 + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 7.658 + ExternalAddress(double_signmask())); 7.659 + %} 7.660 + ins_pipe(pipe_slow); 7.661 +%} 7.662 + 7.663 +instruct negF_reg(regF dst) %{ 7.664 + predicate((UseSSE>=1) && (UseAVX == 0)); 7.665 + match(Set dst (NegF dst)); 7.666 + ins_cost(150); 7.667 + format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 7.668 + ins_encode %{ 7.669 + __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 7.670 + %} 7.671 + ins_pipe(pipe_slow); 7.672 +%} 7.673 + 7.674 +instruct vnegF_reg(regF dst, regF src) %{ 7.675 + predicate(UseAVX > 0); 7.676 + match(Set dst (NegF src)); 7.677 + ins_cost(150); 7.678 + format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 7.679 + ins_encode %{ 7.680 + __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 7.681 + ExternalAddress(float_signflip())); 7.682 + %} 7.683 + ins_pipe(pipe_slow); 7.684 +%} 7.685 + 7.686 +instruct negD_reg(regD dst) %{ 7.687 + predicate((UseSSE>=2) && (UseAVX == 0)); 7.688 + match(Set dst (NegD dst)); 7.689 + ins_cost(150); 7.690 + format %{ "xorpd $dst, [0x8000000000000000]\t" 7.691 + "# neg double by sign flipping" %} 7.692 + ins_encode %{ 7.693 + __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 7.694 + %} 7.695 + ins_pipe(pipe_slow); 7.696 +%} 7.697 + 7.698 +instruct vnegD_reg(regD dst, regD src) %{ 7.699 + predicate(UseAVX > 0); 7.700 + match(Set dst (NegD src)); 7.701 + ins_cost(150); 7.702 + format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 7.703 + "# neg double by sign flipping" %} 7.704 + ins_encode %{ 7.705 + __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 7.706 + ExternalAddress(double_signflip())); 7.707 + %} 7.708 + ins_pipe(pipe_slow); 7.709 +%} 7.710 + 7.711 +instruct sqrtF_reg(regF dst, regF src) %{ 7.712 + predicate(UseSSE>=1); 7.713 + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 7.714 + 7.715 + format %{ "sqrtss $dst, $src" %} 7.716 + ins_cost(150); 7.717 + ins_encode %{ 7.718 + __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 7.719 + %} 7.720 + ins_pipe(pipe_slow); 7.721 +%} 7.722 + 7.723 +instruct sqrtF_mem(regF dst, memory src) %{ 7.724 + predicate(UseSSE>=1); 7.725 + match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 7.726 + 7.727 + format %{ "sqrtss $dst, $src" %} 7.728 + ins_cost(150); 7.729 + ins_encode %{ 7.730 + __ sqrtss($dst$$XMMRegister, $src$$Address); 7.731 + %} 7.732 + ins_pipe(pipe_slow); 7.733 +%} 7.734 + 7.735 +instruct sqrtF_imm(regF dst, immF con) %{ 7.736 + predicate(UseSSE>=1); 7.737 + match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 7.738 + format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 7.739 + ins_cost(150); 7.740 + ins_encode %{ 7.741 + __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 7.742 + %} 7.743 + ins_pipe(pipe_slow); 7.744 +%} 7.745 + 7.746 +instruct sqrtD_reg(regD dst, regD src) %{ 7.747 + predicate(UseSSE>=2); 7.748 + match(Set dst (SqrtD src)); 7.749 + 7.750 + format %{ "sqrtsd $dst, $src" %} 7.751 + ins_cost(150); 7.752 + ins_encode %{ 7.753 + __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 7.754 + %} 7.755 + ins_pipe(pipe_slow); 7.756 +%} 7.757 + 7.758 +instruct sqrtD_mem(regD dst, memory src) %{ 7.759 + predicate(UseSSE>=2); 7.760 + match(Set dst (SqrtD (LoadD src))); 7.761 + 7.762 + format %{ "sqrtsd $dst, $src" %} 7.763 + ins_cost(150); 7.764 + ins_encode %{ 7.765 + __ sqrtsd($dst$$XMMRegister, $src$$Address); 7.766 + %} 7.767 + ins_pipe(pipe_slow); 7.768 +%} 7.769 + 7.770 +instruct sqrtD_imm(regD dst, immD con) %{ 7.771 + predicate(UseSSE>=2); 7.772 + match(Set dst (SqrtD con)); 7.773 + format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 7.774 + ins_cost(150); 7.775 + ins_encode %{ 7.776 + __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 7.777 + %} 7.778 + ins_pipe(pipe_slow); 7.779 +%} 7.780 +
8.1 --- a/src/cpu/x86/vm/x86_32.ad Mon Dec 19 14:16:23 2011 -0800 8.2 +++ b/src/cpu/x86/vm/x86_32.ad Tue Dec 20 00:55:02 2011 -0800 8.3 @@ -1775,7 +1775,7 @@ 8.4 emit_cc(cbuf, $secondary, $cop$$cmpcode); 8.5 %} 8.6 8.7 - enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV 8.8 + enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 8.9 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 8.10 emit_d8(cbuf, op >> 8 ); 8.11 emit_d8(cbuf, op & 255); 8.12 @@ -2063,14 +2063,14 @@ 8.13 $$$emit32$src$$constant; 8.14 %} 8.15 8.16 - enc_class Con32F_as_bits(immF src) %{ // storeF_imm 8.17 + enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 8.18 // Output Float immediate bits 8.19 jfloat jf = $src$$constant; 8.20 int jf_as_bits = jint_cast( jf ); 8.21 emit_d32(cbuf, jf_as_bits); 8.22 %} 8.23 8.24 - enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm 8.25 + enc_class Con32F_as_bits(immF src) %{ // storeX_imm 8.26 // Output Float immediate bits 8.27 jfloat jf = $src$$constant; 8.28 int jf_as_bits = jint_cast( jf ); 8.29 @@ -2283,7 +2283,7 @@ 8.30 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 8.31 %} 8.32 8.33 - enc_class enc_FP_store(memory mem, regD src) %{ 8.34 + enc_class enc_FPR_store(memory mem, regDPR src) %{ 8.35 // If src is FPR1, we can just FST to store it. 8.36 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 8.37 int reg_encoding = 0x2; // Just store 8.38 @@ -2432,7 +2432,7 @@ 8.39 8.40 // ----------------- Encodings for floating point unit ----------------- 8.41 // May leave result in FPU-TOS or FPU reg depending on opcodes 8.42 - enc_class OpcReg_F (regF src) %{ // FMUL, FDIV 8.43 + enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 8.44 $$$emit8$primary; 8.45 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 8.46 %} 8.47 @@ -2444,17 +2444,17 @@ 8.48 %} 8.49 8.50 // !!!!! equivalent to Pop_Reg_F 8.51 - enc_class Pop_Reg_D( regD dst ) %{ 8.52 + enc_class Pop_Reg_DPR( regDPR dst ) %{ 8.53 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 8.54 emit_d8( cbuf, 0xD8+$dst$$reg ); 8.55 %} 8.56 8.57 - enc_class Push_Reg_D( regD dst ) %{ 8.58 + enc_class Push_Reg_DPR( regDPR dst ) %{ 8.59 emit_opcode( cbuf, 0xD9 ); 8.60 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 8.61 %} 8.62 8.63 - enc_class strictfp_bias1( regD dst ) %{ 8.64 + enc_class strictfp_bias1( regDPR dst ) %{ 8.65 emit_opcode( cbuf, 0xDB ); // FLD m80real 8.66 emit_opcode( cbuf, 0x2D ); 8.67 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 8.68 @@ -2462,7 +2462,7 @@ 8.69 emit_opcode( cbuf, 0xC8+$dst$$reg ); 8.70 %} 8.71 8.72 - enc_class strictfp_bias2( regD dst ) %{ 8.73 + enc_class strictfp_bias2( regDPR dst ) %{ 8.74 emit_opcode( cbuf, 0xDB ); // FLD m80real 8.75 emit_opcode( cbuf, 0x2D ); 8.76 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 8.77 @@ -2488,39 +2488,29 @@ 8.78 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 8.79 %} 8.80 8.81 - // Push the float in stackSlot 'src' onto FP-stack 8.82 - enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] 8.83 - store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); 8.84 - %} 8.85 - 8.86 - // Push the double in stackSlot 'src' onto FP-stack 8.87 - enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] 8.88 - store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); 8.89 - %} 8.90 - 8.91 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 8.92 - enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 8.93 + enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 8.94 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 8.95 %} 8.96 8.97 // Same as Pop_Mem_F except for opcode 8.98 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 8.99 - enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 8.100 + enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 8.101 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 8.102 %} 8.103 8.104 - enc_class Pop_Reg_F( regF dst ) %{ 8.105 + enc_class Pop_Reg_FPR( regFPR dst ) %{ 8.106 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 8.107 emit_d8( cbuf, 0xD8+$dst$$reg ); 8.108 %} 8.109 8.110 - enc_class Push_Reg_F( regF dst ) %{ 8.111 + enc_class Push_Reg_FPR( regFPR dst ) %{ 8.112 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 8.113 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 8.114 %} 8.115 8.116 // Push FPU's float to a stack-slot, and pop FPU-stack 8.117 - enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ 8.118 + enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 8.119 int pop = 0x02; 8.120 if ($src$$reg != FPR1L_enc) { 8.121 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 8.122 @@ -2531,7 +2521,7 @@ 8.123 %} 8.124 8.125 // Push FPU's double to a stack-slot, and pop FPU-stack 8.126 - enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ 8.127 + enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 8.128 int pop = 0x02; 8.129 if ($src$$reg != FPR1L_enc) { 8.130 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 8.131 @@ -2542,7 +2532,7 @@ 8.132 %} 8.133 8.134 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 8.135 - enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ 8.136 + enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 8.137 int pop = 0xD0 - 1; // -1 since we skip FLD 8.138 if ($src$$reg != FPR1L_enc) { 8.139 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 8.140 @@ -2554,16 +2544,7 @@ 8.141 %} 8.142 8.143 8.144 - enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ 8.145 - MacroAssembler masm(&cbuf); 8.146 - masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg 8.147 - masm.fmul( $src2$$reg+0); // value at TOS 8.148 - masm.fadd( $src$$reg+0); // value at TOS 8.149 - masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store 8.150 - %} 8.151 - 8.152 - 8.153 - enc_class Push_Reg_Mod_D( regD dst, regD src) %{ 8.154 + enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 8.155 // load dst in FPR0 8.156 emit_opcode( cbuf, 0xD9 ); 8.157 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 8.158 @@ -2581,7 +2562,7 @@ 8.159 } 8.160 %} 8.161 8.162 - enc_class Push_ModD_encoding(regXD src0, regXD src1) %{ 8.163 + enc_class Push_ModD_encoding(regD src0, regD src1) %{ 8.164 MacroAssembler _masm(&cbuf); 8.165 __ subptr(rsp, 8); 8.166 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 8.167 @@ -2590,7 +2571,7 @@ 8.168 __ fld_d(Address(rsp, 0)); 8.169 %} 8.170 8.171 - enc_class Push_ModX_encoding(regX src0, regX src1) %{ 8.172 + enc_class Push_ModF_encoding(regF src0, regF src1) %{ 8.173 MacroAssembler _masm(&cbuf); 8.174 __ subptr(rsp, 4); 8.175 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 8.176 @@ -2599,21 +2580,21 @@ 8.177 __ fld_s(Address(rsp, 0)); 8.178 %} 8.179 8.180 - enc_class Push_ResultXD(regXD dst) %{ 8.181 + enc_class Push_ResultD(regD dst) %{ 8.182 MacroAssembler _masm(&cbuf); 8.183 __ fstp_d(Address(rsp, 0)); 8.184 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 8.185 __ addptr(rsp, 8); 8.186 %} 8.187 8.188 - enc_class Push_ResultX(regX dst, immI d8) %{ 8.189 + enc_class Push_ResultF(regF dst, immI d8) %{ 8.190 MacroAssembler _masm(&cbuf); 8.191 __ fstp_s(Address(rsp, 0)); 8.192 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 8.193 __ addptr(rsp, $d8$$constant); 8.194 %} 8.195 8.196 - enc_class Push_SrcXD(regXD src) %{ 8.197 + enc_class Push_SrcD(regD src) %{ 8.198 MacroAssembler _masm(&cbuf); 8.199 __ subptr(rsp, 8); 8.200 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 8.201 @@ -2630,7 +2611,7 @@ 8.202 __ addptr(rsp, 8); 8.203 %} 8.204 8.205 - enc_class push_xmm_to_fpr1(regXD src) %{ 8.206 + enc_class push_xmm_to_fpr1(regD src) %{ 8.207 MacroAssembler _masm(&cbuf); 8.208 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 8.209 __ fld_d(Address(rsp, 0)); 8.210 @@ -2675,10 +2656,7 @@ 8.211 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); 8.212 %} 8.213 8.214 -// enc_class Pop_Reg_Mod_D( regD dst, regD src) 8.215 -// was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() 8.216 - 8.217 - enc_class Push_Result_Mod_D( regD src) %{ 8.218 + enc_class Push_Result_Mod_DPR( regDPR src) %{ 8.219 if ($src$$reg != FPR1L_enc) { 8.220 // fincstp 8.221 emit_opcode (cbuf, 0xD9); 8.222 @@ -2707,7 +2685,7 @@ 8.223 emit_opcode( cbuf, 0x05 ); 8.224 %} 8.225 8.226 - enc_class emitModD() %{ 8.227 + enc_class emitModDPR() %{ 8.228 // fprem must be iterative 8.229 // :: loop 8.230 // fprem 8.231 @@ -3587,7 +3565,7 @@ 8.232 // 'zero', store the darned double down as an int, and reset the 8.233 // rounding mode to 'nearest'. The hardware throws an exception which 8.234 // patches up the correct value directly to the stack. 8.235 - enc_class D2I_encoding( regD src ) %{ 8.236 + enc_class DPR2I_encoding( regDPR src ) %{ 8.237 // Flip to round-to-zero mode. We attempted to allow invalid-op 8.238 // exceptions here, so that a NAN or other corner-case value will 8.239 // thrown an exception (but normal values get converted at full speed). 8.240 @@ -3630,7 +3608,7 @@ 8.241 // Carry on here... 8.242 %} 8.243 8.244 - enc_class D2L_encoding( regD src ) %{ 8.245 + enc_class DPR2L_encoding( regDPR src ) %{ 8.246 emit_opcode(cbuf,0xD9); // FLDCW trunc 8.247 emit_opcode(cbuf,0x2D); 8.248 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 8.249 @@ -3672,27 +3650,27 @@ 8.250 // Carry on here... 8.251 %} 8.252 8.253 - enc_class FMul_ST_reg( eRegF src1 ) %{ 8.254 + enc_class FMul_ST_reg( eRegFPR src1 ) %{ 8.255 // Operand was loaded from memory into fp ST (stack top) 8.256 // FMUL ST,$src /* D8 C8+i */ 8.257 emit_opcode(cbuf, 0xD8); 8.258 emit_opcode(cbuf, 0xC8 + $src1$$reg); 8.259 %} 8.260 8.261 - enc_class FAdd_ST_reg( eRegF src2 ) %{ 8.262 + enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 8.263 // FADDP ST,src2 /* D8 C0+i */ 8.264 emit_opcode(cbuf, 0xD8); 8.265 emit_opcode(cbuf, 0xC0 + $src2$$reg); 8.266 //could use FADDP src2,fpST /* DE C0+i */ 8.267 %} 8.268 8.269 - enc_class FAddP_reg_ST( eRegF src2 ) %{ 8.270 + enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 8.271 // FADDP src2,ST /* DE C0+i */ 8.272 emit_opcode(cbuf, 0xDE); 8.273 emit_opcode(cbuf, 0xC0 + $src2$$reg); 8.274 %} 8.275 8.276 - enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ 8.277 + enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 8.278 // Operand has been loaded into fp ST (stack top) 8.279 // FSUB ST,$src1 8.280 emit_opcode(cbuf, 0xD8); 8.281 @@ -3703,7 +3681,7 @@ 8.282 emit_opcode(cbuf, 0xF0 + $src2$$reg); 8.283 %} 8.284 8.285 - enc_class MulFAddF (eRegF src1, eRegF src2) %{ 8.286 + enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 8.287 // Operand was loaded from memory into fp ST (stack top) 8.288 // FADD ST,$src /* D8 C0+i */ 8.289 emit_opcode(cbuf, 0xD8); 8.290 @@ -3715,7 +3693,7 @@ 8.291 %} 8.292 8.293 8.294 - enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ 8.295 + enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 8.296 // Operand was loaded from memory into fp ST (stack top) 8.297 // FADD ST,$src /* D8 C0+i */ 8.298 emit_opcode(cbuf, 0xD8); 8.299 @@ -4148,7 +4126,7 @@ 8.300 %} 8.301 8.302 //Double Immediate zero 8.303 -operand immD0() %{ 8.304 +operand immDPR0() %{ 8.305 // Do additional (and counter-intuitive) test against NaN to work around VC++ 8.306 // bug that generates code such that NaNs compare equal to 0.0 8.307 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 8.308 @@ -4160,7 +4138,7 @@ 8.309 %} 8.310 8.311 // Double Immediate one 8.312 -operand immD1() %{ 8.313 +operand immDPR1() %{ 8.314 predicate( UseSSE<=1 && n->getd() == 1.0 ); 8.315 match(ConD); 8.316 8.317 @@ -4170,7 +4148,7 @@ 8.318 %} 8.319 8.320 // Double Immediate 8.321 -operand immD() %{ 8.322 +operand immDPR() %{ 8.323 predicate(UseSSE<=1); 8.324 match(ConD); 8.325 8.326 @@ -4179,7 +4157,7 @@ 8.327 interface(CONST_INTER); 8.328 %} 8.329 8.330 -operand immXD() %{ 8.331 +operand immD() %{ 8.332 predicate(UseSSE>=2); 8.333 match(ConD); 8.334 8.335 @@ -4189,7 +4167,7 @@ 8.336 %} 8.337 8.338 // Double Immediate zero 8.339 -operand immXD0() %{ 8.340 +operand immD0() %{ 8.341 // Do additional (and counter-intuitive) test against NaN to work around VC++ 8.342 // bug that generates code such that NaNs compare equal to 0.0 AND do not 8.343 // compare equal to -0.0. 8.344 @@ -4201,7 +4179,7 @@ 8.345 %} 8.346 8.347 // Float Immediate zero 8.348 -operand immF0() %{ 8.349 +operand immFPR0() %{ 8.350 predicate(UseSSE == 0 && n->getf() == 0.0F); 8.351 match(ConF); 8.352 8.353 @@ -4211,7 +4189,7 @@ 8.354 %} 8.355 8.356 // Float Immediate one 8.357 -operand immF1() %{ 8.358 +operand immFPR1() %{ 8.359 predicate(UseSSE == 0 && n->getf() == 1.0F); 8.360 match(ConF); 8.361 8.362 @@ -4221,7 +4199,7 @@ 8.363 %} 8.364 8.365 // Float Immediate 8.366 -operand immF() %{ 8.367 +operand immFPR() %{ 8.368 predicate( UseSSE == 0 ); 8.369 match(ConF); 8.370 8.371 @@ -4231,7 +4209,7 @@ 8.372 %} 8.373 8.374 // Float Immediate 8.375 -operand immXF() %{ 8.376 +operand immF() %{ 8.377 predicate(UseSSE >= 1); 8.378 match(ConF); 8.379 8.380 @@ -4241,7 +4219,7 @@ 8.381 %} 8.382 8.383 // Float Immediate zero. Zero and not -0.0 8.384 -operand immXF0() %{ 8.385 +operand immF0() %{ 8.386 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 8.387 match(ConF); 8.388 8.389 @@ -4617,7 +4595,7 @@ 8.390 %} 8.391 8.392 // Float register operands 8.393 -operand regD() %{ 8.394 +operand regDPR() %{ 8.395 predicate( UseSSE < 2 ); 8.396 constraint(ALLOC_IN_RC(dbl_reg)); 8.397 match(RegD); 8.398 @@ -4627,7 +4605,7 @@ 8.399 interface(REG_INTER); 8.400 %} 8.401 8.402 -operand regDPR1(regD reg) %{ 8.403 +operand regDPR1(regDPR reg) %{ 8.404 predicate( UseSSE < 2 ); 8.405 constraint(ALLOC_IN_RC(dbl_reg0)); 8.406 match(reg); 8.407 @@ -4635,7 +4613,7 @@ 8.408 interface(REG_INTER); 8.409 %} 8.410 8.411 -operand regDPR2(regD reg) %{ 8.412 +operand regDPR2(regDPR reg) %{ 8.413 predicate( UseSSE < 2 ); 8.414 constraint(ALLOC_IN_RC(dbl_reg1)); 8.415 match(reg); 8.416 @@ -4643,7 +4621,7 @@ 8.417 interface(REG_INTER); 8.418 %} 8.419 8.420 -operand regnotDPR1(regD reg) %{ 8.421 +operand regnotDPR1(regDPR reg) %{ 8.422 predicate( UseSSE < 2 ); 8.423 constraint(ALLOC_IN_RC(dbl_notreg0)); 8.424 match(reg); 8.425 @@ -4652,18 +4630,18 @@ 8.426 %} 8.427 8.428 // XMM Double register operands 8.429 -operand regXD() %{ 8.430 +operand regD() %{ 8.431 predicate( UseSSE>=2 ); 8.432 constraint(ALLOC_IN_RC(xdb_reg)); 8.433 match(RegD); 8.434 - match(regXD6); 8.435 - match(regXD7); 8.436 + match(regD6); 8.437 + match(regD7); 8.438 format %{ %} 8.439 interface(REG_INTER); 8.440 %} 8.441 8.442 // XMM6 double register operands 8.443 -operand regXD6(regXD reg) %{ 8.444 +operand regD6(regD reg) %{ 8.445 predicate( UseSSE>=2 ); 8.446 constraint(ALLOC_IN_RC(xdb_reg6)); 8.447 match(reg); 8.448 @@ -4672,7 +4650,7 @@ 8.449 %} 8.450 8.451 // XMM7 double register operands 8.452 -operand regXD7(regXD reg) %{ 8.453 +operand regD7(regD reg) %{ 8.454 predicate( UseSSE>=2 ); 8.455 constraint(ALLOC_IN_RC(xdb_reg7)); 8.456 match(reg); 8.457 @@ -4681,7 +4659,7 @@ 8.458 %} 8.459 8.460 // Float register operands 8.461 -operand regF() %{ 8.462 +operand regFPR() %{ 8.463 predicate( UseSSE < 2 ); 8.464 constraint(ALLOC_IN_RC(flt_reg)); 8.465 match(RegF); 8.466 @@ -4691,7 +4669,7 @@ 8.467 %} 8.468 8.469 // Float register operands 8.470 -operand regFPR1(regF reg) %{ 8.471 +operand regFPR1(regFPR reg) %{ 8.472 predicate( UseSSE < 2 ); 8.473 constraint(ALLOC_IN_RC(flt_reg0)); 8.474 match(reg); 8.475 @@ -4700,7 +4678,7 @@ 8.476 %} 8.477 8.478 // XMM register operands 8.479 -operand regX() %{ 8.480 +operand regF() %{ 8.481 predicate( UseSSE>=1 ); 8.482 constraint(ALLOC_IN_RC(xmm_reg)); 8.483 match(RegF); 8.484 @@ -5444,7 +5422,7 @@ 8.485 %} 8.486 8.487 // Conditional move double reg-reg 8.488 -pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ 8.489 +pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 8.490 single_instruction; 8.491 dst : S4(write); 8.492 src : S3(read); 8.493 @@ -5453,7 +5431,7 @@ 8.494 %} 8.495 8.496 // Float reg-reg operation 8.497 -pipe_class fpu_reg(regD dst) %{ 8.498 +pipe_class fpu_reg(regDPR dst) %{ 8.499 instruction_count(2); 8.500 dst : S3(read); 8.501 DECODE : S0(2); // any 2 decoders 8.502 @@ -5461,7 +5439,7 @@ 8.503 %} 8.504 8.505 // Float reg-reg operation 8.506 -pipe_class fpu_reg_reg(regD dst, regD src) %{ 8.507 +pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 8.508 instruction_count(2); 8.509 dst : S4(write); 8.510 src : S3(read); 8.511 @@ -5470,7 +5448,7 @@ 8.512 %} 8.513 8.514 // Float reg-reg operation 8.515 -pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ 8.516 +pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 8.517 instruction_count(3); 8.518 dst : S4(write); 8.519 src1 : S3(read); 8.520 @@ -5480,7 +5458,7 @@ 8.521 %} 8.522 8.523 // Float reg-reg operation 8.524 -pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ 8.525 +pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 8.526 instruction_count(4); 8.527 dst : S4(write); 8.528 src1 : S3(read); 8.529 @@ -5491,7 +5469,7 @@ 8.530 %} 8.531 8.532 // Float reg-reg operation 8.533 -pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ 8.534 +pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 8.535 instruction_count(4); 8.536 dst : S4(write); 8.537 src1 : S3(read); 8.538 @@ -5504,7 +5482,7 @@ 8.539 %} 8.540 8.541 // Float reg-mem operation 8.542 -pipe_class fpu_reg_mem(regD dst, memory mem) %{ 8.543 +pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 8.544 instruction_count(2); 8.545 dst : S5(write); 8.546 mem : S3(read); 8.547 @@ -5515,7 +5493,7 @@ 8.548 %} 8.549 8.550 // Float reg-mem operation 8.551 -pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ 8.552 +pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 8.553 instruction_count(3); 8.554 dst : S5(write); 8.555 src1 : S3(read); 8.556 @@ -5527,7 +5505,7 @@ 8.557 %} 8.558 8.559 // Float mem-reg operation 8.560 -pipe_class fpu_mem_reg(memory mem, regD src) %{ 8.561 +pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 8.562 instruction_count(2); 8.563 src : S5(read); 8.564 mem : S3(read); 8.565 @@ -5537,7 +5515,7 @@ 8.566 MEM : S3; // any mem 8.567 %} 8.568 8.569 -pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ 8.570 +pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 8.571 instruction_count(3); 8.572 src1 : S3(read); 8.573 src2 : S3(read); 8.574 @@ -5548,7 +5526,7 @@ 8.575 MEM : S3; // any mem 8.576 %} 8.577 8.578 -pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ 8.579 +pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 8.580 instruction_count(3); 8.581 src1 : S3(read); 8.582 src2 : S3(read); 8.583 @@ -5577,7 +5555,7 @@ 8.584 MEM : S3(3); // any mem 8.585 %} 8.586 8.587 -pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ 8.588 +pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 8.589 instruction_count(3); 8.590 src1 : S4(read); 8.591 mem : S4(read); 8.592 @@ -5588,7 +5566,7 @@ 8.593 %} 8.594 8.595 // Float load constant 8.596 -pipe_class fpu_reg_con(regD dst) %{ 8.597 +pipe_class fpu_reg_con(regDPR dst) %{ 8.598 instruction_count(2); 8.599 dst : S5(write); 8.600 D0 : S0; // big decoder only for the load 8.601 @@ -5598,7 +5576,7 @@ 8.602 %} 8.603 8.604 // Float load constant 8.605 -pipe_class fpu_reg_reg_con(regD dst, regD src) %{ 8.606 +pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 8.607 instruction_count(3); 8.608 dst : S5(write); 8.609 src : S3(read); 8.610 @@ -6313,7 +6291,7 @@ 8.611 ins_pipe( fpu_reg_mem ); 8.612 %} 8.613 8.614 -instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ 8.615 +instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 8.616 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 8.617 match(Set dst (LoadL mem)); 8.618 effect(TEMP tmp); 8.619 @@ -6327,7 +6305,7 @@ 8.620 ins_pipe( pipe_slow ); 8.621 %} 8.622 8.623 -instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ 8.624 +instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 8.625 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 8.626 match(Set dst (LoadL mem)); 8.627 effect(TEMP tmp); 8.628 @@ -6380,7 +6358,7 @@ 8.629 %} 8.630 8.631 // Load Double 8.632 -instruct loadD(regD dst, memory mem) %{ 8.633 +instruct loadDPR(regDPR dst, memory mem) %{ 8.634 predicate(UseSSE<=1); 8.635 match(Set dst (LoadD mem)); 8.636 8.637 @@ -6389,12 +6367,12 @@ 8.638 "FSTP $dst" %} 8.639 opcode(0xDD); /* DD /0 */ 8.640 ins_encode( OpcP, RMopc_Mem(0x00,mem), 8.641 - Pop_Reg_D(dst) ); 8.642 + Pop_Reg_DPR(dst) ); 8.643 ins_pipe( fpu_reg_mem ); 8.644 %} 8.645 8.646 // Load Double to XMM 8.647 -instruct loadXD(regXD dst, memory mem) %{ 8.648 +instruct loadD(regD dst, memory mem) %{ 8.649 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 8.650 match(Set dst (LoadD mem)); 8.651 ins_cost(145); 8.652 @@ -6405,7 +6383,7 @@ 8.653 ins_pipe( pipe_slow ); 8.654 %} 8.655 8.656 -instruct loadXD_partial(regXD dst, memory mem) %{ 8.657 +instruct loadD_partial(regD dst, memory mem) %{ 8.658 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 8.659 match(Set dst (LoadD mem)); 8.660 ins_cost(145); 8.661 @@ -6418,7 +6396,7 @@ 8.662 8.663 // Load to XMM register (single-precision floating point) 8.664 // MOVSS instruction 8.665 -instruct loadX(regX dst, memory mem) %{ 8.666 +instruct loadF(regF dst, memory mem) %{ 8.667 predicate(UseSSE>=1); 8.668 match(Set dst (LoadF mem)); 8.669 ins_cost(145); 8.670 @@ -6430,7 +6408,7 @@ 8.671 %} 8.672 8.673 // Load Float 8.674 -instruct loadF(regF dst, memory mem) %{ 8.675 +instruct loadFPR(regFPR dst, memory mem) %{ 8.676 predicate(UseSSE==0); 8.677 match(Set dst (LoadF mem)); 8.678 8.679 @@ -6439,12 +6417,12 @@ 8.680 "FSTP $dst" %} 8.681 opcode(0xD9); /* D9 /0 */ 8.682 ins_encode( OpcP, RMopc_Mem(0x00,mem), 8.683 - Pop_Reg_F(dst) ); 8.684 + Pop_Reg_FPR(dst) ); 8.685 ins_pipe( fpu_reg_mem ); 8.686 %} 8.687 8.688 // Load Aligned Packed Byte to XMM register 8.689 -instruct loadA8B(regXD dst, memory mem) %{ 8.690 +instruct loadA8B(regD dst, memory mem) %{ 8.691 predicate(UseSSE>=1); 8.692 match(Set dst (Load8B mem)); 8.693 ins_cost(125); 8.694 @@ -6456,7 +6434,7 @@ 8.695 %} 8.696 8.697 // Load Aligned Packed Short to XMM register 8.698 -instruct loadA4S(regXD dst, memory mem) %{ 8.699 +instruct loadA4S(regD dst, memory mem) %{ 8.700 predicate(UseSSE>=1); 8.701 match(Set dst (Load4S mem)); 8.702 ins_cost(125); 8.703 @@ -6468,7 +6446,7 @@ 8.704 %} 8.705 8.706 // Load Aligned Packed Char to XMM register 8.707 -instruct loadA4C(regXD dst, memory mem) %{ 8.708 +instruct loadA4C(regD dst, memory mem) %{ 8.709 predicate(UseSSE>=1); 8.710 match(Set dst (Load4C mem)); 8.711 ins_cost(125); 8.712 @@ -6480,7 +6458,7 @@ 8.713 %} 8.714 8.715 // Load Aligned Packed Integer to XMM register 8.716 -instruct load2IU(regXD dst, memory mem) %{ 8.717 +instruct load2IU(regD dst, memory mem) %{ 8.718 predicate(UseSSE>=1); 8.719 match(Set dst (Load2I mem)); 8.720 ins_cost(125); 8.721 @@ -6492,7 +6470,7 @@ 8.722 %} 8.723 8.724 // Load Aligned Packed Single to XMM 8.725 -instruct loadA2F(regXD dst, memory mem) %{ 8.726 +instruct loadA2F(regD dst, memory mem) %{ 8.727 predicate(UseSSE>=1); 8.728 match(Set dst (Load2F mem)); 8.729 ins_cost(145); 8.730 @@ -6606,58 +6584,58 @@ 8.731 ins_pipe( ialu_reg_long ); 8.732 %} 8.733 8.734 +// The instruction usage is guarded by predicate in operand immFPR(). 8.735 +instruct loadConFPR(regFPR dst, immFPR con) %{ 8.736 + match(Set dst con); 8.737 + ins_cost(125); 8.738 + format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 8.739 + "FSTP $dst" %} 8.740 + ins_encode %{ 8.741 + __ fld_s($constantaddress($con)); 8.742 + __ fstp_d($dst$$reg); 8.743 + %} 8.744 + ins_pipe(fpu_reg_con); 8.745 +%} 8.746 + 8.747 +// The instruction usage is guarded by predicate in operand immFPR0(). 8.748 +instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 8.749 + match(Set dst con); 8.750 + ins_cost(125); 8.751 + format %{ "FLDZ ST\n\t" 8.752 + "FSTP $dst" %} 8.753 + ins_encode %{ 8.754 + __ fldz(); 8.755 + __ fstp_d($dst$$reg); 8.756 + %} 8.757 + ins_pipe(fpu_reg_con); 8.758 +%} 8.759 + 8.760 +// The instruction usage is guarded by predicate in operand immFPR1(). 8.761 +instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 8.762 + match(Set dst con); 8.763 + ins_cost(125); 8.764 + format %{ "FLD1 ST\n\t" 8.765 + "FSTP $dst" %} 8.766 + ins_encode %{ 8.767 + __ fld1(); 8.768 + __ fstp_d($dst$$reg); 8.769 + %} 8.770 + ins_pipe(fpu_reg_con); 8.771 +%} 8.772 + 8.773 // The instruction usage is guarded by predicate in operand immF(). 8.774 instruct loadConF(regF dst, immF con) %{ 8.775 match(Set dst con); 8.776 ins_cost(125); 8.777 - format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 8.778 - "FSTP $dst" %} 8.779 - ins_encode %{ 8.780 - __ fld_s($constantaddress($con)); 8.781 - __ fstp_d($dst$$reg); 8.782 - %} 8.783 - ins_pipe(fpu_reg_con); 8.784 + format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 8.785 + ins_encode %{ 8.786 + __ movflt($dst$$XMMRegister, $constantaddress($con)); 8.787 + %} 8.788 + ins_pipe(pipe_slow); 8.789 %} 8.790 8.791 // The instruction usage is guarded by predicate in operand immF0(). 8.792 -instruct loadConF0(regF dst, immF0 con) %{ 8.793 - match(Set dst con); 8.794 - ins_cost(125); 8.795 - format %{ "FLDZ ST\n\t" 8.796 - "FSTP $dst" %} 8.797 - ins_encode %{ 8.798 - __ fldz(); 8.799 - __ fstp_d($dst$$reg); 8.800 - %} 8.801 - ins_pipe(fpu_reg_con); 8.802 -%} 8.803 - 8.804 -// The instruction usage is guarded by predicate in operand immF1(). 8.805 -instruct loadConF1(regF dst, immF1 con) %{ 8.806 - match(Set dst con); 8.807 - ins_cost(125); 8.808 - format %{ "FLD1 ST\n\t" 8.809 - "FSTP $dst" %} 8.810 - ins_encode %{ 8.811 - __ fld1(); 8.812 - __ fstp_d($dst$$reg); 8.813 - %} 8.814 - ins_pipe(fpu_reg_con); 8.815 -%} 8.816 - 8.817 -// The instruction usage is guarded by predicate in operand immXF(). 8.818 -instruct loadConX(regX dst, immXF con) %{ 8.819 - match(Set dst con); 8.820 - ins_cost(125); 8.821 - format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 8.822 - ins_encode %{ 8.823 - __ movflt($dst$$XMMRegister, $constantaddress($con)); 8.824 - %} 8.825 - ins_pipe(pipe_slow); 8.826 -%} 8.827 - 8.828 -// The instruction usage is guarded by predicate in operand immXF0(). 8.829 -instruct loadConX0(regX dst, immXF0 src) %{ 8.830 +instruct loadConF0(regF dst, immF0 src) %{ 8.831 match(Set dst src); 8.832 ins_cost(100); 8.833 format %{ "XORPS $dst,$dst\t# float 0.0" %} 8.834 @@ -6667,61 +6645,61 @@ 8.835 ins_pipe(pipe_slow); 8.836 %} 8.837 8.838 +// The instruction usage is guarded by predicate in operand immDPR(). 8.839 +instruct loadConDPR(regDPR dst, immDPR con) %{ 8.840 + match(Set dst con); 8.841 + ins_cost(125); 8.842 + 8.843 + format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 8.844 + "FSTP $dst" %} 8.845 + ins_encode %{ 8.846 + __ fld_d($constantaddress($con)); 8.847 + __ fstp_d($dst$$reg); 8.848 + %} 8.849 + ins_pipe(fpu_reg_con); 8.850 +%} 8.851 + 8.852 +// The instruction usage is guarded by predicate in operand immDPR0(). 8.853 +instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 8.854 + match(Set dst con); 8.855 + ins_cost(125); 8.856 + 8.857 + format %{ "FLDZ ST\n\t" 8.858 + "FSTP $dst" %} 8.859 + ins_encode %{ 8.860 + __ fldz(); 8.861 + __ fstp_d($dst$$reg); 8.862 + %} 8.863 + ins_pipe(fpu_reg_con); 8.864 +%} 8.865 + 8.866 +// The instruction usage is guarded by predicate in operand immDPR1(). 8.867 +instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 8.868 + match(Set dst con); 8.869 + ins_cost(125); 8.870 + 8.871 + format %{ "FLD1 ST\n\t" 8.872 + "FSTP $dst" %} 8.873 + ins_encode %{ 8.874 + __ fld1(); 8.875 + __ fstp_d($dst$$reg); 8.876 + %} 8.877 + ins_pipe(fpu_reg_con); 8.878 +%} 8.879 + 8.880 // The instruction usage is guarded by predicate in operand immD(). 8.881 instruct loadConD(regD dst, immD con) %{ 8.882 match(Set dst con); 8.883 ins_cost(125); 8.884 - 8.885 - format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 8.886 - "FSTP $dst" %} 8.887 - ins_encode %{ 8.888 - __ fld_d($constantaddress($con)); 8.889 - __ fstp_d($dst$$reg); 8.890 - %} 8.891 - ins_pipe(fpu_reg_con); 8.892 + format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 8.893 + ins_encode %{ 8.894 + __ movdbl($dst$$XMMRegister, $constantaddress($con)); 8.895 + %} 8.896 + ins_pipe(pipe_slow); 8.897 %} 8.898 8.899 // The instruction usage is guarded by predicate in operand immD0(). 8.900 -instruct loadConD0(regD dst, immD0 con) %{ 8.901 - match(Set dst con); 8.902 - ins_cost(125); 8.903 - 8.904 - format %{ "FLDZ ST\n\t" 8.905 - "FSTP $dst" %} 8.906 - ins_encode %{ 8.907 - __ fldz(); 8.908 - __ fstp_d($dst$$reg); 8.909 - %} 8.910 - ins_pipe(fpu_reg_con); 8.911 -%} 8.912 - 8.913 -// The instruction usage is guarded by predicate in operand immD1(). 8.914 -instruct loadConD1(regD dst, immD1 con) %{ 8.915 - match(Set dst con); 8.916 - ins_cost(125); 8.917 - 8.918 - format %{ "FLD1 ST\n\t" 8.919 - "FSTP $dst" %} 8.920 - ins_encode %{ 8.921 - __ fld1(); 8.922 - __ fstp_d($dst$$reg); 8.923 - %} 8.924 - ins_pipe(fpu_reg_con); 8.925 -%} 8.926 - 8.927 -// The instruction usage is guarded by predicate in operand immXD(). 8.928 -instruct loadConXD(regXD dst, immXD con) %{ 8.929 - match(Set dst con); 8.930 - ins_cost(125); 8.931 - format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 8.932 - ins_encode %{ 8.933 - __ movdbl($dst$$XMMRegister, $constantaddress($con)); 8.934 - %} 8.935 - ins_pipe(pipe_slow); 8.936 -%} 8.937 - 8.938 -// The instruction usage is guarded by predicate in operand immXD0(). 8.939 -instruct loadConXD0(regXD dst, immXD0 src) %{ 8.940 +instruct loadConD0(regD dst, immD0 src) %{ 8.941 match(Set dst src); 8.942 ins_cost(100); 8.943 format %{ "XORPD $dst,$dst\t# double 0.0" %} 8.944 @@ -6765,7 +6743,7 @@ 8.945 %} 8.946 8.947 // Load Stack Slot 8.948 -instruct loadSSF(regF dst, stackSlotF src) %{ 8.949 +instruct loadSSF(regFPR dst, stackSlotF src) %{ 8.950 match(Set dst src); 8.951 ins_cost(125); 8.952 8.953 @@ -6773,12 +6751,12 @@ 8.954 "FSTP $dst" %} 8.955 opcode(0xD9); /* D9 /0, FLD m32real */ 8.956 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 8.957 - Pop_Reg_F(dst) ); 8.958 + Pop_Reg_FPR(dst) ); 8.959 ins_pipe( fpu_reg_mem ); 8.960 %} 8.961 8.962 // Load Stack Slot 8.963 -instruct loadSSD(regD dst, stackSlotD src) %{ 8.964 +instruct loadSSD(regDPR dst, stackSlotD src) %{ 8.965 match(Set dst src); 8.966 ins_cost(125); 8.967 8.968 @@ -6786,7 +6764,7 @@ 8.969 "FSTP $dst" %} 8.970 opcode(0xDD); /* DD /0, FLD m64real */ 8.971 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 8.972 - Pop_Reg_D(dst) ); 8.973 + Pop_Reg_DPR(dst) ); 8.974 ins_pipe( fpu_reg_mem ); 8.975 %} 8.976 8.977 @@ -7021,7 +6999,7 @@ 8.978 ins_pipe( fpu_reg_mem ); 8.979 %} 8.980 8.981 -instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ 8.982 +instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 8.983 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 8.984 match(Set mem (StoreL mem src)); 8.985 effect( TEMP tmp, KILL cr ); 8.986 @@ -7037,7 +7015,7 @@ 8.987 ins_pipe( pipe_slow ); 8.988 %} 8.989 8.990 -instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ 8.991 +instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 8.992 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 8.993 match(Set mem (StoreL mem src)); 8.994 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 8.995 @@ -7115,7 +7093,7 @@ 8.996 %} 8.997 8.998 // Store Aligned Packed Byte XMM register to memory 8.999 -instruct storeA8B(memory mem, regXD src) %{ 8.1000 +instruct storeA8B(memory mem, regD src) %{ 8.1001 predicate(UseSSE>=1); 8.1002 match(Set mem (Store8B mem src)); 8.1003 ins_cost(145); 8.1004 @@ -7127,7 +7105,7 @@ 8.1005 %} 8.1006 8.1007 // Store Aligned Packed Char/Short XMM register to memory 8.1008 -instruct storeA4C(memory mem, regXD src) %{ 8.1009 +instruct storeA4C(memory mem, regD src) %{ 8.1010 predicate(UseSSE>=1); 8.1011 match(Set mem (Store4C mem src)); 8.1012 ins_cost(145); 8.1013 @@ -7139,7 +7117,7 @@ 8.1014 %} 8.1015 8.1016 // Store Aligned Packed Integer XMM register to memory 8.1017 -instruct storeA2I(memory mem, regXD src) %{ 8.1018 +instruct storeA2I(memory mem, regD src) %{ 8.1019 predicate(UseSSE>=1); 8.1020 match(Set mem (Store2I mem src)); 8.1021 ins_cost(145); 8.1022 @@ -7162,32 +7140,32 @@ 8.1023 %} 8.1024 8.1025 // Store Double 8.1026 -instruct storeD( memory mem, regDPR1 src) %{ 8.1027 +instruct storeDPR( memory mem, regDPR1 src) %{ 8.1028 predicate(UseSSE<=1); 8.1029 match(Set mem (StoreD mem src)); 8.1030 8.1031 ins_cost(100); 8.1032 format %{ "FST_D $mem,$src" %} 8.1033 opcode(0xDD); /* DD /2 */ 8.1034 - ins_encode( enc_FP_store(mem,src) ); 8.1035 + ins_encode( enc_FPR_store(mem,src) ); 8.1036 ins_pipe( fpu_mem_reg ); 8.1037 %} 8.1038 8.1039 // Store double does rounding on x86 8.1040 -instruct storeD_rounded( memory mem, regDPR1 src) %{ 8.1041 +instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 8.1042 predicate(UseSSE<=1); 8.1043 match(Set mem (StoreD mem (RoundDouble src))); 8.1044 8.1045 ins_cost(100); 8.1046 format %{ "FST_D $mem,$src\t# round" %} 8.1047 opcode(0xDD); /* DD /2 */ 8.1048 - ins_encode( enc_FP_store(mem,src) ); 8.1049 + ins_encode( enc_FPR_store(mem,src) ); 8.1050 ins_pipe( fpu_mem_reg ); 8.1051 %} 8.1052 8.1053 // Store XMM register to memory (double-precision floating points) 8.1054 // MOVSD instruction 8.1055 -instruct storeXD(memory mem, regXD src) %{ 8.1056 +instruct storeD(memory mem, regD src) %{ 8.1057 predicate(UseSSE>=2); 8.1058 match(Set mem (StoreD mem src)); 8.1059 ins_cost(95); 8.1060 @@ -7200,7 +7178,7 @@ 8.1061 8.1062 // Store XMM register to memory (single-precision floating point) 8.1063 // MOVSS instruction 8.1064 -instruct storeX(memory mem, regX src) %{ 8.1065 +instruct storeF(memory mem, regF src) %{ 8.1066 predicate(UseSSE>=1); 8.1067 match(Set mem (StoreF mem src)); 8.1068 ins_cost(95); 8.1069 @@ -7212,7 +7190,7 @@ 8.1070 %} 8.1071 8.1072 // Store Aligned Packed Single Float XMM register to memory 8.1073 -instruct storeA2F(memory mem, regXD src) %{ 8.1074 +instruct storeA2F(memory mem, regD src) %{ 8.1075 predicate(UseSSE>=1); 8.1076 match(Set mem (Store2F mem src)); 8.1077 ins_cost(145); 8.1078 @@ -7224,42 +7202,54 @@ 8.1079 %} 8.1080 8.1081 // Store Float 8.1082 -instruct storeF( memory mem, regFPR1 src) %{ 8.1083 +instruct storeFPR( memory mem, regFPR1 src) %{ 8.1084 predicate(UseSSE==0); 8.1085 match(Set mem (StoreF mem src)); 8.1086 8.1087 ins_cost(100); 8.1088 format %{ "FST_S $mem,$src" %} 8.1089 opcode(0xD9); /* D9 /2 */ 8.1090 - ins_encode( enc_FP_store(mem,src) ); 8.1091 + ins_encode( enc_FPR_store(mem,src) ); 8.1092 ins_pipe( fpu_mem_reg ); 8.1093 %} 8.1094 8.1095 // Store Float does rounding on x86 8.1096 -instruct storeF_rounded( memory mem, regFPR1 src) %{ 8.1097 +instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 8.1098 predicate(UseSSE==0); 8.1099 match(Set mem (StoreF mem (RoundFloat src))); 8.1100 8.1101 ins_cost(100); 8.1102 format %{ "FST_S $mem,$src\t# round" %} 8.1103 opcode(0xD9); /* D9 /2 */ 8.1104 - ins_encode( enc_FP_store(mem,src) ); 8.1105 + ins_encode( enc_FPR_store(mem,src) ); 8.1106 ins_pipe( fpu_mem_reg ); 8.1107 %} 8.1108 8.1109 // Store Float does rounding on x86 8.1110 -instruct storeF_Drounded( memory mem, regDPR1 src) %{ 8.1111 +instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 8.1112 predicate(UseSSE<=1); 8.1113 match(Set mem (StoreF mem (ConvD2F src))); 8.1114 8.1115 ins_cost(100); 8.1116 format %{ "FST_S $mem,$src\t# D-round" %} 8.1117 opcode(0xD9); /* D9 /2 */ 8.1118 - ins_encode( enc_FP_store(mem,src) ); 8.1119 + ins_encode( enc_FPR_store(mem,src) ); 8.1120 ins_pipe( fpu_mem_reg ); 8.1121 %} 8.1122 8.1123 // Store immediate Float value (it is faster than store from FPU register) 8.1124 +// The instruction usage is guarded by predicate in operand immFPR(). 8.1125 +instruct storeFPR_imm( memory mem, immFPR src) %{ 8.1126 + match(Set mem (StoreF mem src)); 8.1127 + 8.1128 + ins_cost(50); 8.1129 + format %{ "MOV $mem,$src\t# store float" %} 8.1130 + opcode(0xC7); /* C7 /0 */ 8.1131 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 8.1132 + ins_pipe( ialu_mem_imm ); 8.1133 +%} 8.1134 + 8.1135 +// Store immediate Float value (it is faster than store from XMM register) 8.1136 // The instruction usage is guarded by predicate in operand immF(). 8.1137 instruct storeF_imm( memory mem, immF src) %{ 8.1138 match(Set mem (StoreF mem src)); 8.1139 @@ -7271,18 +7261,6 @@ 8.1140 ins_pipe( ialu_mem_imm ); 8.1141 %} 8.1142 8.1143 -// Store immediate Float value (it is faster than store from XMM register) 8.1144 -// The instruction usage is guarded by predicate in operand immXF(). 8.1145 -instruct storeX_imm( memory mem, immXF src) %{ 8.1146 - match(Set mem (StoreF mem src)); 8.1147 - 8.1148 - ins_cost(50); 8.1149 - format %{ "MOV $mem,$src\t# store float" %} 8.1150 - opcode(0xC7); /* C7 /0 */ 8.1151 - ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src )); 8.1152 - ins_pipe( ialu_mem_imm ); 8.1153 -%} 8.1154 - 8.1155 // Store Integer to stack slot 8.1156 instruct storeSSI(stackSlotI dst, eRegI src) %{ 8.1157 match(Set dst src); 8.1158 @@ -7577,29 +7555,29 @@ 8.1159 //%} 8.1160 8.1161 // Conditional move 8.1162 -instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ 8.1163 +instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 8.1164 predicate(UseSSE<=1); 8.1165 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8.1166 ins_cost(200); 8.1167 format %{ "FCMOV$cop $dst,$src\t# double" %} 8.1168 opcode(0xDA); 8.1169 - ins_encode( enc_cmov_d(cop,src) ); 8.1170 - ins_pipe( pipe_cmovD_reg ); 8.1171 + ins_encode( enc_cmov_dpr(cop,src) ); 8.1172 + ins_pipe( pipe_cmovDPR_reg ); 8.1173 %} 8.1174 8.1175 // Conditional move 8.1176 -instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ 8.1177 +instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 8.1178 predicate(UseSSE==0); 8.1179 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8.1180 ins_cost(200); 8.1181 format %{ "FCMOV$cop $dst,$src\t# float" %} 8.1182 opcode(0xDA); 8.1183 - ins_encode( enc_cmov_d(cop,src) ); 8.1184 - ins_pipe( pipe_cmovD_reg ); 8.1185 + ins_encode( enc_cmov_dpr(cop,src) ); 8.1186 + ins_pipe( pipe_cmovDPR_reg ); 8.1187 %} 8.1188 8.1189 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 8.1190 -instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 8.1191 +instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 8.1192 predicate(UseSSE<=1); 8.1193 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8.1194 ins_cost(200); 8.1195 @@ -7607,12 +7585,12 @@ 8.1196 "MOV $dst,$src\t# double\n" 8.1197 "skip:" %} 8.1198 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 8.1199 - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); 8.1200 - ins_pipe( pipe_cmovD_reg ); 8.1201 + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 8.1202 + ins_pipe( pipe_cmovDPR_reg ); 8.1203 %} 8.1204 8.1205 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 8.1206 -instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 8.1207 +instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 8.1208 predicate(UseSSE==0); 8.1209 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8.1210 ins_cost(200); 8.1211 @@ -7620,12 +7598,12 @@ 8.1212 "MOV $dst,$src\t# float\n" 8.1213 "skip:" %} 8.1214 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 8.1215 - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); 8.1216 - ins_pipe( pipe_cmovD_reg ); 8.1217 + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 8.1218 + ins_pipe( pipe_cmovDPR_reg ); 8.1219 %} 8.1220 8.1221 // No CMOVE with SSE/SSE2 8.1222 -instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ 8.1223 +instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 8.1224 predicate (UseSSE>=1); 8.1225 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8.1226 ins_cost(200); 8.1227 @@ -7643,7 +7621,7 @@ 8.1228 %} 8.1229 8.1230 // No CMOVE with SSE/SSE2 8.1231 -instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ 8.1232 +instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 8.1233 predicate (UseSSE>=2); 8.1234 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8.1235 ins_cost(200); 8.1236 @@ -7661,7 +7639,7 @@ 8.1237 %} 8.1238 8.1239 // unsigned version 8.1240 -instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ 8.1241 +instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 8.1242 predicate (UseSSE>=1); 8.1243 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8.1244 ins_cost(200); 8.1245 @@ -7678,17 +7656,17 @@ 8.1246 ins_pipe( pipe_slow ); 8.1247 %} 8.1248 8.1249 -instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ 8.1250 +instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 8.1251 predicate (UseSSE>=1); 8.1252 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 8.1253 ins_cost(200); 8.1254 expand %{ 8.1255 - fcmovX_regU(cop, cr, dst, src); 8.1256 + fcmovF_regU(cop, cr, dst, src); 8.1257 %} 8.1258 %} 8.1259 8.1260 // unsigned version 8.1261 -instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ 8.1262 +instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 8.1263 predicate (UseSSE>=2); 8.1264 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8.1265 ins_cost(200); 8.1266 @@ -7705,12 +7683,12 @@ 8.1267 ins_pipe( pipe_slow ); 8.1268 %} 8.1269 8.1270 -instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ 8.1271 +instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 8.1272 predicate (UseSSE>=2); 8.1273 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 8.1274 ins_cost(200); 8.1275 expand %{ 8.1276 - fcmovXD_regU(cop, cr, dst, src); 8.1277 + fcmovD_regU(cop, cr, dst, src); 8.1278 %} 8.1279 %} 8.1280 8.1281 @@ -7940,7 +7918,7 @@ 8.1282 ins_pipe( fpu_reg_mem ); 8.1283 %} 8.1284 8.1285 -instruct loadLX_Locked(stackSlotL dst, memory mem, regXD tmp) %{ 8.1286 +instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{ 8.1287 predicate(UseSSE>=2); 8.1288 match(Set dst (LoadLLocked mem)); 8.1289 effect(TEMP tmp); 8.1290 @@ -7954,7 +7932,7 @@ 8.1291 ins_pipe( pipe_slow ); 8.1292 %} 8.1293 8.1294 -instruct loadLX_reg_Locked(eRegL dst, memory mem, regXD tmp) %{ 8.1295 +instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{ 8.1296 predicate(UseSSE>=2); 8.1297 match(Set dst (LoadLLocked mem)); 8.1298 effect(TEMP tmp); 8.1299 @@ -9551,7 +9529,7 @@ 8.1300 // Compare & branch 8.1301 8.1302 // P6 version of float compare, sets condition codes in EFLAGS 8.1303 -instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 8.1304 +instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 8.1305 predicate(VM_Version::supports_cmov() && UseSSE <=1); 8.1306 match(Set cr (CmpD src1 src2)); 8.1307 effect(KILL rax); 8.1308 @@ -9563,26 +9541,26 @@ 8.1309 "SAHF\n" 8.1310 "exit:\tNOP // avoid branch to branch" %} 8.1311 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 8.1312 - ins_encode( Push_Reg_D(src1), 8.1313 + ins_encode( Push_Reg_DPR(src1), 8.1314 OpcP, RegOpc(src2), 8.1315 cmpF_P6_fixup ); 8.1316 ins_pipe( pipe_slow ); 8.1317 %} 8.1318 8.1319 -instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ 8.1320 +instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 8.1321 predicate(VM_Version::supports_cmov() && UseSSE <=1); 8.1322 match(Set cr (CmpD src1 src2)); 8.1323 ins_cost(150); 8.1324 format %{ "FLD $src1\n\t" 8.1325 "FUCOMIP ST,$src2 // P6 instruction" %} 8.1326 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 8.1327 - ins_encode( Push_Reg_D(src1), 8.1328 + ins_encode( Push_Reg_DPR(src1), 8.1329 OpcP, RegOpc(src2)); 8.1330 ins_pipe( pipe_slow ); 8.1331 %} 8.1332 8.1333 // Compare & branch 8.1334 -instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 8.1335 +instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 8.1336 predicate(UseSSE<=1); 8.1337 match(Set cr (CmpD src1 src2)); 8.1338 effect(KILL rax); 8.1339 @@ -9595,42 +9573,42 @@ 8.1340 "MOV AH,1\t# unordered treat as LT\n" 8.1341 "flags:\tSAHF" %} 8.1342 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 8.1343 - ins_encode( Push_Reg_D(src1), 8.1344 + ins_encode( Push_Reg_DPR(src1), 8.1345 OpcP, RegOpc(src2), 8.1346 fpu_flags); 8.1347 ins_pipe( pipe_slow ); 8.1348 %} 8.1349 8.1350 // Compare vs zero into -1,0,1 8.1351 -instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ 8.1352 +instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 8.1353 predicate(UseSSE<=1); 8.1354 match(Set dst (CmpD3 src1 zero)); 8.1355 effect(KILL cr, KILL rax); 8.1356 ins_cost(280); 8.1357 format %{ "FTSTD $dst,$src1" %} 8.1358 opcode(0xE4, 0xD9); 8.1359 - ins_encode( Push_Reg_D(src1), 8.1360 + ins_encode( Push_Reg_DPR(src1), 8.1361 OpcS, OpcP, PopFPU, 8.1362 CmpF_Result(dst)); 8.1363 ins_pipe( pipe_slow ); 8.1364 %} 8.1365 8.1366 // Compare into -1,0,1 8.1367 -instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ 8.1368 +instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 8.1369 predicate(UseSSE<=1); 8.1370 match(Set dst (CmpD3 src1 src2)); 8.1371 effect(KILL cr, KILL rax); 8.1372 ins_cost(300); 8.1373 format %{ "FCMPD $dst,$src1,$src2" %} 8.1374 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 8.1375 - ins_encode( Push_Reg_D(src1), 8.1376 + ins_encode( Push_Reg_DPR(src1), 8.1377 OpcP, RegOpc(src2), 8.1378 CmpF_Result(dst)); 8.1379 ins_pipe( pipe_slow ); 8.1380 %} 8.1381 8.1382 // float compare and set condition codes in EFLAGS by XMM regs 8.1383 -instruct cmpXD_cc(eFlagsRegU cr, regXD src1, regXD src2) %{ 8.1384 +instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 8.1385 predicate(UseSSE>=2); 8.1386 match(Set cr (CmpD src1 src2)); 8.1387 ins_cost(145); 8.1388 @@ -9647,7 +9625,7 @@ 8.1389 ins_pipe( pipe_slow ); 8.1390 %} 8.1391 8.1392 -instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD src1, regXD src2) %{ 8.1393 +instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 8.1394 predicate(UseSSE>=2); 8.1395 match(Set cr (CmpD src1 src2)); 8.1396 ins_cost(100); 8.1397 @@ -9659,7 +9637,7 @@ 8.1398 %} 8.1399 8.1400 // float compare and set condition codes in EFLAGS by XMM regs 8.1401 -instruct cmpXD_ccmem(eFlagsRegU cr, regXD src1, memory src2) %{ 8.1402 +instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 8.1403 predicate(UseSSE>=2); 8.1404 match(Set cr (CmpD src1 (LoadD src2))); 8.1405 ins_cost(145); 8.1406 @@ -9676,7 +9654,7 @@ 8.1407 ins_pipe( pipe_slow ); 8.1408 %} 8.1409 8.1410 -instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD src1, memory src2) %{ 8.1411 +instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 8.1412 predicate(UseSSE>=2); 8.1413 match(Set cr (CmpD src1 (LoadD src2))); 8.1414 ins_cost(100); 8.1415 @@ -9688,7 +9666,7 @@ 8.1416 %} 8.1417 8.1418 // Compare into -1,0,1 in XMM 8.1419 -instruct cmpXD_reg(xRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ 8.1420 +instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 8.1421 predicate(UseSSE>=2); 8.1422 match(Set dst (CmpD3 src1 src2)); 8.1423 effect(KILL cr); 8.1424 @@ -9708,7 +9686,7 @@ 8.1425 %} 8.1426 8.1427 // Compare into -1,0,1 in XMM and memory 8.1428 -instruct cmpXD_regmem(xRegI dst, regXD src1, memory src2, eFlagsReg cr) %{ 8.1429 +instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 8.1430 predicate(UseSSE>=2); 8.1431 match(Set dst (CmpD3 src1 (LoadD src2))); 8.1432 effect(KILL cr); 8.1433 @@ -9728,7 +9706,7 @@ 8.1434 %} 8.1435 8.1436 8.1437 -instruct subD_reg(regD dst, regD src) %{ 8.1438 +instruct subDPR_reg(regDPR dst, regDPR src) %{ 8.1439 predicate (UseSSE <=1); 8.1440 match(Set dst (SubD dst src)); 8.1441 8.1442 @@ -9736,12 +9714,12 @@ 8.1443 "DSUBp $dst,ST" %} 8.1444 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 8.1445 ins_cost(150); 8.1446 - ins_encode( Push_Reg_D(src), 8.1447 + ins_encode( Push_Reg_DPR(src), 8.1448 OpcP, RegOpc(dst) ); 8.1449 ins_pipe( fpu_reg_reg ); 8.1450 %} 8.1451 8.1452 -instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 8.1453 +instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 8.1454 predicate (UseSSE <=1); 8.1455 match(Set dst (RoundDouble (SubD src1 src2))); 8.1456 ins_cost(250); 8.1457 @@ -9750,13 +9728,13 @@ 8.1458 "DSUB ST,$src1\n\t" 8.1459 "FSTP_D $dst\t# D-round" %} 8.1460 opcode(0xD8, 0x5); 8.1461 - ins_encode( Push_Reg_D(src2), 8.1462 - OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 8.1463 + ins_encode( Push_Reg_DPR(src2), 8.1464 + OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 8.1465 ins_pipe( fpu_mem_reg_reg ); 8.1466 %} 8.1467 8.1468 8.1469 -instruct subD_reg_mem(regD dst, memory src) %{ 8.1470 +instruct subDPR_reg_mem(regDPR dst, memory src) %{ 8.1471 predicate (UseSSE <=1); 8.1472 match(Set dst (SubD dst (LoadD src))); 8.1473 ins_cost(150); 8.1474 @@ -9769,7 +9747,7 @@ 8.1475 ins_pipe( fpu_reg_mem ); 8.1476 %} 8.1477 8.1478 -instruct absD_reg(regDPR1 dst, regDPR1 src) %{ 8.1479 +instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 8.1480 predicate (UseSSE<=1); 8.1481 match(Set dst (AbsD src)); 8.1482 ins_cost(100); 8.1483 @@ -9779,19 +9757,7 @@ 8.1484 ins_pipe( fpu_reg_reg ); 8.1485 %} 8.1486 8.1487 -instruct absXD_reg( regXD dst ) %{ 8.1488 - predicate(UseSSE>=2); 8.1489 - match(Set dst (AbsD dst)); 8.1490 - ins_cost(150); 8.1491 - format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} 8.1492 - ins_encode %{ 8.1493 - __ andpd($dst$$XMMRegister, 8.1494 - ExternalAddress((address)double_signmask_pool)); 8.1495 - %} 8.1496 - ins_pipe( pipe_slow ); 8.1497 -%} 8.1498 - 8.1499 -instruct negD_reg(regDPR1 dst, regDPR1 src) %{ 8.1500 +instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 8.1501 predicate(UseSSE<=1); 8.1502 match(Set dst (NegD src)); 8.1503 ins_cost(100); 8.1504 @@ -9801,19 +9767,7 @@ 8.1505 ins_pipe( fpu_reg_reg ); 8.1506 %} 8.1507 8.1508 -instruct negXD_reg( regXD dst ) %{ 8.1509 - predicate(UseSSE>=2); 8.1510 - match(Set dst (NegD dst)); 8.1511 - ins_cost(150); 8.1512 - format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} 8.1513 - ins_encode %{ 8.1514 - __ xorpd($dst$$XMMRegister, 8.1515 - ExternalAddress((address)double_signflip_pool)); 8.1516 - %} 8.1517 - ins_pipe( pipe_slow ); 8.1518 -%} 8.1519 - 8.1520 -instruct addD_reg(regD dst, regD src) %{ 8.1521 +instruct addDPR_reg(regDPR dst, regDPR src) %{ 8.1522 predicate(UseSSE<=1); 8.1523 match(Set dst (AddD dst src)); 8.1524 format %{ "FLD $src\n\t" 8.1525 @@ -9821,13 +9775,13 @@ 8.1526 size(4); 8.1527 ins_cost(150); 8.1528 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 8.1529 - ins_encode( Push_Reg_D(src), 8.1530 + ins_encode( Push_Reg_DPR(src), 8.1531 OpcP, RegOpc(dst) ); 8.1532 ins_pipe( fpu_reg_reg ); 8.1533 %} 8.1534 8.1535 8.1536 -instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 8.1537 +instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 8.1538 predicate(UseSSE<=1); 8.1539 match(Set dst (RoundDouble (AddD src1 src2))); 8.1540 ins_cost(250); 8.1541 @@ -9836,13 +9790,13 @@ 8.1542 "DADD ST,$src1\n\t" 8.1543 "FSTP_D $dst\t# D-round" %} 8.1544 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 8.1545 - ins_encode( Push_Reg_D(src2), 8.1546 - OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 8.1547 + ins_encode( Push_Reg_DPR(src2), 8.1548 + OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 8.1549 ins_pipe( fpu_mem_reg_reg ); 8.1550 %} 8.1551 8.1552 8.1553 -instruct addD_reg_mem(regD dst, memory src) %{ 8.1554 +instruct addDPR_reg_mem(regDPR dst, memory src) %{ 8.1555 predicate(UseSSE<=1); 8.1556 match(Set dst (AddD dst (LoadD src))); 8.1557 ins_cost(150); 8.1558 @@ -9856,7 +9810,7 @@ 8.1559 %} 8.1560 8.1561 // add-to-memory 8.1562 -instruct addD_mem_reg(memory dst, regD src) %{ 8.1563 +instruct addDPR_mem_reg(memory dst, regDPR src) %{ 8.1564 predicate(UseSSE<=1); 8.1565 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 8.1566 ins_cost(150); 8.1567 @@ -9872,7 +9826,7 @@ 8.1568 ins_pipe( fpu_reg_mem ); 8.1569 %} 8.1570 8.1571 -instruct addD_reg_imm1(regD dst, immD1 con) %{ 8.1572 +instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 8.1573 predicate(UseSSE<=1); 8.1574 match(Set dst (AddD dst con)); 8.1575 ins_cost(125); 8.1576 @@ -9885,7 +9839,7 @@ 8.1577 ins_pipe(fpu_reg); 8.1578 %} 8.1579 8.1580 -instruct addD_reg_imm(regD dst, immD con) %{ 8.1581 +instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 8.1582 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 8.1583 match(Set dst (AddD dst con)); 8.1584 ins_cost(200); 8.1585 @@ -9898,7 +9852,7 @@ 8.1586 ins_pipe(fpu_reg_mem); 8.1587 %} 8.1588 8.1589 -instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ 8.1590 +instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 8.1591 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 8.1592 match(Set dst (RoundDouble (AddD src con))); 8.1593 ins_cost(200); 8.1594 @@ -9913,143 +9867,14 @@ 8.1595 ins_pipe(fpu_mem_reg_con); 8.1596 %} 8.1597 8.1598 -// Add two double precision floating point values in xmm 8.1599 -instruct addXD_reg(regXD dst, regXD src) %{ 8.1600 - predicate(UseSSE>=2); 8.1601 - match(Set dst (AddD dst src)); 8.1602 - format %{ "ADDSD $dst,$src" %} 8.1603 - ins_encode %{ 8.1604 - __ addsd($dst$$XMMRegister, $src$$XMMRegister); 8.1605 - %} 8.1606 - ins_pipe( pipe_slow ); 8.1607 -%} 8.1608 - 8.1609 -instruct addXD_imm(regXD dst, immXD con) %{ 8.1610 - predicate(UseSSE>=2); 8.1611 - match(Set dst (AddD dst con)); 8.1612 - format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 8.1613 - ins_encode %{ 8.1614 - __ addsd($dst$$XMMRegister, $constantaddress($con)); 8.1615 - %} 8.1616 - ins_pipe(pipe_slow); 8.1617 -%} 8.1618 - 8.1619 -instruct addXD_mem(regXD dst, memory mem) %{ 8.1620 - predicate(UseSSE>=2); 8.1621 - match(Set dst (AddD dst (LoadD mem))); 8.1622 - format %{ "ADDSD $dst,$mem" %} 8.1623 - ins_encode %{ 8.1624 - __ addsd($dst$$XMMRegister, $mem$$Address); 8.1625 - %} 8.1626 - ins_pipe( pipe_slow ); 8.1627 -%} 8.1628 - 8.1629 -// Sub two double precision floating point values in xmm 8.1630 -instruct subXD_reg(regXD dst, regXD src) %{ 8.1631 - predicate(UseSSE>=2); 8.1632 - match(Set dst (SubD dst src)); 8.1633 - ins_cost(150); 8.1634 - format %{ "SUBSD $dst,$src" %} 8.1635 - ins_encode %{ 8.1636 - __ subsd($dst$$XMMRegister, $src$$XMMRegister); 8.1637 - %} 8.1638 - ins_pipe( pipe_slow ); 8.1639 -%} 8.1640 - 8.1641 -instruct subXD_imm(regXD dst, immXD con) %{ 8.1642 - predicate(UseSSE>=2); 8.1643 - match(Set dst (SubD dst con)); 8.1644 - ins_cost(150); 8.1645 - format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 8.1646 - ins_encode %{ 8.1647 - __ subsd($dst$$XMMRegister, $constantaddress($con)); 8.1648 - %} 8.1649 - ins_pipe(pipe_slow); 8.1650 -%} 8.1651 - 8.1652 -instruct subXD_mem(regXD dst, memory mem) %{ 8.1653 - predicate(UseSSE>=2); 8.1654 - match(Set dst (SubD dst (LoadD mem))); 8.1655 - ins_cost(150); 8.1656 - format %{ "SUBSD $dst,$mem" %} 8.1657 - ins_encode %{ 8.1658 - __ subsd($dst$$XMMRegister, $mem$$Address); 8.1659 - %} 8.1660 - ins_pipe( pipe_slow ); 8.1661 -%} 8.1662 - 8.1663 -// Mul two double precision floating point values in xmm 8.1664 -instruct mulXD_reg(regXD dst, regXD src) %{ 8.1665 - predicate(UseSSE>=2); 8.1666 - match(Set dst (MulD dst src)); 8.1667 - format %{ "MULSD $dst,$src" %} 8.1668 - ins_encode %{ 8.1669 - __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 8.1670 - %} 8.1671 - ins_pipe( pipe_slow ); 8.1672 -%} 8.1673 - 8.1674 -instruct mulXD_imm(regXD dst, immXD con) %{ 8.1675 - predicate(UseSSE>=2); 8.1676 - match(Set dst (MulD dst con)); 8.1677 - format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 8.1678 - ins_encode %{ 8.1679 - __ mulsd($dst$$XMMRegister, $constantaddress($con)); 8.1680 - %} 8.1681 - ins_pipe(pipe_slow); 8.1682 -%} 8.1683 - 8.1684 -instruct mulXD_mem(regXD dst, memory mem) %{ 8.1685 - predicate(UseSSE>=2); 8.1686 - match(Set dst (MulD dst (LoadD mem))); 8.1687 - format %{ "MULSD $dst,$mem" %} 8.1688 - ins_encode %{ 8.1689 - __ mulsd($dst$$XMMRegister, $mem$$Address); 8.1690 - %} 8.1691 - ins_pipe( pipe_slow ); 8.1692 -%} 8.1693 - 8.1694 -// Div two double precision floating point values in xmm 8.1695 -instruct divXD_reg(regXD dst, regXD src) %{ 8.1696 - predicate(UseSSE>=2); 8.1697 - match(Set dst (DivD dst src)); 8.1698 - format %{ "DIVSD $dst,$src" %} 8.1699 - opcode(0xF2, 0x0F, 0x5E); 8.1700 - ins_encode %{ 8.1701 - __ divsd($dst$$XMMRegister, $src$$XMMRegister); 8.1702 - %} 8.1703 - ins_pipe( pipe_slow ); 8.1704 -%} 8.1705 - 8.1706 -instruct divXD_imm(regXD dst, immXD con) %{ 8.1707 - predicate(UseSSE>=2); 8.1708 - match(Set dst (DivD dst con)); 8.1709 - format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 8.1710 - ins_encode %{ 8.1711 - __ divsd($dst$$XMMRegister, $constantaddress($con)); 8.1712 - %} 8.1713 - ins_pipe(pipe_slow); 8.1714 -%} 8.1715 - 8.1716 -instruct divXD_mem(regXD dst, memory mem) %{ 8.1717 - predicate(UseSSE>=2); 8.1718 - match(Set dst (DivD dst (LoadD mem))); 8.1719 - format %{ "DIVSD $dst,$mem" %} 8.1720 - ins_encode %{ 8.1721 - __ divsd($dst$$XMMRegister, $mem$$Address); 8.1722 - %} 8.1723 - ins_pipe( pipe_slow ); 8.1724 -%} 8.1725 - 8.1726 - 8.1727 -instruct mulD_reg(regD dst, regD src) %{ 8.1728 +instruct mulDPR_reg(regDPR dst, regDPR src) %{ 8.1729 predicate(UseSSE<=1); 8.1730 match(Set dst (MulD dst src)); 8.1731 format %{ "FLD $src\n\t" 8.1732 "DMULp $dst,ST" %} 8.1733 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 8.1734 ins_cost(150); 8.1735 - ins_encode( Push_Reg_D(src), 8.1736 + ins_encode( Push_Reg_DPR(src), 8.1737 OpcP, RegOpc(dst) ); 8.1738 ins_pipe( fpu_reg_reg ); 8.1739 %} 8.1740 @@ -10062,7 +9887,7 @@ 8.1741 // multiply scaled arg1 by arg2 8.1742 // rescale product by 2^(15360) 8.1743 // 8.1744 -instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ 8.1745 +instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 8.1746 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 8.1747 match(Set dst (MulD dst src)); 8.1748 ins_cost(1); // Select this instruction for all strict FP double multiplies 8.1749 @@ -10075,13 +9900,13 @@ 8.1750 "DMULp $dst,ST\n\t" %} 8.1751 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 8.1752 ins_encode( strictfp_bias1(dst), 8.1753 - Push_Reg_D(src), 8.1754 + Push_Reg_DPR(src), 8.1755 OpcP, RegOpc(dst), 8.1756 strictfp_bias2(dst) ); 8.1757 ins_pipe( fpu_reg_reg ); 8.1758 %} 8.1759 8.1760 -instruct mulD_reg_imm(regD dst, immD con) %{ 8.1761 +instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 8.1762 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 8.1763 match(Set dst (MulD dst con)); 8.1764 ins_cost(200); 8.1765 @@ -10095,7 +9920,7 @@ 8.1766 %} 8.1767 8.1768 8.1769 -instruct mulD_reg_mem(regD dst, memory src) %{ 8.1770 +instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 8.1771 predicate( UseSSE<=1 ); 8.1772 match(Set dst (MulD dst (LoadD src))); 8.1773 ins_cost(200); 8.1774 @@ -10109,7 +9934,7 @@ 8.1775 8.1776 // 8.1777 // Cisc-alternate to reg-reg multiply 8.1778 -instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ 8.1779 +instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 8.1780 predicate( UseSSE<=1 ); 8.1781 match(Set dst (MulD src (LoadD mem))); 8.1782 ins_cost(250); 8.1783 @@ -10118,17 +9943,17 @@ 8.1784 "FSTP_D $dst" %} 8.1785 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 8.1786 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 8.1787 - OpcReg_F(src), 8.1788 - Pop_Reg_D(dst) ); 8.1789 + OpcReg_FPR(src), 8.1790 + Pop_Reg_DPR(dst) ); 8.1791 ins_pipe( fpu_reg_reg_mem ); 8.1792 %} 8.1793 8.1794 8.1795 -// MACRO3 -- addD a mulD 8.1796 +// MACRO3 -- addDPR a mulDPR 8.1797 // This instruction is a '2-address' instruction in that the result goes 8.1798 // back to src2. This eliminates a move from the macro; possibly the 8.1799 // register allocator will have to add it back (and maybe not). 8.1800 -instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ 8.1801 +instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 8.1802 predicate( UseSSE<=1 ); 8.1803 match(Set src2 (AddD (MulD src0 src1) src2)); 8.1804 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 8.1805 @@ -10136,29 +9961,29 @@ 8.1806 "DADDp $src2,ST" %} 8.1807 ins_cost(250); 8.1808 opcode(0xDD); /* LoadD DD /0 */ 8.1809 - ins_encode( Push_Reg_F(src0), 8.1810 + ins_encode( Push_Reg_FPR(src0), 8.1811 FMul_ST_reg(src1), 8.1812 FAddP_reg_ST(src2) ); 8.1813 ins_pipe( fpu_reg_reg_reg ); 8.1814 %} 8.1815 8.1816 8.1817 -// MACRO3 -- subD a mulD 8.1818 -instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ 8.1819 +// MACRO3 -- subDPR a mulDPR 8.1820 +instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 8.1821 predicate( UseSSE<=1 ); 8.1822 match(Set src2 (SubD (MulD src0 src1) src2)); 8.1823 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 8.1824 "DMUL ST,$src1\n\t" 8.1825 "DSUBRp $src2,ST" %} 8.1826 ins_cost(250); 8.1827 - ins_encode( Push_Reg_F(src0), 8.1828 + ins_encode( Push_Reg_FPR(src0), 8.1829 FMul_ST_reg(src1), 8.1830 Opcode(0xDE), Opc_plus(0xE0,src2)); 8.1831 ins_pipe( fpu_reg_reg_reg ); 8.1832 %} 8.1833 8.1834 8.1835 -instruct divD_reg(regD dst, regD src) %{ 8.1836 +instruct divDPR_reg(regDPR dst, regDPR src) %{ 8.1837 predicate( UseSSE<=1 ); 8.1838 match(Set dst (DivD dst src)); 8.1839 8.1840 @@ -10166,7 +9991,7 @@ 8.1841 "FDIVp $dst,ST" %} 8.1842 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 8.1843 ins_cost(150); 8.1844 - ins_encode( Push_Reg_D(src), 8.1845 + ins_encode( Push_Reg_DPR(src), 8.1846 OpcP, RegOpc(dst) ); 8.1847 ins_pipe( fpu_reg_reg ); 8.1848 %} 8.1849 @@ -10179,7 +10004,7 @@ 8.1850 // divide scaled dividend by divisor 8.1851 // rescale quotient by 2^(15360) 8.1852 // 8.1853 -instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ 8.1854 +instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 8.1855 predicate (UseSSE<=1); 8.1856 match(Set dst (DivD dst src)); 8.1857 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 8.1858 @@ -10193,13 +10018,13 @@ 8.1859 "DMULp $dst,ST\n\t" %} 8.1860 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 8.1861 ins_encode( strictfp_bias1(dst), 8.1862 - Push_Reg_D(src), 8.1863 + Push_Reg_DPR(src), 8.1864 OpcP, RegOpc(dst), 8.1865 strictfp_bias2(dst) ); 8.1866 ins_pipe( fpu_reg_reg ); 8.1867 %} 8.1868 8.1869 -instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 8.1870 +instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 8.1871 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 8.1872 match(Set dst (RoundDouble (DivD src1 src2))); 8.1873 8.1874 @@ -10207,27 +10032,27 @@ 8.1875 "FDIV ST,$src2\n\t" 8.1876 "FSTP_D $dst\t# D-round" %} 8.1877 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 8.1878 - ins_encode( Push_Reg_D(src1), 8.1879 - OpcP, RegOpc(src2), Pop_Mem_D(dst) ); 8.1880 + ins_encode( Push_Reg_DPR(src1), 8.1881 + OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 8.1882 ins_pipe( fpu_mem_reg_reg ); 8.1883 %} 8.1884 8.1885 8.1886 -instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ 8.1887 +instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 8.1888 predicate(UseSSE<=1); 8.1889 match(Set dst (ModD dst src)); 8.1890 - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 8.1891 + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 8.1892 8.1893 format %{ "DMOD $dst,$src" %} 8.1894 ins_cost(250); 8.1895 - ins_encode(Push_Reg_Mod_D(dst, src), 8.1896 - emitModD(), 8.1897 - Push_Result_Mod_D(src), 8.1898 - Pop_Reg_D(dst)); 8.1899 - ins_pipe( pipe_slow ); 8.1900 -%} 8.1901 - 8.1902 -instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ 8.1903 + ins_encode(Push_Reg_Mod_DPR(dst, src), 8.1904 + emitModDPR(), 8.1905 + Push_Result_Mod_DPR(src), 8.1906 + Pop_Reg_DPR(dst)); 8.1907 + ins_pipe( pipe_slow ); 8.1908 +%} 8.1909 + 8.1910 +instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 8.1911 predicate(UseSSE>=2); 8.1912 match(Set dst (ModD src0 src1)); 8.1913 effect(KILL rax, KILL cr); 8.1914 @@ -10248,11 +10073,11 @@ 8.1915 "\tFSTP ST0\t # Restore FPU Stack" 8.1916 %} 8.1917 ins_cost(250); 8.1918 - ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); 8.1919 - ins_pipe( pipe_slow ); 8.1920 -%} 8.1921 - 8.1922 -instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ 8.1923 + ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 8.1924 + ins_pipe( pipe_slow ); 8.1925 +%} 8.1926 + 8.1927 +instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 8.1928 predicate (UseSSE<=1); 8.1929 match(Set dst (SinD src)); 8.1930 ins_cost(1800); 8.1931 @@ -10262,18 +10087,18 @@ 8.1932 ins_pipe( pipe_slow ); 8.1933 %} 8.1934 8.1935 -instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ 8.1936 +instruct sinD_reg(regD dst, eFlagsReg cr) %{ 8.1937 predicate (UseSSE>=2); 8.1938 match(Set dst (SinD dst)); 8.1939 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 8.1940 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 8.1941 ins_cost(1800); 8.1942 format %{ "DSIN $dst" %} 8.1943 opcode(0xD9, 0xFE); 8.1944 - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 8.1945 - ins_pipe( pipe_slow ); 8.1946 -%} 8.1947 - 8.1948 -instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ 8.1949 + ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 8.1950 + ins_pipe( pipe_slow ); 8.1951 +%} 8.1952 + 8.1953 +instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 8.1954 predicate (UseSSE<=1); 8.1955 match(Set dst (CosD src)); 8.1956 ins_cost(1800); 8.1957 @@ -10283,18 +10108,18 @@ 8.1958 ins_pipe( pipe_slow ); 8.1959 %} 8.1960 8.1961 -instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ 8.1962 +instruct cosD_reg(regD dst, eFlagsReg cr) %{ 8.1963 predicate (UseSSE>=2); 8.1964 match(Set dst (CosD dst)); 8.1965 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 8.1966 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 8.1967 ins_cost(1800); 8.1968 format %{ "DCOS $dst" %} 8.1969 opcode(0xD9, 0xFF); 8.1970 - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 8.1971 - ins_pipe( pipe_slow ); 8.1972 -%} 8.1973 - 8.1974 -instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ 8.1975 + ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 8.1976 + ins_pipe( pipe_slow ); 8.1977 +%} 8.1978 + 8.1979 +instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 8.1980 predicate (UseSSE<=1); 8.1981 match(Set dst(TanD src)); 8.1982 format %{ "DTAN $dst" %} 8.1983 @@ -10303,50 +10128,50 @@ 8.1984 ins_pipe( pipe_slow ); 8.1985 %} 8.1986 8.1987 -instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ 8.1988 +instruct tanD_reg(regD dst, eFlagsReg cr) %{ 8.1989 predicate (UseSSE>=2); 8.1990 match(Set dst(TanD dst)); 8.1991 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 8.1992 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 8.1993 format %{ "DTAN $dst" %} 8.1994 - ins_encode( Push_SrcXD(dst), 8.1995 + ins_encode( Push_SrcD(dst), 8.1996 Opcode(0xD9), Opcode(0xF2), // fptan 8.1997 Opcode(0xDD), Opcode(0xD8), // fstp st 8.1998 - Push_ResultXD(dst) ); 8.1999 - ins_pipe( pipe_slow ); 8.2000 -%} 8.2001 - 8.2002 -instruct atanD_reg(regD dst, regD src) %{ 8.2003 + Push_ResultD(dst) ); 8.2004 + ins_pipe( pipe_slow ); 8.2005 +%} 8.2006 + 8.2007 +instruct atanDPR_reg(regDPR dst, regDPR src) %{ 8.2008 predicate (UseSSE<=1); 8.2009 match(Set dst(AtanD dst src)); 8.2010 format %{ "DATA $dst,$src" %} 8.2011 opcode(0xD9, 0xF3); 8.2012 - ins_encode( Push_Reg_D(src), 8.2013 + ins_encode( Push_Reg_DPR(src), 8.2014 OpcP, OpcS, RegOpc(dst) ); 8.2015 ins_pipe( pipe_slow ); 8.2016 %} 8.2017 8.2018 -instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 8.2019 +instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 8.2020 predicate (UseSSE>=2); 8.2021 match(Set dst(AtanD dst src)); 8.2022 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 8.2023 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 8.2024 format %{ "DATA $dst,$src" %} 8.2025 opcode(0xD9, 0xF3); 8.2026 - ins_encode( Push_SrcXD(src), 8.2027 - OpcP, OpcS, Push_ResultXD(dst) ); 8.2028 - ins_pipe( pipe_slow ); 8.2029 -%} 8.2030 - 8.2031 -instruct sqrtD_reg(regD dst, regD src) %{ 8.2032 + ins_encode( Push_SrcD(src), 8.2033 + OpcP, OpcS, Push_ResultD(dst) ); 8.2034 + ins_pipe( pipe_slow ); 8.2035 +%} 8.2036 + 8.2037 +instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 8.2038 predicate (UseSSE<=1); 8.2039 match(Set dst (SqrtD src)); 8.2040 format %{ "DSQRT $dst,$src" %} 8.2041 opcode(0xFA, 0xD9); 8.2042 - ins_encode( Push_Reg_D(src), 8.2043 - OpcS, OpcP, Pop_Reg_D(dst) ); 8.2044 - ins_pipe( pipe_slow ); 8.2045 -%} 8.2046 - 8.2047 -instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 8.2048 + ins_encode( Push_Reg_DPR(src), 8.2049 + OpcS, OpcP, Pop_Reg_DPR(dst) ); 8.2050 + ins_pipe( pipe_slow ); 8.2051 +%} 8.2052 + 8.2053 +instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 8.2054 predicate (UseSSE<=1); 8.2055 match(Set Y (PowD X Y)); // Raise X to the Yth power 8.2056 effect(KILL rax, KILL rbx, KILL rcx); 8.2057 @@ -10375,14 +10200,14 @@ 8.2058 "ADD ESP,8" 8.2059 %} 8.2060 ins_encode( push_stack_temp_qword, 8.2061 - Push_Reg_D(X), 8.2062 + Push_Reg_DPR(X), 8.2063 Opcode(0xD9), Opcode(0xF1), // fyl2x 8.2064 pow_exp_core_encoding, 8.2065 pop_stack_temp_qword); 8.2066 ins_pipe( pipe_slow ); 8.2067 %} 8.2068 8.2069 -instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 8.2070 +instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 8.2071 predicate (UseSSE>=2); 8.2072 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 8.2073 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); 8.2074 @@ -10420,12 +10245,12 @@ 8.2075 push_xmm_to_fpr1(src0), 8.2076 Opcode(0xD9), Opcode(0xF1), // fyl2x 8.2077 pow_exp_core_encoding, 8.2078 - Push_ResultXD(dst) ); 8.2079 - ins_pipe( pipe_slow ); 8.2080 -%} 8.2081 - 8.2082 - 8.2083 -instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 8.2084 + Push_ResultD(dst) ); 8.2085 + ins_pipe( pipe_slow ); 8.2086 +%} 8.2087 + 8.2088 + 8.2089 +instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 8.2090 predicate (UseSSE<=1); 8.2091 match(Set dpr1 (ExpD dpr1)); 8.2092 effect(KILL rax, KILL rbx, KILL rcx); 8.2093 @@ -10461,7 +10286,7 @@ 8.2094 ins_pipe( pipe_slow ); 8.2095 %} 8.2096 8.2097 -instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 8.2098 +instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 8.2099 predicate (UseSSE>=2); 8.2100 match(Set dst (ExpD src)); 8.2101 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); 8.2102 @@ -10492,17 +10317,17 @@ 8.2103 "MOVSD $dst,[ESP]\n\t" 8.2104 "ADD ESP,8" 8.2105 %} 8.2106 - ins_encode( Push_SrcXD(src), 8.2107 + ins_encode( Push_SrcD(src), 8.2108 Opcode(0xD9), Opcode(0xEA), // fldl2e 8.2109 Opcode(0xDE), Opcode(0xC9), // fmulp 8.2110 pow_exp_core_encoding, 8.2111 - Push_ResultXD(dst) ); 8.2112 - ins_pipe( pipe_slow ); 8.2113 -%} 8.2114 - 8.2115 - 8.2116 - 8.2117 -instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ 8.2118 + Push_ResultD(dst) ); 8.2119 + ins_pipe( pipe_slow ); 8.2120 +%} 8.2121 + 8.2122 + 8.2123 + 8.2124 +instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 8.2125 predicate (UseSSE<=1); 8.2126 // The source Double operand on FPU stack 8.2127 match(Set dst (Log10D src)); 8.2128 @@ -10520,7 +10345,7 @@ 8.2129 ins_pipe( pipe_slow ); 8.2130 %} 8.2131 8.2132 -instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 8.2133 +instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 8.2134 predicate (UseSSE>=2); 8.2135 effect(KILL cr); 8.2136 match(Set dst (Log10D src)); 8.2137 @@ -10530,14 +10355,14 @@ 8.2138 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 8.2139 %} 8.2140 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 8.2141 - Push_SrcXD(src), 8.2142 + Push_SrcD(src), 8.2143 Opcode(0xD9), Opcode(0xF1), // fyl2x 8.2144 - Push_ResultXD(dst)); 8.2145 - 8.2146 - ins_pipe( pipe_slow ); 8.2147 -%} 8.2148 - 8.2149 -instruct logD_reg(regDPR1 dst, regDPR1 src) %{ 8.2150 + Push_ResultD(dst)); 8.2151 + 8.2152 + ins_pipe( pipe_slow ); 8.2153 +%} 8.2154 + 8.2155 +instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 8.2156 predicate (UseSSE<=1); 8.2157 // The source Double operand on FPU stack 8.2158 match(Set dst (LogD src)); 8.2159 @@ -10555,7 +10380,7 @@ 8.2160 ins_pipe( pipe_slow ); 8.2161 %} 8.2162 8.2163 -instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 8.2164 +instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 8.2165 predicate (UseSSE>=2); 8.2166 effect(KILL cr); 8.2167 // The source and result Double operands in XMM registers 8.2168 @@ -10566,9 +10391,9 @@ 8.2169 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 8.2170 %} 8.2171 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 8.2172 - Push_SrcXD(src), 8.2173 + Push_SrcD(src), 8.2174 Opcode(0xD9), Opcode(0xF1), // fyl2x 8.2175 - Push_ResultXD(dst)); 8.2176 + Push_ResultD(dst)); 8.2177 ins_pipe( pipe_slow ); 8.2178 %} 8.2179 8.2180 @@ -10589,7 +10414,7 @@ 8.2181 // exit: 8.2182 8.2183 // P6 version of float compare, sets condition codes in EFLAGS 8.2184 -instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 8.2185 +instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 8.2186 predicate(VM_Version::supports_cmov() && UseSSE == 0); 8.2187 match(Set cr (CmpF src1 src2)); 8.2188 effect(KILL rax); 8.2189 @@ -10601,27 +10426,27 @@ 8.2190 "SAHF\n" 8.2191 "exit:\tNOP // avoid branch to branch" %} 8.2192 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 8.2193 - ins_encode( Push_Reg_D(src1), 8.2194 + ins_encode( Push_Reg_DPR(src1), 8.2195 OpcP, RegOpc(src2), 8.2196 cmpF_P6_fixup ); 8.2197 ins_pipe( pipe_slow ); 8.2198 %} 8.2199 8.2200 -instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ 8.2201 +instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 8.2202 predicate(VM_Version::supports_cmov() && UseSSE == 0); 8.2203 match(Set cr (CmpF src1 src2)); 8.2204 ins_cost(100); 8.2205 format %{ "FLD $src1\n\t" 8.2206 "FUCOMIP ST,$src2 // P6 instruction" %} 8.2207 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 8.2208 - ins_encode( Push_Reg_D(src1), 8.2209 + ins_encode( Push_Reg_DPR(src1), 8.2210 OpcP, RegOpc(src2)); 8.2211 ins_pipe( pipe_slow ); 8.2212 %} 8.2213 8.2214 8.2215 // Compare & branch 8.2216 -instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 8.2217 +instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 8.2218 predicate(UseSSE == 0); 8.2219 match(Set cr (CmpF src1 src2)); 8.2220 effect(KILL rax); 8.2221 @@ -10634,42 +10459,42 @@ 8.2222 "MOV AH,1\t# unordered treat as LT\n" 8.2223 "flags:\tSAHF" %} 8.2224 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 8.2225 - ins_encode( Push_Reg_D(src1), 8.2226 + ins_encode( Push_Reg_DPR(src1), 8.2227 OpcP, RegOpc(src2), 8.2228 fpu_flags); 8.2229 ins_pipe( pipe_slow ); 8.2230 %} 8.2231 8.2232 // Compare vs zero into -1,0,1 8.2233 -instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ 8.2234 +instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 8.2235 predicate(UseSSE == 0); 8.2236 match(Set dst (CmpF3 src1 zero)); 8.2237 effect(KILL cr, KILL rax); 8.2238 ins_cost(280); 8.2239 format %{ "FTSTF $dst,$src1" %} 8.2240 opcode(0xE4, 0xD9); 8.2241 - ins_encode( Push_Reg_D(src1), 8.2242 + ins_encode( Push_Reg_DPR(src1), 8.2243 OpcS, OpcP, PopFPU, 8.2244 CmpF_Result(dst)); 8.2245 ins_pipe( pipe_slow ); 8.2246 %} 8.2247 8.2248 // Compare into -1,0,1 8.2249 -instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 8.2250 +instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 8.2251 predicate(UseSSE == 0); 8.2252 match(Set dst (CmpF3 src1 src2)); 8.2253 effect(KILL cr, KILL rax); 8.2254 ins_cost(300); 8.2255 format %{ "FCMPF $dst,$src1,$src2" %} 8.2256 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 8.2257 - ins_encode( Push_Reg_D(src1), 8.2258 + ins_encode( Push_Reg_DPR(src1), 8.2259 OpcP, RegOpc(src2), 8.2260 CmpF_Result(dst)); 8.2261 ins_pipe( pipe_slow ); 8.2262 %} 8.2263 8.2264 // float compare and set condition codes in EFLAGS by XMM regs 8.2265 -instruct cmpX_cc(eFlagsRegU cr, regX src1, regX src2) %{ 8.2266 +instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 8.2267 predicate(UseSSE>=1); 8.2268 match(Set cr (CmpF src1 src2)); 8.2269 ins_cost(145); 8.2270 @@ -10686,7 +10511,7 @@ 8.2271 ins_pipe( pipe_slow ); 8.2272 %} 8.2273 8.2274 -instruct cmpX_ccCF(eFlagsRegUCF cr, regX src1, regX src2) %{ 8.2275 +instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 8.2276 predicate(UseSSE>=1); 8.2277 match(Set cr (CmpF src1 src2)); 8.2278 ins_cost(100); 8.2279 @@ -10698,7 +10523,7 @@ 8.2280 %} 8.2281 8.2282 // float compare and set condition codes in EFLAGS by XMM regs 8.2283 -instruct cmpX_ccmem(eFlagsRegU cr, regX src1, memory src2) %{ 8.2284 +instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 8.2285 predicate(UseSSE>=1); 8.2286 match(Set cr (CmpF src1 (LoadF src2))); 8.2287 ins_cost(165); 8.2288 @@ -10715,7 +10540,7 @@ 8.2289 ins_pipe( pipe_slow ); 8.2290 %} 8.2291 8.2292 -instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX src1, memory src2) %{ 8.2293 +instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 8.2294 predicate(UseSSE>=1); 8.2295 match(Set cr (CmpF src1 (LoadF src2))); 8.2296 ins_cost(100); 8.2297 @@ -10727,7 +10552,7 @@ 8.2298 %} 8.2299 8.2300 // Compare into -1,0,1 in XMM 8.2301 -instruct cmpX_reg(xRegI dst, regX src1, regX src2, eFlagsReg cr) %{ 8.2302 +instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 8.2303 predicate(UseSSE>=1); 8.2304 match(Set dst (CmpF3 src1 src2)); 8.2305 effect(KILL cr); 8.2306 @@ -10747,7 +10572,7 @@ 8.2307 %} 8.2308 8.2309 // Compare into -1,0,1 in XMM and memory 8.2310 -instruct cmpX_regmem(xRegI dst, regX src1, memory src2, eFlagsReg cr) %{ 8.2311 +instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 8.2312 predicate(UseSSE>=1); 8.2313 match(Set dst (CmpF3 src1 (LoadF src2))); 8.2314 effect(KILL cr); 8.2315 @@ -10767,230 +10592,57 @@ 8.2316 %} 8.2317 8.2318 // Spill to obtain 24-bit precision 8.2319 -instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ 8.2320 +instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 8.2321 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2322 match(Set dst (SubF src1 src2)); 8.2323 8.2324 format %{ "FSUB $dst,$src1 - $src2" %} 8.2325 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 8.2326 - ins_encode( Push_Reg_F(src1), 8.2327 - OpcReg_F(src2), 8.2328 - Pop_Mem_F(dst) ); 8.2329 + ins_encode( Push_Reg_FPR(src1), 8.2330 + OpcReg_FPR(src2), 8.2331 + Pop_Mem_FPR(dst) ); 8.2332 ins_pipe( fpu_mem_reg_reg ); 8.2333 %} 8.2334 // 8.2335 // This instruction does not round to 24-bits 8.2336 -instruct subF_reg(regF dst, regF src) %{ 8.2337 +instruct subFPR_reg(regFPR dst, regFPR src) %{ 8.2338 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2339 match(Set dst (SubF dst src)); 8.2340 8.2341 format %{ "FSUB $dst,$src" %} 8.2342 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 8.2343 - ins_encode( Push_Reg_F(src), 8.2344 + ins_encode( Push_Reg_FPR(src), 8.2345 OpcP, RegOpc(dst) ); 8.2346 ins_pipe( fpu_reg_reg ); 8.2347 %} 8.2348 8.2349 // Spill to obtain 24-bit precision 8.2350 -instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ 8.2351 +instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 8.2352 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2353 match(Set dst (AddF src1 src2)); 8.2354 8.2355 format %{ "FADD $dst,$src1,$src2" %} 8.2356 opcode(0xD8, 0x0); /* D8 C0+i */ 8.2357 - ins_encode( Push_Reg_F(src2), 8.2358 - OpcReg_F(src1), 8.2359 - Pop_Mem_F(dst) ); 8.2360 + ins_encode( Push_Reg_FPR(src2), 8.2361 + OpcReg_FPR(src1), 8.2362 + Pop_Mem_FPR(dst) ); 8.2363 ins_pipe( fpu_mem_reg_reg ); 8.2364 %} 8.2365 // 8.2366 // This instruction does not round to 24-bits 8.2367 -instruct addF_reg(regF dst, regF src) %{ 8.2368 +instruct addFPR_reg(regFPR dst, regFPR src) %{ 8.2369 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2370 match(Set dst (AddF dst src)); 8.2371 8.2372 format %{ "FLD $src\n\t" 8.2373 "FADDp $dst,ST" %} 8.2374 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 8.2375 - ins_encode( Push_Reg_F(src), 8.2376 + ins_encode( Push_Reg_FPR(src), 8.2377 OpcP, RegOpc(dst) ); 8.2378 ins_pipe( fpu_reg_reg ); 8.2379 %} 8.2380 8.2381 -// Add two single precision floating point values in xmm 8.2382 -instruct addX_reg(regX dst, regX src) %{ 8.2383 - predicate(UseSSE>=1); 8.2384 - match(Set dst (AddF dst src)); 8.2385 - format %{ "ADDSS $dst,$src" %} 8.2386 - ins_encode %{ 8.2387 - __ addss($dst$$XMMRegister, $src$$XMMRegister); 8.2388 - %} 8.2389 - ins_pipe( pipe_slow ); 8.2390 -%} 8.2391 - 8.2392 -instruct addX_imm(regX dst, immXF con) %{ 8.2393 - predicate(UseSSE>=1); 8.2394 - match(Set dst (AddF dst con)); 8.2395 - format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 8.2396 - ins_encode %{ 8.2397 - __ addss($dst$$XMMRegister, $constantaddress($con)); 8.2398 - %} 8.2399 - ins_pipe(pipe_slow); 8.2400 -%} 8.2401 - 8.2402 -instruct addX_mem(regX dst, memory mem) %{ 8.2403 - predicate(UseSSE>=1); 8.2404 - match(Set dst (AddF dst (LoadF mem))); 8.2405 - format %{ "ADDSS $dst,$mem" %} 8.2406 - ins_encode %{ 8.2407 - __ addss($dst$$XMMRegister, $mem$$Address); 8.2408 - %} 8.2409 - ins_pipe( pipe_slow ); 8.2410 -%} 8.2411 - 8.2412 -// Subtract two single precision floating point values in xmm 8.2413 -instruct subX_reg(regX dst, regX src) %{ 8.2414 - predicate(UseSSE>=1); 8.2415 - match(Set dst (SubF dst src)); 8.2416 - ins_cost(150); 8.2417 - format %{ "SUBSS $dst,$src" %} 8.2418 - ins_encode %{ 8.2419 - __ subss($dst$$XMMRegister, $src$$XMMRegister); 8.2420 - %} 8.2421 - ins_pipe( pipe_slow ); 8.2422 -%} 8.2423 - 8.2424 -instruct subX_imm(regX dst, immXF con) %{ 8.2425 - predicate(UseSSE>=1); 8.2426 - match(Set dst (SubF dst con)); 8.2427 - ins_cost(150); 8.2428 - format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 8.2429 - ins_encode %{ 8.2430 - __ subss($dst$$XMMRegister, $constantaddress($con)); 8.2431 - %} 8.2432 - ins_pipe(pipe_slow); 8.2433 -%} 8.2434 - 8.2435 -instruct subX_mem(regX dst, memory mem) %{ 8.2436 - predicate(UseSSE>=1); 8.2437 - match(Set dst (SubF dst (LoadF mem))); 8.2438 - ins_cost(150); 8.2439 - format %{ "SUBSS $dst,$mem" %} 8.2440 - ins_encode %{ 8.2441 - __ subss($dst$$XMMRegister, $mem$$Address); 8.2442 - %} 8.2443 - ins_pipe( pipe_slow ); 8.2444 -%} 8.2445 - 8.2446 -// Multiply two single precision floating point values in xmm 8.2447 -instruct mulX_reg(regX dst, regX src) %{ 8.2448 - predicate(UseSSE>=1); 8.2449 - match(Set dst (MulF dst src)); 8.2450 - format %{ "MULSS $dst,$src" %} 8.2451 - ins_encode %{ 8.2452 - __ mulss($dst$$XMMRegister, $src$$XMMRegister); 8.2453 - %} 8.2454 - ins_pipe( pipe_slow ); 8.2455 -%} 8.2456 - 8.2457 -instruct mulX_imm(regX dst, immXF con) %{ 8.2458 - predicate(UseSSE>=1); 8.2459 - match(Set dst (MulF dst con)); 8.2460 - format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 8.2461 - ins_encode %{ 8.2462 - __ mulss($dst$$XMMRegister, $constantaddress($con)); 8.2463 - %} 8.2464 - ins_pipe(pipe_slow); 8.2465 -%} 8.2466 - 8.2467 -instruct mulX_mem(regX dst, memory mem) %{ 8.2468 - predicate(UseSSE>=1); 8.2469 - match(Set dst (MulF dst (LoadF mem))); 8.2470 - format %{ "MULSS $dst,$mem" %} 8.2471 - ins_encode %{ 8.2472 - __ mulss($dst$$XMMRegister, $mem$$Address); 8.2473 - %} 8.2474 - ins_pipe( pipe_slow ); 8.2475 -%} 8.2476 - 8.2477 -// Divide two single precision floating point values in xmm 8.2478 -instruct divX_reg(regX dst, regX src) %{ 8.2479 - predicate(UseSSE>=1); 8.2480 - match(Set dst (DivF dst src)); 8.2481 - format %{ "DIVSS $dst,$src" %} 8.2482 - ins_encode %{ 8.2483 - __ divss($dst$$XMMRegister, $src$$XMMRegister); 8.2484 - %} 8.2485 - ins_pipe( pipe_slow ); 8.2486 -%} 8.2487 - 8.2488 -instruct divX_imm(regX dst, immXF con) %{ 8.2489 - predicate(UseSSE>=1); 8.2490 - match(Set dst (DivF dst con)); 8.2491 - format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 8.2492 - ins_encode %{ 8.2493 - __ divss($dst$$XMMRegister, $constantaddress($con)); 8.2494 - %} 8.2495 - ins_pipe(pipe_slow); 8.2496 -%} 8.2497 - 8.2498 -instruct divX_mem(regX dst, memory mem) %{ 8.2499 - predicate(UseSSE>=1); 8.2500 - match(Set dst (DivF dst (LoadF mem))); 8.2501 - format %{ "DIVSS $dst,$mem" %} 8.2502 - ins_encode %{ 8.2503 - __ divss($dst$$XMMRegister, $mem$$Address); 8.2504 - %} 8.2505 - ins_pipe( pipe_slow ); 8.2506 -%} 8.2507 - 8.2508 -// Get the square root of a single precision floating point values in xmm 8.2509 -instruct sqrtX_reg(regX dst, regX src) %{ 8.2510 - predicate(UseSSE>=1); 8.2511 - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 8.2512 - ins_cost(150); 8.2513 - format %{ "SQRTSS $dst,$src" %} 8.2514 - ins_encode %{ 8.2515 - __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 8.2516 - %} 8.2517 - ins_pipe( pipe_slow ); 8.2518 -%} 8.2519 - 8.2520 -instruct sqrtX_mem(regX dst, memory mem) %{ 8.2521 - predicate(UseSSE>=1); 8.2522 - match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); 8.2523 - ins_cost(150); 8.2524 - format %{ "SQRTSS $dst,$mem" %} 8.2525 - ins_encode %{ 8.2526 - __ sqrtss($dst$$XMMRegister, $mem$$Address); 8.2527 - %} 8.2528 - ins_pipe( pipe_slow ); 8.2529 -%} 8.2530 - 8.2531 -// Get the square root of a double precision floating point values in xmm 8.2532 -instruct sqrtXD_reg(regXD dst, regXD src) %{ 8.2533 - predicate(UseSSE>=2); 8.2534 - match(Set dst (SqrtD src)); 8.2535 - ins_cost(150); 8.2536 - format %{ "SQRTSD $dst,$src" %} 8.2537 - ins_encode %{ 8.2538 - __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 8.2539 - %} 8.2540 - ins_pipe( pipe_slow ); 8.2541 -%} 8.2542 - 8.2543 -instruct sqrtXD_mem(regXD dst, memory mem) %{ 8.2544 - predicate(UseSSE>=2); 8.2545 - match(Set dst (SqrtD (LoadD mem))); 8.2546 - ins_cost(150); 8.2547 - format %{ "SQRTSD $dst,$mem" %} 8.2548 - ins_encode %{ 8.2549 - __ sqrtsd($dst$$XMMRegister, $mem$$Address); 8.2550 - %} 8.2551 - ins_pipe( pipe_slow ); 8.2552 -%} 8.2553 - 8.2554 -instruct absF_reg(regFPR1 dst, regFPR1 src) %{ 8.2555 +instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 8.2556 predicate(UseSSE==0); 8.2557 match(Set dst (AbsF src)); 8.2558 ins_cost(100); 8.2559 @@ -11000,19 +10652,7 @@ 8.2560 ins_pipe( fpu_reg_reg ); 8.2561 %} 8.2562 8.2563 -instruct absX_reg(regX dst ) %{ 8.2564 - predicate(UseSSE>=1); 8.2565 - match(Set dst (AbsF dst)); 8.2566 - ins_cost(150); 8.2567 - format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} 8.2568 - ins_encode %{ 8.2569 - __ andps($dst$$XMMRegister, 8.2570 - ExternalAddress((address)float_signmask_pool)); 8.2571 - %} 8.2572 - ins_pipe( pipe_slow ); 8.2573 -%} 8.2574 - 8.2575 -instruct negF_reg(regFPR1 dst, regFPR1 src) %{ 8.2576 +instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 8.2577 predicate(UseSSE==0); 8.2578 match(Set dst (NegF src)); 8.2579 ins_cost(100); 8.2580 @@ -11022,21 +10662,9 @@ 8.2581 ins_pipe( fpu_reg_reg ); 8.2582 %} 8.2583 8.2584 -instruct negX_reg( regX dst ) %{ 8.2585 - predicate(UseSSE>=1); 8.2586 - match(Set dst (NegF dst)); 8.2587 - ins_cost(150); 8.2588 - format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} 8.2589 - ins_encode %{ 8.2590 - __ xorps($dst$$XMMRegister, 8.2591 - ExternalAddress((address)float_signflip_pool)); 8.2592 - %} 8.2593 - ins_pipe( pipe_slow ); 8.2594 -%} 8.2595 - 8.2596 -// Cisc-alternate to addF_reg 8.2597 +// Cisc-alternate to addFPR_reg 8.2598 // Spill to obtain 24-bit precision 8.2599 -instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 8.2600 +instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 8.2601 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2602 match(Set dst (AddF src1 (LoadF src2))); 8.2603 8.2604 @@ -11045,14 +10673,14 @@ 8.2605 "FSTP_S $dst" %} 8.2606 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 8.2607 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 8.2608 - OpcReg_F(src1), 8.2609 - Pop_Mem_F(dst) ); 8.2610 + OpcReg_FPR(src1), 8.2611 + Pop_Mem_FPR(dst) ); 8.2612 ins_pipe( fpu_mem_reg_mem ); 8.2613 %} 8.2614 // 8.2615 -// Cisc-alternate to addF_reg 8.2616 +// Cisc-alternate to addFPR_reg 8.2617 // This instruction does not round to 24-bits 8.2618 -instruct addF_reg_mem(regF dst, memory src) %{ 8.2619 +instruct addFPR_reg_mem(regFPR dst, memory src) %{ 8.2620 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2621 match(Set dst (AddF dst (LoadF src))); 8.2622 8.2623 @@ -11065,21 +10693,21 @@ 8.2624 8.2625 // // Following two instructions for _222_mpegaudio 8.2626 // Spill to obtain 24-bit precision 8.2627 -instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ 8.2628 +instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 8.2629 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2630 match(Set dst (AddF src1 src2)); 8.2631 8.2632 format %{ "FADD $dst,$src1,$src2" %} 8.2633 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 8.2634 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 8.2635 - OpcReg_F(src2), 8.2636 - Pop_Mem_F(dst) ); 8.2637 + OpcReg_FPR(src2), 8.2638 + Pop_Mem_FPR(dst) ); 8.2639 ins_pipe( fpu_mem_reg_mem ); 8.2640 %} 8.2641 8.2642 // Cisc-spill variant 8.2643 // Spill to obtain 24-bit precision 8.2644 -instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 8.2645 +instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 8.2646 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2647 match(Set dst (AddF src1 (LoadF src2))); 8.2648 8.2649 @@ -11088,12 +10716,12 @@ 8.2650 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 8.2651 set_instruction_start, 8.2652 OpcP, RMopc_Mem(secondary,src1), 8.2653 - Pop_Mem_F(dst) ); 8.2654 + Pop_Mem_FPR(dst) ); 8.2655 ins_pipe( fpu_mem_mem_mem ); 8.2656 %} 8.2657 8.2658 // Spill to obtain 24-bit precision 8.2659 -instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 8.2660 +instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 8.2661 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2662 match(Set dst (AddF src1 src2)); 8.2663 8.2664 @@ -11102,13 +10730,13 @@ 8.2665 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 8.2666 set_instruction_start, 8.2667 OpcP, RMopc_Mem(secondary,src1), 8.2668 - Pop_Mem_F(dst) ); 8.2669 + Pop_Mem_FPR(dst) ); 8.2670 ins_pipe( fpu_mem_mem_mem ); 8.2671 %} 8.2672 8.2673 8.2674 // Spill to obtain 24-bit precision 8.2675 -instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 8.2676 +instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 8.2677 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2678 match(Set dst (AddF src con)); 8.2679 format %{ "FLD $src\n\t" 8.2680 @@ -11123,7 +10751,7 @@ 8.2681 %} 8.2682 // 8.2683 // This instruction does not round to 24-bits 8.2684 -instruct addF_reg_imm(regF dst, regF src, immF con) %{ 8.2685 +instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 8.2686 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2687 match(Set dst (AddF src con)); 8.2688 format %{ "FLD $src\n\t" 8.2689 @@ -11138,7 +10766,7 @@ 8.2690 %} 8.2691 8.2692 // Spill to obtain 24-bit precision 8.2693 -instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ 8.2694 +instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 8.2695 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2696 match(Set dst (MulF src1 src2)); 8.2697 8.2698 @@ -11146,14 +10774,14 @@ 8.2699 "FMUL $src2\n\t" 8.2700 "FSTP_S $dst" %} 8.2701 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 8.2702 - ins_encode( Push_Reg_F(src1), 8.2703 - OpcReg_F(src2), 8.2704 - Pop_Mem_F(dst) ); 8.2705 + ins_encode( Push_Reg_FPR(src1), 8.2706 + OpcReg_FPR(src2), 8.2707 + Pop_Mem_FPR(dst) ); 8.2708 ins_pipe( fpu_mem_reg_reg ); 8.2709 %} 8.2710 // 8.2711 // This instruction does not round to 24-bits 8.2712 -instruct mulF_reg(regF dst, regF src1, regF src2) %{ 8.2713 +instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 8.2714 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2715 match(Set dst (MulF src1 src2)); 8.2716 8.2717 @@ -11161,16 +10789,16 @@ 8.2718 "FMUL $src2\n\t" 8.2719 "FSTP_S $dst" %} 8.2720 opcode(0xD8, 0x1); /* D8 C8+i */ 8.2721 - ins_encode( Push_Reg_F(src2), 8.2722 - OpcReg_F(src1), 8.2723 - Pop_Reg_F(dst) ); 8.2724 + ins_encode( Push_Reg_FPR(src2), 8.2725 + OpcReg_FPR(src1), 8.2726 + Pop_Reg_FPR(dst) ); 8.2727 ins_pipe( fpu_reg_reg_reg ); 8.2728 %} 8.2729 8.2730 8.2731 // Spill to obtain 24-bit precision 8.2732 // Cisc-alternate to reg-reg multiply 8.2733 -instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 8.2734 +instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 8.2735 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2736 match(Set dst (MulF src1 (LoadF src2))); 8.2737 8.2738 @@ -11179,27 +10807,27 @@ 8.2739 "FSTP_S $dst" %} 8.2740 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 8.2741 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 8.2742 - OpcReg_F(src1), 8.2743 - Pop_Mem_F(dst) ); 8.2744 + OpcReg_FPR(src1), 8.2745 + Pop_Mem_FPR(dst) ); 8.2746 ins_pipe( fpu_mem_reg_mem ); 8.2747 %} 8.2748 // 8.2749 // This instruction does not round to 24-bits 8.2750 // Cisc-alternate to reg-reg multiply 8.2751 -instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 8.2752 +instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 8.2753 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2754 match(Set dst (MulF src1 (LoadF src2))); 8.2755 8.2756 format %{ "FMUL $dst,$src1,$src2" %} 8.2757 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 8.2758 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 8.2759 - OpcReg_F(src1), 8.2760 - Pop_Reg_F(dst) ); 8.2761 + OpcReg_FPR(src1), 8.2762 + Pop_Reg_FPR(dst) ); 8.2763 ins_pipe( fpu_reg_reg_mem ); 8.2764 %} 8.2765 8.2766 // Spill to obtain 24-bit precision 8.2767 -instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 8.2768 +instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 8.2769 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2770 match(Set dst (MulF src1 src2)); 8.2771 8.2772 @@ -11208,12 +10836,12 @@ 8.2773 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 8.2774 set_instruction_start, 8.2775 OpcP, RMopc_Mem(secondary,src1), 8.2776 - Pop_Mem_F(dst) ); 8.2777 + Pop_Mem_FPR(dst) ); 8.2778 ins_pipe( fpu_mem_mem_mem ); 8.2779 %} 8.2780 8.2781 // Spill to obtain 24-bit precision 8.2782 -instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 8.2783 +instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 8.2784 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2785 match(Set dst (MulF src con)); 8.2786 8.2787 @@ -11229,7 +10857,7 @@ 8.2788 %} 8.2789 // 8.2790 // This instruction does not round to 24-bits 8.2791 -instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 8.2792 +instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 8.2793 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2794 match(Set dst (MulF src con)); 8.2795 8.2796 @@ -11246,9 +10874,9 @@ 8.2797 8.2798 8.2799 // 8.2800 -// MACRO1 -- subsume unshared load into mulF 8.2801 +// MACRO1 -- subsume unshared load into mulFPR 8.2802 // This instruction does not round to 24-bits 8.2803 -instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ 8.2804 +instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 8.2805 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2806 match(Set dst (MulF (LoadF mem1) src)); 8.2807 8.2808 @@ -11257,36 +10885,36 @@ 8.2809 "FSTP $dst" %} 8.2810 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 8.2811 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 8.2812 - OpcReg_F(src), 8.2813 - Pop_Reg_F(dst) ); 8.2814 + OpcReg_FPR(src), 8.2815 + Pop_Reg_FPR(dst) ); 8.2816 ins_pipe( fpu_reg_reg_mem ); 8.2817 %} 8.2818 // 8.2819 -// MACRO2 -- addF a mulF which subsumed an unshared load 8.2820 +// MACRO2 -- addFPR a mulFPR which subsumed an unshared load 8.2821 // This instruction does not round to 24-bits 8.2822 -instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ 8.2823 +instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 8.2824 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2825 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 8.2826 ins_cost(95); 8.2827 8.2828 format %{ "FLD $mem1 ===MACRO2===\n\t" 8.2829 - "FMUL ST,$src1 subsume mulF left load\n\t" 8.2830 + "FMUL ST,$src1 subsume mulFPR left load\n\t" 8.2831 "FADD ST,$src2\n\t" 8.2832 "FSTP $dst" %} 8.2833 opcode(0xD9); /* LoadF D9 /0 */ 8.2834 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 8.2835 FMul_ST_reg(src1), 8.2836 FAdd_ST_reg(src2), 8.2837 - Pop_Reg_F(dst) ); 8.2838 + Pop_Reg_FPR(dst) ); 8.2839 ins_pipe( fpu_reg_mem_reg_reg ); 8.2840 %} 8.2841 8.2842 -// MACRO3 -- addF a mulF 8.2843 +// MACRO3 -- addFPR a mulFPR 8.2844 // This instruction does not round to 24-bits. It is a '2-address' 8.2845 // instruction in that the result goes back to src2. This eliminates 8.2846 // a move from the macro; possibly the register allocator will have 8.2847 // to add it back (and maybe not). 8.2848 -instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ 8.2849 +instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 8.2850 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2851 match(Set src2 (AddF (MulF src0 src1) src2)); 8.2852 8.2853 @@ -11294,15 +10922,15 @@ 8.2854 "FMUL ST,$src1\n\t" 8.2855 "FADDP $src2,ST" %} 8.2856 opcode(0xD9); /* LoadF D9 /0 */ 8.2857 - ins_encode( Push_Reg_F(src0), 8.2858 + ins_encode( Push_Reg_FPR(src0), 8.2859 FMul_ST_reg(src1), 8.2860 FAddP_reg_ST(src2) ); 8.2861 ins_pipe( fpu_reg_reg_reg ); 8.2862 %} 8.2863 8.2864 -// MACRO4 -- divF subF 8.2865 +// MACRO4 -- divFPR subFPR 8.2866 // This instruction does not round to 24-bits 8.2867 -instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ 8.2868 +instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 8.2869 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2870 match(Set dst (DivF (SubF src2 src1) src3)); 8.2871 8.2872 @@ -11311,67 +10939,67 @@ 8.2873 "FDIV ST,$src3\n\t" 8.2874 "FSTP $dst" %} 8.2875 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 8.2876 - ins_encode( Push_Reg_F(src2), 8.2877 - subF_divF_encode(src1,src3), 8.2878 - Pop_Reg_F(dst) ); 8.2879 + ins_encode( Push_Reg_FPR(src2), 8.2880 + subFPR_divFPR_encode(src1,src3), 8.2881 + Pop_Reg_FPR(dst) ); 8.2882 ins_pipe( fpu_reg_reg_reg_reg ); 8.2883 %} 8.2884 8.2885 // Spill to obtain 24-bit precision 8.2886 -instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ 8.2887 +instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 8.2888 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2889 match(Set dst (DivF src1 src2)); 8.2890 8.2891 format %{ "FDIV $dst,$src1,$src2" %} 8.2892 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 8.2893 - ins_encode( Push_Reg_F(src1), 8.2894 - OpcReg_F(src2), 8.2895 - Pop_Mem_F(dst) ); 8.2896 + ins_encode( Push_Reg_FPR(src1), 8.2897 + OpcReg_FPR(src2), 8.2898 + Pop_Mem_FPR(dst) ); 8.2899 ins_pipe( fpu_mem_reg_reg ); 8.2900 %} 8.2901 // 8.2902 // This instruction does not round to 24-bits 8.2903 -instruct divF_reg(regF dst, regF src) %{ 8.2904 +instruct divFPR_reg(regFPR dst, regFPR src) %{ 8.2905 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2906 match(Set dst (DivF dst src)); 8.2907 8.2908 format %{ "FDIV $dst,$src" %} 8.2909 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 8.2910 - ins_encode( Push_Reg_F(src), 8.2911 + ins_encode( Push_Reg_FPR(src), 8.2912 OpcP, RegOpc(dst) ); 8.2913 ins_pipe( fpu_reg_reg ); 8.2914 %} 8.2915 8.2916 8.2917 // Spill to obtain 24-bit precision 8.2918 -instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 8.2919 +instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 8.2920 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.2921 match(Set dst (ModF src1 src2)); 8.2922 - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 8.2923 + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 8.2924 8.2925 format %{ "FMOD $dst,$src1,$src2" %} 8.2926 - ins_encode( Push_Reg_Mod_D(src1, src2), 8.2927 - emitModD(), 8.2928 - Push_Result_Mod_D(src2), 8.2929 - Pop_Mem_F(dst)); 8.2930 + ins_encode( Push_Reg_Mod_DPR(src1, src2), 8.2931 + emitModDPR(), 8.2932 + Push_Result_Mod_DPR(src2), 8.2933 + Pop_Mem_FPR(dst)); 8.2934 ins_pipe( pipe_slow ); 8.2935 %} 8.2936 // 8.2937 // This instruction does not round to 24-bits 8.2938 -instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ 8.2939 +instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 8.2940 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.2941 match(Set dst (ModF dst src)); 8.2942 - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 8.2943 + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 8.2944 8.2945 format %{ "FMOD $dst,$src" %} 8.2946 - ins_encode(Push_Reg_Mod_D(dst, src), 8.2947 - emitModD(), 8.2948 - Push_Result_Mod_D(src), 8.2949 - Pop_Reg_F(dst)); 8.2950 - ins_pipe( pipe_slow ); 8.2951 -%} 8.2952 - 8.2953 -instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ 8.2954 + ins_encode(Push_Reg_Mod_DPR(dst, src), 8.2955 + emitModDPR(), 8.2956 + Push_Result_Mod_DPR(src), 8.2957 + Pop_Reg_FPR(dst)); 8.2958 + ins_pipe( pipe_slow ); 8.2959 +%} 8.2960 + 8.2961 +instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 8.2962 predicate(UseSSE>=1); 8.2963 match(Set dst (ModF src0 src1)); 8.2964 effect(KILL rax, KILL cr); 8.2965 @@ -11391,7 +11019,7 @@ 8.2966 "\tFSTP ST0\t # Restore FPU Stack" 8.2967 %} 8.2968 ins_cost(250); 8.2969 - ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); 8.2970 + ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 8.2971 ins_pipe( pipe_slow ); 8.2972 %} 8.2973 8.2974 @@ -11399,26 +11027,26 @@ 8.2975 //----------Arithmetic Conversion Instructions--------------------------------- 8.2976 // The conversions operations are all Alpha sorted. Please keep it that way! 8.2977 8.2978 -instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ 8.2979 +instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 8.2980 predicate(UseSSE==0); 8.2981 match(Set dst (RoundFloat src)); 8.2982 ins_cost(125); 8.2983 format %{ "FST_S $dst,$src\t# F-round" %} 8.2984 - ins_encode( Pop_Mem_Reg_F(dst, src) ); 8.2985 + ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 8.2986 ins_pipe( fpu_mem_reg ); 8.2987 %} 8.2988 8.2989 -instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ 8.2990 +instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 8.2991 predicate(UseSSE<=1); 8.2992 match(Set dst (RoundDouble src)); 8.2993 ins_cost(125); 8.2994 format %{ "FST_D $dst,$src\t# D-round" %} 8.2995 - ins_encode( Pop_Mem_Reg_D(dst, src) ); 8.2996 + ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 8.2997 ins_pipe( fpu_mem_reg ); 8.2998 %} 8.2999 8.3000 // Force rounding to 24-bit precision and 6-bit exponent 8.3001 -instruct convD2F_reg(stackSlotF dst, regD src) %{ 8.3002 +instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 8.3003 predicate(UseSSE==0); 8.3004 match(Set dst (ConvD2F src)); 8.3005 format %{ "FST_S $dst,$src\t# F-round" %} 8.3006 @@ -11428,7 +11056,7 @@ 8.3007 %} 8.3008 8.3009 // Force rounding to 24-bit precision and 6-bit exponent 8.3010 -instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ 8.3011 +instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 8.3012 predicate(UseSSE==1); 8.3013 match(Set dst (ConvD2F src)); 8.3014 effect( KILL cr ); 8.3015 @@ -11451,7 +11079,7 @@ 8.3016 %} 8.3017 8.3018 // Force rounding double precision to single precision 8.3019 -instruct convXD2X_reg(regX dst, regXD src) %{ 8.3020 +instruct convD2F_reg(regF dst, regD src) %{ 8.3021 predicate(UseSSE>=2); 8.3022 match(Set dst (ConvD2F src)); 8.3023 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 8.3024 @@ -11461,15 +11089,15 @@ 8.3025 ins_pipe( pipe_slow ); 8.3026 %} 8.3027 8.3028 -instruct convF2D_reg_reg(regD dst, regF src) %{ 8.3029 +instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 8.3030 predicate(UseSSE==0); 8.3031 match(Set dst (ConvF2D src)); 8.3032 format %{ "FST_S $dst,$src\t# D-round" %} 8.3033 - ins_encode( Pop_Reg_Reg_D(dst, src)); 8.3034 + ins_encode( Pop_Reg_Reg_DPR(dst, src)); 8.3035 ins_pipe( fpu_reg_reg ); 8.3036 %} 8.3037 8.3038 -instruct convF2D_reg(stackSlotD dst, regF src) %{ 8.3039 +instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 8.3040 predicate(UseSSE==1); 8.3041 match(Set dst (ConvF2D src)); 8.3042 format %{ "FST_D $dst,$src\t# D-round" %} 8.3043 @@ -11478,7 +11106,7 @@ 8.3044 %} 8.3045 %} 8.3046 8.3047 -instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ 8.3048 +instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 8.3049 predicate(UseSSE==1); 8.3050 match(Set dst (ConvF2D src)); 8.3051 effect( KILL cr ); 8.3052 @@ -11497,7 +11125,7 @@ 8.3053 ins_pipe( pipe_slow ); 8.3054 %} 8.3055 8.3056 -instruct convX2XD_reg(regXD dst, regX src) %{ 8.3057 +instruct convF2D_reg(regD dst, regF src) %{ 8.3058 predicate(UseSSE>=2); 8.3059 match(Set dst (ConvF2D src)); 8.3060 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 8.3061 @@ -11508,7 +11136,7 @@ 8.3062 %} 8.3063 8.3064 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 8.3065 -instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 8.3066 +instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 8.3067 predicate(UseSSE<=1); 8.3068 match(Set dst (ConvD2I src)); 8.3069 effect( KILL tmp, KILL cr ); 8.3070 @@ -11523,12 +11151,12 @@ 8.3071 "FLD_D $src\n\t" 8.3072 "CALL d2i_wrapper\n" 8.3073 "fast:" %} 8.3074 - ins_encode( Push_Reg_D(src), D2I_encoding(src) ); 8.3075 + ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 8.3076 ins_pipe( pipe_slow ); 8.3077 %} 8.3078 8.3079 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 8.3080 -instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ 8.3081 +instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 8.3082 predicate(UseSSE>=2); 8.3083 match(Set dst (ConvD2I src)); 8.3084 effect( KILL tmp, KILL cr ); 8.3085 @@ -11556,7 +11184,7 @@ 8.3086 ins_pipe( pipe_slow ); 8.3087 %} 8.3088 8.3089 -instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 8.3090 +instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 8.3091 predicate(UseSSE<=1); 8.3092 match(Set dst (ConvD2L src)); 8.3093 effect( KILL cr ); 8.3094 @@ -11574,12 +11202,12 @@ 8.3095 "FLD $src\n\t" 8.3096 "CALL d2l_wrapper\n" 8.3097 "fast:" %} 8.3098 - ins_encode( Push_Reg_D(src), D2L_encoding(src) ); 8.3099 + ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 8.3100 ins_pipe( pipe_slow ); 8.3101 %} 8.3102 8.3103 // XMM lacks a float/double->long conversion, so use the old FPU stack. 8.3104 -instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ 8.3105 +instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 8.3106 predicate (UseSSE>=2); 8.3107 match(Set dst (ConvD2L src)); 8.3108 effect( KILL cr ); 8.3109 @@ -11637,7 +11265,7 @@ 8.3110 // rounding mode to 'nearest'. The hardware stores a flag value down 8.3111 // if we would overflow or converted a NAN; we check for this and 8.3112 // and go the slow path if needed. 8.3113 -instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 8.3114 +instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 8.3115 predicate(UseSSE==0); 8.3116 match(Set dst (ConvF2I src)); 8.3117 effect( KILL tmp, KILL cr ); 8.3118 @@ -11652,13 +11280,13 @@ 8.3119 "FLD $src\n\t" 8.3120 "CALL d2i_wrapper\n" 8.3121 "fast:" %} 8.3122 - // D2I_encoding works for F2I 8.3123 - ins_encode( Push_Reg_F(src), D2I_encoding(src) ); 8.3124 + // DPR2I_encoding works for FPR2I 8.3125 + ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 8.3126 ins_pipe( pipe_slow ); 8.3127 %} 8.3128 8.3129 // Convert a float in xmm to an int reg. 8.3130 -instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ 8.3131 +instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 8.3132 predicate(UseSSE>=1); 8.3133 match(Set dst (ConvF2I src)); 8.3134 effect( KILL tmp, KILL cr ); 8.3135 @@ -11686,7 +11314,7 @@ 8.3136 ins_pipe( pipe_slow ); 8.3137 %} 8.3138 8.3139 -instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 8.3140 +instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 8.3141 predicate(UseSSE==0); 8.3142 match(Set dst (ConvF2L src)); 8.3143 effect( KILL cr ); 8.3144 @@ -11704,13 +11332,13 @@ 8.3145 "FLD $src\n\t" 8.3146 "CALL d2l_wrapper\n" 8.3147 "fast:" %} 8.3148 - // D2L_encoding works for F2L 8.3149 - ins_encode( Push_Reg_F(src), D2L_encoding(src) ); 8.3150 + // DPR2L_encoding works for FPR2L 8.3151 + ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 8.3152 ins_pipe( pipe_slow ); 8.3153 %} 8.3154 8.3155 // XMM lacks a float/double->long conversion, so use the old FPU stack. 8.3156 -instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ 8.3157 +instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 8.3158 predicate (UseSSE>=1); 8.3159 match(Set dst (ConvF2L src)); 8.3160 effect( KILL cr ); 8.3161 @@ -11762,17 +11390,17 @@ 8.3162 ins_pipe( pipe_slow ); 8.3163 %} 8.3164 8.3165 -instruct convI2D_reg(regD dst, stackSlotI src) %{ 8.3166 +instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 8.3167 predicate( UseSSE<=1 ); 8.3168 match(Set dst (ConvI2D src)); 8.3169 format %{ "FILD $src\n\t" 8.3170 "FSTP $dst" %} 8.3171 opcode(0xDB, 0x0); /* DB /0 */ 8.3172 - ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); 8.3173 + ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 8.3174 ins_pipe( fpu_reg_mem ); 8.3175 %} 8.3176 8.3177 -instruct convI2XD_reg(regXD dst, eRegI src) %{ 8.3178 +instruct convI2D_reg(regD dst, eRegI src) %{ 8.3179 predicate( UseSSE>=2 && !UseXmmI2D ); 8.3180 match(Set dst (ConvI2D src)); 8.3181 format %{ "CVTSI2SD $dst,$src" %} 8.3182 @@ -11782,7 +11410,7 @@ 8.3183 ins_pipe( pipe_slow ); 8.3184 %} 8.3185 8.3186 -instruct convI2XD_mem(regXD dst, memory mem) %{ 8.3187 +instruct convI2D_mem(regD dst, memory mem) %{ 8.3188 predicate( UseSSE>=2 ); 8.3189 match(Set dst (ConvI2D (LoadI mem))); 8.3190 format %{ "CVTSI2SD $dst,$mem" %} 8.3191 @@ -11792,7 +11420,7 @@ 8.3192 ins_pipe( pipe_slow ); 8.3193 %} 8.3194 8.3195 -instruct convXI2XD_reg(regXD dst, eRegI src) 8.3196 +instruct convXI2D_reg(regD dst, eRegI src) 8.3197 %{ 8.3198 predicate( UseSSE>=2 && UseXmmI2D ); 8.3199 match(Set dst (ConvI2D src)); 8.3200 @@ -11806,31 +11434,31 @@ 8.3201 ins_pipe(pipe_slow); // XXX 8.3202 %} 8.3203 8.3204 -instruct convI2D_mem(regD dst, memory mem) %{ 8.3205 +instruct convI2DPR_mem(regDPR dst, memory mem) %{ 8.3206 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 8.3207 match(Set dst (ConvI2D (LoadI mem))); 8.3208 format %{ "FILD $mem\n\t" 8.3209 "FSTP $dst" %} 8.3210 opcode(0xDB); /* DB /0 */ 8.3211 ins_encode( OpcP, RMopc_Mem(0x00,mem), 8.3212 - Pop_Reg_D(dst)); 8.3213 + Pop_Reg_DPR(dst)); 8.3214 ins_pipe( fpu_reg_mem ); 8.3215 %} 8.3216 8.3217 // Convert a byte to a float; no rounding step needed. 8.3218 -instruct conv24I2F_reg(regF dst, stackSlotI src) %{ 8.3219 +instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 8.3220 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 8.3221 match(Set dst (ConvI2F src)); 8.3222 format %{ "FILD $src\n\t" 8.3223 "FSTP $dst" %} 8.3224 8.3225 opcode(0xDB, 0x0); /* DB /0 */ 8.3226 - ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); 8.3227 + ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 8.3228 ins_pipe( fpu_reg_mem ); 8.3229 %} 8.3230 8.3231 // In 24-bit mode, force exponent rounding by storing back out 8.3232 -instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ 8.3233 +instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 8.3234 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.3235 match(Set dst (ConvI2F src)); 8.3236 ins_cost(200); 8.3237 @@ -11838,12 +11466,12 @@ 8.3238 "FSTP_S $dst" %} 8.3239 opcode(0xDB, 0x0); /* DB /0 */ 8.3240 ins_encode( Push_Mem_I(src), 8.3241 - Pop_Mem_F(dst)); 8.3242 + Pop_Mem_FPR(dst)); 8.3243 ins_pipe( fpu_mem_mem ); 8.3244 %} 8.3245 8.3246 // In 24-bit mode, force exponent rounding by storing back out 8.3247 -instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ 8.3248 +instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 8.3249 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 8.3250 match(Set dst (ConvI2F (LoadI mem))); 8.3251 ins_cost(200); 8.3252 @@ -11851,36 +11479,36 @@ 8.3253 "FSTP_S $dst" %} 8.3254 opcode(0xDB); /* DB /0 */ 8.3255 ins_encode( OpcP, RMopc_Mem(0x00,mem), 8.3256 - Pop_Mem_F(dst)); 8.3257 + Pop_Mem_FPR(dst)); 8.3258 ins_pipe( fpu_mem_mem ); 8.3259 %} 8.3260 8.3261 // This instruction does not round to 24-bits 8.3262 -instruct convI2F_reg(regF dst, stackSlotI src) %{ 8.3263 +instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 8.3264 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.3265 match(Set dst (ConvI2F src)); 8.3266 format %{ "FILD $src\n\t" 8.3267 "FSTP $dst" %} 8.3268 opcode(0xDB, 0x0); /* DB /0 */ 8.3269 ins_encode( Push_Mem_I(src), 8.3270 - Pop_Reg_F(dst)); 8.3271 + Pop_Reg_FPR(dst)); 8.3272 ins_pipe( fpu_reg_mem ); 8.3273 %} 8.3274 8.3275 // This instruction does not round to 24-bits 8.3276 -instruct convI2F_mem(regF dst, memory mem) %{ 8.3277 +instruct convI2FPR_mem(regFPR dst, memory mem) %{ 8.3278 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 8.3279 match(Set dst (ConvI2F (LoadI mem))); 8.3280 format %{ "FILD $mem\n\t" 8.3281 "FSTP $dst" %} 8.3282 opcode(0xDB); /* DB /0 */ 8.3283 ins_encode( OpcP, RMopc_Mem(0x00,mem), 8.3284 - Pop_Reg_F(dst)); 8.3285 + Pop_Reg_FPR(dst)); 8.3286 ins_pipe( fpu_reg_mem ); 8.3287 %} 8.3288 8.3289 // Convert an int to a float in xmm; no rounding step needed. 8.3290 -instruct convI2X_reg(regX dst, eRegI src) %{ 8.3291 +instruct convI2F_reg(regF dst, eRegI src) %{ 8.3292 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 8.3293 match(Set dst (ConvI2F src)); 8.3294 format %{ "CVTSI2SS $dst, $src" %} 8.3295 @@ -11890,7 +11518,7 @@ 8.3296 ins_pipe( pipe_slow ); 8.3297 %} 8.3298 8.3299 - instruct convXI2X_reg(regX dst, eRegI src) 8.3300 + instruct convXI2F_reg(regF dst, eRegI src) 8.3301 %{ 8.3302 predicate( UseSSE>=2 && UseXmmI2F ); 8.3303 match(Set dst (ConvI2F src)); 8.3304 @@ -11939,7 +11567,7 @@ 8.3305 ins_pipe( ialu_reg_reg_long ); 8.3306 %} 8.3307 8.3308 -instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 8.3309 +instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 8.3310 predicate (UseSSE<=1); 8.3311 match(Set dst (ConvL2D src)); 8.3312 effect( KILL cr ); 8.3313 @@ -11949,11 +11577,11 @@ 8.3314 "ADD ESP,8\n\t" 8.3315 "FSTP_D $dst\t# D-round" %} 8.3316 opcode(0xDF, 0x5); /* DF /5 */ 8.3317 - ins_encode(convert_long_double(src), Pop_Mem_D(dst)); 8.3318 - ins_pipe( pipe_slow ); 8.3319 -%} 8.3320 - 8.3321 -instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ 8.3322 + ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 8.3323 + ins_pipe( pipe_slow ); 8.3324 +%} 8.3325 + 8.3326 +instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 8.3327 predicate (UseSSE>=2); 8.3328 match(Set dst (ConvL2D src)); 8.3329 effect( KILL cr ); 8.3330 @@ -11964,11 +11592,11 @@ 8.3331 "MOVSD $dst,[ESP]\n\t" 8.3332 "ADD ESP,8" %} 8.3333 opcode(0xDF, 0x5); /* DF /5 */ 8.3334 - ins_encode(convert_long_double2(src), Push_ResultXD(dst)); 8.3335 - ins_pipe( pipe_slow ); 8.3336 -%} 8.3337 - 8.3338 -instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ 8.3339 + ins_encode(convert_long_double2(src), Push_ResultD(dst)); 8.3340 + ins_pipe( pipe_slow ); 8.3341 +%} 8.3342 + 8.3343 +instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 8.3344 predicate (UseSSE>=1); 8.3345 match(Set dst (ConvL2F src)); 8.3346 effect( KILL cr ); 8.3347 @@ -11979,11 +11607,11 @@ 8.3348 "MOVSS $dst,[ESP]\n\t" 8.3349 "ADD ESP,8" %} 8.3350 opcode(0xDF, 0x5); /* DF /5 */ 8.3351 - ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); 8.3352 - ins_pipe( pipe_slow ); 8.3353 -%} 8.3354 - 8.3355 -instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 8.3356 + ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 8.3357 + ins_pipe( pipe_slow ); 8.3358 +%} 8.3359 + 8.3360 +instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 8.3361 match(Set dst (ConvL2F src)); 8.3362 effect( KILL cr ); 8.3363 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 8.3364 @@ -11992,7 +11620,7 @@ 8.3365 "ADD ESP,8\n\t" 8.3366 "FSTP_S $dst\t# F-round" %} 8.3367 opcode(0xDF, 0x5); /* DF /5 */ 8.3368 - ins_encode(convert_long_double(src), Pop_Mem_F(dst)); 8.3369 + ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 8.3370 ins_pipe( pipe_slow ); 8.3371 %} 8.3372 8.3373 @@ -12016,18 +11644,18 @@ 8.3374 ins_pipe( ialu_reg_mem ); 8.3375 %} 8.3376 8.3377 -instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ 8.3378 +instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 8.3379 predicate(UseSSE==0); 8.3380 match(Set dst (MoveF2I src)); 8.3381 effect( DEF dst, USE src ); 8.3382 8.3383 ins_cost(125); 8.3384 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 8.3385 - ins_encode( Pop_Mem_Reg_F(dst, src) ); 8.3386 + ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 8.3387 ins_pipe( fpu_mem_reg ); 8.3388 %} 8.3389 8.3390 -instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ 8.3391 +instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 8.3392 predicate(UseSSE>=1); 8.3393 match(Set dst (MoveF2I src)); 8.3394 effect( DEF dst, USE src ); 8.3395 @@ -12040,7 +11668,7 @@ 8.3396 ins_pipe( pipe_slow ); 8.3397 %} 8.3398 8.3399 -instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ 8.3400 +instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{ 8.3401 predicate(UseSSE>=2); 8.3402 match(Set dst (MoveF2I src)); 8.3403 effect( DEF dst, USE src ); 8.3404 @@ -12065,7 +11693,7 @@ 8.3405 %} 8.3406 8.3407 8.3408 -instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ 8.3409 +instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 8.3410 predicate(UseSSE==0); 8.3411 match(Set dst (MoveI2F src)); 8.3412 effect(DEF dst, USE src); 8.3413 @@ -12075,11 +11703,11 @@ 8.3414 "FSTP $dst\t# MoveI2F_stack_reg" %} 8.3415 opcode(0xD9); /* D9 /0, FLD m32real */ 8.3416 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 8.3417 - Pop_Reg_F(dst) ); 8.3418 + Pop_Reg_FPR(dst) ); 8.3419 ins_pipe( fpu_reg_mem ); 8.3420 %} 8.3421 8.3422 -instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ 8.3423 +instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 8.3424 predicate(UseSSE>=1); 8.3425 match(Set dst (MoveI2F src)); 8.3426 effect( DEF dst, USE src ); 8.3427 @@ -12092,7 +11720,7 @@ 8.3428 ins_pipe( pipe_slow ); 8.3429 %} 8.3430 8.3431 -instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ 8.3432 +instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{ 8.3433 predicate(UseSSE>=2); 8.3434 match(Set dst (MoveI2F src)); 8.3435 effect( DEF dst, USE src ); 8.3436 @@ -12117,18 +11745,18 @@ 8.3437 ins_pipe( ialu_mem_long_reg ); 8.3438 %} 8.3439 8.3440 -instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ 8.3441 +instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 8.3442 predicate(UseSSE<=1); 8.3443 match(Set dst (MoveD2L src)); 8.3444 effect(DEF dst, USE src); 8.3445 8.3446 ins_cost(125); 8.3447 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 8.3448 - ins_encode( Pop_Mem_Reg_D(dst, src) ); 8.3449 + ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 8.3450 ins_pipe( fpu_mem_reg ); 8.3451 %} 8.3452 8.3453 -instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ 8.3454 +instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 8.3455 predicate(UseSSE>=2); 8.3456 match(Set dst (MoveD2L src)); 8.3457 effect(DEF dst, USE src); 8.3458 @@ -12140,7 +11768,7 @@ 8.3459 ins_pipe( pipe_slow ); 8.3460 %} 8.3461 8.3462 -instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ 8.3463 +instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 8.3464 predicate(UseSSE>=2); 8.3465 match(Set dst (MoveD2L src)); 8.3466 effect(DEF dst, USE src, TEMP tmp); 8.3467 @@ -12169,7 +11797,7 @@ 8.3468 %} 8.3469 8.3470 8.3471 -instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ 8.3472 +instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 8.3473 predicate(UseSSE<=1); 8.3474 match(Set dst (MoveL2D src)); 8.3475 effect(DEF dst, USE src); 8.3476 @@ -12179,12 +11807,12 @@ 8.3477 "FSTP $dst\t# MoveL2D_stack_reg" %} 8.3478 opcode(0xDD); /* DD /0, FLD m64real */ 8.3479 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 8.3480 - Pop_Reg_D(dst) ); 8.3481 + Pop_Reg_DPR(dst) ); 8.3482 ins_pipe( fpu_reg_mem ); 8.3483 %} 8.3484 8.3485 8.3486 -instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ 8.3487 +instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 8.3488 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 8.3489 match(Set dst (MoveL2D src)); 8.3490 effect(DEF dst, USE src); 8.3491 @@ -12197,7 +11825,7 @@ 8.3492 ins_pipe( pipe_slow ); 8.3493 %} 8.3494 8.3495 -instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ 8.3496 +instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 8.3497 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 8.3498 match(Set dst (MoveL2D src)); 8.3499 effect(DEF dst, USE src); 8.3500 @@ -12210,7 +11838,7 @@ 8.3501 ins_pipe( pipe_slow ); 8.3502 %} 8.3503 8.3504 -instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ 8.3505 +instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 8.3506 predicate(UseSSE>=2); 8.3507 match(Set dst (MoveL2D src)); 8.3508 effect(TEMP dst, USE src, TEMP tmp); 8.3509 @@ -12227,7 +11855,7 @@ 8.3510 %} 8.3511 8.3512 // Replicate scalar to packed byte (1 byte) values in xmm 8.3513 -instruct Repl8B_reg(regXD dst, regXD src) %{ 8.3514 +instruct Repl8B_reg(regD dst, regD src) %{ 8.3515 predicate(UseSSE>=2); 8.3516 match(Set dst (Replicate8B src)); 8.3517 format %{ "MOVDQA $dst,$src\n\t" 8.3518 @@ -12244,7 +11872,7 @@ 8.3519 %} 8.3520 8.3521 // Replicate scalar to packed byte (1 byte) values in xmm 8.3522 -instruct Repl8B_eRegI(regXD dst, eRegI src) %{ 8.3523 +instruct Repl8B_eRegI(regD dst, eRegI src) %{ 8.3524 predicate(UseSSE>=2); 8.3525 match(Set dst (Replicate8B src)); 8.3526 format %{ "MOVD $dst,$src\n\t" 8.3527 @@ -12259,7 +11887,7 @@ 8.3528 %} 8.3529 8.3530 // Replicate scalar zero to packed byte (1 byte) values in xmm 8.3531 -instruct Repl8B_immI0(regXD dst, immI0 zero) %{ 8.3532 +instruct Repl8B_immI0(regD dst, immI0 zero) %{ 8.3533 predicate(UseSSE>=2); 8.3534 match(Set dst (Replicate8B zero)); 8.3535 format %{ "PXOR $dst,$dst\t! replicate8B" %} 8.3536 @@ -12270,7 +11898,7 @@ 8.3537 %} 8.3538 8.3539 // Replicate scalar to packed shore (2 byte) values in xmm 8.3540 -instruct Repl4S_reg(regXD dst, regXD src) %{ 8.3541 +instruct Repl4S_reg(regD dst, regD src) %{ 8.3542 predicate(UseSSE>=2); 8.3543 match(Set dst (Replicate4S src)); 8.3544 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 8.3545 @@ -12281,7 +11909,7 @@ 8.3546 %} 8.3547 8.3548 // Replicate scalar to packed shore (2 byte) values in xmm 8.3549 -instruct Repl4S_eRegI(regXD dst, eRegI src) %{ 8.3550 +instruct Repl4S_eRegI(regD dst, eRegI src) %{ 8.3551 predicate(UseSSE>=2); 8.3552 match(Set dst (Replicate4S src)); 8.3553 format %{ "MOVD $dst,$src\n\t" 8.3554 @@ -12294,7 +11922,7 @@ 8.3555 %} 8.3556 8.3557 // Replicate scalar zero to packed short (2 byte) values in xmm 8.3558 -instruct Repl4S_immI0(regXD dst, immI0 zero) %{ 8.3559 +instruct Repl4S_immI0(regD dst, immI0 zero) %{ 8.3560 predicate(UseSSE>=2); 8.3561 match(Set dst (Replicate4S zero)); 8.3562 format %{ "PXOR $dst,$dst\t! replicate4S" %} 8.3563 @@ -12305,7 +11933,7 @@ 8.3564 %} 8.3565 8.3566 // Replicate scalar to packed char (2 byte) values in xmm 8.3567 -instruct Repl4C_reg(regXD dst, regXD src) %{ 8.3568 +instruct Repl4C_reg(regD dst, regD src) %{ 8.3569 predicate(UseSSE>=2); 8.3570 match(Set dst (Replicate4C src)); 8.3571 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 8.3572 @@ -12316,7 +11944,7 @@ 8.3573 %} 8.3574 8.3575 // Replicate scalar to packed char (2 byte) values in xmm 8.3576 -instruct Repl4C_eRegI(regXD dst, eRegI src) %{ 8.3577 +instruct Repl4C_eRegI(regD dst, eRegI src) %{ 8.3578 predicate(UseSSE>=2); 8.3579 match(Set dst (Replicate4C src)); 8.3580 format %{ "MOVD $dst,$src\n\t" 8.3581 @@ -12329,7 +11957,7 @@ 8.3582 %} 8.3583 8.3584 // Replicate scalar zero to packed char (2 byte) values in xmm 8.3585 -instruct Repl4C_immI0(regXD dst, immI0 zero) %{ 8.3586 +instruct Repl4C_immI0(regD dst, immI0 zero) %{ 8.3587 predicate(UseSSE>=2); 8.3588 match(Set dst (Replicate4C zero)); 8.3589 format %{ "PXOR $dst,$dst\t! replicate4C" %} 8.3590 @@ -12340,7 +11968,7 @@ 8.3591 %} 8.3592 8.3593 // Replicate scalar to packed integer (4 byte) values in xmm 8.3594 -instruct Repl2I_reg(regXD dst, regXD src) %{ 8.3595 +instruct Repl2I_reg(regD dst, regD src) %{ 8.3596 predicate(UseSSE>=2); 8.3597 match(Set dst (Replicate2I src)); 8.3598 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 8.3599 @@ -12351,7 +11979,7 @@ 8.3600 %} 8.3601 8.3602 // Replicate scalar to packed integer (4 byte) values in xmm 8.3603 -instruct Repl2I_eRegI(regXD dst, eRegI src) %{ 8.3604 +instruct Repl2I_eRegI(regD dst, eRegI src) %{ 8.3605 predicate(UseSSE>=2); 8.3606 match(Set dst (Replicate2I src)); 8.3607 format %{ "MOVD $dst,$src\n\t" 8.3608 @@ -12364,7 +11992,7 @@ 8.3609 %} 8.3610 8.3611 // Replicate scalar zero to packed integer (2 byte) values in xmm 8.3612 -instruct Repl2I_immI0(regXD dst, immI0 zero) %{ 8.3613 +instruct Repl2I_immI0(regD dst, immI0 zero) %{ 8.3614 predicate(UseSSE>=2); 8.3615 match(Set dst (Replicate2I zero)); 8.3616 format %{ "PXOR $dst,$dst\t! replicate2I" %} 8.3617 @@ -12375,7 +12003,7 @@ 8.3618 %} 8.3619 8.3620 // Replicate scalar to packed single precision floating point values in xmm 8.3621 -instruct Repl2F_reg(regXD dst, regXD src) %{ 8.3622 +instruct Repl2F_reg(regD dst, regD src) %{ 8.3623 predicate(UseSSE>=2); 8.3624 match(Set dst (Replicate2F src)); 8.3625 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 8.3626 @@ -12386,7 +12014,7 @@ 8.3627 %} 8.3628 8.3629 // Replicate scalar to packed single precision floating point values in xmm 8.3630 -instruct Repl2F_regX(regXD dst, regX src) %{ 8.3631 +instruct Repl2F_regF(regD dst, regF src) %{ 8.3632 predicate(UseSSE>=2); 8.3633 match(Set dst (Replicate2F src)); 8.3634 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 8.3635 @@ -12397,7 +12025,7 @@ 8.3636 %} 8.3637 8.3638 // Replicate scalar to packed single precision floating point values in xmm 8.3639 -instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ 8.3640 +instruct Repl2F_immF0(regD dst, immF0 zero) %{ 8.3641 predicate(UseSSE>=2); 8.3642 match(Set dst (Replicate2F zero)); 8.3643 format %{ "PXOR $dst,$dst\t! replicate2F" %} 8.3644 @@ -12423,7 +12051,7 @@ 8.3645 %} 8.3646 8.3647 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 8.3648 - eAXRegI result, regXD tmp1, eFlagsReg cr) %{ 8.3649 + eAXRegI result, regD tmp1, eFlagsReg cr) %{ 8.3650 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 8.3651 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 8.3652 8.3653 @@ -12438,7 +12066,7 @@ 8.3654 8.3655 // fast string equals 8.3656 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 8.3657 - regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 8.3658 + regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 8.3659 match(Set result (StrEquals (Binary str1 str2) cnt)); 8.3660 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 8.3661 8.3662 @@ -12453,7 +12081,7 @@ 8.3663 8.3664 // fast search of substring with known size. 8.3665 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 8.3666 - eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 8.3667 + eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 8.3668 predicate(UseSSE42Intrinsics); 8.3669 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 8.3670 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 8.3671 @@ -12480,7 +12108,7 @@ 8.3672 %} 8.3673 8.3674 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 8.3675 - eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{ 8.3676 + eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 8.3677 predicate(UseSSE42Intrinsics); 8.3678 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 8.3679 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 8.3680 @@ -12497,7 +12125,7 @@ 8.3681 8.3682 // fast array equals 8.3683 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 8.3684 - regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 8.3685 + regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 8.3686 %{ 8.3687 match(Set result (AryEq ary1 ary2)); 8.3688 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 8.3689 @@ -13323,40 +12951,40 @@ 8.3690 %} 8.3691 8.3692 // Compare 2 longs and CMOVE doubles 8.3693 -instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 8.3694 +instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 8.3695 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 8.3696 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 8.3697 ins_cost(200); 8.3698 expand %{ 8.3699 - fcmovD_regS(cmp,flags,dst,src); 8.3700 + fcmovDPR_regS(cmp,flags,dst,src); 8.3701 %} 8.3702 %} 8.3703 8.3704 // Compare 2 longs and CMOVE doubles 8.3705 -instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ 8.3706 +instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 8.3707 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 8.3708 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 8.3709 ins_cost(200); 8.3710 expand %{ 8.3711 - fcmovXD_regS(cmp,flags,dst,src); 8.3712 - %} 8.3713 -%} 8.3714 - 8.3715 -instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 8.3716 + fcmovD_regS(cmp,flags,dst,src); 8.3717 + %} 8.3718 +%} 8.3719 + 8.3720 +instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 8.3721 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 8.3722 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 8.3723 ins_cost(200); 8.3724 expand %{ 8.3725 - fcmovF_regS(cmp,flags,dst,src); 8.3726 - %} 8.3727 -%} 8.3728 - 8.3729 -instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ 8.3730 + fcmovFPR_regS(cmp,flags,dst,src); 8.3731 + %} 8.3732 +%} 8.3733 + 8.3734 +instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 8.3735 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 8.3736 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 8.3737 ins_cost(200); 8.3738 expand %{ 8.3739 - fcmovX_regS(cmp,flags,dst,src); 8.3740 + fcmovF_regS(cmp,flags,dst,src); 8.3741 %} 8.3742 %} 8.3743 8.3744 @@ -13451,40 +13079,40 @@ 8.3745 %} 8.3746 8.3747 // Compare 2 longs and CMOVE doubles 8.3748 -instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 8.3749 +instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 8.3750 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 8.3751 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 8.3752 ins_cost(200); 8.3753 expand %{ 8.3754 - fcmovD_regS(cmp,flags,dst,src); 8.3755 + fcmovDPR_regS(cmp,flags,dst,src); 8.3756 %} 8.3757 %} 8.3758 8.3759 // Compare 2 longs and CMOVE doubles 8.3760 -instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ 8.3761 +instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 8.3762 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 8.3763 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 8.3764 ins_cost(200); 8.3765 expand %{ 8.3766 - fcmovXD_regS(cmp,flags,dst,src); 8.3767 - %} 8.3768 -%} 8.3769 - 8.3770 -instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 8.3771 + fcmovD_regS(cmp,flags,dst,src); 8.3772 + %} 8.3773 +%} 8.3774 + 8.3775 +instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 8.3776 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 8.3777 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 8.3778 ins_cost(200); 8.3779 expand %{ 8.3780 - fcmovF_regS(cmp,flags,dst,src); 8.3781 - %} 8.3782 -%} 8.3783 - 8.3784 -instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ 8.3785 + fcmovFPR_regS(cmp,flags,dst,src); 8.3786 + %} 8.3787 +%} 8.3788 + 8.3789 +instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 8.3790 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 8.3791 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 8.3792 ins_cost(200); 8.3793 expand %{ 8.3794 - fcmovX_regS(cmp,flags,dst,src); 8.3795 + fcmovF_regS(cmp,flags,dst,src); 8.3796 %} 8.3797 %} 8.3798 8.3799 @@ -13584,41 +13212,41 @@ 8.3800 %} 8.3801 8.3802 // Compare 2 longs and CMOVE doubles 8.3803 -instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 8.3804 +instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 8.3805 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 8.3806 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 8.3807 ins_cost(200); 8.3808 expand %{ 8.3809 - fcmovD_regS(cmp,flags,dst,src); 8.3810 + fcmovDPR_regS(cmp,flags,dst,src); 8.3811 %} 8.3812 %} 8.3813 8.3814 // Compare 2 longs and CMOVE doubles 8.3815 -instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ 8.3816 +instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 8.3817 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 8.3818 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 8.3819 ins_cost(200); 8.3820 expand %{ 8.3821 - fcmovXD_regS(cmp,flags,dst,src); 8.3822 - %} 8.3823 -%} 8.3824 - 8.3825 -instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 8.3826 + fcmovD_regS(cmp,flags,dst,src); 8.3827 + %} 8.3828 +%} 8.3829 + 8.3830 +instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 8.3831 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 8.3832 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 8.3833 ins_cost(200); 8.3834 expand %{ 8.3835 - fcmovF_regS(cmp,flags,dst,src); 8.3836 - %} 8.3837 -%} 8.3838 - 8.3839 - 8.3840 -instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ 8.3841 + fcmovFPR_regS(cmp,flags,dst,src); 8.3842 + %} 8.3843 +%} 8.3844 + 8.3845 + 8.3846 +instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 8.3847 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 8.3848 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 8.3849 ins_cost(200); 8.3850 expand %{ 8.3851 - fcmovX_regS(cmp,flags,dst,src); 8.3852 + fcmovF_regS(cmp,flags,dst,src); 8.3853 %} 8.3854 %} 8.3855
9.1 --- a/src/cpu/x86/vm/x86_64.ad Mon Dec 19 14:16:23 2011 -0800 9.2 +++ b/src/cpu/x86/vm/x86_64.ad Tue Dec 20 00:55:02 2011 -0800 9.3 @@ -9873,396 +9873,6 @@ 9.4 ins_pipe(pipe_slow); 9.5 %} 9.6 9.7 -instruct addF_reg(regF dst, regF src) 9.8 -%{ 9.9 - match(Set dst (AddF dst src)); 9.10 - 9.11 - format %{ "addss $dst, $src" %} 9.12 - ins_cost(150); // XXX 9.13 - ins_encode %{ 9.14 - __ addss($dst$$XMMRegister, $src$$XMMRegister); 9.15 - %} 9.16 - ins_pipe(pipe_slow); 9.17 -%} 9.18 - 9.19 -instruct addF_mem(regF dst, memory src) 9.20 -%{ 9.21 - match(Set dst (AddF dst (LoadF src))); 9.22 - 9.23 - format %{ "addss $dst, $src" %} 9.24 - ins_cost(150); // XXX 9.25 - ins_encode %{ 9.26 - __ addss($dst$$XMMRegister, $src$$Address); 9.27 - %} 9.28 - ins_pipe(pipe_slow); 9.29 -%} 9.30 - 9.31 -instruct addF_imm(regF dst, immF con) %{ 9.32 - match(Set dst (AddF dst con)); 9.33 - format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 9.34 - ins_cost(150); // XXX 9.35 - ins_encode %{ 9.36 - __ addss($dst$$XMMRegister, $constantaddress($con)); 9.37 - %} 9.38 - ins_pipe(pipe_slow); 9.39 -%} 9.40 - 9.41 -instruct addD_reg(regD dst, regD src) 9.42 -%{ 9.43 - match(Set dst (AddD dst src)); 9.44 - 9.45 - format %{ "addsd $dst, $src" %} 9.46 - ins_cost(150); // XXX 9.47 - ins_encode %{ 9.48 - __ addsd($dst$$XMMRegister, $src$$XMMRegister); 9.49 - %} 9.50 - ins_pipe(pipe_slow); 9.51 -%} 9.52 - 9.53 -instruct addD_mem(regD dst, memory src) 9.54 -%{ 9.55 - match(Set dst (AddD dst (LoadD src))); 9.56 - 9.57 - format %{ "addsd $dst, $src" %} 9.58 - ins_cost(150); // XXX 9.59 - ins_encode %{ 9.60 - __ addsd($dst$$XMMRegister, $src$$Address); 9.61 - %} 9.62 - ins_pipe(pipe_slow); 9.63 -%} 9.64 - 9.65 -instruct addD_imm(regD dst, immD con) %{ 9.66 - match(Set dst (AddD dst con)); 9.67 - format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 9.68 - ins_cost(150); // XXX 9.69 - ins_encode %{ 9.70 - __ addsd($dst$$XMMRegister, $constantaddress($con)); 9.71 - %} 9.72 - ins_pipe(pipe_slow); 9.73 -%} 9.74 - 9.75 -instruct subF_reg(regF dst, regF src) 9.76 -%{ 9.77 - match(Set dst (SubF dst src)); 9.78 - 9.79 - format %{ "subss $dst, $src" %} 9.80 - ins_cost(150); // XXX 9.81 - ins_encode %{ 9.82 - __ subss($dst$$XMMRegister, $src$$XMMRegister); 9.83 - %} 9.84 - ins_pipe(pipe_slow); 9.85 -%} 9.86 - 9.87 -instruct subF_mem(regF dst, memory src) 9.88 -%{ 9.89 - match(Set dst (SubF dst (LoadF src))); 9.90 - 9.91 - format %{ "subss $dst, $src" %} 9.92 - ins_cost(150); // XXX 9.93 - ins_encode %{ 9.94 - __ subss($dst$$XMMRegister, $src$$Address); 9.95 - %} 9.96 - ins_pipe(pipe_slow); 9.97 -%} 9.98 - 9.99 -instruct subF_imm(regF dst, immF con) %{ 9.100 - match(Set dst (SubF dst con)); 9.101 - format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 9.102 - ins_cost(150); // XXX 9.103 - ins_encode %{ 9.104 - __ subss($dst$$XMMRegister, $constantaddress($con)); 9.105 - %} 9.106 - ins_pipe(pipe_slow); 9.107 -%} 9.108 - 9.109 -instruct subD_reg(regD dst, regD src) 9.110 -%{ 9.111 - match(Set dst (SubD dst src)); 9.112 - 9.113 - format %{ "subsd $dst, $src" %} 9.114 - ins_cost(150); // XXX 9.115 - ins_encode %{ 9.116 - __ subsd($dst$$XMMRegister, $src$$XMMRegister); 9.117 - %} 9.118 - ins_pipe(pipe_slow); 9.119 -%} 9.120 - 9.121 -instruct subD_mem(regD dst, memory src) 9.122 -%{ 9.123 - match(Set dst (SubD dst (LoadD src))); 9.124 - 9.125 - format %{ "subsd $dst, $src" %} 9.126 - ins_cost(150); // XXX 9.127 - ins_encode %{ 9.128 - __ subsd($dst$$XMMRegister, $src$$Address); 9.129 - %} 9.130 - ins_pipe(pipe_slow); 9.131 -%} 9.132 - 9.133 -instruct subD_imm(regD dst, immD con) %{ 9.134 - match(Set dst (SubD dst con)); 9.135 - format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 9.136 - ins_cost(150); // XXX 9.137 - ins_encode %{ 9.138 - __ subsd($dst$$XMMRegister, $constantaddress($con)); 9.139 - %} 9.140 - ins_pipe(pipe_slow); 9.141 -%} 9.142 - 9.143 -instruct mulF_reg(regF dst, regF src) 9.144 -%{ 9.145 - match(Set dst (MulF dst src)); 9.146 - 9.147 - format %{ "mulss $dst, $src" %} 9.148 - ins_cost(150); // XXX 9.149 - ins_encode %{ 9.150 - __ mulss($dst$$XMMRegister, $src$$XMMRegister); 9.151 - %} 9.152 - ins_pipe(pipe_slow); 9.153 -%} 9.154 - 9.155 -instruct mulF_mem(regF dst, memory src) 9.156 -%{ 9.157 - match(Set dst (MulF dst (LoadF src))); 9.158 - 9.159 - format %{ "mulss $dst, $src" %} 9.160 - ins_cost(150); // XXX 9.161 - ins_encode %{ 9.162 - __ mulss($dst$$XMMRegister, $src$$Address); 9.163 - %} 9.164 - ins_pipe(pipe_slow); 9.165 -%} 9.166 - 9.167 -instruct mulF_imm(regF dst, immF con) %{ 9.168 - match(Set dst (MulF dst con)); 9.169 - format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 9.170 - ins_cost(150); // XXX 9.171 - ins_encode %{ 9.172 - __ mulss($dst$$XMMRegister, $constantaddress($con)); 9.173 - %} 9.174 - ins_pipe(pipe_slow); 9.175 -%} 9.176 - 9.177 -instruct mulD_reg(regD dst, regD src) 9.178 -%{ 9.179 - match(Set dst (MulD dst src)); 9.180 - 9.181 - format %{ "mulsd $dst, $src" %} 9.182 - ins_cost(150); // XXX 9.183 - ins_encode %{ 9.184 - __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 9.185 - %} 9.186 - ins_pipe(pipe_slow); 9.187 -%} 9.188 - 9.189 -instruct mulD_mem(regD dst, memory src) 9.190 -%{ 9.191 - match(Set dst (MulD dst (LoadD src))); 9.192 - 9.193 - format %{ "mulsd $dst, $src" %} 9.194 - ins_cost(150); // XXX 9.195 - ins_encode %{ 9.196 - __ mulsd($dst$$XMMRegister, $src$$Address); 9.197 - %} 9.198 - ins_pipe(pipe_slow); 9.199 -%} 9.200 - 9.201 -instruct mulD_imm(regD dst, immD con) %{ 9.202 - match(Set dst (MulD dst con)); 9.203 - format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 9.204 - ins_cost(150); // XXX 9.205 - ins_encode %{ 9.206 - __ mulsd($dst$$XMMRegister, $constantaddress($con)); 9.207 - %} 9.208 - ins_pipe(pipe_slow); 9.209 -%} 9.210 - 9.211 -instruct divF_reg(regF dst, regF src) 9.212 -%{ 9.213 - match(Set dst (DivF dst src)); 9.214 - 9.215 - format %{ "divss $dst, $src" %} 9.216 - ins_cost(150); // XXX 9.217 - ins_encode %{ 9.218 - __ divss($dst$$XMMRegister, $src$$XMMRegister); 9.219 - %} 9.220 - ins_pipe(pipe_slow); 9.221 -%} 9.222 - 9.223 -instruct divF_mem(regF dst, memory src) 9.224 -%{ 9.225 - match(Set dst (DivF dst (LoadF src))); 9.226 - 9.227 - format %{ "divss $dst, $src" %} 9.228 - ins_cost(150); // XXX 9.229 - ins_encode %{ 9.230 - __ divss($dst$$XMMRegister, $src$$Address); 9.231 - %} 9.232 - ins_pipe(pipe_slow); 9.233 -%} 9.234 - 9.235 -instruct divF_imm(regF dst, immF con) %{ 9.236 - match(Set dst (DivF dst con)); 9.237 - format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 9.238 - ins_cost(150); // XXX 9.239 - ins_encode %{ 9.240 - __ divss($dst$$XMMRegister, $constantaddress($con)); 9.241 - %} 9.242 - ins_pipe(pipe_slow); 9.243 -%} 9.244 - 9.245 -instruct divD_reg(regD dst, regD src) 9.246 -%{ 9.247 - match(Set dst (DivD dst src)); 9.248 - 9.249 - format %{ "divsd $dst, $src" %} 9.250 - ins_cost(150); // XXX 9.251 - ins_encode %{ 9.252 - __ divsd($dst$$XMMRegister, $src$$XMMRegister); 9.253 - %} 9.254 - ins_pipe(pipe_slow); 9.255 -%} 9.256 - 9.257 -instruct divD_mem(regD dst, memory src) 9.258 -%{ 9.259 - match(Set dst (DivD dst (LoadD src))); 9.260 - 9.261 - format %{ "divsd $dst, $src" %} 9.262 - ins_cost(150); // XXX 9.263 - ins_encode %{ 9.264 - __ divsd($dst$$XMMRegister, $src$$Address); 9.265 - %} 9.266 - ins_pipe(pipe_slow); 9.267 -%} 9.268 - 9.269 -instruct divD_imm(regD dst, immD con) %{ 9.270 - match(Set dst (DivD dst con)); 9.271 - format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 9.272 - ins_cost(150); // XXX 9.273 - ins_encode %{ 9.274 - __ divsd($dst$$XMMRegister, $constantaddress($con)); 9.275 - %} 9.276 - ins_pipe(pipe_slow); 9.277 -%} 9.278 - 9.279 -instruct sqrtF_reg(regF dst, regF src) 9.280 -%{ 9.281 - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 9.282 - 9.283 - format %{ "sqrtss $dst, $src" %} 9.284 - ins_cost(150); // XXX 9.285 - ins_encode %{ 9.286 - __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 9.287 - %} 9.288 - ins_pipe(pipe_slow); 9.289 -%} 9.290 - 9.291 -instruct sqrtF_mem(regF dst, memory src) 9.292 -%{ 9.293 - match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 9.294 - 9.295 - format %{ "sqrtss $dst, $src" %} 9.296 - ins_cost(150); // XXX 9.297 - ins_encode %{ 9.298 - __ sqrtss($dst$$XMMRegister, $src$$Address); 9.299 - %} 9.300 - ins_pipe(pipe_slow); 9.301 -%} 9.302 - 9.303 -instruct sqrtF_imm(regF dst, immF con) %{ 9.304 - match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 9.305 - format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 9.306 - ins_cost(150); // XXX 9.307 - ins_encode %{ 9.308 - __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 9.309 - %} 9.310 - ins_pipe(pipe_slow); 9.311 -%} 9.312 - 9.313 -instruct sqrtD_reg(regD dst, regD src) 9.314 -%{ 9.315 - match(Set dst (SqrtD src)); 9.316 - 9.317 - format %{ "sqrtsd $dst, $src" %} 9.318 - ins_cost(150); // XXX 9.319 - ins_encode %{ 9.320 - __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 9.321 - %} 9.322 - ins_pipe(pipe_slow); 9.323 -%} 9.324 - 9.325 -instruct sqrtD_mem(regD dst, memory src) 9.326 -%{ 9.327 - match(Set dst (SqrtD (LoadD src))); 9.328 - 9.329 - format %{ "sqrtsd $dst, $src" %} 9.330 - ins_cost(150); // XXX 9.331 - ins_encode %{ 9.332 - __ sqrtsd($dst$$XMMRegister, $src$$Address); 9.333 - %} 9.334 - ins_pipe(pipe_slow); 9.335 -%} 9.336 - 9.337 -instruct sqrtD_imm(regD dst, immD con) %{ 9.338 - match(Set dst (SqrtD con)); 9.339 - format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 9.340 - ins_cost(150); // XXX 9.341 - ins_encode %{ 9.342 - __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 9.343 - %} 9.344 - ins_pipe(pipe_slow); 9.345 -%} 9.346 - 9.347 -instruct absF_reg(regF dst) 9.348 -%{ 9.349 - match(Set dst (AbsF dst)); 9.350 - ins_cost(150); // XXX 9.351 - format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 9.352 - ins_encode %{ 9.353 - __ andps($dst$$XMMRegister, 9.354 - ExternalAddress((address) StubRoutines::x86::float_sign_mask())); 9.355 - %} 9.356 - ins_pipe(pipe_slow); 9.357 -%} 9.358 - 9.359 -instruct absD_reg(regD dst) 9.360 -%{ 9.361 - match(Set dst (AbsD dst)); 9.362 - ins_cost(150); // XXX 9.363 - format %{ "andpd $dst, [0x7fffffffffffffff]\t" 9.364 - "# abs double by sign masking" %} 9.365 - ins_encode %{ 9.366 - __ andpd($dst$$XMMRegister, 9.367 - ExternalAddress((address) StubRoutines::x86::double_sign_mask())); 9.368 - %} 9.369 - ins_pipe(pipe_slow); 9.370 -%} 9.371 - 9.372 -instruct negF_reg(regF dst) 9.373 -%{ 9.374 - match(Set dst (NegF dst)); 9.375 - ins_cost(150); // XXX 9.376 - format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 9.377 - ins_encode %{ 9.378 - __ xorps($dst$$XMMRegister, 9.379 - ExternalAddress((address) StubRoutines::x86::float_sign_flip())); 9.380 - %} 9.381 - ins_pipe(pipe_slow); 9.382 -%} 9.383 - 9.384 -instruct negD_reg(regD dst) 9.385 -%{ 9.386 - match(Set dst (NegD dst)); 9.387 - ins_cost(150); // XXX 9.388 - format %{ "xorpd $dst, [0x8000000000000000]\t" 9.389 - "# neg double by sign flipping" %} 9.390 - ins_encode %{ 9.391 - __ xorpd($dst$$XMMRegister, 9.392 - ExternalAddress((address) StubRoutines::x86::double_sign_flip())); 9.393 - %} 9.394 - ins_pipe(pipe_slow); 9.395 -%} 9.396 - 9.397 // -----------Trig and Trancendental Instructions------------------------------ 9.398 instruct cosD_reg(regD dst) %{ 9.399 match(Set dst (CosD dst));
10.1 --- a/src/share/vm/opto/matcher.cpp Mon Dec 19 14:16:23 2011 -0800 10.2 +++ b/src/share/vm/opto/matcher.cpp Tue Dec 20 00:55:02 2011 -0800 10.3 @@ -1365,31 +1365,36 @@ 10.4 10.5 const Type *t = m->bottom_type(); 10.6 10.7 - if( t->singleton() ) { 10.8 + if (t->singleton()) { 10.9 // Never force constants into registers. Allow them to match as 10.10 // constants or registers. Copies of the same value will share 10.11 // the same register. See find_shared_node. 10.12 return false; 10.13 } else { // Not a constant 10.14 // Stop recursion if they have different Controls. 10.15 - // Slot 0 of constants is not really a Control. 10.16 - if( control && m->in(0) && control != m->in(0) ) { 10.17 + Node* m_control = m->in(0); 10.18 + // Control of load's memory can post-dominates load's control. 10.19 + // So use it since load can't float above its memory. 10.20 + Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL; 10.21 + if (control && m_control && control != m_control && control != mem_control) { 10.22 10.23 // Actually, we can live with the most conservative control we 10.24 // find, if it post-dominates the others. This allows us to 10.25 // pick up load/op/store trees where the load can float a little 10.26 // above the store. 10.27 Node *x = control; 10.28 - const uint max_scan = 6; // Arbitrary scan cutoff 10.29 + const uint max_scan = 6; // Arbitrary scan cutoff 10.30 uint j; 10.31 - for( j=0; j<max_scan; j++ ) { 10.32 - if( x->is_Region() ) // Bail out at merge points 10.33 + for (j=0; j<max_scan; j++) { 10.34 + if (x->is_Region()) // Bail out at merge points 10.35 return true; 10.36 x = x->in(0); 10.37 - if( x == m->in(0) ) // Does 'control' post-dominate 10.38 + if (x == m_control) // Does 'control' post-dominate 10.39 break; // m->in(0)? If so, we can use it 10.40 + if (x == mem_control) // Does 'control' post-dominate 10.41 + break; // mem_control? If so, we can use it 10.42 } 10.43 - if( j == max_scan ) // No post-domination before scan end? 10.44 + if (j == max_scan) // No post-domination before scan end? 10.45 return true; // Then break the match tree up 10.46 } 10.47 if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) {