7121648: Use 3-operands SIMD instructions on x86 with AVX

Tue, 20 Dec 2011 00:55:02 -0800

author
kvn
date
Tue, 20 Dec 2011 00:55:02 -0800
changeset 3390
65149e74c706
parent 3389
669f6a7d5b70
child 3391
069ab3f976d3

7121648: Use 3-operands SIMD instructions on x86 with AVX
Summary: Use 3-operands SIMD instructions in C2 generated code for machines with AVX.
Reviewed-by: never

make/bsd/makefiles/adlc.make file | annotate | diff | comparison | revisions
make/linux/makefiles/adlc.make file | annotate | diff | comparison | revisions
make/solaris/makefiles/adlc.make file | annotate | diff | comparison | revisions
make/windows/makefiles/adlc.make file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_32.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_64.ad file | annotate | diff | comparison | revisions
src/share/vm/opto/matcher.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/make/bsd/makefiles/adlc.make	Mon Dec 19 14:16:23 2011 -0800
     1.2 +++ b/make/bsd/makefiles/adlc.make	Tue Dec 20 00:55:02 2011 -0800
     1.3 @@ -39,9 +39,16 @@
     1.4  
     1.5  SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
     1.6  
     1.7 -SOURCES.AD = \
     1.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}")
     1.9 +  SOURCES.AD = \
    1.10    $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
    1.11    $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
    1.12 +else
    1.13 +  SOURCES.AD = \
    1.14 +  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
    1.15 +  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
    1.16 +  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
    1.17 +endif
    1.18  
    1.19  EXEC	= $(OUTDIR)/adlc
    1.20  
     2.1 --- a/make/linux/makefiles/adlc.make	Mon Dec 19 14:16:23 2011 -0800
     2.2 +++ b/make/linux/makefiles/adlc.make	Tue Dec 20 00:55:02 2011 -0800
     2.3 @@ -39,9 +39,16 @@
     2.4  
     2.5  SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
     2.6  
     2.7 -SOURCES.AD = \
     2.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}")
     2.9 +  SOURCES.AD = \
    2.10    $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
    2.11    $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
    2.12 +else
    2.13 +  SOURCES.AD = \
    2.14 +  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
    2.15 +  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
    2.16 +  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
    2.17 +endif
    2.18  
    2.19  EXEC	= $(OUTDIR)/adlc
    2.20  
     3.1 --- a/make/solaris/makefiles/adlc.make	Mon Dec 19 14:16:23 2011 -0800
     3.2 +++ b/make/solaris/makefiles/adlc.make	Tue Dec 20 00:55:02 2011 -0800
     3.3 @@ -40,9 +40,16 @@
     3.4  
     3.5  SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 
     3.6  
     3.7 -SOURCES.AD = \
     3.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}")
     3.9 +  SOURCES.AD = \
    3.10    $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
    3.11    $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
    3.12 +else
    3.13 +  SOURCES.AD = \
    3.14 +  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \
    3.15 +  $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \
    3.16 +  $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad)
    3.17 +endif
    3.18  
    3.19  EXEC	= $(OUTDIR)/adlc
    3.20  
     4.1 --- a/make/windows/makefiles/adlc.make	Mon Dec 19 14:16:23 2011 -0800
     4.2 +++ b/make/windows/makefiles/adlc.make	Tue Dec 20 00:55:02 2011 -0800
     4.3 @@ -53,6 +53,17 @@
     4.4    /I "$(WorkSpace)\src\os\windows\vm" \
     4.5    /I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm"
     4.6  
     4.7 +!if "$(Platform_arch_model)" == "$(Platform_arch)"
     4.8 +SOURCES_AD=\
     4.9 +  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
    4.10 +  $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
    4.11 +!else
    4.12 +SOURCES_AD=\
    4.13 +  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \
    4.14 +  $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \
    4.15 +  $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
    4.16 +!endif
    4.17 +
    4.18  # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR
    4.19  # and ProjectCreatorIDEOptions in projectcreator.make. 
    4.20  GENERATED_NAMES=\
    4.21 @@ -105,7 +116,6 @@
    4.22  	$(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad
    4.23  	mv $(GENERATED_NAMES) $(AdlcOutDir)/
    4.24  
    4.25 -$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad
    4.26 +$(Platform_arch_model).ad: $(SOURCES_AD)
    4.27  	rm -f $(Platform_arch_model).ad
    4.28 -	cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad  \
    4.29 -	    $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad
    4.30 +	cat $(SOURCES_AD) >$(Platform_arch_model).ad
     5.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Dec 19 14:16:23 2011 -0800
     5.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Tue Dec 20 00:55:02 2011 -0800
     5.3 @@ -2932,6 +2932,161 @@
     5.4    emit_operand(dst, src);
     5.5  }
     5.6  
     5.7 +// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
     5.8 +
     5.9 +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
    5.10 +  assert(VM_Version::supports_avx(), "");
    5.11 +  InstructionMark im(this);
    5.12 +  vex_prefix(dst, nds, src, VEX_SIMD_F2);
    5.13 +  emit_byte(0x58);
    5.14 +  emit_operand(dst, src);
    5.15 +}
    5.16 +
    5.17 +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
    5.18 +  assert(VM_Version::supports_avx(), "");
    5.19 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
    5.20 +  emit_byte(0x58);
    5.21 +  emit_byte(0xC0 | encode);
    5.22 +}
    5.23 +
    5.24 +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
    5.25 +  assert(VM_Version::supports_avx(), "");
    5.26 +  InstructionMark im(this);
    5.27 +  vex_prefix(dst, nds, src, VEX_SIMD_F3);
    5.28 +  emit_byte(0x58);
    5.29 +  emit_operand(dst, src);
    5.30 +}
    5.31 +
    5.32 +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
    5.33 +  assert(VM_Version::supports_avx(), "");
    5.34 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
    5.35 +  emit_byte(0x58);
    5.36 +  emit_byte(0xC0 | encode);
    5.37 +}
    5.38 +
    5.39 +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
    5.40 +  assert(VM_Version::supports_avx(), "");
    5.41 +  InstructionMark im(this);
    5.42 +  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
    5.43 +  emit_byte(0x54);
    5.44 +  emit_operand(dst, src);
    5.45 +}
    5.46 +
    5.47 +void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
    5.48 +  assert(VM_Version::supports_avx(), "");
    5.49 +  InstructionMark im(this);
    5.50 +  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
    5.51 +  emit_byte(0x54);
    5.52 +  emit_operand(dst, src);
    5.53 +}
    5.54 +
    5.55 +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
    5.56 +  assert(VM_Version::supports_avx(), "");
    5.57 +  InstructionMark im(this);
    5.58 +  vex_prefix(dst, nds, src, VEX_SIMD_F2);
    5.59 +  emit_byte(0x5E);
    5.60 +  emit_operand(dst, src);
    5.61 +}
    5.62 +
    5.63 +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
    5.64 +  assert(VM_Version::supports_avx(), "");
    5.65 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
    5.66 +  emit_byte(0x5E);
    5.67 +  emit_byte(0xC0 | encode);
    5.68 +}
    5.69 +
    5.70 +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
    5.71 +  assert(VM_Version::supports_avx(), "");
    5.72 +  InstructionMark im(this);
    5.73 +  vex_prefix(dst, nds, src, VEX_SIMD_F3);
    5.74 +  emit_byte(0x5E);
    5.75 +  emit_operand(dst, src);
    5.76 +}
    5.77 +
    5.78 +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
    5.79 +  assert(VM_Version::supports_avx(), "");
    5.80 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
    5.81 +  emit_byte(0x5E);
    5.82 +  emit_byte(0xC0 | encode);
    5.83 +}
    5.84 +
    5.85 +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
    5.86 +  assert(VM_Version::supports_avx(), "");
    5.87 +  InstructionMark im(this);
    5.88 +  vex_prefix(dst, nds, src, VEX_SIMD_F2);
    5.89 +  emit_byte(0x59);
    5.90 +  emit_operand(dst, src);
    5.91 +}
    5.92 +
    5.93 +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
    5.94 +  assert(VM_Version::supports_avx(), "");
    5.95 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
    5.96 +  emit_byte(0x59);
    5.97 +  emit_byte(0xC0 | encode);
    5.98 +}
    5.99 +
   5.100 +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
   5.101 +  InstructionMark im(this);
   5.102 +  vex_prefix(dst, nds, src, VEX_SIMD_F3);
   5.103 +  emit_byte(0x59);
   5.104 +  emit_operand(dst, src);
   5.105 +}
   5.106 +
   5.107 +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   5.108 +  assert(VM_Version::supports_avx(), "");
   5.109 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
   5.110 +  emit_byte(0x59);
   5.111 +  emit_byte(0xC0 | encode);
   5.112 +}
   5.113 +
   5.114 +
   5.115 +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
   5.116 +  assert(VM_Version::supports_avx(), "");
   5.117 +  InstructionMark im(this);
   5.118 +  vex_prefix(dst, nds, src, VEX_SIMD_F2);
   5.119 +  emit_byte(0x5C);
   5.120 +  emit_operand(dst, src);
   5.121 +}
   5.122 +
   5.123 +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   5.124 +  assert(VM_Version::supports_avx(), "");
   5.125 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
   5.126 +  emit_byte(0x5C);
   5.127 +  emit_byte(0xC0 | encode);
   5.128 +}
   5.129 +
   5.130 +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
   5.131 +  assert(VM_Version::supports_avx(), "");
   5.132 +  InstructionMark im(this);
   5.133 +  vex_prefix(dst, nds, src, VEX_SIMD_F3);
   5.134 +  emit_byte(0x5C);
   5.135 +  emit_operand(dst, src);
   5.136 +}
   5.137 +
   5.138 +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   5.139 +  assert(VM_Version::supports_avx(), "");
   5.140 +  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
   5.141 +  emit_byte(0x5C);
   5.142 +  emit_byte(0xC0 | encode);
   5.143 +}
   5.144 +
   5.145 +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
   5.146 +  assert(VM_Version::supports_avx(), "");
   5.147 +  InstructionMark im(this);
   5.148 +  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
   5.149 +  emit_byte(0x57);
   5.150 +  emit_operand(dst, src);
   5.151 +}
   5.152 +
   5.153 +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
   5.154 +  assert(VM_Version::supports_avx(), "");
   5.155 +  InstructionMark im(this);
   5.156 +  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
   5.157 +  emit_byte(0x57);
   5.158 +  emit_operand(dst, src);
   5.159 +}
   5.160 +
   5.161 +
   5.162  #ifndef _LP64
   5.163  // 32bit only pieces of the assembler
   5.164  
   5.165 @@ -7235,6 +7390,157 @@
   5.166    }
   5.167  }
   5.168  
   5.169 +void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
   5.170 +  if (reachable(src)) {
   5.171 +    Assembler::ucomisd(dst, as_Address(src));
   5.172 +  } else {
   5.173 +    lea(rscratch1, src);
   5.174 +    Assembler::ucomisd(dst, Address(rscratch1, 0));
   5.175 +  }
   5.176 +}
   5.177 +
   5.178 +void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
   5.179 +  if (reachable(src)) {
   5.180 +    Assembler::ucomiss(dst, as_Address(src));
   5.181 +  } else {
   5.182 +    lea(rscratch1, src);
   5.183 +    Assembler::ucomiss(dst, Address(rscratch1, 0));
   5.184 +  }
   5.185 +}
   5.186 +
   5.187 +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
   5.188 +  // Used in sign-bit flipping with aligned address.
   5.189 +  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
   5.190 +  if (reachable(src)) {
   5.191 +    Assembler::xorpd(dst, as_Address(src));
   5.192 +  } else {
   5.193 +    lea(rscratch1, src);
   5.194 +    Assembler::xorpd(dst, Address(rscratch1, 0));
   5.195 +  }
   5.196 +}
   5.197 +
   5.198 +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
   5.199 +  // Used in sign-bit flipping with aligned address.
   5.200 +  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
   5.201 +  if (reachable(src)) {
   5.202 +    Assembler::xorps(dst, as_Address(src));
   5.203 +  } else {
   5.204 +    lea(rscratch1, src);
   5.205 +    Assembler::xorps(dst, Address(rscratch1, 0));
   5.206 +  }
   5.207 +}
   5.208 +
   5.209 +// AVX 3-operands instructions
   5.210 +
   5.211 +void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.212 +  if (reachable(src)) {
   5.213 +    vaddsd(dst, nds, as_Address(src));
   5.214 +  } else {
   5.215 +    lea(rscratch1, src);
   5.216 +    vaddsd(dst, nds, Address(rscratch1, 0));
   5.217 +  }
   5.218 +}
   5.219 +
   5.220 +void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.221 +  if (reachable(src)) {
   5.222 +    vaddss(dst, nds, as_Address(src));
   5.223 +  } else {
   5.224 +    lea(rscratch1, src);
   5.225 +    vaddss(dst, nds, Address(rscratch1, 0));
   5.226 +  }
   5.227 +}
   5.228 +
   5.229 +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.230 +  if (reachable(src)) {
   5.231 +    vandpd(dst, nds, as_Address(src));
   5.232 +  } else {
   5.233 +    lea(rscratch1, src);
   5.234 +    vandpd(dst, nds, Address(rscratch1, 0));
   5.235 +  }
   5.236 +}
   5.237 +
   5.238 +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.239 +  if (reachable(src)) {
   5.240 +    vandps(dst, nds, as_Address(src));
   5.241 +  } else {
   5.242 +    lea(rscratch1, src);
   5.243 +    vandps(dst, nds, Address(rscratch1, 0));
   5.244 +  }
   5.245 +}
   5.246 +
   5.247 +void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.248 +  if (reachable(src)) {
   5.249 +    vdivsd(dst, nds, as_Address(src));
   5.250 +  } else {
   5.251 +    lea(rscratch1, src);
   5.252 +    vdivsd(dst, nds, Address(rscratch1, 0));
   5.253 +  }
   5.254 +}
   5.255 +
   5.256 +void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.257 +  if (reachable(src)) {
   5.258 +    vdivss(dst, nds, as_Address(src));
   5.259 +  } else {
   5.260 +    lea(rscratch1, src);
   5.261 +    vdivss(dst, nds, Address(rscratch1, 0));
   5.262 +  }
   5.263 +}
   5.264 +
   5.265 +void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.266 +  if (reachable(src)) {
   5.267 +    vmulsd(dst, nds, as_Address(src));
   5.268 +  } else {
   5.269 +    lea(rscratch1, src);
   5.270 +    vmulsd(dst, nds, Address(rscratch1, 0));
   5.271 +  }
   5.272 +}
   5.273 +
   5.274 +void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.275 +  if (reachable(src)) {
   5.276 +    vmulss(dst, nds, as_Address(src));
   5.277 +  } else {
   5.278 +    lea(rscratch1, src);
   5.279 +    vmulss(dst, nds, Address(rscratch1, 0));
   5.280 +  }
   5.281 +}
   5.282 +
   5.283 +void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.284 +  if (reachable(src)) {
   5.285 +    vsubsd(dst, nds, as_Address(src));
   5.286 +  } else {
   5.287 +    lea(rscratch1, src);
   5.288 +    vsubsd(dst, nds, Address(rscratch1, 0));
   5.289 +  }
   5.290 +}
   5.291 +
   5.292 +void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.293 +  if (reachable(src)) {
   5.294 +    vsubss(dst, nds, as_Address(src));
   5.295 +  } else {
   5.296 +    lea(rscratch1, src);
   5.297 +    vsubss(dst, nds, Address(rscratch1, 0));
   5.298 +  }
   5.299 +}
   5.300 +
   5.301 +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.302 +  if (reachable(src)) {
   5.303 +    vxorpd(dst, nds, as_Address(src));
   5.304 +  } else {
   5.305 +    lea(rscratch1, src);
   5.306 +    vxorpd(dst, nds, Address(rscratch1, 0));
   5.307 +  }
   5.308 +}
   5.309 +
   5.310 +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
   5.311 +  if (reachable(src)) {
   5.312 +    vxorps(dst, nds, as_Address(src));
   5.313 +  } else {
   5.314 +    lea(rscratch1, src);
   5.315 +    vxorps(dst, nds, Address(rscratch1, 0));
   5.316 +  }
   5.317 +}
   5.318 +
   5.319 +
   5.320  //////////////////////////////////////////////////////////////////////////////////
   5.321  #ifndef SERIALGC
   5.322  
   5.323 @@ -8119,46 +8425,6 @@
   5.324  }
   5.325  
   5.326  
   5.327 -void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
   5.328 -  if (reachable(src)) {
   5.329 -    Assembler::ucomisd(dst, as_Address(src));
   5.330 -  } else {
   5.331 -    lea(rscratch1, src);
   5.332 -    Assembler::ucomisd(dst, Address(rscratch1, 0));
   5.333 -  }
   5.334 -}
   5.335 -
   5.336 -void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
   5.337 -  if (reachable(src)) {
   5.338 -    Assembler::ucomiss(dst, as_Address(src));
   5.339 -  } else {
   5.340 -    lea(rscratch1, src);
   5.341 -    Assembler::ucomiss(dst, Address(rscratch1, 0));
   5.342 -  }
   5.343 -}
   5.344 -
   5.345 -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
   5.346 -  // Used in sign-bit flipping with aligned address.
   5.347 -  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
   5.348 -  if (reachable(src)) {
   5.349 -    Assembler::xorpd(dst, as_Address(src));
   5.350 -  } else {
   5.351 -    lea(rscratch1, src);
   5.352 -    Assembler::xorpd(dst, Address(rscratch1, 0));
   5.353 -  }
   5.354 -}
   5.355 -
   5.356 -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
   5.357 -  // Used in sign-bit flipping with aligned address.
   5.358 -  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
   5.359 -  if (reachable(src)) {
   5.360 -    Assembler::xorps(dst, as_Address(src));
   5.361 -  } else {
   5.362 -    lea(rscratch1, src);
   5.363 -    Assembler::xorps(dst, Address(rscratch1, 0));
   5.364 -  }
   5.365 -}
   5.366 -
   5.367  void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
   5.368    if (VM_Version::supports_cmov()) {
   5.369      cmovl(cc, dst, src);
     6.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Mon Dec 19 14:16:23 2011 -0800
     6.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Tue Dec 20 00:55:02 2011 -0800
     6.3 @@ -589,10 +589,21 @@
     6.4                    VexSimdPrefix pre, VexOpcode opc,
     6.5                    bool vex_w, bool vector256);
     6.6  
     6.7 +  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
     6.8 +                  VexSimdPrefix pre, bool vector256 = false) {
     6.9 +     vex_prefix(src, nds->encoding(), dst->encoding(),
    6.10 +                pre, VEX_OPCODE_0F, false, vector256);
    6.11 +  }
    6.12 +
    6.13    int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
    6.14                               VexSimdPrefix pre, VexOpcode opc,
    6.15                               bool vex_w, bool vector256);
    6.16  
    6.17 +  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
    6.18 +                             VexSimdPrefix pre, bool vector256 = false) {
    6.19 +     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
    6.20 +                                  pre, VEX_OPCODE_0F, false, vector256);
    6.21 +  }
    6.22  
    6.23    void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
    6.24                     VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
    6.25 @@ -1574,6 +1585,29 @@
    6.26  
    6.27    void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
    6.28  
    6.29 +  // AVX 3-operands instructions (encoded with VEX prefix)
    6.30 +  void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
    6.31 +  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.32 +  void vaddss(XMMRegister dst, XMMRegister nds, Address src);
    6.33 +  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.34 +  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
    6.35 +  void vandps(XMMRegister dst, XMMRegister nds, Address src);
    6.36 +  void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
    6.37 +  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.38 +  void vdivss(XMMRegister dst, XMMRegister nds, Address src);
    6.39 +  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.40 +  void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
    6.41 +  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.42 +  void vmulss(XMMRegister dst, XMMRegister nds, Address src);
    6.43 +  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.44 +  void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
    6.45 +  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.46 +  void vsubss(XMMRegister dst, XMMRegister nds, Address src);
    6.47 +  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
    6.48 +  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
    6.49 +  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
    6.50 +
    6.51 +
    6.52   protected:
    6.53    // Next instructions require address alignment 16 bytes SSE mode.
    6.54    // They should be called only from corresponding MacroAssembler instructions.
    6.55 @@ -2422,6 +2456,53 @@
    6.56    void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
    6.57    void xorps(XMMRegister dst, AddressLiteral src);
    6.58  
    6.59 +  // AVX 3-operands instructions
    6.60 +
    6.61 +  void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
    6.62 +  void vaddsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddsd(dst, nds, src); }
    6.63 +  void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.64 +
    6.65 +  void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
    6.66 +  void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
    6.67 +  void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.68 +
    6.69 +  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
    6.70 +  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.71 +
    6.72 +  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
    6.73 +  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.74 +
    6.75 +  void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
    6.76 +  void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
    6.77 +  void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.78 +
    6.79 +  void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
    6.80 +  void vdivss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivss(dst, nds, src); }
    6.81 +  void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.82 +
    6.83 +  void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
    6.84 +  void vmulsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulsd(dst, nds, src); }
    6.85 +  void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.86 +
    6.87 +  void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
    6.88 +  void vmulss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vmulss(dst, nds, src); }
    6.89 +  void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.90 +
    6.91 +  void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
    6.92 +  void vsubsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubsd(dst, nds, src); }
    6.93 +  void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.94 +
    6.95 +  void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
    6.96 +  void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
    6.97 +  void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
    6.98 +
    6.99 +  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
   6.100 +  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
   6.101 +
   6.102 +  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
   6.103 +  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
   6.104 +
   6.105 +
   6.106    // Data
   6.107  
   6.108    void cmov32( Condition cc, Register dst, Address  src);
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/cpu/x86/vm/x86.ad	Tue Dec 20 00:55:02 2011 -0800
     7.3 @@ -0,0 +1,777 @@
     7.4 +//
     7.5 +// Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
     7.6 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     7.7 +//
     7.8 +// This code is free software; you can redistribute it and/or modify it
     7.9 +// under the terms of the GNU General Public License version 2 only, as
    7.10 +// published by the Free Software Foundation.
    7.11 +//
    7.12 +// This code is distributed in the hope that it will be useful, but WITHOUT
    7.13 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    7.14 +// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    7.15 +// version 2 for more details (a copy is included in the LICENSE file that
    7.16 +// accompanied this code).
    7.17 +//
    7.18 +// You should have received a copy of the GNU General Public License version
    7.19 +// 2 along with this work; if not, write to the Free Software Foundation,
    7.20 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    7.21 +//
    7.22 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    7.23 +// or visit www.oracle.com if you need additional information or have any
    7.24 +// questions.
    7.25 +//
    7.26 +//
    7.27 +
    7.28 +// X86 Common Architecture Description File
    7.29 +
    7.30 +source %{
    7.31 +  // Float masks come from different places depending on platform.
    7.32 +#ifdef _LP64
    7.33 +  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
    7.34 +  static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
    7.35 +  static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
    7.36 +  static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
    7.37 +#else
    7.38 +  static address float_signmask()  { return (address)float_signmask_pool; }
    7.39 +  static address float_signflip()  { return (address)float_signflip_pool; }
    7.40 +  static address double_signmask() { return (address)double_signmask_pool; }
    7.41 +  static address double_signflip() { return (address)double_signflip_pool; }
    7.42 +#endif
    7.43 +%}
    7.44 +
    7.45 +// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
    7.46 +
    7.47 +instruct addF_reg(regF dst, regF src) %{
    7.48 +  predicate((UseSSE>=1) && (UseAVX == 0));
    7.49 +  match(Set dst (AddF dst src));
    7.50 +
    7.51 +  format %{ "addss   $dst, $src" %}
    7.52 +  ins_cost(150);
    7.53 +  ins_encode %{
    7.54 +    __ addss($dst$$XMMRegister, $src$$XMMRegister);
    7.55 +  %}
    7.56 +  ins_pipe(pipe_slow);
    7.57 +%}
    7.58 +
    7.59 +instruct addF_mem(regF dst, memory src) %{
    7.60 +  predicate((UseSSE>=1) && (UseAVX == 0));
    7.61 +  match(Set dst (AddF dst (LoadF src)));
    7.62 +
    7.63 +  format %{ "addss   $dst, $src" %}
    7.64 +  ins_cost(150);
    7.65 +  ins_encode %{
    7.66 +    __ addss($dst$$XMMRegister, $src$$Address);
    7.67 +  %}
    7.68 +  ins_pipe(pipe_slow);
    7.69 +%}
    7.70 +
    7.71 +instruct addF_imm(regF dst, immF con) %{
    7.72 +  predicate((UseSSE>=1) && (UseAVX == 0));
    7.73 +  match(Set dst (AddF dst con));
    7.74 +  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
    7.75 +  ins_cost(150);
    7.76 +  ins_encode %{
    7.77 +    __ addss($dst$$XMMRegister, $constantaddress($con));
    7.78 +  %}
    7.79 +  ins_pipe(pipe_slow);
    7.80 +%}
    7.81 +
    7.82 +instruct vaddF_reg(regF dst, regF src1, regF src2) %{
    7.83 +  predicate(UseAVX > 0);
    7.84 +  match(Set dst (AddF src1 src2));
    7.85 +
    7.86 +  format %{ "vaddss  $dst, $src1, $src2" %}
    7.87 +  ins_cost(150);
    7.88 +  ins_encode %{
    7.89 +    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
    7.90 +  %}
    7.91 +  ins_pipe(pipe_slow);
    7.92 +%}
    7.93 +
    7.94 +instruct vaddF_mem(regF dst, regF src1, memory src2) %{
    7.95 +  predicate(UseAVX > 0);
    7.96 +  match(Set dst (AddF src1 (LoadF src2)));
    7.97 +
    7.98 +  format %{ "vaddss  $dst, $src1, $src2" %}
    7.99 +  ins_cost(150);
   7.100 +  ins_encode %{
   7.101 +    __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.102 +  %}
   7.103 +  ins_pipe(pipe_slow);
   7.104 +%}
   7.105 +
   7.106 +instruct vaddF_imm(regF dst, regF src, immF con) %{
   7.107 +  predicate(UseAVX > 0);
   7.108 +  match(Set dst (AddF src con));
   7.109 +
   7.110 +  format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   7.111 +  ins_cost(150);
   7.112 +  ins_encode %{
   7.113 +    __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.114 +  %}
   7.115 +  ins_pipe(pipe_slow);
   7.116 +%}
   7.117 +
   7.118 +instruct addD_reg(regD dst, regD src) %{
   7.119 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.120 +  match(Set dst (AddD dst src));
   7.121 +
   7.122 +  format %{ "addsd   $dst, $src" %}
   7.123 +  ins_cost(150);
   7.124 +  ins_encode %{
   7.125 +    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
   7.126 +  %}
   7.127 +  ins_pipe(pipe_slow);
   7.128 +%}
   7.129 +
   7.130 +instruct addD_mem(regD dst, memory src) %{
   7.131 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.132 +  match(Set dst (AddD dst (LoadD src)));
   7.133 +
   7.134 +  format %{ "addsd   $dst, $src" %}
   7.135 +  ins_cost(150);
   7.136 +  ins_encode %{
   7.137 +    __ addsd($dst$$XMMRegister, $src$$Address);
   7.138 +  %}
   7.139 +  ins_pipe(pipe_slow);
   7.140 +%}
   7.141 +
   7.142 +instruct addD_imm(regD dst, immD con) %{
   7.143 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.144 +  match(Set dst (AddD dst con));
   7.145 +  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   7.146 +  ins_cost(150);
   7.147 +  ins_encode %{
   7.148 +    __ addsd($dst$$XMMRegister, $constantaddress($con));
   7.149 +  %}
   7.150 +  ins_pipe(pipe_slow);
   7.151 +%}
   7.152 +
   7.153 +instruct vaddD_reg(regD dst, regD src1, regD src2) %{
   7.154 +  predicate(UseAVX > 0);
   7.155 +  match(Set dst (AddD src1 src2));
   7.156 +
   7.157 +  format %{ "vaddsd  $dst, $src1, $src2" %}
   7.158 +  ins_cost(150);
   7.159 +  ins_encode %{
   7.160 +    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   7.161 +  %}
   7.162 +  ins_pipe(pipe_slow);
   7.163 +%}
   7.164 +
   7.165 +instruct vaddD_mem(regD dst, regD src1, memory src2) %{
   7.166 +  predicate(UseAVX > 0);
   7.167 +  match(Set dst (AddD src1 (LoadD src2)));
   7.168 +
   7.169 +  format %{ "vaddsd  $dst, $src1, $src2" %}
   7.170 +  ins_cost(150);
   7.171 +  ins_encode %{
   7.172 +    __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.173 +  %}
   7.174 +  ins_pipe(pipe_slow);
   7.175 +%}
   7.176 +
   7.177 +instruct vaddD_imm(regD dst, regD src, immD con) %{
   7.178 +  predicate(UseAVX > 0);
   7.179 +  match(Set dst (AddD src con));
   7.180 +
   7.181 +  format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
   7.182 +  ins_cost(150);
   7.183 +  ins_encode %{
   7.184 +    __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.185 +  %}
   7.186 +  ins_pipe(pipe_slow);
   7.187 +%}
   7.188 +
   7.189 +instruct subF_reg(regF dst, regF src) %{
   7.190 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.191 +  match(Set dst (SubF dst src));
   7.192 +
   7.193 +  format %{ "subss   $dst, $src" %}
   7.194 +  ins_cost(150);
   7.195 +  ins_encode %{
   7.196 +    __ subss($dst$$XMMRegister, $src$$XMMRegister);
   7.197 +  %}
   7.198 +  ins_pipe(pipe_slow);
   7.199 +%}
   7.200 +
   7.201 +instruct subF_mem(regF dst, memory src) %{
   7.202 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.203 +  match(Set dst (SubF dst (LoadF src)));
   7.204 +
   7.205 +  format %{ "subss   $dst, $src" %}
   7.206 +  ins_cost(150);
   7.207 +  ins_encode %{
   7.208 +    __ subss($dst$$XMMRegister, $src$$Address);
   7.209 +  %}
   7.210 +  ins_pipe(pipe_slow);
   7.211 +%}
   7.212 +
   7.213 +instruct subF_imm(regF dst, immF con) %{
   7.214 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.215 +  match(Set dst (SubF dst con));
   7.216 +  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   7.217 +  ins_cost(150);
   7.218 +  ins_encode %{
   7.219 +    __ subss($dst$$XMMRegister, $constantaddress($con));
   7.220 +  %}
   7.221 +  ins_pipe(pipe_slow);
   7.222 +%}
   7.223 +
   7.224 +instruct vsubF_reg(regF dst, regF src1, regF src2) %{
   7.225 +  predicate(UseAVX > 0);
   7.226 +  match(Set dst (SubF src1 src2));
   7.227 +
   7.228 +  format %{ "vsubss  $dst, $src1, $src2" %}
   7.229 +  ins_cost(150);
   7.230 +  ins_encode %{
   7.231 +    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   7.232 +  %}
   7.233 +  ins_pipe(pipe_slow);
   7.234 +%}
   7.235 +
   7.236 +instruct vsubF_mem(regF dst, regF src1, memory src2) %{
   7.237 +  predicate(UseAVX > 0);
   7.238 +  match(Set dst (SubF src1 (LoadF src2)));
   7.239 +
   7.240 +  format %{ "vsubss  $dst, $src1, $src2" %}
   7.241 +  ins_cost(150);
   7.242 +  ins_encode %{
   7.243 +    __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.244 +  %}
   7.245 +  ins_pipe(pipe_slow);
   7.246 +%}
   7.247 +
   7.248 +instruct vsubF_imm(regF dst, regF src, immF con) %{
   7.249 +  predicate(UseAVX > 0);
   7.250 +  match(Set dst (SubF src con));
   7.251 +
   7.252 +  format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   7.253 +  ins_cost(150);
   7.254 +  ins_encode %{
   7.255 +    __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.256 +  %}
   7.257 +  ins_pipe(pipe_slow);
   7.258 +%}
   7.259 +
   7.260 +instruct subD_reg(regD dst, regD src) %{
   7.261 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.262 +  match(Set dst (SubD dst src));
   7.263 +
   7.264 +  format %{ "subsd   $dst, $src" %}
   7.265 +  ins_cost(150);
   7.266 +  ins_encode %{
   7.267 +    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
   7.268 +  %}
   7.269 +  ins_pipe(pipe_slow);
   7.270 +%}
   7.271 +
   7.272 +instruct subD_mem(regD dst, memory src) %{
   7.273 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.274 +  match(Set dst (SubD dst (LoadD src)));
   7.275 +
   7.276 +  format %{ "subsd   $dst, $src" %}
   7.277 +  ins_cost(150);
   7.278 +  ins_encode %{
   7.279 +    __ subsd($dst$$XMMRegister, $src$$Address);
   7.280 +  %}
   7.281 +  ins_pipe(pipe_slow);
   7.282 +%}
   7.283 +
   7.284 +instruct subD_imm(regD dst, immD con) %{
   7.285 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.286 +  match(Set dst (SubD dst con));
   7.287 +  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   7.288 +  ins_cost(150);
   7.289 +  ins_encode %{
   7.290 +    __ subsd($dst$$XMMRegister, $constantaddress($con));
   7.291 +  %}
   7.292 +  ins_pipe(pipe_slow);
   7.293 +%}
   7.294 +
   7.295 +instruct vsubD_reg(regD dst, regD src1, regD src2) %{
   7.296 +  predicate(UseAVX > 0);
   7.297 +  match(Set dst (SubD src1 src2));
   7.298 +
   7.299 +  format %{ "vsubsd  $dst, $src1, $src2" %}
   7.300 +  ins_cost(150);
   7.301 +  ins_encode %{
   7.302 +    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   7.303 +  %}
   7.304 +  ins_pipe(pipe_slow);
   7.305 +%}
   7.306 +
   7.307 +instruct vsubD_mem(regD dst, regD src1, memory src2) %{
   7.308 +  predicate(UseAVX > 0);
   7.309 +  match(Set dst (SubD src1 (LoadD src2)));
   7.310 +
   7.311 +  format %{ "vsubsd  $dst, $src1, $src2" %}
   7.312 +  ins_cost(150);
   7.313 +  ins_encode %{
   7.314 +    __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.315 +  %}
   7.316 +  ins_pipe(pipe_slow);
   7.317 +%}
   7.318 +
   7.319 +instruct vsubD_imm(regD dst, regD src, immD con) %{
   7.320 +  predicate(UseAVX > 0);
   7.321 +  match(Set dst (SubD src con));
   7.322 +
   7.323 +  format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
   7.324 +  ins_cost(150);
   7.325 +  ins_encode %{
   7.326 +    __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.327 +  %}
   7.328 +  ins_pipe(pipe_slow);
   7.329 +%}
   7.330 +
   7.331 +instruct mulF_reg(regF dst, regF src) %{
   7.332 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.333 +  match(Set dst (MulF dst src));
   7.334 +
   7.335 +  format %{ "mulss   $dst, $src" %}
   7.336 +  ins_cost(150);
   7.337 +  ins_encode %{
   7.338 +    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
   7.339 +  %}
   7.340 +  ins_pipe(pipe_slow);
   7.341 +%}
   7.342 +
   7.343 +instruct mulF_mem(regF dst, memory src) %{
   7.344 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.345 +  match(Set dst (MulF dst (LoadF src)));
   7.346 +
   7.347 +  format %{ "mulss   $dst, $src" %}
   7.348 +  ins_cost(150);
   7.349 +  ins_encode %{
   7.350 +    __ mulss($dst$$XMMRegister, $src$$Address);
   7.351 +  %}
   7.352 +  ins_pipe(pipe_slow);
   7.353 +%}
   7.354 +
   7.355 +instruct mulF_imm(regF dst, immF con) %{
   7.356 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.357 +  match(Set dst (MulF dst con));
   7.358 +  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   7.359 +  ins_cost(150);
   7.360 +  ins_encode %{
   7.361 +    __ mulss($dst$$XMMRegister, $constantaddress($con));
   7.362 +  %}
   7.363 +  ins_pipe(pipe_slow);
   7.364 +%}
   7.365 +
   7.366 +instruct vmulF_reg(regF dst, regF src1, regF src2) %{
   7.367 +  predicate(UseAVX > 0);
   7.368 +  match(Set dst (MulF src1 src2));
   7.369 +
   7.370 +  format %{ "vmulss  $dst, $src1, $src2" %}
   7.371 +  ins_cost(150);
   7.372 +  ins_encode %{
   7.373 +    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   7.374 +  %}
   7.375 +  ins_pipe(pipe_slow);
   7.376 +%}
   7.377 +
   7.378 +instruct vmulF_mem(regF dst, regF src1, memory src2) %{
   7.379 +  predicate(UseAVX > 0);
   7.380 +  match(Set dst (MulF src1 (LoadF src2)));
   7.381 +
   7.382 +  format %{ "vmulss  $dst, $src1, $src2" %}
   7.383 +  ins_cost(150);
   7.384 +  ins_encode %{
   7.385 +    __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.386 +  %}
   7.387 +  ins_pipe(pipe_slow);
   7.388 +%}
   7.389 +
   7.390 +instruct vmulF_imm(regF dst, regF src, immF con) %{
   7.391 +  predicate(UseAVX > 0);
   7.392 +  match(Set dst (MulF src con));
   7.393 +
   7.394 +  format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   7.395 +  ins_cost(150);
   7.396 +  ins_encode %{
   7.397 +    __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.398 +  %}
   7.399 +  ins_pipe(pipe_slow);
   7.400 +%}
   7.401 +
   7.402 +instruct mulD_reg(regD dst, regD src) %{
   7.403 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.404 +  match(Set dst (MulD dst src));
   7.405 +
   7.406 +  format %{ "mulsd   $dst, $src" %}
   7.407 +  ins_cost(150);
   7.408 +  ins_encode %{
   7.409 +    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
   7.410 +  %}
   7.411 +  ins_pipe(pipe_slow);
   7.412 +%}
   7.413 +
   7.414 +instruct mulD_mem(regD dst, memory src) %{
   7.415 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.416 +  match(Set dst (MulD dst (LoadD src)));
   7.417 +
   7.418 +  format %{ "mulsd   $dst, $src" %}
   7.419 +  ins_cost(150);
   7.420 +  ins_encode %{
   7.421 +    __ mulsd($dst$$XMMRegister, $src$$Address);
   7.422 +  %}
   7.423 +  ins_pipe(pipe_slow);
   7.424 +%}
   7.425 +
   7.426 +instruct mulD_imm(regD dst, immD con) %{
   7.427 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.428 +  match(Set dst (MulD dst con));
   7.429 +  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   7.430 +  ins_cost(150);
   7.431 +  ins_encode %{
   7.432 +    __ mulsd($dst$$XMMRegister, $constantaddress($con));
   7.433 +  %}
   7.434 +  ins_pipe(pipe_slow);
   7.435 +%}
   7.436 +
   7.437 +instruct vmulD_reg(regD dst, regD src1, regD src2) %{
   7.438 +  predicate(UseAVX > 0);
   7.439 +  match(Set dst (MulD src1 src2));
   7.440 +
   7.441 +  format %{ "vmulsd  $dst, $src1, $src2" %}
   7.442 +  ins_cost(150);
   7.443 +  ins_encode %{
   7.444 +    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   7.445 +  %}
   7.446 +  ins_pipe(pipe_slow);
   7.447 +%}
   7.448 +
   7.449 +instruct vmulD_mem(regD dst, regD src1, memory src2) %{
   7.450 +  predicate(UseAVX > 0);
   7.451 +  match(Set dst (MulD src1 (LoadD src2)));
   7.452 +
   7.453 +  format %{ "vmulsd  $dst, $src1, $src2" %}
   7.454 +  ins_cost(150);
   7.455 +  ins_encode %{
   7.456 +    __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.457 +  %}
   7.458 +  ins_pipe(pipe_slow);
   7.459 +%}
   7.460 +
   7.461 +instruct vmulD_imm(regD dst, regD src, immD con) %{
   7.462 +  predicate(UseAVX > 0);
   7.463 +  match(Set dst (MulD src con));
   7.464 +
   7.465 +  format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
   7.466 +  ins_cost(150);
   7.467 +  ins_encode %{
   7.468 +    __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.469 +  %}
   7.470 +  ins_pipe(pipe_slow);
   7.471 +%}
   7.472 +
   7.473 +instruct divF_reg(regF dst, regF src) %{
   7.474 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.475 +  match(Set dst (DivF dst src));
   7.476 +
   7.477 +  format %{ "divss   $dst, $src" %}
   7.478 +  ins_cost(150);
   7.479 +  ins_encode %{
   7.480 +    __ divss($dst$$XMMRegister, $src$$XMMRegister);
   7.481 +  %}
   7.482 +  ins_pipe(pipe_slow);
   7.483 +%}
   7.484 +
   7.485 +instruct divF_mem(regF dst, memory src) %{
   7.486 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.487 +  match(Set dst (DivF dst (LoadF src)));
   7.488 +
   7.489 +  format %{ "divss   $dst, $src" %}
   7.490 +  ins_cost(150);
   7.491 +  ins_encode %{
   7.492 +    __ divss($dst$$XMMRegister, $src$$Address);
   7.493 +  %}
   7.494 +  ins_pipe(pipe_slow);
   7.495 +%}
   7.496 +
   7.497 +instruct divF_imm(regF dst, immF con) %{
   7.498 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.499 +  match(Set dst (DivF dst con));
   7.500 +  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   7.501 +  ins_cost(150);
   7.502 +  ins_encode %{
   7.503 +    __ divss($dst$$XMMRegister, $constantaddress($con));
   7.504 +  %}
   7.505 +  ins_pipe(pipe_slow);
   7.506 +%}
   7.507 +
   7.508 +instruct vdivF_reg(regF dst, regF src1, regF src2) %{
   7.509 +  predicate(UseAVX > 0);
   7.510 +  match(Set dst (DivF src1 src2));
   7.511 +
   7.512 +  format %{ "vdivss  $dst, $src1, $src2" %}
   7.513 +  ins_cost(150);
   7.514 +  ins_encode %{
   7.515 +    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   7.516 +  %}
   7.517 +  ins_pipe(pipe_slow);
   7.518 +%}
   7.519 +
   7.520 +instruct vdivF_mem(regF dst, regF src1, memory src2) %{
   7.521 +  predicate(UseAVX > 0);
   7.522 +  match(Set dst (DivF src1 (LoadF src2)));
   7.523 +
   7.524 +  format %{ "vdivss  $dst, $src1, $src2" %}
   7.525 +  ins_cost(150);
   7.526 +  ins_encode %{
   7.527 +    __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.528 +  %}
   7.529 +  ins_pipe(pipe_slow);
   7.530 +%}
   7.531 +
   7.532 +instruct vdivF_imm(regF dst, regF src, immF con) %{
   7.533 +  predicate(UseAVX > 0);
   7.534 +  match(Set dst (DivF src con));
   7.535 +
   7.536 +  format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
   7.537 +  ins_cost(150);
   7.538 +  ins_encode %{
   7.539 +    __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.540 +  %}
   7.541 +  ins_pipe(pipe_slow);
   7.542 +%}
   7.543 +
   7.544 +instruct divD_reg(regD dst, regD src) %{
   7.545 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.546 +  match(Set dst (DivD dst src));
   7.547 +
   7.548 +  format %{ "divsd   $dst, $src" %}
   7.549 +  ins_cost(150);
   7.550 +  ins_encode %{
   7.551 +    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
   7.552 +  %}
   7.553 +  ins_pipe(pipe_slow);
   7.554 +%}
   7.555 +
   7.556 +instruct divD_mem(regD dst, memory src) %{
   7.557 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.558 +  match(Set dst (DivD dst (LoadD src)));
   7.559 +
   7.560 +  format %{ "divsd   $dst, $src" %}
   7.561 +  ins_cost(150);
   7.562 +  ins_encode %{
   7.563 +    __ divsd($dst$$XMMRegister, $src$$Address);
   7.564 +  %}
   7.565 +  ins_pipe(pipe_slow);
   7.566 +%}
   7.567 +
   7.568 +instruct divD_imm(regD dst, immD con) %{
   7.569 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.570 +  match(Set dst (DivD dst con));
   7.571 +  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   7.572 +  ins_cost(150);
   7.573 +  ins_encode %{
   7.574 +    __ divsd($dst$$XMMRegister, $constantaddress($con));
   7.575 +  %}
   7.576 +  ins_pipe(pipe_slow);
   7.577 +%}
   7.578 +
   7.579 +instruct vdivD_reg(regD dst, regD src1, regD src2) %{
   7.580 +  predicate(UseAVX > 0);
   7.581 +  match(Set dst (DivD src1 src2));
   7.582 +
   7.583 +  format %{ "vdivsd  $dst, $src1, $src2" %}
   7.584 +  ins_cost(150);
   7.585 +  ins_encode %{
   7.586 +    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
   7.587 +  %}
   7.588 +  ins_pipe(pipe_slow);
   7.589 +%}
   7.590 +
   7.591 +instruct vdivD_mem(regD dst, regD src1, memory src2) %{
   7.592 +  predicate(UseAVX > 0);
   7.593 +  match(Set dst (DivD src1 (LoadD src2)));
   7.594 +
   7.595 +  format %{ "vdivsd  $dst, $src1, $src2" %}
   7.596 +  ins_cost(150);
   7.597 +  ins_encode %{
   7.598 +    __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
   7.599 +  %}
   7.600 +  ins_pipe(pipe_slow);
   7.601 +%}
   7.602 +
   7.603 +instruct vdivD_imm(regD dst, regD src, immD con) %{
   7.604 +  predicate(UseAVX > 0);
   7.605 +  match(Set dst (DivD src con));
   7.606 +
   7.607 +  format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
   7.608 +  ins_cost(150);
   7.609 +  ins_encode %{
   7.610 +    __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
   7.611 +  %}
   7.612 +  ins_pipe(pipe_slow);
   7.613 +%}
   7.614 +
   7.615 +instruct absF_reg(regF dst) %{
   7.616 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.617 +  match(Set dst (AbsF dst));
   7.618 +  ins_cost(150);
   7.619 +  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
   7.620 +  ins_encode %{
   7.621 +    __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
   7.622 +  %}
   7.623 +  ins_pipe(pipe_slow);
   7.624 +%}
   7.625 +
   7.626 +instruct vabsF_reg(regF dst, regF src) %{
   7.627 +  predicate(UseAVX > 0);
   7.628 +  match(Set dst (AbsF src));
   7.629 +  ins_cost(150);
   7.630 +  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
   7.631 +  ins_encode %{
   7.632 +    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
   7.633 +              ExternalAddress(float_signmask()));
   7.634 +  %}
   7.635 +  ins_pipe(pipe_slow);
   7.636 +%}
   7.637 +
   7.638 +instruct absD_reg(regD dst) %{
   7.639 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.640 +  match(Set dst (AbsD dst));
   7.641 +  ins_cost(150);
   7.642 +  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
   7.643 +            "# abs double by sign masking" %}
   7.644 +  ins_encode %{
   7.645 +    __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
   7.646 +  %}
   7.647 +  ins_pipe(pipe_slow);
   7.648 +%}
   7.649 +
   7.650 +instruct vabsD_reg(regD dst, regD src) %{
   7.651 +  predicate(UseAVX > 0);
   7.652 +  match(Set dst (AbsD src));
   7.653 +  ins_cost(150);
   7.654 +  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
   7.655 +            "# abs double by sign masking" %}
   7.656 +  ins_encode %{
   7.657 +    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
   7.658 +              ExternalAddress(double_signmask()));
   7.659 +  %}
   7.660 +  ins_pipe(pipe_slow);
   7.661 +%}
   7.662 +
   7.663 +instruct negF_reg(regF dst) %{
   7.664 +  predicate((UseSSE>=1) && (UseAVX == 0));
   7.665 +  match(Set dst (NegF dst));
   7.666 +  ins_cost(150);
   7.667 +  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
   7.668 +  ins_encode %{
   7.669 +    __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
   7.670 +  %}
   7.671 +  ins_pipe(pipe_slow);
   7.672 +%}
   7.673 +
   7.674 +instruct vnegF_reg(regF dst, regF src) %{
   7.675 +  predicate(UseAVX > 0);
   7.676 +  match(Set dst (NegF src));
   7.677 +  ins_cost(150);
   7.678 +  format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
   7.679 +  ins_encode %{
   7.680 +    __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
   7.681 +              ExternalAddress(float_signflip()));
   7.682 +  %}
   7.683 +  ins_pipe(pipe_slow);
   7.684 +%}
   7.685 +
   7.686 +instruct negD_reg(regD dst) %{
   7.687 +  predicate((UseSSE>=2) && (UseAVX == 0));
   7.688 +  match(Set dst (NegD dst));
   7.689 +  ins_cost(150);
   7.690 +  format %{ "xorpd   $dst, [0x8000000000000000]\t"
   7.691 +            "# neg double by sign flipping" %}
   7.692 +  ins_encode %{
   7.693 +    __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
   7.694 +  %}
   7.695 +  ins_pipe(pipe_slow);
   7.696 +%}
   7.697 +
   7.698 +instruct vnegD_reg(regD dst, regD src) %{
   7.699 +  predicate(UseAVX > 0);
   7.700 +  match(Set dst (NegD src));
   7.701 +  ins_cost(150);
   7.702 +  format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
   7.703 +            "# neg double by sign flipping" %}
   7.704 +  ins_encode %{
   7.705 +    __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
   7.706 +              ExternalAddress(double_signflip()));
   7.707 +  %}
   7.708 +  ins_pipe(pipe_slow);
   7.709 +%}
   7.710 +
   7.711 +instruct sqrtF_reg(regF dst, regF src) %{
   7.712 +  predicate(UseSSE>=1);
   7.713 +  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
   7.714 +
   7.715 +  format %{ "sqrtss  $dst, $src" %}
   7.716 +  ins_cost(150);
   7.717 +  ins_encode %{
   7.718 +    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
   7.719 +  %}
   7.720 +  ins_pipe(pipe_slow);
   7.721 +%}
   7.722 +
   7.723 +instruct sqrtF_mem(regF dst, memory src) %{
   7.724 +  predicate(UseSSE>=1);
   7.725 +  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
   7.726 +
   7.727 +  format %{ "sqrtss  $dst, $src" %}
   7.728 +  ins_cost(150);
   7.729 +  ins_encode %{
   7.730 +    __ sqrtss($dst$$XMMRegister, $src$$Address);
   7.731 +  %}
   7.732 +  ins_pipe(pipe_slow);
   7.733 +%}
   7.734 +
   7.735 +instruct sqrtF_imm(regF dst, immF con) %{
   7.736 +  predicate(UseSSE>=1);
   7.737 +  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
   7.738 +  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   7.739 +  ins_cost(150);
   7.740 +  ins_encode %{
   7.741 +    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
   7.742 +  %}
   7.743 +  ins_pipe(pipe_slow);
   7.744 +%}
   7.745 +
   7.746 +instruct sqrtD_reg(regD dst, regD src) %{
   7.747 +  predicate(UseSSE>=2);
   7.748 +  match(Set dst (SqrtD src));
   7.749 +
   7.750 +  format %{ "sqrtsd  $dst, $src" %}
   7.751 +  ins_cost(150);
   7.752 +  ins_encode %{
   7.753 +    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
   7.754 +  %}
   7.755 +  ins_pipe(pipe_slow);
   7.756 +%}
   7.757 +
   7.758 +instruct sqrtD_mem(regD dst, memory src) %{
   7.759 +  predicate(UseSSE>=2);
   7.760 +  match(Set dst (SqrtD (LoadD src)));
   7.761 +
   7.762 +  format %{ "sqrtsd  $dst, $src" %}
   7.763 +  ins_cost(150);
   7.764 +  ins_encode %{
   7.765 +    __ sqrtsd($dst$$XMMRegister, $src$$Address);
   7.766 +  %}
   7.767 +  ins_pipe(pipe_slow);
   7.768 +%}
   7.769 +
   7.770 +instruct sqrtD_imm(regD dst, immD con) %{
   7.771 +  predicate(UseSSE>=2);
   7.772 +  match(Set dst (SqrtD con));
   7.773 +  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   7.774 +  ins_cost(150);
   7.775 +  ins_encode %{
   7.776 +    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
   7.777 +  %}
   7.778 +  ins_pipe(pipe_slow);
   7.779 +%}
   7.780 +
     8.1 --- a/src/cpu/x86/vm/x86_32.ad	Mon Dec 19 14:16:23 2011 -0800
     8.2 +++ b/src/cpu/x86/vm/x86_32.ad	Tue Dec 20 00:55:02 2011 -0800
     8.3 @@ -1775,7 +1775,7 @@
     8.4      emit_cc(cbuf, $secondary, $cop$$cmpcode);
     8.5    %}
     8.6  
     8.7 -  enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
     8.8 +  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
     8.9      int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
    8.10      emit_d8(cbuf, op >> 8 );
    8.11      emit_d8(cbuf, op & 255);
    8.12 @@ -2063,14 +2063,14 @@
    8.13      $$$emit32$src$$constant;
    8.14    %}
    8.15  
    8.16 -  enc_class Con32F_as_bits(immF src) %{        // storeF_imm
    8.17 +  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
    8.18      // Output Float immediate bits
    8.19      jfloat jf = $src$$constant;
    8.20      int    jf_as_bits = jint_cast( jf );
    8.21      emit_d32(cbuf, jf_as_bits);
    8.22    %}
    8.23  
    8.24 -  enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
    8.25 +  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
    8.26      // Output Float immediate bits
    8.27      jfloat jf = $src$$constant;
    8.28      int    jf_as_bits = jint_cast( jf );
    8.29 @@ -2283,7 +2283,7 @@
    8.30      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
    8.31    %}
    8.32  
    8.33 -  enc_class enc_FP_store(memory mem, regD src) %{
    8.34 +  enc_class enc_FPR_store(memory mem, regDPR src) %{
    8.35      // If src is FPR1, we can just FST to store it.
    8.36      // Else we need to FLD it to FPR1, then FSTP to store/pop it.
    8.37      int reg_encoding = 0x2; // Just store
    8.38 @@ -2432,7 +2432,7 @@
    8.39  
    8.40    // ----------------- Encodings for floating point unit -----------------
    8.41    // May leave result in FPU-TOS or FPU reg depending on opcodes
    8.42 -  enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
    8.43 +  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
    8.44      $$$emit8$primary;
    8.45      emit_rm(cbuf, 0x3, $secondary, $src$$reg );
    8.46    %}
    8.47 @@ -2444,17 +2444,17 @@
    8.48    %}
    8.49  
    8.50    // !!!!! equivalent to Pop_Reg_F
    8.51 -  enc_class Pop_Reg_D( regD dst ) %{
    8.52 +  enc_class Pop_Reg_DPR( regDPR dst ) %{
    8.53      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
    8.54      emit_d8( cbuf, 0xD8+$dst$$reg );
    8.55    %}
    8.56  
    8.57 -  enc_class Push_Reg_D( regD dst ) %{
    8.58 +  enc_class Push_Reg_DPR( regDPR dst ) %{
    8.59      emit_opcode( cbuf, 0xD9 );
    8.60      emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
    8.61    %}
    8.62  
    8.63 -  enc_class strictfp_bias1( regD dst ) %{
    8.64 +  enc_class strictfp_bias1( regDPR dst ) %{
    8.65      emit_opcode( cbuf, 0xDB );           // FLD m80real
    8.66      emit_opcode( cbuf, 0x2D );
    8.67      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
    8.68 @@ -2462,7 +2462,7 @@
    8.69      emit_opcode( cbuf, 0xC8+$dst$$reg );
    8.70    %}
    8.71  
    8.72 -  enc_class strictfp_bias2( regD dst ) %{
    8.73 +  enc_class strictfp_bias2( regDPR dst ) %{
    8.74      emit_opcode( cbuf, 0xDB );           // FLD m80real
    8.75      emit_opcode( cbuf, 0x2D );
    8.76      emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
    8.77 @@ -2488,39 +2488,29 @@
    8.78      store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
    8.79    %}
    8.80  
    8.81 -  // Push the float in stackSlot 'src' onto FP-stack
    8.82 -  enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
    8.83 -    store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
    8.84 -  %}
    8.85 -
    8.86 -  // Push the double in stackSlot 'src' onto FP-stack
    8.87 -  enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
    8.88 -    store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
    8.89 -  %}
    8.90 -
    8.91    // Push FPU's TOS float to a stack-slot, and pop FPU-stack
    8.92 -  enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
    8.93 +  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
    8.94      store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
    8.95    %}
    8.96  
    8.97    // Same as Pop_Mem_F except for opcode
    8.98    // Push FPU's TOS double to a stack-slot, and pop FPU-stack
    8.99 -  enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
   8.100 +  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
   8.101      store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
   8.102    %}
   8.103  
   8.104 -  enc_class Pop_Reg_F( regF dst ) %{
   8.105 +  enc_class Pop_Reg_FPR( regFPR dst ) %{
   8.106      emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
   8.107      emit_d8( cbuf, 0xD8+$dst$$reg );
   8.108    %}
   8.109  
   8.110 -  enc_class Push_Reg_F( regF dst ) %{
   8.111 +  enc_class Push_Reg_FPR( regFPR dst ) %{
   8.112      emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
   8.113      emit_d8( cbuf, 0xC0-1+$dst$$reg );
   8.114    %}
   8.115  
   8.116    // Push FPU's float to a stack-slot, and pop FPU-stack
   8.117 -  enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
   8.118 +  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
   8.119      int pop = 0x02;
   8.120      if ($src$$reg != FPR1L_enc) {
   8.121        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
   8.122 @@ -2531,7 +2521,7 @@
   8.123    %}
   8.124  
   8.125    // Push FPU's double to a stack-slot, and pop FPU-stack
   8.126 -  enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
   8.127 +  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
   8.128      int pop = 0x02;
   8.129      if ($src$$reg != FPR1L_enc) {
   8.130        emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
   8.131 @@ -2542,7 +2532,7 @@
   8.132    %}
   8.133  
   8.134    // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
   8.135 -  enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
   8.136 +  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
   8.137      int pop = 0xD0 - 1; // -1 since we skip FLD
   8.138      if ($src$$reg != FPR1L_enc) {
   8.139        emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
   8.140 @@ -2554,16 +2544,7 @@
   8.141    %}
   8.142  
   8.143  
   8.144 -  enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
   8.145 -    MacroAssembler masm(&cbuf);
   8.146 -    masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
   8.147 -    masm.fmul(   $src2$$reg+0);   // value at TOS
   8.148 -    masm.fadd(   $src$$reg+0);    // value at TOS
   8.149 -    masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
   8.150 -  %}
   8.151 -
   8.152 -
   8.153 -  enc_class Push_Reg_Mod_D( regD dst, regD src) %{
   8.154 +  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
   8.155      // load dst in FPR0
   8.156      emit_opcode( cbuf, 0xD9 );
   8.157      emit_d8( cbuf, 0xC0-1+$dst$$reg );
   8.158 @@ -2581,7 +2562,7 @@
   8.159      }
   8.160    %}
   8.161  
   8.162 -  enc_class Push_ModD_encoding(regXD src0, regXD src1) %{
   8.163 +  enc_class Push_ModD_encoding(regD src0, regD src1) %{
   8.164      MacroAssembler _masm(&cbuf);
   8.165      __ subptr(rsp, 8);
   8.166      __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
   8.167 @@ -2590,7 +2571,7 @@
   8.168      __ fld_d(Address(rsp, 0));
   8.169    %}
   8.170  
   8.171 -  enc_class Push_ModX_encoding(regX src0, regX src1) %{
   8.172 +  enc_class Push_ModF_encoding(regF src0, regF src1) %{
   8.173      MacroAssembler _masm(&cbuf);
   8.174      __ subptr(rsp, 4);
   8.175      __ movflt(Address(rsp, 0), $src1$$XMMRegister);
   8.176 @@ -2599,21 +2580,21 @@
   8.177      __ fld_s(Address(rsp, 0));
   8.178    %}
   8.179  
   8.180 -  enc_class Push_ResultXD(regXD dst) %{
   8.181 +  enc_class Push_ResultD(regD dst) %{
   8.182      MacroAssembler _masm(&cbuf);
   8.183      __ fstp_d(Address(rsp, 0));
   8.184      __ movdbl($dst$$XMMRegister, Address(rsp, 0));
   8.185      __ addptr(rsp, 8);
   8.186    %}
   8.187  
   8.188 -  enc_class Push_ResultX(regX dst, immI d8) %{
   8.189 +  enc_class Push_ResultF(regF dst, immI d8) %{
   8.190      MacroAssembler _masm(&cbuf);
   8.191      __ fstp_s(Address(rsp, 0));
   8.192      __ movflt($dst$$XMMRegister, Address(rsp, 0));
   8.193      __ addptr(rsp, $d8$$constant);
   8.194    %}
   8.195  
   8.196 -  enc_class Push_SrcXD(regXD src) %{
   8.197 +  enc_class Push_SrcD(regD src) %{
   8.198      MacroAssembler _masm(&cbuf);
   8.199      __ subptr(rsp, 8);
   8.200      __ movdbl(Address(rsp, 0), $src$$XMMRegister);
   8.201 @@ -2630,7 +2611,7 @@
   8.202      __ addptr(rsp, 8);
   8.203    %}
   8.204  
   8.205 -  enc_class push_xmm_to_fpr1(regXD src) %{
   8.206 +  enc_class push_xmm_to_fpr1(regD src) %{
   8.207      MacroAssembler _masm(&cbuf);
   8.208      __ movdbl(Address(rsp, 0), $src$$XMMRegister);
   8.209      __ fld_d(Address(rsp, 0));
   8.210 @@ -2675,10 +2656,7 @@
   8.211      encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
   8.212    %}
   8.213  
   8.214 -//   enc_class Pop_Reg_Mod_D( regD dst, regD src)
   8.215 -//   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
   8.216 -
   8.217 -  enc_class Push_Result_Mod_D( regD src) %{
   8.218 +  enc_class Push_Result_Mod_DPR( regDPR src) %{
   8.219      if ($src$$reg != FPR1L_enc) {
   8.220        // fincstp
   8.221        emit_opcode (cbuf, 0xD9);
   8.222 @@ -2707,7 +2685,7 @@
   8.223      emit_opcode( cbuf, 0x05 );
   8.224    %}
   8.225  
   8.226 -  enc_class emitModD() %{
   8.227 +  enc_class emitModDPR() %{
   8.228      // fprem must be iterative
   8.229      // :: loop
   8.230      // fprem
   8.231 @@ -3587,7 +3565,7 @@
   8.232    // 'zero', store the darned double down as an int, and reset the
   8.233    // rounding mode to 'nearest'.  The hardware throws an exception which
   8.234    // patches up the correct value directly to the stack.
   8.235 -  enc_class D2I_encoding( regD src ) %{
   8.236 +  enc_class DPR2I_encoding( regDPR src ) %{
   8.237      // Flip to round-to-zero mode.  We attempted to allow invalid-op
   8.238      // exceptions here, so that a NAN or other corner-case value will
   8.239      // thrown an exception (but normal values get converted at full speed).
   8.240 @@ -3630,7 +3608,7 @@
   8.241      // Carry on here...
   8.242    %}
   8.243  
   8.244 -  enc_class D2L_encoding( regD src ) %{
   8.245 +  enc_class DPR2L_encoding( regDPR src ) %{
   8.246      emit_opcode(cbuf,0xD9);            // FLDCW  trunc
   8.247      emit_opcode(cbuf,0x2D);
   8.248      emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
   8.249 @@ -3672,27 +3650,27 @@
   8.250      // Carry on here...
   8.251    %}
   8.252  
   8.253 -  enc_class FMul_ST_reg( eRegF src1 ) %{
   8.254 +  enc_class FMul_ST_reg( eRegFPR src1 ) %{
   8.255      // Operand was loaded from memory into fp ST (stack top)
   8.256      // FMUL   ST,$src  /* D8 C8+i */
   8.257      emit_opcode(cbuf, 0xD8);
   8.258      emit_opcode(cbuf, 0xC8 + $src1$$reg);
   8.259    %}
   8.260  
   8.261 -  enc_class FAdd_ST_reg( eRegF src2 ) %{
   8.262 +  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
   8.263      // FADDP  ST,src2  /* D8 C0+i */
   8.264      emit_opcode(cbuf, 0xD8);
   8.265      emit_opcode(cbuf, 0xC0 + $src2$$reg);
   8.266      //could use FADDP  src2,fpST  /* DE C0+i */
   8.267    %}
   8.268  
   8.269 -  enc_class FAddP_reg_ST( eRegF src2 ) %{
   8.270 +  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
   8.271      // FADDP  src2,ST  /* DE C0+i */
   8.272      emit_opcode(cbuf, 0xDE);
   8.273      emit_opcode(cbuf, 0xC0 + $src2$$reg);
   8.274    %}
   8.275  
   8.276 -  enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
   8.277 +  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
   8.278      // Operand has been loaded into fp ST (stack top)
   8.279        // FSUB   ST,$src1
   8.280        emit_opcode(cbuf, 0xD8);
   8.281 @@ -3703,7 +3681,7 @@
   8.282        emit_opcode(cbuf, 0xF0 + $src2$$reg);
   8.283    %}
   8.284  
   8.285 -  enc_class MulFAddF (eRegF src1, eRegF src2) %{
   8.286 +  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
   8.287      // Operand was loaded from memory into fp ST (stack top)
   8.288      // FADD   ST,$src  /* D8 C0+i */
   8.289      emit_opcode(cbuf, 0xD8);
   8.290 @@ -3715,7 +3693,7 @@
   8.291    %}
   8.292  
   8.293  
   8.294 -  enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
   8.295 +  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
   8.296      // Operand was loaded from memory into fp ST (stack top)
   8.297      // FADD   ST,$src  /* D8 C0+i */
   8.298      emit_opcode(cbuf, 0xD8);
   8.299 @@ -4148,7 +4126,7 @@
   8.300  %}
   8.301  
   8.302  //Double Immediate zero
   8.303 -operand immD0() %{
   8.304 +operand immDPR0() %{
   8.305    // Do additional (and counter-intuitive) test against NaN to work around VC++
   8.306    // bug that generates code such that NaNs compare equal to 0.0
   8.307    predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
   8.308 @@ -4160,7 +4138,7 @@
   8.309  %}
   8.310  
   8.311  // Double Immediate one
   8.312 -operand immD1() %{
   8.313 +operand immDPR1() %{
   8.314    predicate( UseSSE<=1 && n->getd() == 1.0 );
   8.315    match(ConD);
   8.316  
   8.317 @@ -4170,7 +4148,7 @@
   8.318  %}
   8.319  
   8.320  // Double Immediate
   8.321 -operand immD() %{
   8.322 +operand immDPR() %{
   8.323    predicate(UseSSE<=1);
   8.324    match(ConD);
   8.325  
   8.326 @@ -4179,7 +4157,7 @@
   8.327    interface(CONST_INTER);
   8.328  %}
   8.329  
   8.330 -operand immXD() %{
   8.331 +operand immD() %{
   8.332    predicate(UseSSE>=2);
   8.333    match(ConD);
   8.334  
   8.335 @@ -4189,7 +4167,7 @@
   8.336  %}
   8.337  
   8.338  // Double Immediate zero
   8.339 -operand immXD0() %{
   8.340 +operand immD0() %{
   8.341    // Do additional (and counter-intuitive) test against NaN to work around VC++
   8.342    // bug that generates code such that NaNs compare equal to 0.0 AND do not
   8.343    // compare equal to -0.0.
   8.344 @@ -4201,7 +4179,7 @@
   8.345  %}
   8.346  
   8.347  // Float Immediate zero
   8.348 -operand immF0() %{
   8.349 +operand immFPR0() %{
   8.350    predicate(UseSSE == 0 && n->getf() == 0.0F);
   8.351    match(ConF);
   8.352  
   8.353 @@ -4211,7 +4189,7 @@
   8.354  %}
   8.355  
   8.356  // Float Immediate one
   8.357 -operand immF1() %{
   8.358 +operand immFPR1() %{
   8.359    predicate(UseSSE == 0 && n->getf() == 1.0F);
   8.360    match(ConF);
   8.361  
   8.362 @@ -4221,7 +4199,7 @@
   8.363  %}
   8.364  
   8.365  // Float Immediate
   8.366 -operand immF() %{
   8.367 +operand immFPR() %{
   8.368    predicate( UseSSE == 0 );
   8.369    match(ConF);
   8.370  
   8.371 @@ -4231,7 +4209,7 @@
   8.372  %}
   8.373  
   8.374  // Float Immediate
   8.375 -operand immXF() %{
   8.376 +operand immF() %{
   8.377    predicate(UseSSE >= 1);
   8.378    match(ConF);
   8.379  
   8.380 @@ -4241,7 +4219,7 @@
   8.381  %}
   8.382  
   8.383  // Float Immediate zero.  Zero and not -0.0
   8.384 -operand immXF0() %{
   8.385 +operand immF0() %{
   8.386    predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
   8.387    match(ConF);
   8.388  
   8.389 @@ -4617,7 +4595,7 @@
   8.390  %}
   8.391  
   8.392  // Float register operands
   8.393 -operand regD() %{
   8.394 +operand regDPR() %{
   8.395    predicate( UseSSE < 2 );
   8.396    constraint(ALLOC_IN_RC(dbl_reg));
   8.397    match(RegD);
   8.398 @@ -4627,7 +4605,7 @@
   8.399    interface(REG_INTER);
   8.400  %}
   8.401  
   8.402 -operand regDPR1(regD reg) %{
   8.403 +operand regDPR1(regDPR reg) %{
   8.404    predicate( UseSSE < 2 );
   8.405    constraint(ALLOC_IN_RC(dbl_reg0));
   8.406    match(reg);
   8.407 @@ -4635,7 +4613,7 @@
   8.408    interface(REG_INTER);
   8.409  %}
   8.410  
   8.411 -operand regDPR2(regD reg) %{
   8.412 +operand regDPR2(regDPR reg) %{
   8.413    predicate( UseSSE < 2 );
   8.414    constraint(ALLOC_IN_RC(dbl_reg1));
   8.415    match(reg);
   8.416 @@ -4643,7 +4621,7 @@
   8.417    interface(REG_INTER);
   8.418  %}
   8.419  
   8.420 -operand regnotDPR1(regD reg) %{
   8.421 +operand regnotDPR1(regDPR reg) %{
   8.422    predicate( UseSSE < 2 );
   8.423    constraint(ALLOC_IN_RC(dbl_notreg0));
   8.424    match(reg);
   8.425 @@ -4652,18 +4630,18 @@
   8.426  %}
   8.427  
   8.428  // XMM Double register operands
   8.429 -operand regXD() %{
   8.430 +operand regD() %{
   8.431    predicate( UseSSE>=2 );
   8.432    constraint(ALLOC_IN_RC(xdb_reg));
   8.433    match(RegD);
   8.434 -  match(regXD6);
   8.435 -  match(regXD7);
   8.436 +  match(regD6);
   8.437 +  match(regD7);
   8.438    format %{ %}
   8.439    interface(REG_INTER);
   8.440  %}
   8.441  
   8.442  // XMM6 double register operands
   8.443 -operand regXD6(regXD reg) %{
   8.444 +operand regD6(regD reg) %{
   8.445    predicate( UseSSE>=2 );
   8.446    constraint(ALLOC_IN_RC(xdb_reg6));
   8.447    match(reg);
   8.448 @@ -4672,7 +4650,7 @@
   8.449  %}
   8.450  
   8.451  // XMM7 double register operands
   8.452 -operand regXD7(regXD reg) %{
   8.453 +operand regD7(regD reg) %{
   8.454    predicate( UseSSE>=2 );
   8.455    constraint(ALLOC_IN_RC(xdb_reg7));
   8.456    match(reg);
   8.457 @@ -4681,7 +4659,7 @@
   8.458  %}
   8.459  
   8.460  // Float register operands
   8.461 -operand regF() %{
   8.462 +operand regFPR() %{
   8.463    predicate( UseSSE < 2 );
   8.464    constraint(ALLOC_IN_RC(flt_reg));
   8.465    match(RegF);
   8.466 @@ -4691,7 +4669,7 @@
   8.467  %}
   8.468  
   8.469  // Float register operands
   8.470 -operand regFPR1(regF reg) %{
   8.471 +operand regFPR1(regFPR reg) %{
   8.472    predicate( UseSSE < 2 );
   8.473    constraint(ALLOC_IN_RC(flt_reg0));
   8.474    match(reg);
   8.475 @@ -4700,7 +4678,7 @@
   8.476  %}
   8.477  
   8.478  // XMM register operands
   8.479 -operand regX() %{
   8.480 +operand regF() %{
   8.481    predicate( UseSSE>=1 );
   8.482    constraint(ALLOC_IN_RC(xmm_reg));
   8.483    match(RegF);
   8.484 @@ -5444,7 +5422,7 @@
   8.485  %}
   8.486  
   8.487  // Conditional move double reg-reg
   8.488 -pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
   8.489 +pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
   8.490      single_instruction;
   8.491      dst    : S4(write);
   8.492      src    : S3(read);
   8.493 @@ -5453,7 +5431,7 @@
   8.494  %}
   8.495  
   8.496  // Float reg-reg operation
   8.497 -pipe_class fpu_reg(regD dst) %{
   8.498 +pipe_class fpu_reg(regDPR dst) %{
   8.499      instruction_count(2);
   8.500      dst    : S3(read);
   8.501      DECODE : S0(2);     // any 2 decoders
   8.502 @@ -5461,7 +5439,7 @@
   8.503  %}
   8.504  
   8.505  // Float reg-reg operation
   8.506 -pipe_class fpu_reg_reg(regD dst, regD src) %{
   8.507 +pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
   8.508      instruction_count(2);
   8.509      dst    : S4(write);
   8.510      src    : S3(read);
   8.511 @@ -5470,7 +5448,7 @@
   8.512  %}
   8.513  
   8.514  // Float reg-reg operation
   8.515 -pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
   8.516 +pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
   8.517      instruction_count(3);
   8.518      dst    : S4(write);
   8.519      src1   : S3(read);
   8.520 @@ -5480,7 +5458,7 @@
   8.521  %}
   8.522  
   8.523  // Float reg-reg operation
   8.524 -pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
   8.525 +pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
   8.526      instruction_count(4);
   8.527      dst    : S4(write);
   8.528      src1   : S3(read);
   8.529 @@ -5491,7 +5469,7 @@
   8.530  %}
   8.531  
   8.532  // Float reg-reg operation
   8.533 -pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
   8.534 +pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
   8.535      instruction_count(4);
   8.536      dst    : S4(write);
   8.537      src1   : S3(read);
   8.538 @@ -5504,7 +5482,7 @@
   8.539  %}
   8.540  
   8.541  // Float reg-mem operation
   8.542 -pipe_class fpu_reg_mem(regD dst, memory mem) %{
   8.543 +pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
   8.544      instruction_count(2);
   8.545      dst    : S5(write);
   8.546      mem    : S3(read);
   8.547 @@ -5515,7 +5493,7 @@
   8.548  %}
   8.549  
   8.550  // Float reg-mem operation
   8.551 -pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
   8.552 +pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
   8.553      instruction_count(3);
   8.554      dst    : S5(write);
   8.555      src1   : S3(read);
   8.556 @@ -5527,7 +5505,7 @@
   8.557  %}
   8.558  
   8.559  // Float mem-reg operation
   8.560 -pipe_class fpu_mem_reg(memory mem, regD src) %{
   8.561 +pipe_class fpu_mem_reg(memory mem, regDPR src) %{
   8.562      instruction_count(2);
   8.563      src    : S5(read);
   8.564      mem    : S3(read);
   8.565 @@ -5537,7 +5515,7 @@
   8.566      MEM    : S3;        // any mem
   8.567  %}
   8.568  
   8.569 -pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
   8.570 +pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
   8.571      instruction_count(3);
   8.572      src1   : S3(read);
   8.573      src2   : S3(read);
   8.574 @@ -5548,7 +5526,7 @@
   8.575      MEM    : S3;        // any mem
   8.576  %}
   8.577  
   8.578 -pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
   8.579 +pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
   8.580      instruction_count(3);
   8.581      src1   : S3(read);
   8.582      src2   : S3(read);
   8.583 @@ -5577,7 +5555,7 @@
   8.584      MEM    : S3(3);     // any mem
   8.585  %}
   8.586  
   8.587 -pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
   8.588 +pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
   8.589      instruction_count(3);
   8.590      src1   : S4(read);
   8.591      mem    : S4(read);
   8.592 @@ -5588,7 +5566,7 @@
   8.593  %}
   8.594  
   8.595  // Float load constant
   8.596 -pipe_class fpu_reg_con(regD dst) %{
   8.597 +pipe_class fpu_reg_con(regDPR dst) %{
   8.598      instruction_count(2);
   8.599      dst    : S5(write);
   8.600      D0     : S0;        // big decoder only for the load
   8.601 @@ -5598,7 +5576,7 @@
   8.602  %}
   8.603  
   8.604  // Float load constant
   8.605 -pipe_class fpu_reg_reg_con(regD dst, regD src) %{
   8.606 +pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
   8.607      instruction_count(3);
   8.608      dst    : S5(write);
   8.609      src    : S3(read);
   8.610 @@ -6313,7 +6291,7 @@
   8.611    ins_pipe( fpu_reg_mem );
   8.612  %}
   8.613  
   8.614 -instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
   8.615 +instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
   8.616    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
   8.617    match(Set dst (LoadL mem));
   8.618    effect(TEMP tmp);
   8.619 @@ -6327,7 +6305,7 @@
   8.620    ins_pipe( pipe_slow );
   8.621  %}
   8.622  
   8.623 -instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
   8.624 +instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
   8.625    predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
   8.626    match(Set dst (LoadL mem));
   8.627    effect(TEMP tmp);
   8.628 @@ -6380,7 +6358,7 @@
   8.629  %}
   8.630  
   8.631  // Load Double
   8.632 -instruct loadD(regD dst, memory mem) %{
   8.633 +instruct loadDPR(regDPR dst, memory mem) %{
   8.634    predicate(UseSSE<=1);
   8.635    match(Set dst (LoadD mem));
   8.636  
   8.637 @@ -6389,12 +6367,12 @@
   8.638              "FSTP   $dst" %}
   8.639    opcode(0xDD);               /* DD /0 */
   8.640    ins_encode( OpcP, RMopc_Mem(0x00,mem),
   8.641 -              Pop_Reg_D(dst) );
   8.642 +              Pop_Reg_DPR(dst) );
   8.643    ins_pipe( fpu_reg_mem );
   8.644  %}
   8.645  
   8.646  // Load Double to XMM
   8.647 -instruct loadXD(regXD dst, memory mem) %{
   8.648 +instruct loadD(regD dst, memory mem) %{
   8.649    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
   8.650    match(Set dst (LoadD mem));
   8.651    ins_cost(145);
   8.652 @@ -6405,7 +6383,7 @@
   8.653    ins_pipe( pipe_slow );
   8.654  %}
   8.655  
   8.656 -instruct loadXD_partial(regXD dst, memory mem) %{
   8.657 +instruct loadD_partial(regD dst, memory mem) %{
   8.658    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
   8.659    match(Set dst (LoadD mem));
   8.660    ins_cost(145);
   8.661 @@ -6418,7 +6396,7 @@
   8.662  
   8.663  // Load to XMM register (single-precision floating point)
   8.664  // MOVSS instruction
   8.665 -instruct loadX(regX dst, memory mem) %{
   8.666 +instruct loadF(regF dst, memory mem) %{
   8.667    predicate(UseSSE>=1);
   8.668    match(Set dst (LoadF mem));
   8.669    ins_cost(145);
   8.670 @@ -6430,7 +6408,7 @@
   8.671  %}
   8.672  
   8.673  // Load Float
   8.674 -instruct loadF(regF dst, memory mem) %{
   8.675 +instruct loadFPR(regFPR dst, memory mem) %{
   8.676    predicate(UseSSE==0);
   8.677    match(Set dst (LoadF mem));
   8.678  
   8.679 @@ -6439,12 +6417,12 @@
   8.680              "FSTP   $dst" %}
   8.681    opcode(0xD9);               /* D9 /0 */
   8.682    ins_encode( OpcP, RMopc_Mem(0x00,mem),
   8.683 -              Pop_Reg_F(dst) );
   8.684 +              Pop_Reg_FPR(dst) );
   8.685    ins_pipe( fpu_reg_mem );
   8.686  %}
   8.687  
   8.688  // Load Aligned Packed Byte to XMM register
   8.689 -instruct loadA8B(regXD dst, memory mem) %{
   8.690 +instruct loadA8B(regD dst, memory mem) %{
   8.691    predicate(UseSSE>=1);
   8.692    match(Set dst (Load8B mem));
   8.693    ins_cost(125);
   8.694 @@ -6456,7 +6434,7 @@
   8.695  %}
   8.696  
   8.697  // Load Aligned Packed Short to XMM register
   8.698 -instruct loadA4S(regXD dst, memory mem) %{
   8.699 +instruct loadA4S(regD dst, memory mem) %{
   8.700    predicate(UseSSE>=1);
   8.701    match(Set dst (Load4S mem));
   8.702    ins_cost(125);
   8.703 @@ -6468,7 +6446,7 @@
   8.704  %}
   8.705  
   8.706  // Load Aligned Packed Char to XMM register
   8.707 -instruct loadA4C(regXD dst, memory mem) %{
   8.708 +instruct loadA4C(regD dst, memory mem) %{
   8.709    predicate(UseSSE>=1);
   8.710    match(Set dst (Load4C mem));
   8.711    ins_cost(125);
   8.712 @@ -6480,7 +6458,7 @@
   8.713  %}
   8.714  
   8.715  // Load Aligned Packed Integer to XMM register
   8.716 -instruct load2IU(regXD dst, memory mem) %{
   8.717 +instruct load2IU(regD dst, memory mem) %{
   8.718    predicate(UseSSE>=1);
   8.719    match(Set dst (Load2I mem));
   8.720    ins_cost(125);
   8.721 @@ -6492,7 +6470,7 @@
   8.722  %}
   8.723  
   8.724  // Load Aligned Packed Single to XMM
   8.725 -instruct loadA2F(regXD dst, memory mem) %{
   8.726 +instruct loadA2F(regD dst, memory mem) %{
   8.727    predicate(UseSSE>=1);
   8.728    match(Set dst (Load2F mem));
   8.729    ins_cost(145);
   8.730 @@ -6606,58 +6584,58 @@
   8.731    ins_pipe( ialu_reg_long );
   8.732  %}
   8.733  
   8.734 +// The instruction usage is guarded by predicate in operand immFPR().
   8.735 +instruct loadConFPR(regFPR dst, immFPR con) %{
   8.736 +  match(Set dst con);
   8.737 +  ins_cost(125);
   8.738 +  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
   8.739 +            "FSTP   $dst" %}
   8.740 +  ins_encode %{
   8.741 +    __ fld_s($constantaddress($con));
   8.742 +    __ fstp_d($dst$$reg);
   8.743 +  %}
   8.744 +  ins_pipe(fpu_reg_con);
   8.745 +%}
   8.746 +
   8.747 +// The instruction usage is guarded by predicate in operand immFPR0().
   8.748 +instruct loadConFPR0(regFPR dst, immFPR0 con) %{
   8.749 +  match(Set dst con);
   8.750 +  ins_cost(125);
   8.751 +  format %{ "FLDZ   ST\n\t"
   8.752 +            "FSTP   $dst" %}
   8.753 +  ins_encode %{
   8.754 +    __ fldz();
   8.755 +    __ fstp_d($dst$$reg);
   8.756 +  %}
   8.757 +  ins_pipe(fpu_reg_con);
   8.758 +%}
   8.759 +
   8.760 +// The instruction usage is guarded by predicate in operand immFPR1().
   8.761 +instruct loadConFPR1(regFPR dst, immFPR1 con) %{
   8.762 +  match(Set dst con);
   8.763 +  ins_cost(125);
   8.764 +  format %{ "FLD1   ST\n\t"
   8.765 +            "FSTP   $dst" %}
   8.766 +  ins_encode %{
   8.767 +    __ fld1();
   8.768 +    __ fstp_d($dst$$reg);
   8.769 +  %}
   8.770 +  ins_pipe(fpu_reg_con);
   8.771 +%}
   8.772 +
   8.773  // The instruction usage is guarded by predicate in operand immF().
   8.774  instruct loadConF(regF dst, immF con) %{
   8.775    match(Set dst con);
   8.776    ins_cost(125);
   8.777 -  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
   8.778 -            "FSTP   $dst" %}
   8.779 -  ins_encode %{
   8.780 -    __ fld_s($constantaddress($con));
   8.781 -    __ fstp_d($dst$$reg);
   8.782 -  %}
   8.783 -  ins_pipe(fpu_reg_con);
   8.784 +  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
   8.785 +  ins_encode %{
   8.786 +    __ movflt($dst$$XMMRegister, $constantaddress($con));
   8.787 +  %}
   8.788 +  ins_pipe(pipe_slow);
   8.789  %}
   8.790  
   8.791  // The instruction usage is guarded by predicate in operand immF0().
   8.792 -instruct loadConF0(regF dst, immF0 con) %{
   8.793 -  match(Set dst con);
   8.794 -  ins_cost(125);
   8.795 -  format %{ "FLDZ   ST\n\t"
   8.796 -            "FSTP   $dst" %}
   8.797 -  ins_encode %{
   8.798 -    __ fldz();
   8.799 -    __ fstp_d($dst$$reg);
   8.800 -  %}
   8.801 -  ins_pipe(fpu_reg_con);
   8.802 -%}
   8.803 -
   8.804 -// The instruction usage is guarded by predicate in operand immF1().
   8.805 -instruct loadConF1(regF dst, immF1 con) %{
   8.806 -  match(Set dst con);
   8.807 -  ins_cost(125);
   8.808 -  format %{ "FLD1   ST\n\t"
   8.809 -            "FSTP   $dst" %}
   8.810 -  ins_encode %{
   8.811 -    __ fld1();
   8.812 -    __ fstp_d($dst$$reg);
   8.813 -  %}
   8.814 -  ins_pipe(fpu_reg_con);
   8.815 -%}
   8.816 -
   8.817 -// The instruction usage is guarded by predicate in operand immXF().
   8.818 -instruct loadConX(regX dst, immXF con) %{
   8.819 -  match(Set dst con);
   8.820 -  ins_cost(125);
   8.821 -  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
   8.822 -  ins_encode %{
   8.823 -    __ movflt($dst$$XMMRegister, $constantaddress($con));
   8.824 -  %}
   8.825 -  ins_pipe(pipe_slow);
   8.826 -%}
   8.827 -
   8.828 -// The instruction usage is guarded by predicate in operand immXF0().
   8.829 -instruct loadConX0(regX dst, immXF0 src) %{
   8.830 +instruct loadConF0(regF dst, immF0 src) %{
   8.831    match(Set dst src);
   8.832    ins_cost(100);
   8.833    format %{ "XORPS  $dst,$dst\t# float 0.0" %}
   8.834 @@ -6667,61 +6645,61 @@
   8.835    ins_pipe(pipe_slow);
   8.836  %}
   8.837  
   8.838 +// The instruction usage is guarded by predicate in operand immDPR().
   8.839 +instruct loadConDPR(regDPR dst, immDPR con) %{
   8.840 +  match(Set dst con);
   8.841 +  ins_cost(125);
   8.842 +
   8.843 +  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
   8.844 +            "FSTP   $dst" %}
   8.845 +  ins_encode %{
   8.846 +    __ fld_d($constantaddress($con));
   8.847 +    __ fstp_d($dst$$reg);
   8.848 +  %}
   8.849 +  ins_pipe(fpu_reg_con);
   8.850 +%}
   8.851 +
   8.852 +// The instruction usage is guarded by predicate in operand immDPR0().
   8.853 +instruct loadConDPR0(regDPR dst, immDPR0 con) %{
   8.854 +  match(Set dst con);
   8.855 +  ins_cost(125);
   8.856 +
   8.857 +  format %{ "FLDZ   ST\n\t"
   8.858 +            "FSTP   $dst" %}
   8.859 +  ins_encode %{
   8.860 +    __ fldz();
   8.861 +    __ fstp_d($dst$$reg);
   8.862 +  %}
   8.863 +  ins_pipe(fpu_reg_con);
   8.864 +%}
   8.865 +
   8.866 +// The instruction usage is guarded by predicate in operand immDPR1().
   8.867 +instruct loadConDPR1(regDPR dst, immDPR1 con) %{
   8.868 +  match(Set dst con);
   8.869 +  ins_cost(125);
   8.870 +
   8.871 +  format %{ "FLD1   ST\n\t"
   8.872 +            "FSTP   $dst" %}
   8.873 +  ins_encode %{
   8.874 +    __ fld1();
   8.875 +    __ fstp_d($dst$$reg);
   8.876 +  %}
   8.877 +  ins_pipe(fpu_reg_con);
   8.878 +%}
   8.879 +
   8.880  // The instruction usage is guarded by predicate in operand immD().
   8.881  instruct loadConD(regD dst, immD con) %{
   8.882    match(Set dst con);
   8.883    ins_cost(125);
   8.884 -
   8.885 -  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
   8.886 -            "FSTP   $dst" %}
   8.887 -  ins_encode %{
   8.888 -    __ fld_d($constantaddress($con));
   8.889 -    __ fstp_d($dst$$reg);
   8.890 -  %}
   8.891 -  ins_pipe(fpu_reg_con);
   8.892 +  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
   8.893 +  ins_encode %{
   8.894 +    __ movdbl($dst$$XMMRegister, $constantaddress($con));
   8.895 +  %}
   8.896 +  ins_pipe(pipe_slow);
   8.897  %}
   8.898  
   8.899  // The instruction usage is guarded by predicate in operand immD0().
   8.900 -instruct loadConD0(regD dst, immD0 con) %{
   8.901 -  match(Set dst con);
   8.902 -  ins_cost(125);
   8.903 -
   8.904 -  format %{ "FLDZ   ST\n\t"
   8.905 -            "FSTP   $dst" %}
   8.906 -  ins_encode %{
   8.907 -    __ fldz();
   8.908 -    __ fstp_d($dst$$reg);
   8.909 -  %}
   8.910 -  ins_pipe(fpu_reg_con);
   8.911 -%}
   8.912 -
   8.913 -// The instruction usage is guarded by predicate in operand immD1().
   8.914 -instruct loadConD1(regD dst, immD1 con) %{
   8.915 -  match(Set dst con);
   8.916 -  ins_cost(125);
   8.917 -
   8.918 -  format %{ "FLD1   ST\n\t"
   8.919 -            "FSTP   $dst" %}
   8.920 -  ins_encode %{
   8.921 -    __ fld1();
   8.922 -    __ fstp_d($dst$$reg);
   8.923 -  %}
   8.924 -  ins_pipe(fpu_reg_con);
   8.925 -%}
   8.926 -
   8.927 -// The instruction usage is guarded by predicate in operand immXD().
   8.928 -instruct loadConXD(regXD dst, immXD con) %{
   8.929 -  match(Set dst con);
   8.930 -  ins_cost(125);
   8.931 -  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
   8.932 -  ins_encode %{
   8.933 -    __ movdbl($dst$$XMMRegister, $constantaddress($con));
   8.934 -  %}
   8.935 -  ins_pipe(pipe_slow);
   8.936 -%}
   8.937 -
   8.938 -// The instruction usage is guarded by predicate in operand immXD0().
   8.939 -instruct loadConXD0(regXD dst, immXD0 src) %{
   8.940 +instruct loadConD0(regD dst, immD0 src) %{
   8.941    match(Set dst src);
   8.942    ins_cost(100);
   8.943    format %{ "XORPD  $dst,$dst\t# double 0.0" %}
   8.944 @@ -6765,7 +6743,7 @@
   8.945  %}
   8.946  
   8.947  // Load Stack Slot
   8.948 -instruct loadSSF(regF dst, stackSlotF src) %{
   8.949 +instruct loadSSF(regFPR dst, stackSlotF src) %{
   8.950    match(Set dst src);
   8.951    ins_cost(125);
   8.952  
   8.953 @@ -6773,12 +6751,12 @@
   8.954              "FSTP   $dst" %}
   8.955    opcode(0xD9);               /* D9 /0, FLD m32real */
   8.956    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
   8.957 -              Pop_Reg_F(dst) );
   8.958 +              Pop_Reg_FPR(dst) );
   8.959    ins_pipe( fpu_reg_mem );
   8.960  %}
   8.961  
   8.962  // Load Stack Slot
   8.963 -instruct loadSSD(regD dst, stackSlotD src) %{
   8.964 +instruct loadSSD(regDPR dst, stackSlotD src) %{
   8.965    match(Set dst src);
   8.966    ins_cost(125);
   8.967  
   8.968 @@ -6786,7 +6764,7 @@
   8.969              "FSTP   $dst" %}
   8.970    opcode(0xDD);               /* DD /0, FLD m64real */
   8.971    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
   8.972 -              Pop_Reg_D(dst) );
   8.973 +              Pop_Reg_DPR(dst) );
   8.974    ins_pipe( fpu_reg_mem );
   8.975  %}
   8.976  
   8.977 @@ -7021,7 +6999,7 @@
   8.978    ins_pipe( fpu_reg_mem );
   8.979  %}
   8.980  
   8.981 -instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
   8.982 +instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
   8.983    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
   8.984    match(Set mem (StoreL mem src));
   8.985    effect( TEMP tmp, KILL cr );
   8.986 @@ -7037,7 +7015,7 @@
   8.987    ins_pipe( pipe_slow );
   8.988  %}
   8.989  
   8.990 -instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
   8.991 +instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
   8.992    predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
   8.993    match(Set mem (StoreL mem src));
   8.994    effect( TEMP tmp2 , TEMP tmp, KILL cr );
   8.995 @@ -7115,7 +7093,7 @@
   8.996  %}
   8.997  
   8.998  // Store Aligned Packed Byte XMM register to memory
   8.999 -instruct storeA8B(memory mem, regXD src) %{
  8.1000 +instruct storeA8B(memory mem, regD src) %{
  8.1001    predicate(UseSSE>=1);
  8.1002    match(Set mem (Store8B mem src));
  8.1003    ins_cost(145);
  8.1004 @@ -7127,7 +7105,7 @@
  8.1005  %}
  8.1006  
  8.1007  // Store Aligned Packed Char/Short XMM register to memory
  8.1008 -instruct storeA4C(memory mem, regXD src) %{
  8.1009 +instruct storeA4C(memory mem, regD src) %{
  8.1010    predicate(UseSSE>=1);
  8.1011    match(Set mem (Store4C mem src));
  8.1012    ins_cost(145);
  8.1013 @@ -7139,7 +7117,7 @@
  8.1014  %}
  8.1015  
  8.1016  // Store Aligned Packed Integer XMM register to memory
  8.1017 -instruct storeA2I(memory mem, regXD src) %{
  8.1018 +instruct storeA2I(memory mem, regD src) %{
  8.1019    predicate(UseSSE>=1);
  8.1020    match(Set mem (Store2I mem src));
  8.1021    ins_cost(145);
  8.1022 @@ -7162,32 +7140,32 @@
  8.1023  %}
  8.1024  
  8.1025  // Store Double
  8.1026 -instruct storeD( memory mem, regDPR1 src) %{
  8.1027 +instruct storeDPR( memory mem, regDPR1 src) %{
  8.1028    predicate(UseSSE<=1);
  8.1029    match(Set mem (StoreD mem src));
  8.1030  
  8.1031    ins_cost(100);
  8.1032    format %{ "FST_D  $mem,$src" %}
  8.1033    opcode(0xDD);       /* DD /2 */
  8.1034 -  ins_encode( enc_FP_store(mem,src) );
  8.1035 +  ins_encode( enc_FPR_store(mem,src) );
  8.1036    ins_pipe( fpu_mem_reg );
  8.1037  %}
  8.1038  
  8.1039  // Store double does rounding on x86
  8.1040 -instruct storeD_rounded( memory mem, regDPR1 src) %{
  8.1041 +instruct storeDPR_rounded( memory mem, regDPR1 src) %{
  8.1042    predicate(UseSSE<=1);
  8.1043    match(Set mem (StoreD mem (RoundDouble src)));
  8.1044  
  8.1045    ins_cost(100);
  8.1046    format %{ "FST_D  $mem,$src\t# round" %}
  8.1047    opcode(0xDD);       /* DD /2 */
  8.1048 -  ins_encode( enc_FP_store(mem,src) );
  8.1049 +  ins_encode( enc_FPR_store(mem,src) );
  8.1050    ins_pipe( fpu_mem_reg );
  8.1051  %}
  8.1052  
  8.1053  // Store XMM register to memory (double-precision floating points)
  8.1054  // MOVSD instruction
  8.1055 -instruct storeXD(memory mem, regXD src) %{
  8.1056 +instruct storeD(memory mem, regD src) %{
  8.1057    predicate(UseSSE>=2);
  8.1058    match(Set mem (StoreD mem src));
  8.1059    ins_cost(95);
  8.1060 @@ -7200,7 +7178,7 @@
  8.1061  
  8.1062  // Store XMM register to memory (single-precision floating point)
  8.1063  // MOVSS instruction
  8.1064 -instruct storeX(memory mem, regX src) %{
  8.1065 +instruct storeF(memory mem, regF src) %{
  8.1066    predicate(UseSSE>=1);
  8.1067    match(Set mem (StoreF mem src));
  8.1068    ins_cost(95);
  8.1069 @@ -7212,7 +7190,7 @@
  8.1070  %}
  8.1071  
  8.1072  // Store Aligned Packed Single Float XMM register to memory
  8.1073 -instruct storeA2F(memory mem, regXD src) %{
  8.1074 +instruct storeA2F(memory mem, regD src) %{
  8.1075    predicate(UseSSE>=1);
  8.1076    match(Set mem (Store2F mem src));
  8.1077    ins_cost(145);
  8.1078 @@ -7224,42 +7202,54 @@
  8.1079  %}
  8.1080  
  8.1081  // Store Float
  8.1082 -instruct storeF( memory mem, regFPR1 src) %{
  8.1083 +instruct storeFPR( memory mem, regFPR1 src) %{
  8.1084    predicate(UseSSE==0);
  8.1085    match(Set mem (StoreF mem src));
  8.1086  
  8.1087    ins_cost(100);
  8.1088    format %{ "FST_S  $mem,$src" %}
  8.1089    opcode(0xD9);       /* D9 /2 */
  8.1090 -  ins_encode( enc_FP_store(mem,src) );
  8.1091 +  ins_encode( enc_FPR_store(mem,src) );
  8.1092    ins_pipe( fpu_mem_reg );
  8.1093  %}
  8.1094  
  8.1095  // Store Float does rounding on x86
  8.1096 -instruct storeF_rounded( memory mem, regFPR1 src) %{
  8.1097 +instruct storeFPR_rounded( memory mem, regFPR1 src) %{
  8.1098    predicate(UseSSE==0);
  8.1099    match(Set mem (StoreF mem (RoundFloat src)));
  8.1100  
  8.1101    ins_cost(100);
  8.1102    format %{ "FST_S  $mem,$src\t# round" %}
  8.1103    opcode(0xD9);       /* D9 /2 */
  8.1104 -  ins_encode( enc_FP_store(mem,src) );
  8.1105 +  ins_encode( enc_FPR_store(mem,src) );
  8.1106    ins_pipe( fpu_mem_reg );
  8.1107  %}
  8.1108  
  8.1109  // Store Float does rounding on x86
  8.1110 -instruct storeF_Drounded( memory mem, regDPR1 src) %{
  8.1111 +instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
  8.1112    predicate(UseSSE<=1);
  8.1113    match(Set mem (StoreF mem (ConvD2F src)));
  8.1114  
  8.1115    ins_cost(100);
  8.1116    format %{ "FST_S  $mem,$src\t# D-round" %}
  8.1117    opcode(0xD9);       /* D9 /2 */
  8.1118 -  ins_encode( enc_FP_store(mem,src) );
  8.1119 +  ins_encode( enc_FPR_store(mem,src) );
  8.1120    ins_pipe( fpu_mem_reg );
  8.1121  %}
  8.1122  
  8.1123  // Store immediate Float value (it is faster than store from FPU register)
  8.1124 +// The instruction usage is guarded by predicate in operand immFPR().
  8.1125 +instruct storeFPR_imm( memory mem, immFPR src) %{
  8.1126 +  match(Set mem (StoreF mem src));
  8.1127 +
  8.1128 +  ins_cost(50);
  8.1129 +  format %{ "MOV    $mem,$src\t# store float" %}
  8.1130 +  opcode(0xC7);               /* C7 /0 */
  8.1131 +  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
  8.1132 +  ins_pipe( ialu_mem_imm );
  8.1133 +%}
  8.1134 +
  8.1135 +// Store immediate Float value (it is faster than store from XMM register)
  8.1136  // The instruction usage is guarded by predicate in operand immF().
  8.1137  instruct storeF_imm( memory mem, immF src) %{
  8.1138    match(Set mem (StoreF mem src));
  8.1139 @@ -7271,18 +7261,6 @@
  8.1140    ins_pipe( ialu_mem_imm );
  8.1141  %}
  8.1142  
  8.1143 -// Store immediate Float value (it is faster than store from XMM register)
  8.1144 -// The instruction usage is guarded by predicate in operand immXF().
  8.1145 -instruct storeX_imm( memory mem, immXF src) %{
  8.1146 -  match(Set mem (StoreF mem src));
  8.1147 -
  8.1148 -  ins_cost(50);
  8.1149 -  format %{ "MOV    $mem,$src\t# store float" %}
  8.1150 -  opcode(0xC7);               /* C7 /0 */
  8.1151 -  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
  8.1152 -  ins_pipe( ialu_mem_imm );
  8.1153 -%}
  8.1154 -
  8.1155  // Store Integer to stack slot
  8.1156  instruct storeSSI(stackSlotI dst, eRegI src) %{
  8.1157    match(Set dst src);
  8.1158 @@ -7577,29 +7555,29 @@
  8.1159  //%}
  8.1160  
  8.1161  // Conditional move
  8.1162 -instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
  8.1163 +instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
  8.1164    predicate(UseSSE<=1);
  8.1165    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  8.1166    ins_cost(200);
  8.1167    format %{ "FCMOV$cop $dst,$src\t# double" %}
  8.1168    opcode(0xDA);
  8.1169 -  ins_encode( enc_cmov_d(cop,src) );
  8.1170 -  ins_pipe( pipe_cmovD_reg );
  8.1171 +  ins_encode( enc_cmov_dpr(cop,src) );
  8.1172 +  ins_pipe( pipe_cmovDPR_reg );
  8.1173  %}
  8.1174  
  8.1175  // Conditional move
  8.1176 -instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
  8.1177 +instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
  8.1178    predicate(UseSSE==0);
  8.1179    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  8.1180    ins_cost(200);
  8.1181    format %{ "FCMOV$cop $dst,$src\t# float" %}
  8.1182    opcode(0xDA);
  8.1183 -  ins_encode( enc_cmov_d(cop,src) );
  8.1184 -  ins_pipe( pipe_cmovD_reg );
  8.1185 +  ins_encode( enc_cmov_dpr(cop,src) );
  8.1186 +  ins_pipe( pipe_cmovDPR_reg );
  8.1187  %}
  8.1188  
  8.1189  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
  8.1190 -instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
  8.1191 +instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
  8.1192    predicate(UseSSE<=1);
  8.1193    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  8.1194    ins_cost(200);
  8.1195 @@ -7607,12 +7585,12 @@
  8.1196              "MOV    $dst,$src\t# double\n"
  8.1197        "skip:" %}
  8.1198    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
  8.1199 -  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
  8.1200 -  ins_pipe( pipe_cmovD_reg );
  8.1201 +  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
  8.1202 +  ins_pipe( pipe_cmovDPR_reg );
  8.1203  %}
  8.1204  
  8.1205  // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
  8.1206 -instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
  8.1207 +instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
  8.1208    predicate(UseSSE==0);
  8.1209    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  8.1210    ins_cost(200);
  8.1211 @@ -7620,12 +7598,12 @@
  8.1212              "MOV    $dst,$src\t# float\n"
  8.1213        "skip:" %}
  8.1214    opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
  8.1215 -  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
  8.1216 -  ins_pipe( pipe_cmovD_reg );
  8.1217 +  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
  8.1218 +  ins_pipe( pipe_cmovDPR_reg );
  8.1219  %}
  8.1220  
  8.1221  // No CMOVE with SSE/SSE2
  8.1222 -instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
  8.1223 +instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
  8.1224    predicate (UseSSE>=1);
  8.1225    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  8.1226    ins_cost(200);
  8.1227 @@ -7643,7 +7621,7 @@
  8.1228  %}
  8.1229  
  8.1230  // No CMOVE with SSE/SSE2
  8.1231 -instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
  8.1232 +instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
  8.1233    predicate (UseSSE>=2);
  8.1234    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  8.1235    ins_cost(200);
  8.1236 @@ -7661,7 +7639,7 @@
  8.1237  %}
  8.1238  
  8.1239  // unsigned version
  8.1240 -instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
  8.1241 +instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
  8.1242    predicate (UseSSE>=1);
  8.1243    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  8.1244    ins_cost(200);
  8.1245 @@ -7678,17 +7656,17 @@
  8.1246    ins_pipe( pipe_slow );
  8.1247  %}
  8.1248  
  8.1249 -instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
  8.1250 +instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
  8.1251    predicate (UseSSE>=1);
  8.1252    match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  8.1253    ins_cost(200);
  8.1254    expand %{
  8.1255 -    fcmovX_regU(cop, cr, dst, src);
  8.1256 +    fcmovF_regU(cop, cr, dst, src);
  8.1257    %}
  8.1258  %}
  8.1259  
  8.1260  // unsigned version
  8.1261 -instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
  8.1262 +instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
  8.1263    predicate (UseSSE>=2);
  8.1264    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  8.1265    ins_cost(200);
  8.1266 @@ -7705,12 +7683,12 @@
  8.1267    ins_pipe( pipe_slow );
  8.1268  %}
  8.1269  
  8.1270 -instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
  8.1271 +instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
  8.1272    predicate (UseSSE>=2);
  8.1273    match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  8.1274    ins_cost(200);
  8.1275    expand %{
  8.1276 -    fcmovXD_regU(cop, cr, dst, src);
  8.1277 +    fcmovD_regU(cop, cr, dst, src);
  8.1278    %}
  8.1279  %}
  8.1280  
  8.1281 @@ -7940,7 +7918,7 @@
  8.1282    ins_pipe( fpu_reg_mem );
  8.1283  %}
  8.1284  
  8.1285 -instruct loadLX_Locked(stackSlotL dst, memory mem, regXD tmp) %{
  8.1286 +instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
  8.1287    predicate(UseSSE>=2);
  8.1288    match(Set dst (LoadLLocked mem));
  8.1289    effect(TEMP tmp);
  8.1290 @@ -7954,7 +7932,7 @@
  8.1291    ins_pipe( pipe_slow );
  8.1292  %}
  8.1293  
  8.1294 -instruct loadLX_reg_Locked(eRegL dst, memory mem, regXD tmp) %{
  8.1295 +instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
  8.1296    predicate(UseSSE>=2);
  8.1297    match(Set dst (LoadLLocked mem));
  8.1298    effect(TEMP tmp);
  8.1299 @@ -9551,7 +9529,7 @@
  8.1300  // Compare & branch
  8.1301  
  8.1302  // P6 version of float compare, sets condition codes in EFLAGS
  8.1303 -instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
  8.1304 +instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
  8.1305    predicate(VM_Version::supports_cmov() && UseSSE <=1);
  8.1306    match(Set cr (CmpD src1 src2));
  8.1307    effect(KILL rax);
  8.1308 @@ -9563,26 +9541,26 @@
  8.1309              "SAHF\n"
  8.1310       "exit:\tNOP               // avoid branch to branch" %}
  8.1311    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
  8.1312 -  ins_encode( Push_Reg_D(src1),
  8.1313 +  ins_encode( Push_Reg_DPR(src1),
  8.1314                OpcP, RegOpc(src2),
  8.1315                cmpF_P6_fixup );
  8.1316    ins_pipe( pipe_slow );
  8.1317  %}
  8.1318  
  8.1319 -instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
  8.1320 +instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
  8.1321    predicate(VM_Version::supports_cmov() && UseSSE <=1);
  8.1322    match(Set cr (CmpD src1 src2));
  8.1323    ins_cost(150);
  8.1324    format %{ "FLD    $src1\n\t"
  8.1325              "FUCOMIP ST,$src2  // P6 instruction" %}
  8.1326    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
  8.1327 -  ins_encode( Push_Reg_D(src1),
  8.1328 +  ins_encode( Push_Reg_DPR(src1),
  8.1329                OpcP, RegOpc(src2));
  8.1330    ins_pipe( pipe_slow );
  8.1331  %}
  8.1332  
  8.1333  // Compare & branch
  8.1334 -instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
  8.1335 +instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
  8.1336    predicate(UseSSE<=1);
  8.1337    match(Set cr (CmpD src1 src2));
  8.1338    effect(KILL rax);
  8.1339 @@ -9595,42 +9573,42 @@
  8.1340              "MOV    AH,1\t# unordered treat as LT\n"
  8.1341      "flags:\tSAHF" %}
  8.1342    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
  8.1343 -  ins_encode( Push_Reg_D(src1),
  8.1344 +  ins_encode( Push_Reg_DPR(src1),
  8.1345                OpcP, RegOpc(src2),
  8.1346                fpu_flags);
  8.1347    ins_pipe( pipe_slow );
  8.1348  %}
  8.1349  
  8.1350  // Compare vs zero into -1,0,1
  8.1351 -instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
  8.1352 +instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
  8.1353    predicate(UseSSE<=1);
  8.1354    match(Set dst (CmpD3 src1 zero));
  8.1355    effect(KILL cr, KILL rax);
  8.1356    ins_cost(280);
  8.1357    format %{ "FTSTD  $dst,$src1" %}
  8.1358    opcode(0xE4, 0xD9);
  8.1359 -  ins_encode( Push_Reg_D(src1),
  8.1360 +  ins_encode( Push_Reg_DPR(src1),
  8.1361                OpcS, OpcP, PopFPU,
  8.1362                CmpF_Result(dst));
  8.1363    ins_pipe( pipe_slow );
  8.1364  %}
  8.1365  
  8.1366  // Compare into -1,0,1
  8.1367 -instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
  8.1368 +instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
  8.1369    predicate(UseSSE<=1);
  8.1370    match(Set dst (CmpD3 src1 src2));
  8.1371    effect(KILL cr, KILL rax);
  8.1372    ins_cost(300);
  8.1373    format %{ "FCMPD  $dst,$src1,$src2" %}
  8.1374    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
  8.1375 -  ins_encode( Push_Reg_D(src1),
  8.1376 +  ins_encode( Push_Reg_DPR(src1),
  8.1377                OpcP, RegOpc(src2),
  8.1378                CmpF_Result(dst));
  8.1379    ins_pipe( pipe_slow );
  8.1380  %}
  8.1381  
  8.1382  // float compare and set condition codes in EFLAGS by XMM regs
  8.1383 -instruct cmpXD_cc(eFlagsRegU cr, regXD src1, regXD src2) %{
  8.1384 +instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
  8.1385    predicate(UseSSE>=2);
  8.1386    match(Set cr (CmpD src1 src2));
  8.1387    ins_cost(145);
  8.1388 @@ -9647,7 +9625,7 @@
  8.1389    ins_pipe( pipe_slow );
  8.1390  %}
  8.1391  
  8.1392 -instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD src1, regXD src2) %{
  8.1393 +instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
  8.1394    predicate(UseSSE>=2);
  8.1395    match(Set cr (CmpD src1 src2));
  8.1396    ins_cost(100);
  8.1397 @@ -9659,7 +9637,7 @@
  8.1398  %}
  8.1399  
  8.1400  // float compare and set condition codes in EFLAGS by XMM regs
  8.1401 -instruct cmpXD_ccmem(eFlagsRegU cr, regXD src1, memory src2) %{
  8.1402 +instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
  8.1403    predicate(UseSSE>=2);
  8.1404    match(Set cr (CmpD src1 (LoadD src2)));
  8.1405    ins_cost(145);
  8.1406 @@ -9676,7 +9654,7 @@
  8.1407    ins_pipe( pipe_slow );
  8.1408  %}
  8.1409  
  8.1410 -instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD src1, memory src2) %{
  8.1411 +instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
  8.1412    predicate(UseSSE>=2);
  8.1413    match(Set cr (CmpD src1 (LoadD src2)));
  8.1414    ins_cost(100);
  8.1415 @@ -9688,7 +9666,7 @@
  8.1416  %}
  8.1417  
  8.1418  // Compare into -1,0,1 in XMM
  8.1419 -instruct cmpXD_reg(xRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
  8.1420 +instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
  8.1421    predicate(UseSSE>=2);
  8.1422    match(Set dst (CmpD3 src1 src2));
  8.1423    effect(KILL cr);
  8.1424 @@ -9708,7 +9686,7 @@
  8.1425  %}
  8.1426  
  8.1427  // Compare into -1,0,1 in XMM and memory
  8.1428 -instruct cmpXD_regmem(xRegI dst, regXD src1, memory src2, eFlagsReg cr) %{
  8.1429 +instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
  8.1430    predicate(UseSSE>=2);
  8.1431    match(Set dst (CmpD3 src1 (LoadD src2)));
  8.1432    effect(KILL cr);
  8.1433 @@ -9728,7 +9706,7 @@
  8.1434  %}
  8.1435  
  8.1436  
  8.1437 -instruct subD_reg(regD dst, regD src) %{
  8.1438 +instruct subDPR_reg(regDPR dst, regDPR src) %{
  8.1439    predicate (UseSSE <=1);
  8.1440    match(Set dst (SubD dst src));
  8.1441  
  8.1442 @@ -9736,12 +9714,12 @@
  8.1443              "DSUBp  $dst,ST" %}
  8.1444    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
  8.1445    ins_cost(150);
  8.1446 -  ins_encode( Push_Reg_D(src),
  8.1447 +  ins_encode( Push_Reg_DPR(src),
  8.1448                OpcP, RegOpc(dst) );
  8.1449    ins_pipe( fpu_reg_reg );
  8.1450  %}
  8.1451  
  8.1452 -instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
  8.1453 +instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
  8.1454    predicate (UseSSE <=1);
  8.1455    match(Set dst (RoundDouble (SubD src1 src2)));
  8.1456    ins_cost(250);
  8.1457 @@ -9750,13 +9728,13 @@
  8.1458              "DSUB   ST,$src1\n\t"
  8.1459              "FSTP_D $dst\t# D-round" %}
  8.1460    opcode(0xD8, 0x5);
  8.1461 -  ins_encode( Push_Reg_D(src2),
  8.1462 -              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
  8.1463 +  ins_encode( Push_Reg_DPR(src2),
  8.1464 +              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
  8.1465    ins_pipe( fpu_mem_reg_reg );
  8.1466  %}
  8.1467  
  8.1468  
  8.1469 -instruct subD_reg_mem(regD dst, memory src) %{
  8.1470 +instruct subDPR_reg_mem(regDPR dst, memory src) %{
  8.1471    predicate (UseSSE <=1);
  8.1472    match(Set dst (SubD dst (LoadD src)));
  8.1473    ins_cost(150);
  8.1474 @@ -9769,7 +9747,7 @@
  8.1475    ins_pipe( fpu_reg_mem );
  8.1476  %}
  8.1477  
  8.1478 -instruct absD_reg(regDPR1 dst, regDPR1 src) %{
  8.1479 +instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
  8.1480    predicate (UseSSE<=1);
  8.1481    match(Set dst (AbsD src));
  8.1482    ins_cost(100);
  8.1483 @@ -9779,19 +9757,7 @@
  8.1484    ins_pipe( fpu_reg_reg );
  8.1485  %}
  8.1486  
  8.1487 -instruct absXD_reg( regXD dst ) %{
  8.1488 -  predicate(UseSSE>=2);
  8.1489 -  match(Set dst (AbsD dst));
  8.1490 -  ins_cost(150);
  8.1491 -  format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
  8.1492 -  ins_encode %{
  8.1493 -    __ andpd($dst$$XMMRegister,
  8.1494 -             ExternalAddress((address)double_signmask_pool));
  8.1495 -  %}
  8.1496 -  ins_pipe( pipe_slow );
  8.1497 -%}
  8.1498 -
  8.1499 -instruct negD_reg(regDPR1 dst, regDPR1 src) %{
  8.1500 +instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
  8.1501    predicate(UseSSE<=1);
  8.1502    match(Set dst (NegD src));
  8.1503    ins_cost(100);
  8.1504 @@ -9801,19 +9767,7 @@
  8.1505    ins_pipe( fpu_reg_reg );
  8.1506  %}
  8.1507  
  8.1508 -instruct negXD_reg( regXD dst ) %{
  8.1509 -  predicate(UseSSE>=2);
  8.1510 -  match(Set dst (NegD dst));
  8.1511 -  ins_cost(150);
  8.1512 -  format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
  8.1513 -  ins_encode %{
  8.1514 -    __ xorpd($dst$$XMMRegister,
  8.1515 -             ExternalAddress((address)double_signflip_pool));
  8.1516 -  %}
  8.1517 -  ins_pipe( pipe_slow );
  8.1518 -%}
  8.1519 -
  8.1520 -instruct addD_reg(regD dst, regD src) %{
  8.1521 +instruct addDPR_reg(regDPR dst, regDPR src) %{
  8.1522    predicate(UseSSE<=1);
  8.1523    match(Set dst (AddD dst src));
  8.1524    format %{ "FLD    $src\n\t"
  8.1525 @@ -9821,13 +9775,13 @@
  8.1526    size(4);
  8.1527    ins_cost(150);
  8.1528    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
  8.1529 -  ins_encode( Push_Reg_D(src),
  8.1530 +  ins_encode( Push_Reg_DPR(src),
  8.1531                OpcP, RegOpc(dst) );
  8.1532    ins_pipe( fpu_reg_reg );
  8.1533  %}
  8.1534  
  8.1535  
  8.1536 -instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
  8.1537 +instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
  8.1538    predicate(UseSSE<=1);
  8.1539    match(Set dst (RoundDouble (AddD src1 src2)));
  8.1540    ins_cost(250);
  8.1541 @@ -9836,13 +9790,13 @@
  8.1542              "DADD   ST,$src1\n\t"
  8.1543              "FSTP_D $dst\t# D-round" %}
  8.1544    opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
  8.1545 -  ins_encode( Push_Reg_D(src2),
  8.1546 -              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
  8.1547 +  ins_encode( Push_Reg_DPR(src2),
  8.1548 +              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
  8.1549    ins_pipe( fpu_mem_reg_reg );
  8.1550  %}
  8.1551  
  8.1552  
  8.1553 -instruct addD_reg_mem(regD dst, memory src) %{
  8.1554 +instruct addDPR_reg_mem(regDPR dst, memory src) %{
  8.1555    predicate(UseSSE<=1);
  8.1556    match(Set dst (AddD dst (LoadD src)));
  8.1557    ins_cost(150);
  8.1558 @@ -9856,7 +9810,7 @@
  8.1559  %}
  8.1560  
  8.1561  // add-to-memory
  8.1562 -instruct addD_mem_reg(memory dst, regD src) %{
  8.1563 +instruct addDPR_mem_reg(memory dst, regDPR src) %{
  8.1564    predicate(UseSSE<=1);
  8.1565    match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
  8.1566    ins_cost(150);
  8.1567 @@ -9872,7 +9826,7 @@
  8.1568    ins_pipe( fpu_reg_mem );
  8.1569  %}
  8.1570  
  8.1571 -instruct addD_reg_imm1(regD dst, immD1 con) %{
  8.1572 +instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
  8.1573    predicate(UseSSE<=1);
  8.1574    match(Set dst (AddD dst con));
  8.1575    ins_cost(125);
  8.1576 @@ -9885,7 +9839,7 @@
  8.1577    ins_pipe(fpu_reg);
  8.1578  %}
  8.1579  
  8.1580 -instruct addD_reg_imm(regD dst, immD con) %{
  8.1581 +instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
  8.1582    predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
  8.1583    match(Set dst (AddD dst con));
  8.1584    ins_cost(200);
  8.1585 @@ -9898,7 +9852,7 @@
  8.1586    ins_pipe(fpu_reg_mem);
  8.1587  %}
  8.1588  
  8.1589 -instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
  8.1590 +instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
  8.1591    predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
  8.1592    match(Set dst (RoundDouble (AddD src con)));
  8.1593    ins_cost(200);
  8.1594 @@ -9913,143 +9867,14 @@
  8.1595    ins_pipe(fpu_mem_reg_con);
  8.1596  %}
  8.1597  
  8.1598 -// Add two double precision floating point values in xmm
  8.1599 -instruct addXD_reg(regXD dst, regXD src) %{
  8.1600 -  predicate(UseSSE>=2);
  8.1601 -  match(Set dst (AddD dst src));
  8.1602 -  format %{ "ADDSD  $dst,$src" %}
  8.1603 -  ins_encode %{
  8.1604 -    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
  8.1605 -  %}
  8.1606 -  ins_pipe( pipe_slow );
  8.1607 -%}
  8.1608 -
  8.1609 -instruct addXD_imm(regXD dst, immXD con) %{
  8.1610 -  predicate(UseSSE>=2);
  8.1611 -  match(Set dst (AddD dst con));
  8.1612 -  format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
  8.1613 -  ins_encode %{
  8.1614 -    __ addsd($dst$$XMMRegister, $constantaddress($con));
  8.1615 -  %}
  8.1616 -  ins_pipe(pipe_slow);
  8.1617 -%}
  8.1618 -
  8.1619 -instruct addXD_mem(regXD dst, memory mem) %{
  8.1620 -  predicate(UseSSE>=2);
  8.1621 -  match(Set dst (AddD dst (LoadD mem)));
  8.1622 -  format %{ "ADDSD  $dst,$mem" %}
  8.1623 -  ins_encode %{
  8.1624 -    __ addsd($dst$$XMMRegister, $mem$$Address);
  8.1625 -  %}
  8.1626 -  ins_pipe( pipe_slow );
  8.1627 -%}
  8.1628 -
  8.1629 -// Sub two double precision floating point values in xmm
  8.1630 -instruct subXD_reg(regXD dst, regXD src) %{
  8.1631 -  predicate(UseSSE>=2);
  8.1632 -  match(Set dst (SubD dst src));
  8.1633 -  ins_cost(150);
  8.1634 -  format %{ "SUBSD  $dst,$src" %}
  8.1635 -  ins_encode %{
  8.1636 -    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
  8.1637 -  %}
  8.1638 -  ins_pipe( pipe_slow );
  8.1639 -%}
  8.1640 -
  8.1641 -instruct subXD_imm(regXD dst, immXD con) %{
  8.1642 -  predicate(UseSSE>=2);
  8.1643 -  match(Set dst (SubD dst con));
  8.1644 -  ins_cost(150);
  8.1645 -  format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
  8.1646 -  ins_encode %{
  8.1647 -    __ subsd($dst$$XMMRegister, $constantaddress($con));
  8.1648 -  %}
  8.1649 -  ins_pipe(pipe_slow);
  8.1650 -%}
  8.1651 -
  8.1652 -instruct subXD_mem(regXD dst, memory mem) %{
  8.1653 -  predicate(UseSSE>=2);
  8.1654 -  match(Set dst (SubD dst (LoadD mem)));
  8.1655 -  ins_cost(150);
  8.1656 -  format %{ "SUBSD  $dst,$mem" %}
  8.1657 -  ins_encode %{
  8.1658 -    __ subsd($dst$$XMMRegister, $mem$$Address);
  8.1659 -  %}
  8.1660 -  ins_pipe( pipe_slow );
  8.1661 -%}
  8.1662 -
  8.1663 -// Mul two double precision floating point values in xmm
  8.1664 -instruct mulXD_reg(regXD dst, regXD src) %{
  8.1665 -  predicate(UseSSE>=2);
  8.1666 -  match(Set dst (MulD dst src));
  8.1667 -  format %{ "MULSD  $dst,$src" %}
  8.1668 -  ins_encode %{
  8.1669 -    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
  8.1670 -  %}
  8.1671 -  ins_pipe( pipe_slow );
  8.1672 -%}
  8.1673 -
  8.1674 -instruct mulXD_imm(regXD dst, immXD con) %{
  8.1675 -  predicate(UseSSE>=2);
  8.1676 -  match(Set dst (MulD dst con));
  8.1677 -  format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
  8.1678 -  ins_encode %{
  8.1679 -    __ mulsd($dst$$XMMRegister, $constantaddress($con));
  8.1680 -  %}
  8.1681 -  ins_pipe(pipe_slow);
  8.1682 -%}
  8.1683 -
  8.1684 -instruct mulXD_mem(regXD dst, memory mem) %{
  8.1685 -  predicate(UseSSE>=2);
  8.1686 -  match(Set dst (MulD dst (LoadD mem)));
  8.1687 -  format %{ "MULSD  $dst,$mem" %}
  8.1688 -  ins_encode %{
  8.1689 -    __ mulsd($dst$$XMMRegister, $mem$$Address);
  8.1690 -  %}
  8.1691 -  ins_pipe( pipe_slow );
  8.1692 -%}
  8.1693 -
  8.1694 -// Div two double precision floating point values in xmm
  8.1695 -instruct divXD_reg(regXD dst, regXD src) %{
  8.1696 -  predicate(UseSSE>=2);
  8.1697 -  match(Set dst (DivD dst src));
  8.1698 -  format %{ "DIVSD  $dst,$src" %}
  8.1699 -  opcode(0xF2, 0x0F, 0x5E);
  8.1700 -  ins_encode %{
  8.1701 -    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
  8.1702 -  %}
  8.1703 -  ins_pipe( pipe_slow );
  8.1704 -%}
  8.1705 -
  8.1706 -instruct divXD_imm(regXD dst, immXD con) %{
  8.1707 -  predicate(UseSSE>=2);
  8.1708 -  match(Set dst (DivD dst con));
  8.1709 -  format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
  8.1710 -  ins_encode %{
  8.1711 -    __ divsd($dst$$XMMRegister, $constantaddress($con));
  8.1712 -  %}
  8.1713 -  ins_pipe(pipe_slow);
  8.1714 -%}
  8.1715 -
  8.1716 -instruct divXD_mem(regXD dst, memory mem) %{
  8.1717 -  predicate(UseSSE>=2);
  8.1718 -  match(Set dst (DivD dst (LoadD mem)));
  8.1719 -  format %{ "DIVSD  $dst,$mem" %}
  8.1720 -  ins_encode %{
  8.1721 -    __ divsd($dst$$XMMRegister, $mem$$Address);
  8.1722 -  %}
  8.1723 -  ins_pipe( pipe_slow );
  8.1724 -%}
  8.1725 -
  8.1726 -
  8.1727 -instruct mulD_reg(regD dst, regD src) %{
  8.1728 +instruct mulDPR_reg(regDPR dst, regDPR src) %{
  8.1729    predicate(UseSSE<=1);
  8.1730    match(Set dst (MulD dst src));
  8.1731    format %{ "FLD    $src\n\t"
  8.1732              "DMULp  $dst,ST" %}
  8.1733    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
  8.1734    ins_cost(150);
  8.1735 -  ins_encode( Push_Reg_D(src),
  8.1736 +  ins_encode( Push_Reg_DPR(src),
  8.1737                OpcP, RegOpc(dst) );
  8.1738    ins_pipe( fpu_reg_reg );
  8.1739  %}
  8.1740 @@ -10062,7 +9887,7 @@
  8.1741  // multiply scaled arg1 by arg2
  8.1742  // rescale product by 2^(15360)
  8.1743  //
  8.1744 -instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
  8.1745 +instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
  8.1746    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
  8.1747    match(Set dst (MulD dst src));
  8.1748    ins_cost(1);   // Select this instruction for all strict FP double multiplies
  8.1749 @@ -10075,13 +9900,13 @@
  8.1750              "DMULp  $dst,ST\n\t" %}
  8.1751    opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
  8.1752    ins_encode( strictfp_bias1(dst),
  8.1753 -              Push_Reg_D(src),
  8.1754 +              Push_Reg_DPR(src),
  8.1755                OpcP, RegOpc(dst),
  8.1756                strictfp_bias2(dst) );
  8.1757    ins_pipe( fpu_reg_reg );
  8.1758  %}
  8.1759  
  8.1760 -instruct mulD_reg_imm(regD dst, immD con) %{
  8.1761 +instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
  8.1762    predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
  8.1763    match(Set dst (MulD dst con));
  8.1764    ins_cost(200);
  8.1765 @@ -10095,7 +9920,7 @@
  8.1766  %}
  8.1767  
  8.1768  
  8.1769 -instruct mulD_reg_mem(regD dst, memory src) %{
  8.1770 +instruct mulDPR_reg_mem(regDPR dst, memory src) %{
  8.1771    predicate( UseSSE<=1 );
  8.1772    match(Set dst (MulD dst (LoadD src)));
  8.1773    ins_cost(200);
  8.1774 @@ -10109,7 +9934,7 @@
  8.1775  
  8.1776  //
  8.1777  // Cisc-alternate to reg-reg multiply
  8.1778 -instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
  8.1779 +instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
  8.1780    predicate( UseSSE<=1 );
  8.1781    match(Set dst (MulD src (LoadD mem)));
  8.1782    ins_cost(250);
  8.1783 @@ -10118,17 +9943,17 @@
  8.1784              "FSTP_D $dst" %}
  8.1785    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
  8.1786    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
  8.1787 -              OpcReg_F(src),
  8.1788 -              Pop_Reg_D(dst) );
  8.1789 +              OpcReg_FPR(src),
  8.1790 +              Pop_Reg_DPR(dst) );
  8.1791    ins_pipe( fpu_reg_reg_mem );
  8.1792  %}
  8.1793  
  8.1794  
  8.1795 -// MACRO3 -- addD a mulD
  8.1796 +// MACRO3 -- addDPR a mulDPR
  8.1797  // This instruction is a '2-address' instruction in that the result goes
  8.1798  // back to src2.  This eliminates a move from the macro; possibly the
  8.1799  // register allocator will have to add it back (and maybe not).
  8.1800 -instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
  8.1801 +instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
  8.1802    predicate( UseSSE<=1 );
  8.1803    match(Set src2 (AddD (MulD src0 src1) src2));
  8.1804    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
  8.1805 @@ -10136,29 +9961,29 @@
  8.1806              "DADDp  $src2,ST" %}
  8.1807    ins_cost(250);
  8.1808    opcode(0xDD); /* LoadD DD /0 */
  8.1809 -  ins_encode( Push_Reg_F(src0),
  8.1810 +  ins_encode( Push_Reg_FPR(src0),
  8.1811                FMul_ST_reg(src1),
  8.1812                FAddP_reg_ST(src2) );
  8.1813    ins_pipe( fpu_reg_reg_reg );
  8.1814  %}
  8.1815  
  8.1816  
  8.1817 -// MACRO3 -- subD a mulD
  8.1818 -instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
  8.1819 +// MACRO3 -- subDPR a mulDPR
  8.1820 +instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
  8.1821    predicate( UseSSE<=1 );
  8.1822    match(Set src2 (SubD (MulD src0 src1) src2));
  8.1823    format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
  8.1824              "DMUL   ST,$src1\n\t"
  8.1825              "DSUBRp $src2,ST" %}
  8.1826    ins_cost(250);
  8.1827 -  ins_encode( Push_Reg_F(src0),
  8.1828 +  ins_encode( Push_Reg_FPR(src0),
  8.1829                FMul_ST_reg(src1),
  8.1830                Opcode(0xDE), Opc_plus(0xE0,src2));
  8.1831    ins_pipe( fpu_reg_reg_reg );
  8.1832  %}
  8.1833  
  8.1834  
  8.1835 -instruct divD_reg(regD dst, regD src) %{
  8.1836 +instruct divDPR_reg(regDPR dst, regDPR src) %{
  8.1837    predicate( UseSSE<=1 );
  8.1838    match(Set dst (DivD dst src));
  8.1839  
  8.1840 @@ -10166,7 +9991,7 @@
  8.1841              "FDIVp  $dst,ST" %}
  8.1842    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
  8.1843    ins_cost(150);
  8.1844 -  ins_encode( Push_Reg_D(src),
  8.1845 +  ins_encode( Push_Reg_DPR(src),
  8.1846                OpcP, RegOpc(dst) );
  8.1847    ins_pipe( fpu_reg_reg );
  8.1848  %}
  8.1849 @@ -10179,7 +10004,7 @@
  8.1850  // divide scaled dividend by divisor
  8.1851  // rescale quotient by 2^(15360)
  8.1852  //
  8.1853 -instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
  8.1854 +instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
  8.1855    predicate (UseSSE<=1);
  8.1856    match(Set dst (DivD dst src));
  8.1857    predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
  8.1858 @@ -10193,13 +10018,13 @@
  8.1859              "DMULp  $dst,ST\n\t" %}
  8.1860    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
  8.1861    ins_encode( strictfp_bias1(dst),
  8.1862 -              Push_Reg_D(src),
  8.1863 +              Push_Reg_DPR(src),
  8.1864                OpcP, RegOpc(dst),
  8.1865                strictfp_bias2(dst) );
  8.1866    ins_pipe( fpu_reg_reg );
  8.1867  %}
  8.1868  
  8.1869 -instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
  8.1870 +instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
  8.1871    predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
  8.1872    match(Set dst (RoundDouble (DivD src1 src2)));
  8.1873  
  8.1874 @@ -10207,27 +10032,27 @@
  8.1875              "FDIV   ST,$src2\n\t"
  8.1876              "FSTP_D $dst\t# D-round" %}
  8.1877    opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
  8.1878 -  ins_encode( Push_Reg_D(src1),
  8.1879 -              OpcP, RegOpc(src2), Pop_Mem_D(dst) );
  8.1880 +  ins_encode( Push_Reg_DPR(src1),
  8.1881 +              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
  8.1882    ins_pipe( fpu_mem_reg_reg );
  8.1883  %}
  8.1884  
  8.1885  
  8.1886 -instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
  8.1887 +instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
  8.1888    predicate(UseSSE<=1);
  8.1889    match(Set dst (ModD dst src));
  8.1890 -  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
  8.1891 +  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
  8.1892  
  8.1893    format %{ "DMOD   $dst,$src" %}
  8.1894    ins_cost(250);
  8.1895 -  ins_encode(Push_Reg_Mod_D(dst, src),
  8.1896 -              emitModD(),
  8.1897 -              Push_Result_Mod_D(src),
  8.1898 -              Pop_Reg_D(dst));
  8.1899 -  ins_pipe( pipe_slow );
  8.1900 -%}
  8.1901 -
  8.1902 -instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
  8.1903 +  ins_encode(Push_Reg_Mod_DPR(dst, src),
  8.1904 +              emitModDPR(),
  8.1905 +              Push_Result_Mod_DPR(src),
  8.1906 +              Pop_Reg_DPR(dst));
  8.1907 +  ins_pipe( pipe_slow );
  8.1908 +%}
  8.1909 +
  8.1910 +instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
  8.1911    predicate(UseSSE>=2);
  8.1912    match(Set dst (ModD src0 src1));
  8.1913    effect(KILL rax, KILL cr);
  8.1914 @@ -10248,11 +10073,11 @@
  8.1915            "\tFSTP   ST0\t # Restore FPU Stack"
  8.1916      %}
  8.1917    ins_cost(250);
  8.1918 -  ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
  8.1919 -  ins_pipe( pipe_slow );
  8.1920 -%}
  8.1921 -
  8.1922 -instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
  8.1923 +  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
  8.1924 +  ins_pipe( pipe_slow );
  8.1925 +%}
  8.1926 +
  8.1927 +instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
  8.1928    predicate (UseSSE<=1);
  8.1929    match(Set dst (SinD src));
  8.1930    ins_cost(1800);
  8.1931 @@ -10262,18 +10087,18 @@
  8.1932    ins_pipe( pipe_slow );
  8.1933  %}
  8.1934  
  8.1935 -instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
  8.1936 +instruct sinD_reg(regD dst, eFlagsReg cr) %{
  8.1937    predicate (UseSSE>=2);
  8.1938    match(Set dst (SinD dst));
  8.1939 -  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
  8.1940 +  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
  8.1941    ins_cost(1800);
  8.1942    format %{ "DSIN   $dst" %}
  8.1943    opcode(0xD9, 0xFE);
  8.1944 -  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
  8.1945 -  ins_pipe( pipe_slow );
  8.1946 -%}
  8.1947 -
  8.1948 -instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
  8.1949 +  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
  8.1950 +  ins_pipe( pipe_slow );
  8.1951 +%}
  8.1952 +
  8.1953 +instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
  8.1954    predicate (UseSSE<=1);
  8.1955    match(Set dst (CosD src));
  8.1956    ins_cost(1800);
  8.1957 @@ -10283,18 +10108,18 @@
  8.1958    ins_pipe( pipe_slow );
  8.1959  %}
  8.1960  
  8.1961 -instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
  8.1962 +instruct cosD_reg(regD dst, eFlagsReg cr) %{
  8.1963    predicate (UseSSE>=2);
  8.1964    match(Set dst (CosD dst));
  8.1965 -  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
  8.1966 +  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
  8.1967    ins_cost(1800);
  8.1968    format %{ "DCOS   $dst" %}
  8.1969    opcode(0xD9, 0xFF);
  8.1970 -  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
  8.1971 -  ins_pipe( pipe_slow );
  8.1972 -%}
  8.1973 -
  8.1974 -instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
  8.1975 +  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
  8.1976 +  ins_pipe( pipe_slow );
  8.1977 +%}
  8.1978 +
  8.1979 +instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
  8.1980    predicate (UseSSE<=1);
  8.1981    match(Set dst(TanD src));
  8.1982    format %{ "DTAN   $dst" %}
  8.1983 @@ -10303,50 +10128,50 @@
  8.1984    ins_pipe( pipe_slow );
  8.1985  %}
  8.1986  
  8.1987 -instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
  8.1988 +instruct tanD_reg(regD dst, eFlagsReg cr) %{
  8.1989    predicate (UseSSE>=2);
  8.1990    match(Set dst(TanD dst));
  8.1991 -  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
  8.1992 +  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
  8.1993    format %{ "DTAN   $dst" %}
  8.1994 -  ins_encode( Push_SrcXD(dst),
  8.1995 +  ins_encode( Push_SrcD(dst),
  8.1996                Opcode(0xD9), Opcode(0xF2),    // fptan
  8.1997                Opcode(0xDD), Opcode(0xD8),   // fstp st
  8.1998 -              Push_ResultXD(dst) );
  8.1999 -  ins_pipe( pipe_slow );
  8.2000 -%}
  8.2001 -
  8.2002 -instruct atanD_reg(regD dst, regD src) %{
  8.2003 +              Push_ResultD(dst) );
  8.2004 +  ins_pipe( pipe_slow );
  8.2005 +%}
  8.2006 +
  8.2007 +instruct atanDPR_reg(regDPR dst, regDPR src) %{
  8.2008    predicate (UseSSE<=1);
  8.2009    match(Set dst(AtanD dst src));
  8.2010    format %{ "DATA   $dst,$src" %}
  8.2011    opcode(0xD9, 0xF3);
  8.2012 -  ins_encode( Push_Reg_D(src),
  8.2013 +  ins_encode( Push_Reg_DPR(src),
  8.2014                OpcP, OpcS, RegOpc(dst) );
  8.2015    ins_pipe( pipe_slow );
  8.2016  %}
  8.2017  
  8.2018 -instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
  8.2019 +instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
  8.2020    predicate (UseSSE>=2);
  8.2021    match(Set dst(AtanD dst src));
  8.2022 -  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
  8.2023 +  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
  8.2024    format %{ "DATA   $dst,$src" %}
  8.2025    opcode(0xD9, 0xF3);
  8.2026 -  ins_encode( Push_SrcXD(src),
  8.2027 -              OpcP, OpcS, Push_ResultXD(dst) );
  8.2028 -  ins_pipe( pipe_slow );
  8.2029 -%}
  8.2030 -
  8.2031 -instruct sqrtD_reg(regD dst, regD src) %{
  8.2032 +  ins_encode( Push_SrcD(src),
  8.2033 +              OpcP, OpcS, Push_ResultD(dst) );
  8.2034 +  ins_pipe( pipe_slow );
  8.2035 +%}
  8.2036 +
  8.2037 +instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
  8.2038    predicate (UseSSE<=1);
  8.2039    match(Set dst (SqrtD src));
  8.2040    format %{ "DSQRT  $dst,$src" %}
  8.2041    opcode(0xFA, 0xD9);
  8.2042 -  ins_encode( Push_Reg_D(src),
  8.2043 -              OpcS, OpcP, Pop_Reg_D(dst) );
  8.2044 -  ins_pipe( pipe_slow );
  8.2045 -%}
  8.2046 -
  8.2047 -instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
  8.2048 +  ins_encode( Push_Reg_DPR(src),
  8.2049 +              OpcS, OpcP, Pop_Reg_DPR(dst) );
  8.2050 +  ins_pipe( pipe_slow );
  8.2051 +%}
  8.2052 +
  8.2053 +instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
  8.2054    predicate (UseSSE<=1);
  8.2055    match(Set Y (PowD X Y));  // Raise X to the Yth power
  8.2056    effect(KILL rax, KILL rbx, KILL rcx);
  8.2057 @@ -10375,14 +10200,14 @@
  8.2058              "ADD    ESP,8"
  8.2059               %}
  8.2060    ins_encode( push_stack_temp_qword,
  8.2061 -              Push_Reg_D(X),
  8.2062 +              Push_Reg_DPR(X),
  8.2063                Opcode(0xD9), Opcode(0xF1),   // fyl2x
  8.2064                pow_exp_core_encoding,
  8.2065                pop_stack_temp_qword);
  8.2066    ins_pipe( pipe_slow );
  8.2067  %}
  8.2068  
  8.2069 -instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
  8.2070 +instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
  8.2071    predicate (UseSSE>=2);
  8.2072    match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
  8.2073    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
  8.2074 @@ -10420,12 +10245,12 @@
  8.2075                push_xmm_to_fpr1(src0),
  8.2076                Opcode(0xD9), Opcode(0xF1),   // fyl2x
  8.2077                pow_exp_core_encoding,
  8.2078 -              Push_ResultXD(dst) );
  8.2079 -  ins_pipe( pipe_slow );
  8.2080 -%}
  8.2081 -
  8.2082 -
  8.2083 -instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
  8.2084 +              Push_ResultD(dst) );
  8.2085 +  ins_pipe( pipe_slow );
  8.2086 +%}
  8.2087 +
  8.2088 +
  8.2089 +instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
  8.2090    predicate (UseSSE<=1);
  8.2091    match(Set dpr1 (ExpD dpr1));
  8.2092    effect(KILL rax, KILL rbx, KILL rcx);
  8.2093 @@ -10461,7 +10286,7 @@
  8.2094    ins_pipe( pipe_slow );
  8.2095  %}
  8.2096  
  8.2097 -instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
  8.2098 +instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
  8.2099    predicate (UseSSE>=2);
  8.2100    match(Set dst (ExpD src));
  8.2101    effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
  8.2102 @@ -10492,17 +10317,17 @@
  8.2103              "MOVSD  $dst,[ESP]\n\t"
  8.2104              "ADD    ESP,8"
  8.2105               %}
  8.2106 -  ins_encode( Push_SrcXD(src),
  8.2107 +  ins_encode( Push_SrcD(src),
  8.2108                Opcode(0xD9), Opcode(0xEA),   // fldl2e
  8.2109                Opcode(0xDE), Opcode(0xC9),   // fmulp
  8.2110                pow_exp_core_encoding,
  8.2111 -              Push_ResultXD(dst) );
  8.2112 -  ins_pipe( pipe_slow );
  8.2113 -%}
  8.2114 -
  8.2115 -
  8.2116 -
  8.2117 -instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
  8.2118 +              Push_ResultD(dst) );
  8.2119 +  ins_pipe( pipe_slow );
  8.2120 +%}
  8.2121 +
  8.2122 +
  8.2123 +
  8.2124 +instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
  8.2125    predicate (UseSSE<=1);
  8.2126    // The source Double operand on FPU stack
  8.2127    match(Set dst (Log10D src));
  8.2128 @@ -10520,7 +10345,7 @@
  8.2129    ins_pipe( pipe_slow );
  8.2130  %}
  8.2131  
  8.2132 -instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
  8.2133 +instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
  8.2134    predicate (UseSSE>=2);
  8.2135    effect(KILL cr);
  8.2136    match(Set dst (Log10D src));
  8.2137 @@ -10530,14 +10355,14 @@
  8.2138              "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
  8.2139           %}
  8.2140    ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
  8.2141 -              Push_SrcXD(src),
  8.2142 +              Push_SrcD(src),
  8.2143                Opcode(0xD9), Opcode(0xF1),   // fyl2x
  8.2144 -              Push_ResultXD(dst));
  8.2145 -
  8.2146 -  ins_pipe( pipe_slow );
  8.2147 -%}
  8.2148 -
  8.2149 -instruct logD_reg(regDPR1 dst, regDPR1 src) %{
  8.2150 +              Push_ResultD(dst));
  8.2151 +
  8.2152 +  ins_pipe( pipe_slow );
  8.2153 +%}
  8.2154 +
  8.2155 +instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
  8.2156    predicate (UseSSE<=1);
  8.2157    // The source Double operand on FPU stack
  8.2158    match(Set dst (LogD src));
  8.2159 @@ -10555,7 +10380,7 @@
  8.2160    ins_pipe( pipe_slow );
  8.2161  %}
  8.2162  
  8.2163 -instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
  8.2164 +instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
  8.2165    predicate (UseSSE>=2);
  8.2166    effect(KILL cr);
  8.2167    // The source and result Double operands in XMM registers
  8.2168 @@ -10566,9 +10391,9 @@
  8.2169              "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
  8.2170           %}
  8.2171    ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
  8.2172 -              Push_SrcXD(src),
  8.2173 +              Push_SrcD(src),
  8.2174                Opcode(0xD9), Opcode(0xF1),   // fyl2x
  8.2175 -              Push_ResultXD(dst));
  8.2176 +              Push_ResultD(dst));
  8.2177    ins_pipe( pipe_slow );
  8.2178  %}
  8.2179  
  8.2180 @@ -10589,7 +10414,7 @@
  8.2181  //   exit:
  8.2182  
  8.2183  // P6 version of float compare, sets condition codes in EFLAGS
  8.2184 -instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
  8.2185 +instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
  8.2186    predicate(VM_Version::supports_cmov() && UseSSE == 0);
  8.2187    match(Set cr (CmpF src1 src2));
  8.2188    effect(KILL rax);
  8.2189 @@ -10601,27 +10426,27 @@
  8.2190              "SAHF\n"
  8.2191       "exit:\tNOP               // avoid branch to branch" %}
  8.2192    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
  8.2193 -  ins_encode( Push_Reg_D(src1),
  8.2194 +  ins_encode( Push_Reg_DPR(src1),
  8.2195                OpcP, RegOpc(src2),
  8.2196                cmpF_P6_fixup );
  8.2197    ins_pipe( pipe_slow );
  8.2198  %}
  8.2199  
  8.2200 -instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
  8.2201 +instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
  8.2202    predicate(VM_Version::supports_cmov() && UseSSE == 0);
  8.2203    match(Set cr (CmpF src1 src2));
  8.2204    ins_cost(100);
  8.2205    format %{ "FLD    $src1\n\t"
  8.2206              "FUCOMIP ST,$src2  // P6 instruction" %}
  8.2207    opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
  8.2208 -  ins_encode( Push_Reg_D(src1),
  8.2209 +  ins_encode( Push_Reg_DPR(src1),
  8.2210                OpcP, RegOpc(src2));
  8.2211    ins_pipe( pipe_slow );
  8.2212  %}
  8.2213  
  8.2214  
  8.2215  // Compare & branch
  8.2216 -instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
  8.2217 +instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
  8.2218    predicate(UseSSE == 0);
  8.2219    match(Set cr (CmpF src1 src2));
  8.2220    effect(KILL rax);
  8.2221 @@ -10634,42 +10459,42 @@
  8.2222              "MOV    AH,1\t# unordered treat as LT\n"
  8.2223      "flags:\tSAHF" %}
  8.2224    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
  8.2225 -  ins_encode( Push_Reg_D(src1),
  8.2226 +  ins_encode( Push_Reg_DPR(src1),
  8.2227                OpcP, RegOpc(src2),
  8.2228                fpu_flags);
  8.2229    ins_pipe( pipe_slow );
  8.2230  %}
  8.2231  
  8.2232  // Compare vs zero into -1,0,1
  8.2233 -instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
  8.2234 +instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
  8.2235    predicate(UseSSE == 0);
  8.2236    match(Set dst (CmpF3 src1 zero));
  8.2237    effect(KILL cr, KILL rax);
  8.2238    ins_cost(280);
  8.2239    format %{ "FTSTF  $dst,$src1" %}
  8.2240    opcode(0xE4, 0xD9);
  8.2241 -  ins_encode( Push_Reg_D(src1),
  8.2242 +  ins_encode( Push_Reg_DPR(src1),
  8.2243                OpcS, OpcP, PopFPU,
  8.2244                CmpF_Result(dst));
  8.2245    ins_pipe( pipe_slow );
  8.2246  %}
  8.2247  
  8.2248  // Compare into -1,0,1
  8.2249 -instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
  8.2250 +instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
  8.2251    predicate(UseSSE == 0);
  8.2252    match(Set dst (CmpF3 src1 src2));
  8.2253    effect(KILL cr, KILL rax);
  8.2254    ins_cost(300);
  8.2255    format %{ "FCMPF  $dst,$src1,$src2" %}
  8.2256    opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
  8.2257 -  ins_encode( Push_Reg_D(src1),
  8.2258 +  ins_encode( Push_Reg_DPR(src1),
  8.2259                OpcP, RegOpc(src2),
  8.2260                CmpF_Result(dst));
  8.2261    ins_pipe( pipe_slow );
  8.2262  %}
  8.2263  
  8.2264  // float compare and set condition codes in EFLAGS by XMM regs
  8.2265 -instruct cmpX_cc(eFlagsRegU cr, regX src1, regX src2) %{
  8.2266 +instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
  8.2267    predicate(UseSSE>=1);
  8.2268    match(Set cr (CmpF src1 src2));
  8.2269    ins_cost(145);
  8.2270 @@ -10686,7 +10511,7 @@
  8.2271    ins_pipe( pipe_slow );
  8.2272  %}
  8.2273  
  8.2274 -instruct cmpX_ccCF(eFlagsRegUCF cr, regX src1, regX src2) %{
  8.2275 +instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
  8.2276    predicate(UseSSE>=1);
  8.2277    match(Set cr (CmpF src1 src2));
  8.2278    ins_cost(100);
  8.2279 @@ -10698,7 +10523,7 @@
  8.2280  %}
  8.2281  
  8.2282  // float compare and set condition codes in EFLAGS by XMM regs
  8.2283 -instruct cmpX_ccmem(eFlagsRegU cr, regX src1, memory src2) %{
  8.2284 +instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
  8.2285    predicate(UseSSE>=1);
  8.2286    match(Set cr (CmpF src1 (LoadF src2)));
  8.2287    ins_cost(165);
  8.2288 @@ -10715,7 +10540,7 @@
  8.2289    ins_pipe( pipe_slow );
  8.2290  %}
  8.2291  
  8.2292 -instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX src1, memory src2) %{
  8.2293 +instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
  8.2294    predicate(UseSSE>=1);
  8.2295    match(Set cr (CmpF src1 (LoadF src2)));
  8.2296    ins_cost(100);
  8.2297 @@ -10727,7 +10552,7 @@
  8.2298  %}
  8.2299  
  8.2300  // Compare into -1,0,1 in XMM
  8.2301 -instruct cmpX_reg(xRegI dst, regX src1, regX src2, eFlagsReg cr) %{
  8.2302 +instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
  8.2303    predicate(UseSSE>=1);
  8.2304    match(Set dst (CmpF3 src1 src2));
  8.2305    effect(KILL cr);
  8.2306 @@ -10747,7 +10572,7 @@
  8.2307  %}
  8.2308  
  8.2309  // Compare into -1,0,1 in XMM and memory
  8.2310 -instruct cmpX_regmem(xRegI dst, regX src1, memory src2, eFlagsReg cr) %{
  8.2311 +instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
  8.2312    predicate(UseSSE>=1);
  8.2313    match(Set dst (CmpF3 src1 (LoadF src2)));
  8.2314    effect(KILL cr);
  8.2315 @@ -10767,230 +10592,57 @@
  8.2316  %}
  8.2317  
  8.2318  // Spill to obtain 24-bit precision
  8.2319 -instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
  8.2320 +instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
  8.2321    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2322    match(Set dst (SubF src1 src2));
  8.2323  
  8.2324    format %{ "FSUB   $dst,$src1 - $src2" %}
  8.2325    opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
  8.2326 -  ins_encode( Push_Reg_F(src1),
  8.2327 -              OpcReg_F(src2),
  8.2328 -              Pop_Mem_F(dst) );
  8.2329 +  ins_encode( Push_Reg_FPR(src1),
  8.2330 +              OpcReg_FPR(src2),
  8.2331 +              Pop_Mem_FPR(dst) );
  8.2332    ins_pipe( fpu_mem_reg_reg );
  8.2333  %}
  8.2334  //
  8.2335  // This instruction does not round to 24-bits
  8.2336 -instruct subF_reg(regF dst, regF src) %{
  8.2337 +instruct subFPR_reg(regFPR dst, regFPR src) %{
  8.2338    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2339    match(Set dst (SubF dst src));
  8.2340  
  8.2341    format %{ "FSUB   $dst,$src" %}
  8.2342    opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
  8.2343 -  ins_encode( Push_Reg_F(src),
  8.2344 +  ins_encode( Push_Reg_FPR(src),
  8.2345                OpcP, RegOpc(dst) );
  8.2346    ins_pipe( fpu_reg_reg );
  8.2347  %}
  8.2348  
  8.2349  // Spill to obtain 24-bit precision
  8.2350 -instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
  8.2351 +instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
  8.2352    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2353    match(Set dst (AddF src1 src2));
  8.2354  
  8.2355    format %{ "FADD   $dst,$src1,$src2" %}
  8.2356    opcode(0xD8, 0x0); /* D8 C0+i */
  8.2357 -  ins_encode( Push_Reg_F(src2),
  8.2358 -              OpcReg_F(src1),
  8.2359 -              Pop_Mem_F(dst) );
  8.2360 +  ins_encode( Push_Reg_FPR(src2),
  8.2361 +              OpcReg_FPR(src1),
  8.2362 +              Pop_Mem_FPR(dst) );
  8.2363    ins_pipe( fpu_mem_reg_reg );
  8.2364  %}
  8.2365  //
  8.2366  // This instruction does not round to 24-bits
  8.2367 -instruct addF_reg(regF dst, regF src) %{
  8.2368 +instruct addFPR_reg(regFPR dst, regFPR src) %{
  8.2369    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2370    match(Set dst (AddF dst src));
  8.2371  
  8.2372    format %{ "FLD    $src\n\t"
  8.2373              "FADDp  $dst,ST" %}
  8.2374    opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
  8.2375 -  ins_encode( Push_Reg_F(src),
  8.2376 +  ins_encode( Push_Reg_FPR(src),
  8.2377                OpcP, RegOpc(dst) );
  8.2378    ins_pipe( fpu_reg_reg );
  8.2379  %}
  8.2380  
  8.2381 -// Add two single precision floating point values in xmm
  8.2382 -instruct addX_reg(regX dst, regX src) %{
  8.2383 -  predicate(UseSSE>=1);
  8.2384 -  match(Set dst (AddF dst src));
  8.2385 -  format %{ "ADDSS  $dst,$src" %}
  8.2386 -  ins_encode %{
  8.2387 -    __ addss($dst$$XMMRegister, $src$$XMMRegister);
  8.2388 -  %}
  8.2389 -  ins_pipe( pipe_slow );
  8.2390 -%}
  8.2391 -
  8.2392 -instruct addX_imm(regX dst, immXF con) %{
  8.2393 -  predicate(UseSSE>=1);
  8.2394 -  match(Set dst (AddF dst con));
  8.2395 -  format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
  8.2396 -  ins_encode %{
  8.2397 -    __ addss($dst$$XMMRegister, $constantaddress($con));
  8.2398 -  %}
  8.2399 -  ins_pipe(pipe_slow);
  8.2400 -%}
  8.2401 -
  8.2402 -instruct addX_mem(regX dst, memory mem) %{
  8.2403 -  predicate(UseSSE>=1);
  8.2404 -  match(Set dst (AddF dst (LoadF mem)));
  8.2405 -  format %{ "ADDSS  $dst,$mem" %}
  8.2406 -  ins_encode %{
  8.2407 -    __ addss($dst$$XMMRegister, $mem$$Address);
  8.2408 -  %}
  8.2409 -  ins_pipe( pipe_slow );
  8.2410 -%}
  8.2411 -
  8.2412 -// Subtract two single precision floating point values in xmm
  8.2413 -instruct subX_reg(regX dst, regX src) %{
  8.2414 -  predicate(UseSSE>=1);
  8.2415 -  match(Set dst (SubF dst src));
  8.2416 -  ins_cost(150);
  8.2417 -  format %{ "SUBSS  $dst,$src" %}
  8.2418 -  ins_encode %{
  8.2419 -    __ subss($dst$$XMMRegister, $src$$XMMRegister);
  8.2420 -  %}
  8.2421 -  ins_pipe( pipe_slow );
  8.2422 -%}
  8.2423 -
  8.2424 -instruct subX_imm(regX dst, immXF con) %{
  8.2425 -  predicate(UseSSE>=1);
  8.2426 -  match(Set dst (SubF dst con));
  8.2427 -  ins_cost(150);
  8.2428 -  format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
  8.2429 -  ins_encode %{
  8.2430 -    __ subss($dst$$XMMRegister, $constantaddress($con));
  8.2431 -  %}
  8.2432 -  ins_pipe(pipe_slow);
  8.2433 -%}
  8.2434 -
  8.2435 -instruct subX_mem(regX dst, memory mem) %{
  8.2436 -  predicate(UseSSE>=1);
  8.2437 -  match(Set dst (SubF dst (LoadF mem)));
  8.2438 -  ins_cost(150);
  8.2439 -  format %{ "SUBSS  $dst,$mem" %}
  8.2440 -  ins_encode %{
  8.2441 -    __ subss($dst$$XMMRegister, $mem$$Address);
  8.2442 -  %}
  8.2443 -  ins_pipe( pipe_slow );
  8.2444 -%}
  8.2445 -
  8.2446 -// Multiply two single precision floating point values in xmm
  8.2447 -instruct mulX_reg(regX dst, regX src) %{
  8.2448 -  predicate(UseSSE>=1);
  8.2449 -  match(Set dst (MulF dst src));
  8.2450 -  format %{ "MULSS  $dst,$src" %}
  8.2451 -  ins_encode %{
  8.2452 -    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
  8.2453 -  %}
  8.2454 -  ins_pipe( pipe_slow );
  8.2455 -%}
  8.2456 -
  8.2457 -instruct mulX_imm(regX dst, immXF con) %{
  8.2458 -  predicate(UseSSE>=1);
  8.2459 -  match(Set dst (MulF dst con));
  8.2460 -  format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
  8.2461 -  ins_encode %{
  8.2462 -    __ mulss($dst$$XMMRegister, $constantaddress($con));
  8.2463 -  %}
  8.2464 -  ins_pipe(pipe_slow);
  8.2465 -%}
  8.2466 -
  8.2467 -instruct mulX_mem(regX dst, memory mem) %{
  8.2468 -  predicate(UseSSE>=1);
  8.2469 -  match(Set dst (MulF dst (LoadF mem)));
  8.2470 -  format %{ "MULSS  $dst,$mem" %}
  8.2471 -  ins_encode %{
  8.2472 -    __ mulss($dst$$XMMRegister, $mem$$Address);
  8.2473 -  %}
  8.2474 -  ins_pipe( pipe_slow );
  8.2475 -%}
  8.2476 -
  8.2477 -// Divide two single precision floating point values in xmm
  8.2478 -instruct divX_reg(regX dst, regX src) %{
  8.2479 -  predicate(UseSSE>=1);
  8.2480 -  match(Set dst (DivF dst src));
  8.2481 -  format %{ "DIVSS  $dst,$src" %}
  8.2482 -  ins_encode %{
  8.2483 -    __ divss($dst$$XMMRegister, $src$$XMMRegister);
  8.2484 -  %}
  8.2485 -  ins_pipe( pipe_slow );
  8.2486 -%}
  8.2487 -
  8.2488 -instruct divX_imm(regX dst, immXF con) %{
  8.2489 -  predicate(UseSSE>=1);
  8.2490 -  match(Set dst (DivF dst con));
  8.2491 -  format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
  8.2492 -  ins_encode %{
  8.2493 -    __ divss($dst$$XMMRegister, $constantaddress($con));
  8.2494 -  %}
  8.2495 -  ins_pipe(pipe_slow);
  8.2496 -%}
  8.2497 -
  8.2498 -instruct divX_mem(regX dst, memory mem) %{
  8.2499 -  predicate(UseSSE>=1);
  8.2500 -  match(Set dst (DivF dst (LoadF mem)));
  8.2501 -  format %{ "DIVSS  $dst,$mem" %}
  8.2502 -  ins_encode %{
  8.2503 -    __ divss($dst$$XMMRegister, $mem$$Address);
  8.2504 -  %}
  8.2505 -  ins_pipe( pipe_slow );
  8.2506 -%}
  8.2507 -
  8.2508 -// Get the square root of a single precision floating point values in xmm
  8.2509 -instruct sqrtX_reg(regX dst, regX src) %{
  8.2510 -  predicate(UseSSE>=1);
  8.2511 -  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
  8.2512 -  ins_cost(150);
  8.2513 -  format %{ "SQRTSS $dst,$src" %}
  8.2514 -  ins_encode %{
  8.2515 -    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
  8.2516 -  %}
  8.2517 -  ins_pipe( pipe_slow );
  8.2518 -%}
  8.2519 -
  8.2520 -instruct sqrtX_mem(regX dst, memory mem) %{
  8.2521 -  predicate(UseSSE>=1);
  8.2522 -  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
  8.2523 -  ins_cost(150);
  8.2524 -  format %{ "SQRTSS $dst,$mem" %}
  8.2525 -  ins_encode %{
  8.2526 -    __ sqrtss($dst$$XMMRegister, $mem$$Address);
  8.2527 -  %}
  8.2528 -  ins_pipe( pipe_slow );
  8.2529 -%}
  8.2530 -
  8.2531 -// Get the square root of a double precision floating point values in xmm
  8.2532 -instruct sqrtXD_reg(regXD dst, regXD src) %{
  8.2533 -  predicate(UseSSE>=2);
  8.2534 -  match(Set dst (SqrtD src));
  8.2535 -  ins_cost(150);
  8.2536 -  format %{ "SQRTSD $dst,$src" %}
  8.2537 -  ins_encode %{
  8.2538 -    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
  8.2539 -  %}
  8.2540 -  ins_pipe( pipe_slow );
  8.2541 -%}
  8.2542 -
  8.2543 -instruct sqrtXD_mem(regXD dst, memory mem) %{
  8.2544 -  predicate(UseSSE>=2);
  8.2545 -  match(Set dst (SqrtD (LoadD mem)));
  8.2546 -  ins_cost(150);
  8.2547 -  format %{ "SQRTSD $dst,$mem" %}
  8.2548 -  ins_encode %{
  8.2549 -    __ sqrtsd($dst$$XMMRegister, $mem$$Address);
  8.2550 -  %}
  8.2551 -  ins_pipe( pipe_slow );
  8.2552 -%}
  8.2553 -
  8.2554 -instruct absF_reg(regFPR1 dst, regFPR1 src) %{
  8.2555 +instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
  8.2556    predicate(UseSSE==0);
  8.2557    match(Set dst (AbsF src));
  8.2558    ins_cost(100);
  8.2559 @@ -11000,19 +10652,7 @@
  8.2560    ins_pipe( fpu_reg_reg );
  8.2561  %}
  8.2562  
  8.2563 -instruct absX_reg(regX dst ) %{
  8.2564 -  predicate(UseSSE>=1);
  8.2565 -  match(Set dst (AbsF dst));
  8.2566 -  ins_cost(150);
  8.2567 -  format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
  8.2568 -  ins_encode %{
  8.2569 -    __ andps($dst$$XMMRegister,
  8.2570 -             ExternalAddress((address)float_signmask_pool));
  8.2571 -  %}
  8.2572 -  ins_pipe( pipe_slow );
  8.2573 -%}
  8.2574 -
  8.2575 -instruct negF_reg(regFPR1 dst, regFPR1 src) %{
  8.2576 +instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
  8.2577    predicate(UseSSE==0);
  8.2578    match(Set dst (NegF src));
  8.2579    ins_cost(100);
  8.2580 @@ -11022,21 +10662,9 @@
  8.2581    ins_pipe( fpu_reg_reg );
  8.2582  %}
  8.2583  
  8.2584 -instruct negX_reg( regX dst ) %{
  8.2585 -  predicate(UseSSE>=1);
  8.2586 -  match(Set dst (NegF dst));
  8.2587 -  ins_cost(150);
  8.2588 -  format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
  8.2589 -  ins_encode %{
  8.2590 -    __ xorps($dst$$XMMRegister,
  8.2591 -             ExternalAddress((address)float_signflip_pool));
  8.2592 -  %}
  8.2593 -  ins_pipe( pipe_slow );
  8.2594 -%}
  8.2595 -
  8.2596 -// Cisc-alternate to addF_reg
  8.2597 +// Cisc-alternate to addFPR_reg
  8.2598  // Spill to obtain 24-bit precision
  8.2599 -instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
  8.2600 +instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
  8.2601    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2602    match(Set dst (AddF src1 (LoadF src2)));
  8.2603  
  8.2604 @@ -11045,14 +10673,14 @@
  8.2605              "FSTP_S $dst" %}
  8.2606    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
  8.2607    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
  8.2608 -              OpcReg_F(src1),
  8.2609 -              Pop_Mem_F(dst) );
  8.2610 +              OpcReg_FPR(src1),
  8.2611 +              Pop_Mem_FPR(dst) );
  8.2612    ins_pipe( fpu_mem_reg_mem );
  8.2613  %}
  8.2614  //
  8.2615 -// Cisc-alternate to addF_reg
  8.2616 +// Cisc-alternate to addFPR_reg
  8.2617  // This instruction does not round to 24-bits
  8.2618 -instruct addF_reg_mem(regF dst, memory src) %{
  8.2619 +instruct addFPR_reg_mem(regFPR dst, memory src) %{
  8.2620    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2621    match(Set dst (AddF dst (LoadF src)));
  8.2622  
  8.2623 @@ -11065,21 +10693,21 @@
  8.2624  
  8.2625  // // Following two instructions for _222_mpegaudio
  8.2626  // Spill to obtain 24-bit precision
  8.2627 -instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
  8.2628 +instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
  8.2629    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2630    match(Set dst (AddF src1 src2));
  8.2631  
  8.2632    format %{ "FADD   $dst,$src1,$src2" %}
  8.2633    opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
  8.2634    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
  8.2635 -              OpcReg_F(src2),
  8.2636 -              Pop_Mem_F(dst) );
  8.2637 +              OpcReg_FPR(src2),
  8.2638 +              Pop_Mem_FPR(dst) );
  8.2639    ins_pipe( fpu_mem_reg_mem );
  8.2640  %}
  8.2641  
  8.2642  // Cisc-spill variant
  8.2643  // Spill to obtain 24-bit precision
  8.2644 -instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
  8.2645 +instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
  8.2646    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2647    match(Set dst (AddF src1 (LoadF src2)));
  8.2648  
  8.2649 @@ -11088,12 +10716,12 @@
  8.2650    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
  8.2651                set_instruction_start,
  8.2652                OpcP, RMopc_Mem(secondary,src1),
  8.2653 -              Pop_Mem_F(dst) );
  8.2654 +              Pop_Mem_FPR(dst) );
  8.2655    ins_pipe( fpu_mem_mem_mem );
  8.2656  %}
  8.2657  
  8.2658  // Spill to obtain 24-bit precision
  8.2659 -instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
  8.2660 +instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
  8.2661    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2662    match(Set dst (AddF src1 src2));
  8.2663  
  8.2664 @@ -11102,13 +10730,13 @@
  8.2665    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
  8.2666                set_instruction_start,
  8.2667                OpcP, RMopc_Mem(secondary,src1),
  8.2668 -              Pop_Mem_F(dst) );
  8.2669 +              Pop_Mem_FPR(dst) );
  8.2670    ins_pipe( fpu_mem_mem_mem );
  8.2671  %}
  8.2672  
  8.2673  
  8.2674  // Spill to obtain 24-bit precision
  8.2675 -instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
  8.2676 +instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
  8.2677    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2678    match(Set dst (AddF src con));
  8.2679    format %{ "FLD    $src\n\t"
  8.2680 @@ -11123,7 +10751,7 @@
  8.2681  %}
  8.2682  //
  8.2683  // This instruction does not round to 24-bits
  8.2684 -instruct addF_reg_imm(regF dst, regF src, immF con) %{
  8.2685 +instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
  8.2686    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2687    match(Set dst (AddF src con));
  8.2688    format %{ "FLD    $src\n\t"
  8.2689 @@ -11138,7 +10766,7 @@
  8.2690  %}
  8.2691  
  8.2692  // Spill to obtain 24-bit precision
  8.2693 -instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
  8.2694 +instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
  8.2695    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2696    match(Set dst (MulF src1 src2));
  8.2697  
  8.2698 @@ -11146,14 +10774,14 @@
  8.2699              "FMUL   $src2\n\t"
  8.2700              "FSTP_S $dst"  %}
  8.2701    opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
  8.2702 -  ins_encode( Push_Reg_F(src1),
  8.2703 -              OpcReg_F(src2),
  8.2704 -              Pop_Mem_F(dst) );
  8.2705 +  ins_encode( Push_Reg_FPR(src1),
  8.2706 +              OpcReg_FPR(src2),
  8.2707 +              Pop_Mem_FPR(dst) );
  8.2708    ins_pipe( fpu_mem_reg_reg );
  8.2709  %}
  8.2710  //
  8.2711  // This instruction does not round to 24-bits
  8.2712 -instruct mulF_reg(regF dst, regF src1, regF src2) %{
  8.2713 +instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
  8.2714    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2715    match(Set dst (MulF src1 src2));
  8.2716  
  8.2717 @@ -11161,16 +10789,16 @@
  8.2718              "FMUL   $src2\n\t"
  8.2719              "FSTP_S $dst"  %}
  8.2720    opcode(0xD8, 0x1); /* D8 C8+i */
  8.2721 -  ins_encode( Push_Reg_F(src2),
  8.2722 -              OpcReg_F(src1),
  8.2723 -              Pop_Reg_F(dst) );
  8.2724 +  ins_encode( Push_Reg_FPR(src2),
  8.2725 +              OpcReg_FPR(src1),
  8.2726 +              Pop_Reg_FPR(dst) );
  8.2727    ins_pipe( fpu_reg_reg_reg );
  8.2728  %}
  8.2729  
  8.2730  
  8.2731  // Spill to obtain 24-bit precision
  8.2732  // Cisc-alternate to reg-reg multiply
  8.2733 -instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
  8.2734 +instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
  8.2735    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2736    match(Set dst (MulF src1 (LoadF src2)));
  8.2737  
  8.2738 @@ -11179,27 +10807,27 @@
  8.2739              "FSTP_S $dst"  %}
  8.2740    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
  8.2741    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
  8.2742 -              OpcReg_F(src1),
  8.2743 -              Pop_Mem_F(dst) );
  8.2744 +              OpcReg_FPR(src1),
  8.2745 +              Pop_Mem_FPR(dst) );
  8.2746    ins_pipe( fpu_mem_reg_mem );
  8.2747  %}
  8.2748  //
  8.2749  // This instruction does not round to 24-bits
  8.2750  // Cisc-alternate to reg-reg multiply
  8.2751 -instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
  8.2752 +instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
  8.2753    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2754    match(Set dst (MulF src1 (LoadF src2)));
  8.2755  
  8.2756    format %{ "FMUL   $dst,$src1,$src2" %}
  8.2757    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
  8.2758    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
  8.2759 -              OpcReg_F(src1),
  8.2760 -              Pop_Reg_F(dst) );
  8.2761 +              OpcReg_FPR(src1),
  8.2762 +              Pop_Reg_FPR(dst) );
  8.2763    ins_pipe( fpu_reg_reg_mem );
  8.2764  %}
  8.2765  
  8.2766  // Spill to obtain 24-bit precision
  8.2767 -instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
  8.2768 +instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
  8.2769    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2770    match(Set dst (MulF src1 src2));
  8.2771  
  8.2772 @@ -11208,12 +10836,12 @@
  8.2773    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
  8.2774                set_instruction_start,
  8.2775                OpcP, RMopc_Mem(secondary,src1),
  8.2776 -              Pop_Mem_F(dst) );
  8.2777 +              Pop_Mem_FPR(dst) );
  8.2778    ins_pipe( fpu_mem_mem_mem );
  8.2779  %}
  8.2780  
  8.2781  // Spill to obtain 24-bit precision
  8.2782 -instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
  8.2783 +instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
  8.2784    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2785    match(Set dst (MulF src con));
  8.2786  
  8.2787 @@ -11229,7 +10857,7 @@
  8.2788  %}
  8.2789  //
  8.2790  // This instruction does not round to 24-bits
  8.2791 -instruct mulF_reg_imm(regF dst, regF src, immF con) %{
  8.2792 +instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
  8.2793    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2794    match(Set dst (MulF src con));
  8.2795  
  8.2796 @@ -11246,9 +10874,9 @@
  8.2797  
  8.2798  
  8.2799  //
  8.2800 -// MACRO1 -- subsume unshared load into mulF
  8.2801 +// MACRO1 -- subsume unshared load into mulFPR
  8.2802  // This instruction does not round to 24-bits
  8.2803 -instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
  8.2804 +instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
  8.2805    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2806    match(Set dst (MulF (LoadF mem1) src));
  8.2807  
  8.2808 @@ -11257,36 +10885,36 @@
  8.2809              "FSTP   $dst" %}
  8.2810    opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
  8.2811    ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
  8.2812 -              OpcReg_F(src),
  8.2813 -              Pop_Reg_F(dst) );
  8.2814 +              OpcReg_FPR(src),
  8.2815 +              Pop_Reg_FPR(dst) );
  8.2816    ins_pipe( fpu_reg_reg_mem );
  8.2817  %}
  8.2818  //
  8.2819 -// MACRO2 -- addF a mulF which subsumed an unshared load
  8.2820 +// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
  8.2821  // This instruction does not round to 24-bits
  8.2822 -instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
  8.2823 +instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
  8.2824    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2825    match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
  8.2826    ins_cost(95);
  8.2827  
  8.2828    format %{ "FLD    $mem1     ===MACRO2===\n\t"
  8.2829 -            "FMUL   ST,$src1  subsume mulF left load\n\t"
  8.2830 +            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
  8.2831              "FADD   ST,$src2\n\t"
  8.2832              "FSTP   $dst" %}
  8.2833    opcode(0xD9); /* LoadF D9 /0 */
  8.2834    ins_encode( OpcP, RMopc_Mem(0x00,mem1),
  8.2835                FMul_ST_reg(src1),
  8.2836                FAdd_ST_reg(src2),
  8.2837 -              Pop_Reg_F(dst) );
  8.2838 +              Pop_Reg_FPR(dst) );
  8.2839    ins_pipe( fpu_reg_mem_reg_reg );
  8.2840  %}
  8.2841  
  8.2842 -// MACRO3 -- addF a mulF
  8.2843 +// MACRO3 -- addFPR a mulFPR
  8.2844  // This instruction does not round to 24-bits.  It is a '2-address'
  8.2845  // instruction in that the result goes back to src2.  This eliminates
  8.2846  // a move from the macro; possibly the register allocator will have
  8.2847  // to add it back (and maybe not).
  8.2848 -instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
  8.2849 +instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
  8.2850    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2851    match(Set src2 (AddF (MulF src0 src1) src2));
  8.2852  
  8.2853 @@ -11294,15 +10922,15 @@
  8.2854              "FMUL   ST,$src1\n\t"
  8.2855              "FADDP  $src2,ST" %}
  8.2856    opcode(0xD9); /* LoadF D9 /0 */
  8.2857 -  ins_encode( Push_Reg_F(src0),
  8.2858 +  ins_encode( Push_Reg_FPR(src0),
  8.2859                FMul_ST_reg(src1),
  8.2860                FAddP_reg_ST(src2) );
  8.2861    ins_pipe( fpu_reg_reg_reg );
  8.2862  %}
  8.2863  
  8.2864 -// MACRO4 -- divF subF
  8.2865 +// MACRO4 -- divFPR subFPR
  8.2866  // This instruction does not round to 24-bits
  8.2867 -instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
  8.2868 +instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
  8.2869    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2870    match(Set dst (DivF (SubF src2 src1) src3));
  8.2871  
  8.2872 @@ -11311,67 +10939,67 @@
  8.2873              "FDIV   ST,$src3\n\t"
  8.2874              "FSTP  $dst" %}
  8.2875    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
  8.2876 -  ins_encode( Push_Reg_F(src2),
  8.2877 -              subF_divF_encode(src1,src3),
  8.2878 -              Pop_Reg_F(dst) );
  8.2879 +  ins_encode( Push_Reg_FPR(src2),
  8.2880 +              subFPR_divFPR_encode(src1,src3),
  8.2881 +              Pop_Reg_FPR(dst) );
  8.2882    ins_pipe( fpu_reg_reg_reg_reg );
  8.2883  %}
  8.2884  
  8.2885  // Spill to obtain 24-bit precision
  8.2886 -instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
  8.2887 +instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
  8.2888    predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2889    match(Set dst (DivF src1 src2));
  8.2890  
  8.2891    format %{ "FDIV   $dst,$src1,$src2" %}
  8.2892    opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
  8.2893 -  ins_encode( Push_Reg_F(src1),
  8.2894 -              OpcReg_F(src2),
  8.2895 -              Pop_Mem_F(dst) );
  8.2896 +  ins_encode( Push_Reg_FPR(src1),
  8.2897 +              OpcReg_FPR(src2),
  8.2898 +              Pop_Mem_FPR(dst) );
  8.2899    ins_pipe( fpu_mem_reg_reg );
  8.2900  %}
  8.2901  //
  8.2902  // This instruction does not round to 24-bits
  8.2903 -instruct divF_reg(regF dst, regF src) %{
  8.2904 +instruct divFPR_reg(regFPR dst, regFPR src) %{
  8.2905    predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2906    match(Set dst (DivF dst src));
  8.2907  
  8.2908    format %{ "FDIV   $dst,$src" %}
  8.2909    opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
  8.2910 -  ins_encode( Push_Reg_F(src),
  8.2911 +  ins_encode( Push_Reg_FPR(src),
  8.2912                OpcP, RegOpc(dst) );
  8.2913    ins_pipe( fpu_reg_reg );
  8.2914  %}
  8.2915  
  8.2916  
  8.2917  // Spill to obtain 24-bit precision
  8.2918 -instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
  8.2919 +instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
  8.2920    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.2921    match(Set dst (ModF src1 src2));
  8.2922 -  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
  8.2923 +  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
  8.2924  
  8.2925    format %{ "FMOD   $dst,$src1,$src2" %}
  8.2926 -  ins_encode( Push_Reg_Mod_D(src1, src2),
  8.2927 -              emitModD(),
  8.2928 -              Push_Result_Mod_D(src2),
  8.2929 -              Pop_Mem_F(dst));
  8.2930 +  ins_encode( Push_Reg_Mod_DPR(src1, src2),
  8.2931 +              emitModDPR(),
  8.2932 +              Push_Result_Mod_DPR(src2),
  8.2933 +              Pop_Mem_FPR(dst));
  8.2934    ins_pipe( pipe_slow );
  8.2935  %}
  8.2936  //
  8.2937  // This instruction does not round to 24-bits
  8.2938 -instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
  8.2939 +instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
  8.2940    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.2941    match(Set dst (ModF dst src));
  8.2942 -  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
  8.2943 +  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
  8.2944  
  8.2945    format %{ "FMOD   $dst,$src" %}
  8.2946 -  ins_encode(Push_Reg_Mod_D(dst, src),
  8.2947 -              emitModD(),
  8.2948 -              Push_Result_Mod_D(src),
  8.2949 -              Pop_Reg_F(dst));
  8.2950 -  ins_pipe( pipe_slow );
  8.2951 -%}
  8.2952 -
  8.2953 -instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
  8.2954 +  ins_encode(Push_Reg_Mod_DPR(dst, src),
  8.2955 +              emitModDPR(),
  8.2956 +              Push_Result_Mod_DPR(src),
  8.2957 +              Pop_Reg_FPR(dst));
  8.2958 +  ins_pipe( pipe_slow );
  8.2959 +%}
  8.2960 +
  8.2961 +instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
  8.2962    predicate(UseSSE>=1);
  8.2963    match(Set dst (ModF src0 src1));
  8.2964    effect(KILL rax, KILL cr);
  8.2965 @@ -11391,7 +11019,7 @@
  8.2966            "\tFSTP   ST0\t # Restore FPU Stack"
  8.2967      %}
  8.2968    ins_cost(250);
  8.2969 -  ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
  8.2970 +  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
  8.2971    ins_pipe( pipe_slow );
  8.2972  %}
  8.2973  
  8.2974 @@ -11399,26 +11027,26 @@
  8.2975  //----------Arithmetic Conversion Instructions---------------------------------
  8.2976  // The conversions operations are all Alpha sorted.  Please keep it that way!
  8.2977  
  8.2978 -instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
  8.2979 +instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
  8.2980    predicate(UseSSE==0);
  8.2981    match(Set dst (RoundFloat src));
  8.2982    ins_cost(125);
  8.2983    format %{ "FST_S  $dst,$src\t# F-round" %}
  8.2984 -  ins_encode( Pop_Mem_Reg_F(dst, src) );
  8.2985 +  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
  8.2986    ins_pipe( fpu_mem_reg );
  8.2987  %}
  8.2988  
  8.2989 -instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
  8.2990 +instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
  8.2991    predicate(UseSSE<=1);
  8.2992    match(Set dst (RoundDouble src));
  8.2993    ins_cost(125);
  8.2994    format %{ "FST_D  $dst,$src\t# D-round" %}
  8.2995 -  ins_encode( Pop_Mem_Reg_D(dst, src) );
  8.2996 +  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
  8.2997    ins_pipe( fpu_mem_reg );
  8.2998  %}
  8.2999  
  8.3000  // Force rounding to 24-bit precision and 6-bit exponent
  8.3001 -instruct convD2F_reg(stackSlotF dst, regD src) %{
  8.3002 +instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
  8.3003    predicate(UseSSE==0);
  8.3004    match(Set dst (ConvD2F src));
  8.3005    format %{ "FST_S  $dst,$src\t# F-round" %}
  8.3006 @@ -11428,7 +11056,7 @@
  8.3007  %}
  8.3008  
  8.3009  // Force rounding to 24-bit precision and 6-bit exponent
  8.3010 -instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
  8.3011 +instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
  8.3012    predicate(UseSSE==1);
  8.3013    match(Set dst (ConvD2F src));
  8.3014    effect( KILL cr );
  8.3015 @@ -11451,7 +11079,7 @@
  8.3016  %}
  8.3017  
  8.3018  // Force rounding double precision to single precision
  8.3019 -instruct convXD2X_reg(regX dst, regXD src) %{
  8.3020 +instruct convD2F_reg(regF dst, regD src) %{
  8.3021    predicate(UseSSE>=2);
  8.3022    match(Set dst (ConvD2F src));
  8.3023    format %{ "CVTSD2SS $dst,$src\t# F-round" %}
  8.3024 @@ -11461,15 +11089,15 @@
  8.3025    ins_pipe( pipe_slow );
  8.3026  %}
  8.3027  
  8.3028 -instruct convF2D_reg_reg(regD dst, regF src) %{
  8.3029 +instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
  8.3030    predicate(UseSSE==0);
  8.3031    match(Set dst (ConvF2D src));
  8.3032    format %{ "FST_S  $dst,$src\t# D-round" %}
  8.3033 -  ins_encode( Pop_Reg_Reg_D(dst, src));
  8.3034 +  ins_encode( Pop_Reg_Reg_DPR(dst, src));
  8.3035    ins_pipe( fpu_reg_reg );
  8.3036  %}
  8.3037  
  8.3038 -instruct convF2D_reg(stackSlotD dst, regF src) %{
  8.3039 +instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
  8.3040    predicate(UseSSE==1);
  8.3041    match(Set dst (ConvF2D src));
  8.3042    format %{ "FST_D  $dst,$src\t# D-round" %}
  8.3043 @@ -11478,7 +11106,7 @@
  8.3044    %}
  8.3045  %}
  8.3046  
  8.3047 -instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
  8.3048 +instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
  8.3049    predicate(UseSSE==1);
  8.3050    match(Set dst (ConvF2D src));
  8.3051    effect( KILL cr );
  8.3052 @@ -11497,7 +11125,7 @@
  8.3053    ins_pipe( pipe_slow );
  8.3054  %}
  8.3055  
  8.3056 -instruct convX2XD_reg(regXD dst, regX src) %{
  8.3057 +instruct convF2D_reg(regD dst, regF src) %{
  8.3058    predicate(UseSSE>=2);
  8.3059    match(Set dst (ConvF2D src));
  8.3060    format %{ "CVTSS2SD $dst,$src\t# D-round" %}
  8.3061 @@ -11508,7 +11136,7 @@
  8.3062  %}
  8.3063  
  8.3064  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
  8.3065 -instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
  8.3066 +instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
  8.3067    predicate(UseSSE<=1);
  8.3068    match(Set dst (ConvD2I src));
  8.3069    effect( KILL tmp, KILL cr );
  8.3070 @@ -11523,12 +11151,12 @@
  8.3071              "FLD_D  $src\n\t"
  8.3072              "CALL   d2i_wrapper\n"
  8.3073        "fast:" %}
  8.3074 -  ins_encode( Push_Reg_D(src), D2I_encoding(src) );
  8.3075 +  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
  8.3076    ins_pipe( pipe_slow );
  8.3077  %}
  8.3078  
  8.3079  // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
  8.3080 -instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
  8.3081 +instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
  8.3082    predicate(UseSSE>=2);
  8.3083    match(Set dst (ConvD2I src));
  8.3084    effect( KILL tmp, KILL cr );
  8.3085 @@ -11556,7 +11184,7 @@
  8.3086    ins_pipe( pipe_slow );
  8.3087  %}
  8.3088  
  8.3089 -instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
  8.3090 +instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
  8.3091    predicate(UseSSE<=1);
  8.3092    match(Set dst (ConvD2L src));
  8.3093    effect( KILL cr );
  8.3094 @@ -11574,12 +11202,12 @@
  8.3095              "FLD    $src\n\t"
  8.3096              "CALL   d2l_wrapper\n"
  8.3097        "fast:" %}
  8.3098 -  ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
  8.3099 +  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
  8.3100    ins_pipe( pipe_slow );
  8.3101  %}
  8.3102  
  8.3103  // XMM lacks a float/double->long conversion, so use the old FPU stack.
  8.3104 -instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
  8.3105 +instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
  8.3106    predicate (UseSSE>=2);
  8.3107    match(Set dst (ConvD2L src));
  8.3108    effect( KILL cr );
  8.3109 @@ -11637,7 +11265,7 @@
  8.3110  // rounding mode to 'nearest'.  The hardware stores a flag value down
  8.3111  // if we would overflow or converted a NAN; we check for this and
  8.3112  // and go the slow path if needed.
  8.3113 -instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
  8.3114 +instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
  8.3115    predicate(UseSSE==0);
  8.3116    match(Set dst (ConvF2I src));
  8.3117    effect( KILL tmp, KILL cr );
  8.3118 @@ -11652,13 +11280,13 @@
  8.3119              "FLD    $src\n\t"
  8.3120              "CALL   d2i_wrapper\n"
  8.3121        "fast:" %}
  8.3122 -  // D2I_encoding works for F2I
  8.3123 -  ins_encode( Push_Reg_F(src), D2I_encoding(src) );
  8.3124 +  // DPR2I_encoding works for FPR2I
  8.3125 +  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
  8.3126    ins_pipe( pipe_slow );
  8.3127  %}
  8.3128  
  8.3129  // Convert a float in xmm to an int reg.
  8.3130 -instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
  8.3131 +instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
  8.3132    predicate(UseSSE>=1);
  8.3133    match(Set dst (ConvF2I src));
  8.3134    effect( KILL tmp, KILL cr );
  8.3135 @@ -11686,7 +11314,7 @@
  8.3136    ins_pipe( pipe_slow );
  8.3137  %}
  8.3138  
  8.3139 -instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
  8.3140 +instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
  8.3141    predicate(UseSSE==0);
  8.3142    match(Set dst (ConvF2L src));
  8.3143    effect( KILL cr );
  8.3144 @@ -11704,13 +11332,13 @@
  8.3145              "FLD    $src\n\t"
  8.3146              "CALL   d2l_wrapper\n"
  8.3147        "fast:" %}
  8.3148 -  // D2L_encoding works for F2L
  8.3149 -  ins_encode( Push_Reg_F(src), D2L_encoding(src) );
  8.3150 +  // DPR2L_encoding works for FPR2L
  8.3151 +  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
  8.3152    ins_pipe( pipe_slow );
  8.3153  %}
  8.3154  
  8.3155  // XMM lacks a float/double->long conversion, so use the old FPU stack.
  8.3156 -instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
  8.3157 +instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
  8.3158    predicate (UseSSE>=1);
  8.3159    match(Set dst (ConvF2L src));
  8.3160    effect( KILL cr );
  8.3161 @@ -11762,17 +11390,17 @@
  8.3162    ins_pipe( pipe_slow );
  8.3163  %}
  8.3164  
  8.3165 -instruct convI2D_reg(regD dst, stackSlotI src) %{
  8.3166 +instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
  8.3167    predicate( UseSSE<=1 );
  8.3168    match(Set dst (ConvI2D src));
  8.3169    format %{ "FILD   $src\n\t"
  8.3170              "FSTP   $dst" %}
  8.3171    opcode(0xDB, 0x0);  /* DB /0 */
  8.3172 -  ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
  8.3173 +  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
  8.3174    ins_pipe( fpu_reg_mem );
  8.3175  %}
  8.3176  
  8.3177 -instruct convI2XD_reg(regXD dst, eRegI src) %{
  8.3178 +instruct convI2D_reg(regD dst, eRegI src) %{
  8.3179    predicate( UseSSE>=2 && !UseXmmI2D );
  8.3180    match(Set dst (ConvI2D src));
  8.3181    format %{ "CVTSI2SD $dst,$src" %}
  8.3182 @@ -11782,7 +11410,7 @@
  8.3183    ins_pipe( pipe_slow );
  8.3184  %}
  8.3185  
  8.3186 -instruct convI2XD_mem(regXD dst, memory mem) %{
  8.3187 +instruct convI2D_mem(regD dst, memory mem) %{
  8.3188    predicate( UseSSE>=2 );
  8.3189    match(Set dst (ConvI2D (LoadI mem)));
  8.3190    format %{ "CVTSI2SD $dst,$mem" %}
  8.3191 @@ -11792,7 +11420,7 @@
  8.3192    ins_pipe( pipe_slow );
  8.3193  %}
  8.3194  
  8.3195 -instruct convXI2XD_reg(regXD dst, eRegI src)
  8.3196 +instruct convXI2D_reg(regD dst, eRegI src)
  8.3197  %{
  8.3198    predicate( UseSSE>=2 && UseXmmI2D );
  8.3199    match(Set dst (ConvI2D src));
  8.3200 @@ -11806,31 +11434,31 @@
  8.3201    ins_pipe(pipe_slow); // XXX
  8.3202  %}
  8.3203  
  8.3204 -instruct convI2D_mem(regD dst, memory mem) %{
  8.3205 +instruct convI2DPR_mem(regDPR dst, memory mem) %{
  8.3206    predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
  8.3207    match(Set dst (ConvI2D (LoadI mem)));
  8.3208    format %{ "FILD   $mem\n\t"
  8.3209              "FSTP   $dst" %}
  8.3210    opcode(0xDB);      /* DB /0 */
  8.3211    ins_encode( OpcP, RMopc_Mem(0x00,mem),
  8.3212 -              Pop_Reg_D(dst));
  8.3213 +              Pop_Reg_DPR(dst));
  8.3214    ins_pipe( fpu_reg_mem );
  8.3215  %}
  8.3216  
  8.3217  // Convert a byte to a float; no rounding step needed.
  8.3218 -instruct conv24I2F_reg(regF dst, stackSlotI src) %{
  8.3219 +instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
  8.3220    predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
  8.3221    match(Set dst (ConvI2F src));
  8.3222    format %{ "FILD   $src\n\t"
  8.3223              "FSTP   $dst" %}
  8.3224  
  8.3225    opcode(0xDB, 0x0);  /* DB /0 */
  8.3226 -  ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
  8.3227 +  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
  8.3228    ins_pipe( fpu_reg_mem );
  8.3229  %}
  8.3230  
  8.3231  // In 24-bit mode, force exponent rounding by storing back out
  8.3232 -instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
  8.3233 +instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
  8.3234    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.3235    match(Set dst (ConvI2F src));
  8.3236    ins_cost(200);
  8.3237 @@ -11838,12 +11466,12 @@
  8.3238              "FSTP_S $dst" %}
  8.3239    opcode(0xDB, 0x0);  /* DB /0 */
  8.3240    ins_encode( Push_Mem_I(src),
  8.3241 -              Pop_Mem_F(dst));
  8.3242 +              Pop_Mem_FPR(dst));
  8.3243    ins_pipe( fpu_mem_mem );
  8.3244  %}
  8.3245  
  8.3246  // In 24-bit mode, force exponent rounding by storing back out
  8.3247 -instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
  8.3248 +instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
  8.3249    predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
  8.3250    match(Set dst (ConvI2F (LoadI mem)));
  8.3251    ins_cost(200);
  8.3252 @@ -11851,36 +11479,36 @@
  8.3253              "FSTP_S $dst" %}
  8.3254    opcode(0xDB);  /* DB /0 */
  8.3255    ins_encode( OpcP, RMopc_Mem(0x00,mem),
  8.3256 -              Pop_Mem_F(dst));
  8.3257 +              Pop_Mem_FPR(dst));
  8.3258    ins_pipe( fpu_mem_mem );
  8.3259  %}
  8.3260  
  8.3261  // This instruction does not round to 24-bits
  8.3262 -instruct convI2F_reg(regF dst, stackSlotI src) %{
  8.3263 +instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
  8.3264    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.3265    match(Set dst (ConvI2F src));
  8.3266    format %{ "FILD   $src\n\t"
  8.3267              "FSTP   $dst" %}
  8.3268    opcode(0xDB, 0x0);  /* DB /0 */
  8.3269    ins_encode( Push_Mem_I(src),
  8.3270 -              Pop_Reg_F(dst));
  8.3271 +              Pop_Reg_FPR(dst));
  8.3272    ins_pipe( fpu_reg_mem );
  8.3273  %}
  8.3274  
  8.3275  // This instruction does not round to 24-bits
  8.3276 -instruct convI2F_mem(regF dst, memory mem) %{
  8.3277 +instruct convI2FPR_mem(regFPR dst, memory mem) %{
  8.3278    predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
  8.3279    match(Set dst (ConvI2F (LoadI mem)));
  8.3280    format %{ "FILD   $mem\n\t"
  8.3281              "FSTP   $dst" %}
  8.3282    opcode(0xDB);      /* DB /0 */
  8.3283    ins_encode( OpcP, RMopc_Mem(0x00,mem),
  8.3284 -              Pop_Reg_F(dst));
  8.3285 +              Pop_Reg_FPR(dst));
  8.3286    ins_pipe( fpu_reg_mem );
  8.3287  %}
  8.3288  
  8.3289  // Convert an int to a float in xmm; no rounding step needed.
  8.3290 -instruct convI2X_reg(regX dst, eRegI src) %{
  8.3291 +instruct convI2F_reg(regF dst, eRegI src) %{
  8.3292    predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
  8.3293    match(Set dst (ConvI2F src));
  8.3294    format %{ "CVTSI2SS $dst, $src" %}
  8.3295 @@ -11890,7 +11518,7 @@
  8.3296    ins_pipe( pipe_slow );
  8.3297  %}
  8.3298  
  8.3299 - instruct convXI2X_reg(regX dst, eRegI src)
  8.3300 + instruct convXI2F_reg(regF dst, eRegI src)
  8.3301  %{
  8.3302    predicate( UseSSE>=2 && UseXmmI2F );
  8.3303    match(Set dst (ConvI2F src));
  8.3304 @@ -11939,7 +11567,7 @@
  8.3305    ins_pipe( ialu_reg_reg_long );
  8.3306  %}
  8.3307  
  8.3308 -instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
  8.3309 +instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
  8.3310    predicate (UseSSE<=1);
  8.3311    match(Set dst (ConvL2D src));
  8.3312    effect( KILL cr );
  8.3313 @@ -11949,11 +11577,11 @@
  8.3314              "ADD    ESP,8\n\t"
  8.3315              "FSTP_D $dst\t# D-round" %}
  8.3316    opcode(0xDF, 0x5);  /* DF /5 */
  8.3317 -  ins_encode(convert_long_double(src), Pop_Mem_D(dst));
  8.3318 -  ins_pipe( pipe_slow );
  8.3319 -%}
  8.3320 -
  8.3321 -instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
  8.3322 +  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
  8.3323 +  ins_pipe( pipe_slow );
  8.3324 +%}
  8.3325 +
  8.3326 +instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
  8.3327    predicate (UseSSE>=2);
  8.3328    match(Set dst (ConvL2D src));
  8.3329    effect( KILL cr );
  8.3330 @@ -11964,11 +11592,11 @@
  8.3331              "MOVSD  $dst,[ESP]\n\t"
  8.3332              "ADD    ESP,8" %}
  8.3333    opcode(0xDF, 0x5);  /* DF /5 */
  8.3334 -  ins_encode(convert_long_double2(src), Push_ResultXD(dst));
  8.3335 -  ins_pipe( pipe_slow );
  8.3336 -%}
  8.3337 -
  8.3338 -instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
  8.3339 +  ins_encode(convert_long_double2(src), Push_ResultD(dst));
  8.3340 +  ins_pipe( pipe_slow );
  8.3341 +%}
  8.3342 +
  8.3343 +instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
  8.3344    predicate (UseSSE>=1);
  8.3345    match(Set dst (ConvL2F src));
  8.3346    effect( KILL cr );
  8.3347 @@ -11979,11 +11607,11 @@
  8.3348              "MOVSS  $dst,[ESP]\n\t"
  8.3349              "ADD    ESP,8" %}
  8.3350    opcode(0xDF, 0x5);  /* DF /5 */
  8.3351 -  ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
  8.3352 -  ins_pipe( pipe_slow );
  8.3353 -%}
  8.3354 -
  8.3355 -instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
  8.3356 +  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
  8.3357 +  ins_pipe( pipe_slow );
  8.3358 +%}
  8.3359 +
  8.3360 +instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
  8.3361    match(Set dst (ConvL2F src));
  8.3362    effect( KILL cr );
  8.3363    format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
  8.3364 @@ -11992,7 +11620,7 @@
  8.3365              "ADD    ESP,8\n\t"
  8.3366              "FSTP_S $dst\t# F-round" %}
  8.3367    opcode(0xDF, 0x5);  /* DF /5 */
  8.3368 -  ins_encode(convert_long_double(src), Pop_Mem_F(dst));
  8.3369 +  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
  8.3370    ins_pipe( pipe_slow );
  8.3371  %}
  8.3372  
  8.3373 @@ -12016,18 +11644,18 @@
  8.3374    ins_pipe( ialu_reg_mem );
  8.3375  %}
  8.3376  
  8.3377 -instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
  8.3378 +instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
  8.3379    predicate(UseSSE==0);
  8.3380    match(Set dst (MoveF2I src));
  8.3381    effect( DEF dst, USE src );
  8.3382  
  8.3383    ins_cost(125);
  8.3384    format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
  8.3385 -  ins_encode( Pop_Mem_Reg_F(dst, src) );
  8.3386 +  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
  8.3387    ins_pipe( fpu_mem_reg );
  8.3388  %}
  8.3389  
  8.3390 -instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
  8.3391 +instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
  8.3392    predicate(UseSSE>=1);
  8.3393    match(Set dst (MoveF2I src));
  8.3394    effect( DEF dst, USE src );
  8.3395 @@ -12040,7 +11668,7 @@
  8.3396    ins_pipe( pipe_slow );
  8.3397  %}
  8.3398  
  8.3399 -instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
  8.3400 +instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
  8.3401    predicate(UseSSE>=2);
  8.3402    match(Set dst (MoveF2I src));
  8.3403    effect( DEF dst, USE src );
  8.3404 @@ -12065,7 +11693,7 @@
  8.3405  %}
  8.3406  
  8.3407  
  8.3408 -instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
  8.3409 +instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
  8.3410    predicate(UseSSE==0);
  8.3411    match(Set dst (MoveI2F src));
  8.3412    effect(DEF dst, USE src);
  8.3413 @@ -12075,11 +11703,11 @@
  8.3414              "FSTP   $dst\t# MoveI2F_stack_reg" %}
  8.3415    opcode(0xD9);               /* D9 /0, FLD m32real */
  8.3416    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
  8.3417 -              Pop_Reg_F(dst) );
  8.3418 +              Pop_Reg_FPR(dst) );
  8.3419    ins_pipe( fpu_reg_mem );
  8.3420  %}
  8.3421  
  8.3422 -instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
  8.3423 +instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
  8.3424    predicate(UseSSE>=1);
  8.3425    match(Set dst (MoveI2F src));
  8.3426    effect( DEF dst, USE src );
  8.3427 @@ -12092,7 +11720,7 @@
  8.3428    ins_pipe( pipe_slow );
  8.3429  %}
  8.3430  
  8.3431 -instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
  8.3432 +instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
  8.3433    predicate(UseSSE>=2);
  8.3434    match(Set dst (MoveI2F src));
  8.3435    effect( DEF dst, USE src );
  8.3436 @@ -12117,18 +11745,18 @@
  8.3437    ins_pipe( ialu_mem_long_reg );
  8.3438  %}
  8.3439  
  8.3440 -instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
  8.3441 +instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
  8.3442    predicate(UseSSE<=1);
  8.3443    match(Set dst (MoveD2L src));
  8.3444    effect(DEF dst, USE src);
  8.3445  
  8.3446    ins_cost(125);
  8.3447    format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
  8.3448 -  ins_encode( Pop_Mem_Reg_D(dst, src) );
  8.3449 +  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
  8.3450    ins_pipe( fpu_mem_reg );
  8.3451  %}
  8.3452  
  8.3453 -instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
  8.3454 +instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
  8.3455    predicate(UseSSE>=2);
  8.3456    match(Set dst (MoveD2L src));
  8.3457    effect(DEF dst, USE src);
  8.3458 @@ -12140,7 +11768,7 @@
  8.3459    ins_pipe( pipe_slow );
  8.3460  %}
  8.3461  
  8.3462 -instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
  8.3463 +instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
  8.3464    predicate(UseSSE>=2);
  8.3465    match(Set dst (MoveD2L src));
  8.3466    effect(DEF dst, USE src, TEMP tmp);
  8.3467 @@ -12169,7 +11797,7 @@
  8.3468  %}
  8.3469  
  8.3470  
  8.3471 -instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
  8.3472 +instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
  8.3473    predicate(UseSSE<=1);
  8.3474    match(Set dst (MoveL2D src));
  8.3475    effect(DEF dst, USE src);
  8.3476 @@ -12179,12 +11807,12 @@
  8.3477              "FSTP   $dst\t# MoveL2D_stack_reg" %}
  8.3478    opcode(0xDD);               /* DD /0, FLD m64real */
  8.3479    ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
  8.3480 -              Pop_Reg_D(dst) );
  8.3481 +              Pop_Reg_DPR(dst) );
  8.3482    ins_pipe( fpu_reg_mem );
  8.3483  %}
  8.3484  
  8.3485  
  8.3486 -instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
  8.3487 +instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
  8.3488    predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
  8.3489    match(Set dst (MoveL2D src));
  8.3490    effect(DEF dst, USE src);
  8.3491 @@ -12197,7 +11825,7 @@
  8.3492    ins_pipe( pipe_slow );
  8.3493  %}
  8.3494  
  8.3495 -instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
  8.3496 +instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
  8.3497    predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
  8.3498    match(Set dst (MoveL2D src));
  8.3499    effect(DEF dst, USE src);
  8.3500 @@ -12210,7 +11838,7 @@
  8.3501    ins_pipe( pipe_slow );
  8.3502  %}
  8.3503  
  8.3504 -instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
  8.3505 +instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
  8.3506    predicate(UseSSE>=2);
  8.3507    match(Set dst (MoveL2D src));
  8.3508    effect(TEMP dst, USE src, TEMP tmp);
  8.3509 @@ -12227,7 +11855,7 @@
  8.3510  %}
  8.3511  
  8.3512  // Replicate scalar to packed byte (1 byte) values in xmm
  8.3513 -instruct Repl8B_reg(regXD dst, regXD src) %{
  8.3514 +instruct Repl8B_reg(regD dst, regD src) %{
  8.3515    predicate(UseSSE>=2);
  8.3516    match(Set dst (Replicate8B src));
  8.3517    format %{ "MOVDQA  $dst,$src\n\t"
  8.3518 @@ -12244,7 +11872,7 @@
  8.3519  %}
  8.3520  
  8.3521  // Replicate scalar to packed byte (1 byte) values in xmm
  8.3522 -instruct Repl8B_eRegI(regXD dst, eRegI src) %{
  8.3523 +instruct Repl8B_eRegI(regD dst, eRegI src) %{
  8.3524    predicate(UseSSE>=2);
  8.3525    match(Set dst (Replicate8B src));
  8.3526    format %{ "MOVD    $dst,$src\n\t"
  8.3527 @@ -12259,7 +11887,7 @@
  8.3528  %}
  8.3529  
  8.3530  // Replicate scalar zero to packed byte (1 byte) values in xmm
  8.3531 -instruct Repl8B_immI0(regXD dst, immI0 zero) %{
  8.3532 +instruct Repl8B_immI0(regD dst, immI0 zero) %{
  8.3533    predicate(UseSSE>=2);
  8.3534    match(Set dst (Replicate8B zero));
  8.3535    format %{ "PXOR  $dst,$dst\t! replicate8B" %}
  8.3536 @@ -12270,7 +11898,7 @@
  8.3537  %}
  8.3538  
  8.3539  // Replicate scalar to packed shore (2 byte) values in xmm
  8.3540 -instruct Repl4S_reg(regXD dst, regXD src) %{
  8.3541 +instruct Repl4S_reg(regD dst, regD src) %{
  8.3542    predicate(UseSSE>=2);
  8.3543    match(Set dst (Replicate4S src));
  8.3544    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
  8.3545 @@ -12281,7 +11909,7 @@
  8.3546  %}
  8.3547  
  8.3548  // Replicate scalar to packed shore (2 byte) values in xmm
  8.3549 -instruct Repl4S_eRegI(regXD dst, eRegI src) %{
  8.3550 +instruct Repl4S_eRegI(regD dst, eRegI src) %{
  8.3551    predicate(UseSSE>=2);
  8.3552    match(Set dst (Replicate4S src));
  8.3553    format %{ "MOVD    $dst,$src\n\t"
  8.3554 @@ -12294,7 +11922,7 @@
  8.3555  %}
  8.3556  
  8.3557  // Replicate scalar zero to packed short (2 byte) values in xmm
  8.3558 -instruct Repl4S_immI0(regXD dst, immI0 zero) %{
  8.3559 +instruct Repl4S_immI0(regD dst, immI0 zero) %{
  8.3560    predicate(UseSSE>=2);
  8.3561    match(Set dst (Replicate4S zero));
  8.3562    format %{ "PXOR  $dst,$dst\t! replicate4S" %}
  8.3563 @@ -12305,7 +11933,7 @@
  8.3564  %}
  8.3565  
  8.3566  // Replicate scalar to packed char (2 byte) values in xmm
  8.3567 -instruct Repl4C_reg(regXD dst, regXD src) %{
  8.3568 +instruct Repl4C_reg(regD dst, regD src) %{
  8.3569    predicate(UseSSE>=2);
  8.3570    match(Set dst (Replicate4C src));
  8.3571    format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
  8.3572 @@ -12316,7 +11944,7 @@
  8.3573  %}
  8.3574  
  8.3575  // Replicate scalar to packed char (2 byte) values in xmm
  8.3576 -instruct Repl4C_eRegI(regXD dst, eRegI src) %{
  8.3577 +instruct Repl4C_eRegI(regD dst, eRegI src) %{
  8.3578    predicate(UseSSE>=2);
  8.3579    match(Set dst (Replicate4C src));
  8.3580    format %{ "MOVD    $dst,$src\n\t"
  8.3581 @@ -12329,7 +11957,7 @@
  8.3582  %}
  8.3583  
  8.3584  // Replicate scalar zero to packed char (2 byte) values in xmm
  8.3585 -instruct Repl4C_immI0(regXD dst, immI0 zero) %{
  8.3586 +instruct Repl4C_immI0(regD dst, immI0 zero) %{
  8.3587    predicate(UseSSE>=2);
  8.3588    match(Set dst (Replicate4C zero));
  8.3589    format %{ "PXOR  $dst,$dst\t! replicate4C" %}
  8.3590 @@ -12340,7 +11968,7 @@
  8.3591  %}
  8.3592  
  8.3593  // Replicate scalar to packed integer (4 byte) values in xmm
  8.3594 -instruct Repl2I_reg(regXD dst, regXD src) %{
  8.3595 +instruct Repl2I_reg(regD dst, regD src) %{
  8.3596    predicate(UseSSE>=2);
  8.3597    match(Set dst (Replicate2I src));
  8.3598    format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
  8.3599 @@ -12351,7 +11979,7 @@
  8.3600  %}
  8.3601  
  8.3602  // Replicate scalar to packed integer (4 byte) values in xmm
  8.3603 -instruct Repl2I_eRegI(regXD dst, eRegI src) %{
  8.3604 +instruct Repl2I_eRegI(regD dst, eRegI src) %{
  8.3605    predicate(UseSSE>=2);
  8.3606    match(Set dst (Replicate2I src));
  8.3607    format %{ "MOVD   $dst,$src\n\t"
  8.3608 @@ -12364,7 +11992,7 @@
  8.3609  %}
  8.3610  
  8.3611  // Replicate scalar zero to packed integer (2 byte) values in xmm
  8.3612 -instruct Repl2I_immI0(regXD dst, immI0 zero) %{
  8.3613 +instruct Repl2I_immI0(regD dst, immI0 zero) %{
  8.3614    predicate(UseSSE>=2);
  8.3615    match(Set dst (Replicate2I zero));
  8.3616    format %{ "PXOR  $dst,$dst\t! replicate2I" %}
  8.3617 @@ -12375,7 +12003,7 @@
  8.3618  %}
  8.3619  
  8.3620  // Replicate scalar to packed single precision floating point values in xmm
  8.3621 -instruct Repl2F_reg(regXD dst, regXD src) %{
  8.3622 +instruct Repl2F_reg(regD dst, regD src) %{
  8.3623    predicate(UseSSE>=2);
  8.3624    match(Set dst (Replicate2F src));
  8.3625    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
  8.3626 @@ -12386,7 +12014,7 @@
  8.3627  %}
  8.3628  
  8.3629  // Replicate scalar to packed single precision floating point values in xmm
  8.3630 -instruct Repl2F_regX(regXD dst, regX src) %{
  8.3631 +instruct Repl2F_regF(regD dst, regF src) %{
  8.3632    predicate(UseSSE>=2);
  8.3633    match(Set dst (Replicate2F src));
  8.3634    format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
  8.3635 @@ -12397,7 +12025,7 @@
  8.3636  %}
  8.3637  
  8.3638  // Replicate scalar to packed single precision floating point values in xmm
  8.3639 -instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
  8.3640 +instruct Repl2F_immF0(regD dst, immF0 zero) %{
  8.3641    predicate(UseSSE>=2);
  8.3642    match(Set dst (Replicate2F zero));
  8.3643    format %{ "PXOR  $dst,$dst\t! replicate2F" %}
  8.3644 @@ -12423,7 +12051,7 @@
  8.3645  %}
  8.3646  
  8.3647  instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
  8.3648 -                        eAXRegI result, regXD tmp1, eFlagsReg cr) %{
  8.3649 +                        eAXRegI result, regD tmp1, eFlagsReg cr) %{
  8.3650    match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  8.3651    effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
  8.3652  
  8.3653 @@ -12438,7 +12066,7 @@
  8.3654  
  8.3655  // fast string equals
  8.3656  instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
  8.3657 -                       regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
  8.3658 +                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
  8.3659    match(Set result (StrEquals (Binary str1 str2) cnt));
  8.3660    effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
  8.3661  
  8.3662 @@ -12453,7 +12081,7 @@
  8.3663  
  8.3664  // fast search of substring with known size.
  8.3665  instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
  8.3666 -                            eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
  8.3667 +                            eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
  8.3668    predicate(UseSSE42Intrinsics);
  8.3669    match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  8.3670    effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
  8.3671 @@ -12480,7 +12108,7 @@
  8.3672  %}
  8.3673  
  8.3674  instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
  8.3675 -                        eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
  8.3676 +                        eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
  8.3677    predicate(UseSSE42Intrinsics);
  8.3678    match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  8.3679    effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
  8.3680 @@ -12497,7 +12125,7 @@
  8.3681  
  8.3682  // fast array equals
  8.3683  instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
  8.3684 -                      regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
  8.3685 +                      regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
  8.3686  %{
  8.3687    match(Set result (AryEq ary1 ary2));
  8.3688    effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
  8.3689 @@ -13323,40 +12951,40 @@
  8.3690  %}
  8.3691  
  8.3692  // Compare 2 longs and CMOVE doubles
  8.3693 -instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
  8.3694 +instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
  8.3695    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
  8.3696    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
  8.3697    ins_cost(200);
  8.3698    expand %{
  8.3699 -    fcmovD_regS(cmp,flags,dst,src);
  8.3700 +    fcmovDPR_regS(cmp,flags,dst,src);
  8.3701    %}
  8.3702  %}
  8.3703  
  8.3704  // Compare 2 longs and CMOVE doubles
  8.3705 -instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
  8.3706 +instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
  8.3707    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
  8.3708    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
  8.3709    ins_cost(200);
  8.3710    expand %{
  8.3711 -    fcmovXD_regS(cmp,flags,dst,src);
  8.3712 -  %}
  8.3713 -%}
  8.3714 -
  8.3715 -instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
  8.3716 +    fcmovD_regS(cmp,flags,dst,src);
  8.3717 +  %}
  8.3718 +%}
  8.3719 +
  8.3720 +instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
  8.3721    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
  8.3722    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
  8.3723    ins_cost(200);
  8.3724    expand %{
  8.3725 -    fcmovF_regS(cmp,flags,dst,src);
  8.3726 -  %}
  8.3727 -%}
  8.3728 -
  8.3729 -instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
  8.3730 +    fcmovFPR_regS(cmp,flags,dst,src);
  8.3731 +  %}
  8.3732 +%}
  8.3733 +
  8.3734 +instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
  8.3735    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
  8.3736    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
  8.3737    ins_cost(200);
  8.3738    expand %{
  8.3739 -    fcmovX_regS(cmp,flags,dst,src);
  8.3740 +    fcmovF_regS(cmp,flags,dst,src);
  8.3741    %}
  8.3742  %}
  8.3743  
  8.3744 @@ -13451,40 +13079,40 @@
  8.3745  %}
  8.3746  
  8.3747  // Compare 2 longs and CMOVE doubles
  8.3748 -instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
  8.3749 +instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
  8.3750    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
  8.3751    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
  8.3752    ins_cost(200);
  8.3753    expand %{
  8.3754 -    fcmovD_regS(cmp,flags,dst,src);
  8.3755 +    fcmovDPR_regS(cmp,flags,dst,src);
  8.3756    %}
  8.3757  %}
  8.3758  
  8.3759  // Compare 2 longs and CMOVE doubles
  8.3760 -instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
  8.3761 +instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
  8.3762    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
  8.3763    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
  8.3764    ins_cost(200);
  8.3765    expand %{
  8.3766 -    fcmovXD_regS(cmp,flags,dst,src);
  8.3767 -  %}
  8.3768 -%}
  8.3769 -
  8.3770 -instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
  8.3771 +    fcmovD_regS(cmp,flags,dst,src);
  8.3772 +  %}
  8.3773 +%}
  8.3774 +
  8.3775 +instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
  8.3776    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
  8.3777    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
  8.3778    ins_cost(200);
  8.3779    expand %{
  8.3780 -    fcmovF_regS(cmp,flags,dst,src);
  8.3781 -  %}
  8.3782 -%}
  8.3783 -
  8.3784 -instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
  8.3785 +    fcmovFPR_regS(cmp,flags,dst,src);
  8.3786 +  %}
  8.3787 +%}
  8.3788 +
  8.3789 +instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
  8.3790    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
  8.3791    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
  8.3792    ins_cost(200);
  8.3793    expand %{
  8.3794 -    fcmovX_regS(cmp,flags,dst,src);
  8.3795 +    fcmovF_regS(cmp,flags,dst,src);
  8.3796    %}
  8.3797  %}
  8.3798  
  8.3799 @@ -13584,41 +13212,41 @@
  8.3800  %}
  8.3801  
  8.3802  // Compare 2 longs and CMOVE doubles
  8.3803 -instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
  8.3804 +instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
  8.3805    predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
  8.3806    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
  8.3807    ins_cost(200);
  8.3808    expand %{
  8.3809 -    fcmovD_regS(cmp,flags,dst,src);
  8.3810 +    fcmovDPR_regS(cmp,flags,dst,src);
  8.3811    %}
  8.3812  %}
  8.3813  
  8.3814  // Compare 2 longs and CMOVE doubles
  8.3815 -instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
  8.3816 +instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
  8.3817    predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
  8.3818    match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
  8.3819    ins_cost(200);
  8.3820    expand %{
  8.3821 -    fcmovXD_regS(cmp,flags,dst,src);
  8.3822 -  %}
  8.3823 -%}
  8.3824 -
  8.3825 -instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
  8.3826 +    fcmovD_regS(cmp,flags,dst,src);
  8.3827 +  %}
  8.3828 +%}
  8.3829 +
  8.3830 +instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
  8.3831    predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
  8.3832    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
  8.3833    ins_cost(200);
  8.3834    expand %{
  8.3835 -    fcmovF_regS(cmp,flags,dst,src);
  8.3836 -  %}
  8.3837 -%}
  8.3838 -
  8.3839 -
  8.3840 -instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
  8.3841 +    fcmovFPR_regS(cmp,flags,dst,src);
  8.3842 +  %}
  8.3843 +%}
  8.3844 +
  8.3845 +
  8.3846 +instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
  8.3847    predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
  8.3848    match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
  8.3849    ins_cost(200);
  8.3850    expand %{
  8.3851 -    fcmovX_regS(cmp,flags,dst,src);
  8.3852 +    fcmovF_regS(cmp,flags,dst,src);
  8.3853    %}
  8.3854  %}
  8.3855  
     9.1 --- a/src/cpu/x86/vm/x86_64.ad	Mon Dec 19 14:16:23 2011 -0800
     9.2 +++ b/src/cpu/x86/vm/x86_64.ad	Tue Dec 20 00:55:02 2011 -0800
     9.3 @@ -9873,396 +9873,6 @@
     9.4    ins_pipe(pipe_slow);
     9.5  %}
     9.6  
     9.7 -instruct addF_reg(regF dst, regF src)
     9.8 -%{
     9.9 -  match(Set dst (AddF dst src));
    9.10 -
    9.11 -  format %{ "addss   $dst, $src" %}
    9.12 -  ins_cost(150); // XXX
    9.13 -  ins_encode %{
    9.14 -    __ addss($dst$$XMMRegister, $src$$XMMRegister);
    9.15 -  %}
    9.16 -  ins_pipe(pipe_slow);
    9.17 -%}
    9.18 -
    9.19 -instruct addF_mem(regF dst, memory src)
    9.20 -%{
    9.21 -  match(Set dst (AddF dst (LoadF src)));
    9.22 -
    9.23 -  format %{ "addss   $dst, $src" %}
    9.24 -  ins_cost(150); // XXX
    9.25 -  ins_encode %{
    9.26 -    __ addss($dst$$XMMRegister, $src$$Address);
    9.27 -  %}
    9.28 -  ins_pipe(pipe_slow);
    9.29 -%}
    9.30 -
    9.31 -instruct addF_imm(regF dst, immF con) %{
    9.32 -  match(Set dst (AddF dst con));
    9.33 -  format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
    9.34 -  ins_cost(150); // XXX
    9.35 -  ins_encode %{
    9.36 -    __ addss($dst$$XMMRegister, $constantaddress($con));
    9.37 -  %}
    9.38 -  ins_pipe(pipe_slow);
    9.39 -%}
    9.40 -
    9.41 -instruct addD_reg(regD dst, regD src)
    9.42 -%{
    9.43 -  match(Set dst (AddD dst src));
    9.44 -
    9.45 -  format %{ "addsd   $dst, $src" %}
    9.46 -  ins_cost(150); // XXX
    9.47 -  ins_encode %{
    9.48 -    __ addsd($dst$$XMMRegister, $src$$XMMRegister);
    9.49 -  %}
    9.50 -  ins_pipe(pipe_slow);
    9.51 -%}
    9.52 -
    9.53 -instruct addD_mem(regD dst, memory src)
    9.54 -%{
    9.55 -  match(Set dst (AddD dst (LoadD src)));
    9.56 -
    9.57 -  format %{ "addsd   $dst, $src" %}
    9.58 -  ins_cost(150); // XXX
    9.59 -  ins_encode %{
    9.60 -    __ addsd($dst$$XMMRegister, $src$$Address);
    9.61 -  %}
    9.62 -  ins_pipe(pipe_slow);
    9.63 -%}
    9.64 -
    9.65 -instruct addD_imm(regD dst, immD con) %{
    9.66 -  match(Set dst (AddD dst con));
    9.67 -  format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
    9.68 -  ins_cost(150); // XXX
    9.69 -  ins_encode %{
    9.70 -    __ addsd($dst$$XMMRegister, $constantaddress($con));
    9.71 -  %}
    9.72 -  ins_pipe(pipe_slow);
    9.73 -%}
    9.74 -
    9.75 -instruct subF_reg(regF dst, regF src)
    9.76 -%{
    9.77 -  match(Set dst (SubF dst src));
    9.78 -
    9.79 -  format %{ "subss   $dst, $src" %}
    9.80 -  ins_cost(150); // XXX
    9.81 -  ins_encode %{
    9.82 -    __ subss($dst$$XMMRegister, $src$$XMMRegister);
    9.83 -  %}
    9.84 -  ins_pipe(pipe_slow);
    9.85 -%}
    9.86 -
    9.87 -instruct subF_mem(regF dst, memory src)
    9.88 -%{
    9.89 -  match(Set dst (SubF dst (LoadF src)));
    9.90 -
    9.91 -  format %{ "subss   $dst, $src" %}
    9.92 -  ins_cost(150); // XXX
    9.93 -  ins_encode %{
    9.94 -    __ subss($dst$$XMMRegister, $src$$Address);
    9.95 -  %}
    9.96 -  ins_pipe(pipe_slow);
    9.97 -%}
    9.98 -
    9.99 -instruct subF_imm(regF dst, immF con) %{
   9.100 -  match(Set dst (SubF dst con));
   9.101 -  format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   9.102 -  ins_cost(150); // XXX
   9.103 -  ins_encode %{
   9.104 -    __ subss($dst$$XMMRegister, $constantaddress($con));
   9.105 -  %}
   9.106 -  ins_pipe(pipe_slow);
   9.107 -%}
   9.108 -
   9.109 -instruct subD_reg(regD dst, regD src)
   9.110 -%{
   9.111 -  match(Set dst (SubD dst src));
   9.112 -
   9.113 -  format %{ "subsd   $dst, $src" %}
   9.114 -  ins_cost(150); // XXX
   9.115 -  ins_encode %{
   9.116 -    __ subsd($dst$$XMMRegister, $src$$XMMRegister);
   9.117 -  %}
   9.118 -  ins_pipe(pipe_slow);
   9.119 -%}
   9.120 -
   9.121 -instruct subD_mem(regD dst, memory src)
   9.122 -%{
   9.123 -  match(Set dst (SubD dst (LoadD src)));
   9.124 -
   9.125 -  format %{ "subsd   $dst, $src" %}
   9.126 -  ins_cost(150); // XXX
   9.127 -  ins_encode %{
   9.128 -    __ subsd($dst$$XMMRegister, $src$$Address);
   9.129 -  %}
   9.130 -  ins_pipe(pipe_slow);
   9.131 -%}
   9.132 -
   9.133 -instruct subD_imm(regD dst, immD con) %{
   9.134 -  match(Set dst (SubD dst con));
   9.135 -  format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   9.136 -  ins_cost(150); // XXX
   9.137 -  ins_encode %{
   9.138 -    __ subsd($dst$$XMMRegister, $constantaddress($con));
   9.139 -  %}
   9.140 -  ins_pipe(pipe_slow);
   9.141 -%}
   9.142 -
   9.143 -instruct mulF_reg(regF dst, regF src)
   9.144 -%{
   9.145 -  match(Set dst (MulF dst src));
   9.146 -
   9.147 -  format %{ "mulss   $dst, $src" %}
   9.148 -  ins_cost(150); // XXX
   9.149 -  ins_encode %{
   9.150 -    __ mulss($dst$$XMMRegister, $src$$XMMRegister);
   9.151 -  %}
   9.152 -  ins_pipe(pipe_slow);
   9.153 -%}
   9.154 -
   9.155 -instruct mulF_mem(regF dst, memory src)
   9.156 -%{
   9.157 -  match(Set dst (MulF dst (LoadF src)));
   9.158 -
   9.159 -  format %{ "mulss   $dst, $src" %}
   9.160 -  ins_cost(150); // XXX
   9.161 -  ins_encode %{
   9.162 -    __ mulss($dst$$XMMRegister, $src$$Address);
   9.163 -  %}
   9.164 -  ins_pipe(pipe_slow);
   9.165 -%}
   9.166 -
   9.167 -instruct mulF_imm(regF dst, immF con) %{
   9.168 -  match(Set dst (MulF dst con));
   9.169 -  format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   9.170 -  ins_cost(150); // XXX
   9.171 -  ins_encode %{
   9.172 -    __ mulss($dst$$XMMRegister, $constantaddress($con));
   9.173 -  %}
   9.174 -  ins_pipe(pipe_slow);
   9.175 -%}
   9.176 -
   9.177 -instruct mulD_reg(regD dst, regD src)
   9.178 -%{
   9.179 -  match(Set dst (MulD dst src));
   9.180 -
   9.181 -  format %{ "mulsd   $dst, $src" %}
   9.182 -  ins_cost(150); // XXX
   9.183 -  ins_encode %{
   9.184 -    __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
   9.185 -  %}
   9.186 -  ins_pipe(pipe_slow);
   9.187 -%}
   9.188 -
   9.189 -instruct mulD_mem(regD dst, memory src)
   9.190 -%{
   9.191 -  match(Set dst (MulD dst (LoadD src)));
   9.192 -
   9.193 -  format %{ "mulsd   $dst, $src" %}
   9.194 -  ins_cost(150); // XXX
   9.195 -  ins_encode %{
   9.196 -    __ mulsd($dst$$XMMRegister, $src$$Address);
   9.197 -  %}
   9.198 -  ins_pipe(pipe_slow);
   9.199 -%}
   9.200 -
   9.201 -instruct mulD_imm(regD dst, immD con) %{
   9.202 -  match(Set dst (MulD dst con));
   9.203 -  format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   9.204 -  ins_cost(150); // XXX
   9.205 -  ins_encode %{
   9.206 -    __ mulsd($dst$$XMMRegister, $constantaddress($con));
   9.207 -  %}
   9.208 -  ins_pipe(pipe_slow);
   9.209 -%}
   9.210 -
   9.211 -instruct divF_reg(regF dst, regF src)
   9.212 -%{
   9.213 -  match(Set dst (DivF dst src));
   9.214 -
   9.215 -  format %{ "divss   $dst, $src" %}
   9.216 -  ins_cost(150); // XXX
   9.217 -  ins_encode %{
   9.218 -    __ divss($dst$$XMMRegister, $src$$XMMRegister);
   9.219 -  %}
   9.220 -  ins_pipe(pipe_slow);
   9.221 -%}
   9.222 -
   9.223 -instruct divF_mem(regF dst, memory src)
   9.224 -%{
   9.225 -  match(Set dst (DivF dst (LoadF src)));
   9.226 -
   9.227 -  format %{ "divss   $dst, $src" %}
   9.228 -  ins_cost(150); // XXX
   9.229 -  ins_encode %{
   9.230 -    __ divss($dst$$XMMRegister, $src$$Address);
   9.231 -  %}
   9.232 -  ins_pipe(pipe_slow);
   9.233 -%}
   9.234 -
   9.235 -instruct divF_imm(regF dst, immF con) %{
   9.236 -  match(Set dst (DivF dst con));
   9.237 -  format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   9.238 -  ins_cost(150); // XXX
   9.239 -  ins_encode %{
   9.240 -    __ divss($dst$$XMMRegister, $constantaddress($con));
   9.241 -  %}
   9.242 -  ins_pipe(pipe_slow);
   9.243 -%}
   9.244 -
   9.245 -instruct divD_reg(regD dst, regD src)
   9.246 -%{
   9.247 -  match(Set dst (DivD dst src));
   9.248 -
   9.249 -  format %{ "divsd   $dst, $src" %}
   9.250 -  ins_cost(150); // XXX
   9.251 -  ins_encode %{
   9.252 -    __ divsd($dst$$XMMRegister, $src$$XMMRegister);
   9.253 -  %}
   9.254 -  ins_pipe(pipe_slow);
   9.255 -%}
   9.256 -
   9.257 -instruct divD_mem(regD dst, memory src)
   9.258 -%{
   9.259 -  match(Set dst (DivD dst (LoadD src)));
   9.260 -
   9.261 -  format %{ "divsd   $dst, $src" %}
   9.262 -  ins_cost(150); // XXX
   9.263 -  ins_encode %{
   9.264 -    __ divsd($dst$$XMMRegister, $src$$Address);
   9.265 -  %}
   9.266 -  ins_pipe(pipe_slow);
   9.267 -%}
   9.268 -
   9.269 -instruct divD_imm(regD dst, immD con) %{
   9.270 -  match(Set dst (DivD dst con));
   9.271 -  format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   9.272 -  ins_cost(150); // XXX
   9.273 -  ins_encode %{
   9.274 -    __ divsd($dst$$XMMRegister, $constantaddress($con));
   9.275 -  %}
   9.276 -  ins_pipe(pipe_slow);
   9.277 -%}
   9.278 -
   9.279 -instruct sqrtF_reg(regF dst, regF src)
   9.280 -%{
   9.281 -  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
   9.282 -
   9.283 -  format %{ "sqrtss  $dst, $src" %}
   9.284 -  ins_cost(150); // XXX
   9.285 -  ins_encode %{
   9.286 -    __ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
   9.287 -  %}
   9.288 -  ins_pipe(pipe_slow);
   9.289 -%}
   9.290 -
   9.291 -instruct sqrtF_mem(regF dst, memory src)
   9.292 -%{
   9.293 -  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
   9.294 -
   9.295 -  format %{ "sqrtss  $dst, $src" %}
   9.296 -  ins_cost(150); // XXX
   9.297 -  ins_encode %{
   9.298 -    __ sqrtss($dst$$XMMRegister, $src$$Address);
   9.299 -  %}
   9.300 -  ins_pipe(pipe_slow);
   9.301 -%}
   9.302 -
   9.303 -instruct sqrtF_imm(regF dst, immF con) %{
   9.304 -  match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
   9.305 -  format %{ "sqrtss  $dst, [$constantaddress]\t# load from constant table: float=$con" %}
   9.306 -  ins_cost(150); // XXX
   9.307 -  ins_encode %{
   9.308 -    __ sqrtss($dst$$XMMRegister, $constantaddress($con));
   9.309 -  %}
   9.310 -  ins_pipe(pipe_slow);
   9.311 -%}
   9.312 -
   9.313 -instruct sqrtD_reg(regD dst, regD src)
   9.314 -%{
   9.315 -  match(Set dst (SqrtD src));
   9.316 -
   9.317 -  format %{ "sqrtsd  $dst, $src" %}
   9.318 -  ins_cost(150); // XXX
   9.319 -  ins_encode %{
   9.320 -    __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
   9.321 -  %}
   9.322 -  ins_pipe(pipe_slow);
   9.323 -%}
   9.324 -
   9.325 -instruct sqrtD_mem(regD dst, memory src)
   9.326 -%{
   9.327 -  match(Set dst (SqrtD (LoadD src)));
   9.328 -
   9.329 -  format %{ "sqrtsd  $dst, $src" %}
   9.330 -  ins_cost(150); // XXX
   9.331 -  ins_encode %{
   9.332 -    __ sqrtsd($dst$$XMMRegister, $src$$Address);
   9.333 -  %}
   9.334 -  ins_pipe(pipe_slow);
   9.335 -%}
   9.336 -
   9.337 -instruct sqrtD_imm(regD dst, immD con) %{
   9.338 -  match(Set dst (SqrtD con));
   9.339 -  format %{ "sqrtsd  $dst, [$constantaddress]\t# load from constant table: double=$con" %}
   9.340 -  ins_cost(150); // XXX
   9.341 -  ins_encode %{
   9.342 -    __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
   9.343 -  %}
   9.344 -  ins_pipe(pipe_slow);
   9.345 -%}
   9.346 -
   9.347 -instruct absF_reg(regF dst)
   9.348 -%{
   9.349 -  match(Set dst (AbsF dst));
   9.350 -  ins_cost(150); // XXX
   9.351 -  format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
   9.352 -  ins_encode %{
   9.353 -    __ andps($dst$$XMMRegister,
   9.354 -             ExternalAddress((address) StubRoutines::x86::float_sign_mask()));
   9.355 -  %}
   9.356 -  ins_pipe(pipe_slow);
   9.357 -%}
   9.358 -
   9.359 -instruct absD_reg(regD dst)
   9.360 -%{
   9.361 -  match(Set dst (AbsD dst));
   9.362 -  ins_cost(150); // XXX
   9.363 -  format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
   9.364 -            "# abs double by sign masking" %}
   9.365 -  ins_encode %{
   9.366 -    __ andpd($dst$$XMMRegister,
   9.367 -             ExternalAddress((address) StubRoutines::x86::double_sign_mask()));
   9.368 -  %}
   9.369 -  ins_pipe(pipe_slow);
   9.370 -%}
   9.371 -
   9.372 -instruct negF_reg(regF dst)
   9.373 -%{
   9.374 -  match(Set dst (NegF dst));
   9.375 -  ins_cost(150); // XXX
   9.376 -  format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
   9.377 -  ins_encode %{
   9.378 -    __ xorps($dst$$XMMRegister,
   9.379 -             ExternalAddress((address) StubRoutines::x86::float_sign_flip()));
   9.380 -  %}
   9.381 -  ins_pipe(pipe_slow);
   9.382 -%}
   9.383 -
   9.384 -instruct negD_reg(regD dst)
   9.385 -%{
   9.386 -  match(Set dst (NegD dst));
   9.387 -  ins_cost(150); // XXX
   9.388 -  format %{ "xorpd   $dst, [0x8000000000000000]\t"
   9.389 -            "# neg double by sign flipping" %}
   9.390 -  ins_encode %{
   9.391 -    __ xorpd($dst$$XMMRegister,
   9.392 -             ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
   9.393 -  %}
   9.394 -  ins_pipe(pipe_slow);
   9.395 -%}
   9.396 -
   9.397  // -----------Trig and Trancendental Instructions------------------------------
   9.398  instruct cosD_reg(regD dst) %{
   9.399    match(Set dst (CosD dst));
    10.1 --- a/src/share/vm/opto/matcher.cpp	Mon Dec 19 14:16:23 2011 -0800
    10.2 +++ b/src/share/vm/opto/matcher.cpp	Tue Dec 20 00:55:02 2011 -0800
    10.3 @@ -1365,31 +1365,36 @@
    10.4  
    10.5    const Type *t = m->bottom_type();
    10.6  
    10.7 -  if( t->singleton() ) {
    10.8 +  if (t->singleton()) {
    10.9      // Never force constants into registers.  Allow them to match as
   10.10      // constants or registers.  Copies of the same value will share
   10.11      // the same register.  See find_shared_node.
   10.12      return false;
   10.13    } else {                      // Not a constant
   10.14      // Stop recursion if they have different Controls.
   10.15 -    // Slot 0 of constants is not really a Control.
   10.16 -    if( control && m->in(0) && control != m->in(0) ) {
   10.17 +    Node* m_control = m->in(0);
   10.18 +    // Control of load's memory can post-dominates load's control.
   10.19 +    // So use it since load can't float above its memory.
   10.20 +    Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL;
   10.21 +    if (control && m_control && control != m_control && control != mem_control) {
   10.22  
   10.23        // Actually, we can live with the most conservative control we
   10.24        // find, if it post-dominates the others.  This allows us to
   10.25        // pick up load/op/store trees where the load can float a little
   10.26        // above the store.
   10.27        Node *x = control;
   10.28 -      const uint max_scan = 6;   // Arbitrary scan cutoff
   10.29 +      const uint max_scan = 6;  // Arbitrary scan cutoff
   10.30        uint j;
   10.31 -      for( j=0; j<max_scan; j++ ) {
   10.32 -        if( x->is_Region() )    // Bail out at merge points
   10.33 +      for (j=0; j<max_scan; j++) {
   10.34 +        if (x->is_Region())     // Bail out at merge points
   10.35            return true;
   10.36          x = x->in(0);
   10.37 -        if( x == m->in(0) )     // Does 'control' post-dominate
   10.38 +        if (x == m_control)     // Does 'control' post-dominate
   10.39            break;                // m->in(0)?  If so, we can use it
   10.40 +        if (x == mem_control)   // Does 'control' post-dominate
   10.41 +          break;                // mem_control?  If so, we can use it
   10.42        }
   10.43 -      if( j == max_scan )       // No post-domination before scan end?
   10.44 +      if (j == max_scan)        // No post-domination before scan end?
   10.45          return true;            // Then break the match tree up
   10.46      }
   10.47      if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) {

mercurial