Thu, 29 Dec 2011 11:37:50 -0800
Merge
src/cpu/x86/vm/assembler_x86.cpp | file | annotate | diff | comparison | revisions | |
src/share/vm/runtime/globals.hpp | file | annotate | diff | comparison | revisions |
1.1 --- a/make/bsd/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 1.2 +++ b/make/bsd/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 1.3 @@ -39,9 +39,16 @@ 1.4 1.5 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 1.6 1.7 -SOURCES.AD = \ 1.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}") 1.9 + SOURCES.AD = \ 1.10 $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 1.11 $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 1.12 +else 1.13 + SOURCES.AD = \ 1.14 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 1.15 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ 1.16 + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 1.17 +endif 1.18 1.19 EXEC = $(OUTDIR)/adlc 1.20
2.1 --- a/make/linux/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 2.2 +++ b/make/linux/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 2.3 @@ -39,9 +39,16 @@ 2.4 2.5 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 2.6 2.7 -SOURCES.AD = \ 2.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}") 2.9 + SOURCES.AD = \ 2.10 $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 2.11 $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 2.12 +else 2.13 + SOURCES.AD = \ 2.14 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 2.15 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ 2.16 + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 2.17 +endif 2.18 2.19 EXEC = $(OUTDIR)/adlc 2.20
3.1 --- a/make/solaris/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 3.2 +++ b/make/solaris/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 3.3 @@ -40,9 +40,16 @@ 3.4 3.5 SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad 3.6 3.7 -SOURCES.AD = \ 3.8 +ifeq ("${Platform_arch_model}", "${Platform_arch}") 3.9 + SOURCES.AD = \ 3.10 $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 3.11 $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 3.12 +else 3.13 + SOURCES.AD = \ 3.14 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ 3.15 + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ 3.16 + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) 3.17 +endif 3.18 3.19 EXEC = $(OUTDIR)/adlc 3.20
4.1 --- a/make/windows/makefiles/adlc.make Tue Dec 27 12:38:49 2011 -0800 4.2 +++ b/make/windows/makefiles/adlc.make Thu Dec 29 11:37:50 2011 -0800 4.3 @@ -53,6 +53,17 @@ 4.4 /I "$(WorkSpace)\src\os\windows\vm" \ 4.5 /I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm" 4.6 4.7 +!if "$(Platform_arch_model)" == "$(Platform_arch)" 4.8 +SOURCES_AD=\ 4.9 + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ 4.10 + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad 4.11 +!else 4.12 +SOURCES_AD=\ 4.13 + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ 4.14 + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \ 4.15 + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad 4.16 +!endif 4.17 + 4.18 # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR 4.19 # and ProjectCreatorIDEOptions in projectcreator.make. 4.20 GENERATED_NAMES=\ 4.21 @@ -105,7 +116,6 @@ 4.22 $(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad 4.23 mv $(GENERATED_NAMES) $(AdlcOutDir)/ 4.24 4.25 -$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad 4.26 +$(Platform_arch_model).ad: $(SOURCES_AD) 4.27 rm -f $(Platform_arch_model).ad 4.28 - cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ 4.29 - $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad 4.30 + cat $(SOURCES_AD) >$(Platform_arch_model).ad
5.1 --- a/src/cpu/sparc/vm/assembler_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 5.2 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 5.3 @@ -3036,10 +3036,8 @@ 5.4 Label* L_failure, 5.5 Label* L_slow_path, 5.6 RegisterOrConstant super_check_offset) { 5.7 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 5.8 - Klass::secondary_super_cache_offset_in_bytes()); 5.9 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 5.10 - Klass::super_check_offset_offset_in_bytes()); 5.11 + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 5.12 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 5.13 5.14 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 5.15 bool need_slow_path = (must_load_sco || 5.16 @@ -3159,10 +3157,8 @@ 5.17 assert(label_nulls <= 1, "at most one NULL in the batch"); 5.18 5.19 // a couple of useful fields in sub_klass: 5.20 - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 5.21 - Klass::secondary_supers_offset_in_bytes()); 5.22 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 5.23 - Klass::secondary_super_cache_offset_in_bytes()); 5.24 + int ss_offset = in_bytes(Klass::secondary_supers_offset()); 5.25 + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 5.26 5.27 // Do a linear scan of the secondary super-klass chain. 5.28 // This code is rarely used, so simplicity is a virtue here. 5.29 @@ -3336,7 +3332,7 @@ 5.30 cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); 5.31 5.32 load_klass(obj_reg, temp_reg); 5.33 - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); 5.34 + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); 5.35 or3(G2_thread, temp_reg, temp_reg); 5.36 xor3(mark_reg, temp_reg, temp_reg); 5.37 andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); 5.38 @@ -3413,7 +3409,7 @@ 5.39 // FIXME: due to a lack of registers we currently blow away the age 5.40 // bits in this situation. Should attempt to preserve them. 5.41 load_klass(obj_reg, temp_reg); 5.42 - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); 5.43 + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); 5.44 or3(G2_thread, temp_reg, temp_reg); 5.45 casn(mark_addr.base(), mark_reg, temp_reg); 5.46 // If the biasing toward our thread failed, this means that 5.47 @@ -3443,7 +3439,7 @@ 5.48 // FIXME: due to a lack of registers we currently blow away the age 5.49 // bits in this situation. Should attempt to preserve them. 5.50 load_klass(obj_reg, temp_reg); 5.51 - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); 5.52 + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); 5.53 casn(mark_addr.base(), mark_reg, temp_reg); 5.54 // Fall through to the normal CAS-based lock, because no matter what 5.55 // the result of the above CAS, some thread must have succeeded in
6.1 --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 6.2 +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 6.3 @@ -302,7 +302,7 @@ 6.4 assert(_obj != noreg, "must be a valid register"); 6.5 assert(_oop_index >= 0, "must have oop index"); 6.6 __ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3); 6.7 - __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3); 6.8 + __ ld_ptr(G3, in_bytes(instanceKlass::init_thread_offset()), G3); 6.9 __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch); 6.10 6.11 // load_klass patches may execute the patched code before it's 6.12 @@ -471,7 +471,7 @@ 6.13 6.14 __ load_klass(src_reg, tmp_reg); 6.15 6.16 - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); 6.17 + Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); 6.18 __ ld(ref_type_adr, tmp_reg); 6.19 6.20 // _reference_type field is of type ReferenceType (enum)
7.1 --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 7.2 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 7.3 @@ -2202,8 +2202,7 @@ 7.4 } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { 7.5 __ load_klass(dst, tmp); 7.6 } 7.7 - int lh_offset = klassOopDesc::header_size() * HeapWordSize + 7.8 - Klass::layout_helper_offset_in_bytes(); 7.9 + int lh_offset = in_bytes(Klass::layout_helper_offset()); 7.10 7.11 __ lduw(tmp, lh_offset, tmp2); 7.12 7.13 @@ -2238,12 +2237,10 @@ 7.14 __ mov(length, len); 7.15 __ load_klass(dst, tmp); 7.16 7.17 - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 7.18 - objArrayKlass::element_klass_offset_in_bytes()); 7.19 + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); 7.20 __ ld_ptr(tmp, ek_offset, super_k); 7.21 7.22 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 7.23 - Klass::super_check_offset_offset_in_bytes()); 7.24 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 7.25 __ lduw(super_k, sco_offset, chk_off); 7.26 7.27 __ call_VM_leaf(tmp, copyfunc_addr); 7.28 @@ -2456,7 +2453,7 @@ 7.29 op->klass()->as_register() == G5, "must be"); 7.30 if (op->init_check()) { 7.31 __ ld(op->klass()->as_register(), 7.32 - instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), 7.33 + in_bytes(instanceKlass::init_state_offset()), 7.34 op->tmp1()->as_register()); 7.35 add_debug_info_for_null_check_here(op->stub()->info()); 7.36 __ cmp(op->tmp1()->as_register(), instanceKlass::fully_initialized); 7.37 @@ -2627,7 +2624,7 @@ 7.38 } else { 7.39 bool need_slow_path = true; 7.40 if (k->is_loaded()) { 7.41 - if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()) 7.42 + if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset())) 7.43 need_slow_path = false; 7.44 // perform the fast part of the checking logic 7.45 __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg, 7.46 @@ -2731,7 +2728,7 @@ 7.47 __ load_klass(value, klass_RInfo); 7.48 7.49 // get instance klass 7.50 - __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)), k_RInfo); 7.51 + __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset()), k_RInfo); 7.52 // perform the fast part of the checking logic 7.53 __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL); 7.54
8.1 --- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 8.2 +++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 8.3 @@ -181,7 +181,7 @@ 8.4 void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { 8.5 assert_different_registers(obj, klass, len, t1, t2); 8.6 if (UseBiasedLocking && !len->is_valid()) { 8.7 - ld_ptr(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes(), t1); 8.8 + ld_ptr(klass, in_bytes(Klass::prototype_header_offset()), t1); 8.9 } else { 8.10 set((intx)markOopDesc::prototype(), t1); 8.11 } 8.12 @@ -252,7 +252,7 @@ 8.13 #ifdef ASSERT 8.14 { 8.15 Label ok; 8.16 - ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1); 8.17 + ld(klass, in_bytes(Klass::layout_helper_offset()), t1); 8.18 if (var_size_in_bytes != noreg) { 8.19 cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok); 8.20 } else {
9.1 --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 9.2 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 9.3 @@ -398,14 +398,14 @@ 9.4 9.5 if (id == fast_new_instance_init_check_id) { 9.6 // make sure the klass is initialized 9.7 - __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1); 9.8 + __ ld(G5_klass, in_bytes(instanceKlass::init_state_offset()), G3_t1); 9.9 __ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path); 9.10 } 9.11 #ifdef ASSERT 9.12 // assert object can be fast path allocated 9.13 { 9.14 Label ok, not_ok; 9.15 - __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size); 9.16 + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); 9.17 // make sure it's an instance (LH > 0) 9.18 __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok); 9.19 __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size); 9.20 @@ -425,7 +425,7 @@ 9.21 __ bind(retry_tlab); 9.22 9.23 // get the instance size 9.24 - __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size); 9.25 + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); 9.26 9.27 __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path); 9.28 9.29 @@ -437,7 +437,7 @@ 9.30 9.31 __ bind(try_eden); 9.32 // get the instance size 9.33 - __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size); 9.34 + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); 9.35 __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path); 9.36 __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2); 9.37 9.38 @@ -471,8 +471,7 @@ 9.39 Register G4_length = G4; // Incoming 9.40 Register O0_obj = O0; // Outgoing 9.41 9.42 - Address klass_lh(G5_klass, ((klassOopDesc::header_size() * HeapWordSize) 9.43 - + Klass::layout_helper_offset_in_bytes())); 9.44 + Address klass_lh(G5_klass, Klass::layout_helper_offset()); 9.45 assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); 9.46 assert(Klass::_lh_header_size_mask == 0xFF, "bytewise"); 9.47 // Use this offset to pick out an individual byte of the layout_helper: 9.48 @@ -592,7 +591,7 @@ 9.49 Label register_finalizer; 9.50 Register t = O1; 9.51 __ load_klass(O0, t); 9.52 - __ ld(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), t); 9.53 + __ ld(t, in_bytes(Klass::access_flags_offset()), t); 9.54 __ set(JVM_ACC_HAS_FINALIZER, G3); 9.55 __ andcc(G3, t, G0); 9.56 __ br(Assembler::notZero, false, Assembler::pt, register_finalizer);
10.1 --- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 10.2 +++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 10.3 @@ -766,7 +766,7 @@ 10.4 // get native function entry point(O0 is a good temp until the very end) 10.5 ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::native_function_offset())), O0); 10.6 // for static methods insert the mirror argument 10.7 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 10.8 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 10.9 10.10 __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: constants_offset())), O1); 10.11 __ ld_ptr(Address(O1, 0, constantPoolOopDesc::pool_holder_offset_in_bytes()), O1); 10.12 @@ -1173,7 +1173,7 @@ 10.13 __ btst(JVM_ACC_SYNCHRONIZED, O1); 10.14 __ br( Assembler::zero, false, Assembler::pt, done); 10.15 10.16 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 10.17 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 10.18 __ delayed()->btst(JVM_ACC_STATIC, O1); 10.19 __ ld_ptr(XXX_STATE(_locals), O1); 10.20 __ br( Assembler::zero, true, Assembler::pt, got_obj);
11.1 --- a/src/cpu/sparc/vm/methodHandles_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 11.2 +++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 11.3 @@ -1098,7 +1098,7 @@ 11.4 Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes()); 11.5 Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes()); 11.6 11.7 - const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 11.8 + const int java_mirror_offset = in_bytes(Klass::java_mirror_offset()); 11.9 11.10 if (have_entry(ek)) { 11.11 __ nop(); // empty stubs make SG sick
12.1 --- a/src/cpu/sparc/vm/sparc.ad Tue Dec 27 12:38:49 2011 -0800 12.2 +++ b/src/cpu/sparc/vm/sparc.ad Thu Dec 29 11:37:50 2011 -0800 12.3 @@ -6773,6 +6773,16 @@ 12.4 ins_pipe(empty); 12.5 %} 12.6 12.7 +instruct membar_storestore() %{ 12.8 + match(MemBarStoreStore); 12.9 + ins_cost(0); 12.10 + 12.11 + size(0); 12.12 + format %{ "!MEMBAR-storestore (empty encoding)" %} 12.13 + ins_encode( ); 12.14 + ins_pipe(empty); 12.15 +%} 12.16 + 12.17 //----------Register Move Instructions----------------------------------------- 12.18 instruct roundDouble_nop(regD dst) %{ 12.19 match(Set dst (RoundDouble dst));
13.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 13.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 13.3 @@ -3046,8 +3046,7 @@ 13.4 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 13.5 // 13.6 13.7 - int lh_offset = klassOopDesc::header_size() * HeapWordSize + 13.8 - Klass::layout_helper_offset_in_bytes(); 13.9 + int lh_offset = in_bytes(Klass::layout_helper_offset()); 13.10 13.11 // Load 32-bits signed value. Use br() instruction with it to check icc. 13.12 __ lduw(G3_src_klass, lh_offset, G5_lh); 13.13 @@ -3194,15 +3193,13 @@ 13.14 G4_dst_klass, G3_src_klass); 13.15 13.16 // Generate the type check. 13.17 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 13.18 - Klass::super_check_offset_offset_in_bytes()); 13.19 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 13.20 __ lduw(G4_dst_klass, sco_offset, sco_temp); 13.21 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, 13.22 O5_temp, L_plain_copy); 13.23 13.24 // Fetch destination element klass from the objArrayKlass header. 13.25 - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 13.26 - objArrayKlass::element_klass_offset_in_bytes()); 13.27 + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); 13.28 13.29 // the checkcast_copy loop needs two extra arguments: 13.30 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass
14.1 --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 14.2 +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 14.3 @@ -366,7 +366,7 @@ 14.4 14.5 // get synchronization object to O0 14.6 { Label done; 14.7 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 14.8 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 14.9 __ btst(JVM_ACC_STATIC, O0); 14.10 __ br( Assembler::zero, true, Assembler::pt, done); 14.11 __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case 14.12 @@ -984,7 +984,7 @@ 14.13 // get native function entry point(O0 is a good temp until the very end) 14.14 __ delayed()->ld_ptr(Lmethod, in_bytes(methodOopDesc::native_function_offset()), O0); 14.15 // for static methods insert the mirror argument 14.16 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 14.17 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 14.18 14.19 __ ld_ptr(Lmethod, methodOopDesc:: constants_offset(), O1); 14.20 __ ld_ptr(O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1);
15.1 --- a/src/cpu/sparc/vm/templateTable_sparc.cpp Tue Dec 27 12:38:49 2011 -0800 15.2 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Thu Dec 29 11:37:50 2011 -0800 15.3 @@ -888,7 +888,7 @@ 15.4 15.5 // do fast instanceof cache test 15.6 15.7 - __ ld_ptr(O4, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes(), O4); 15.8 + __ ld_ptr(O4, in_bytes(objArrayKlass::element_klass_offset()), O4); 15.9 15.10 assert(Otos_i == O0, "just checking"); 15.11 15.12 @@ -2031,7 +2031,7 @@ 15.13 __ access_local_ptr(G3_scratch, Otos_i); 15.14 __ load_klass(Otos_i, O2); 15.15 __ set(JVM_ACC_HAS_FINALIZER, G3); 15.16 - __ ld(O2, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), O2); 15.17 + __ ld(O2, in_bytes(Klass::access_flags_offset()), O2); 15.18 __ andcc(G3, O2, G0); 15.19 Label skip_register_finalizer; 15.20 __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer); 15.21 @@ -3350,13 +3350,13 @@ 15.22 __ ld_ptr(Rscratch, Roffset, RinstanceKlass); 15.23 15.24 // make sure klass is fully initialized: 15.25 - __ ld(RinstanceKlass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_scratch); 15.26 + __ ld(RinstanceKlass, in_bytes(instanceKlass::init_state_offset()), G3_scratch); 15.27 __ cmp(G3_scratch, instanceKlass::fully_initialized); 15.28 __ br(Assembler::notEqual, false, Assembler::pn, slow_case); 15.29 - __ delayed()->ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset); 15.30 + __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); 15.31 15.32 // get instance_size in instanceKlass (already aligned) 15.33 - //__ ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset); 15.34 + //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); 15.35 15.36 // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class 15.37 __ btst(Klass::_lh_instance_slow_path_bit, Roffset); 15.38 @@ -3483,7 +3483,7 @@ 15.39 __ bind(initialize_header); 15.40 15.41 if (UseBiasedLocking) { 15.42 - __ ld_ptr(RinstanceKlass, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), G4_scratch); 15.43 + __ ld_ptr(RinstanceKlass, in_bytes(Klass::prototype_header_offset()), G4_scratch); 15.44 } else { 15.45 __ set((intptr_t)markOopDesc::prototype(), G4_scratch); 15.46 }
16.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Tue Dec 27 12:38:49 2011 -0800 16.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Dec 29 11:37:50 2011 -0800 16.3 @@ -533,6 +533,19 @@ 16.4 16.5 case 0x0F: // movx..., etc. 16.6 switch (0xFF & *ip++) { 16.7 + case 0x3A: // pcmpestri 16.8 + tail_size = 1; 16.9 + case 0x38: // ptest, pmovzxbw 16.10 + ip++; // skip opcode 16.11 + debug_only(has_disp32 = true); // has both kinds of operands! 16.12 + break; 16.13 + 16.14 + case 0x70: // pshufd r, r/a, #8 16.15 + debug_only(has_disp32 = true); // has both kinds of operands! 16.16 + case 0x73: // psrldq r, #8 16.17 + tail_size = 1; 16.18 + break; 16.19 + 16.20 case 0x12: // movlps 16.21 case 0x28: // movaps 16.22 case 0x2E: // ucomiss 16.23 @@ -543,9 +556,7 @@ 16.24 case 0x57: // xorps 16.25 case 0x6E: // movd 16.26 case 0x7E: // movd 16.27 - case 0xAE: // ldmxcsr a 16.28 - // 64bit side says it these have both operands but that doesn't 16.29 - // appear to be true 16.30 + case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 16.31 debug_only(has_disp32 = true); 16.32 break; 16.33 16.34 @@ -565,6 +576,12 @@ 16.35 // fall out of the switch to decode the address 16.36 break; 16.37 16.38 + case 0xC4: // pinsrw r, a, #8 16.39 + debug_only(has_disp32 = true); 16.40 + case 0xC5: // pextrw r, r, #8 16.41 + tail_size = 1; // the imm8 16.42 + break; 16.43 + 16.44 case 0xAC: // shrd r, a, #8 16.45 debug_only(has_disp32 = true); 16.46 tail_size = 1; // the imm8 16.47 @@ -625,11 +642,44 @@ 16.48 tail_size = 1; // the imm8 16.49 break; 16.50 16.51 - case 0xE8: // call rdisp32 16.52 - case 0xE9: // jmp rdisp32 16.53 - if (which == end_pc_operand) return ip + 4; 16.54 - assert(which == call32_operand, "call has no disp32 or imm"); 16.55 - return ip; 16.56 + case 0xC4: // VEX_3bytes 16.57 + case 0xC5: // VEX_2bytes 16.58 + assert((UseAVX > 0), "shouldn't have VEX prefix"); 16.59 + assert(ip == inst+1, "no prefixes allowed"); 16.60 + // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 16.61 + // but they have prefix 0x0F and processed when 0x0F processed above. 16.62 + // 16.63 + // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 16.64 + // instructions (these instructions are not supported in 64-bit mode). 16.65 + // To distinguish them bits [7:6] are set in the VEX second byte since 16.66 + // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 16.67 + // those VEX bits REX and vvvv bits are inverted. 16.68 + // 16.69 + // Fortunately C2 doesn't generate these instructions so we don't need 16.70 + // to check for them in product version. 16.71 + 16.72 + // Check second byte 16.73 + NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 16.74 + 16.75 + // First byte 16.76 + if ((0xFF & *inst) == VEX_3bytes) { 16.77 + ip++; // third byte 16.78 + is_64bit = ((VEX_W & *ip) == VEX_W); 16.79 + } 16.80 + ip++; // opcode 16.81 + // To find the end of instruction (which == end_pc_operand). 16.82 + switch (0xFF & *ip) { 16.83 + case 0x61: // pcmpestri r, r/a, #8 16.84 + case 0x70: // pshufd r, r/a, #8 16.85 + case 0x73: // psrldq r, #8 16.86 + tail_size = 1; // the imm8 16.87 + break; 16.88 + default: 16.89 + break; 16.90 + } 16.91 + ip++; // skip opcode 16.92 + debug_only(has_disp32 = true); // has both kinds of operands! 16.93 + break; 16.94 16.95 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 16.96 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 16.97 @@ -643,6 +693,12 @@ 16.98 debug_only(has_disp32 = true); 16.99 break; 16.100 16.101 + case 0xE8: // call rdisp32 16.102 + case 0xE9: // jmp rdisp32 16.103 + if (which == end_pc_operand) return ip + 4; 16.104 + assert(which == call32_operand, "call has no disp32 or imm"); 16.105 + return ip; 16.106 + 16.107 case 0xF0: // Lock 16.108 assert(os::is_MP(), "only on MP"); 16.109 goto again_after_prefix; 16.110 @@ -918,9 +974,7 @@ 16.111 16.112 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 16.113 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.114 - emit_byte(0xF2); 16.115 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.116 - emit_byte(0x0F); 16.117 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.118 emit_byte(0x58); 16.119 emit_byte(0xC0 | encode); 16.120 } 16.121 @@ -928,18 +982,14 @@ 16.122 void Assembler::addsd(XMMRegister dst, Address src) { 16.123 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.124 InstructionMark im(this); 16.125 - emit_byte(0xF2); 16.126 - prefix(src, dst); 16.127 - emit_byte(0x0F); 16.128 + simd_prefix(dst, dst, src, VEX_SIMD_F2); 16.129 emit_byte(0x58); 16.130 emit_operand(dst, src); 16.131 } 16.132 16.133 void Assembler::addss(XMMRegister dst, XMMRegister src) { 16.134 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.135 - emit_byte(0xF3); 16.136 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.137 - emit_byte(0x0F); 16.138 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.139 emit_byte(0x58); 16.140 emit_byte(0xC0 | encode); 16.141 } 16.142 @@ -947,13 +997,19 @@ 16.143 void Assembler::addss(XMMRegister dst, Address src) { 16.144 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.145 InstructionMark im(this); 16.146 - emit_byte(0xF3); 16.147 - prefix(src, dst); 16.148 - emit_byte(0x0F); 16.149 + simd_prefix(dst, dst, src, VEX_SIMD_F3); 16.150 emit_byte(0x58); 16.151 emit_operand(dst, src); 16.152 } 16.153 16.154 +void Assembler::andl(Address dst, int32_t imm32) { 16.155 + InstructionMark im(this); 16.156 + prefix(dst); 16.157 + emit_byte(0x81); 16.158 + emit_operand(rsp, dst, 4); 16.159 + emit_long(imm32); 16.160 +} 16.161 + 16.162 void Assembler::andl(Register dst, int32_t imm32) { 16.163 prefix(dst); 16.164 emit_arith(0x81, 0xE0, dst, imm32); 16.165 @@ -974,13 +1030,33 @@ 16.166 void Assembler::andpd(XMMRegister dst, Address src) { 16.167 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.168 InstructionMark im(this); 16.169 - emit_byte(0x66); 16.170 - prefix(src, dst); 16.171 - emit_byte(0x0F); 16.172 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.173 emit_byte(0x54); 16.174 emit_operand(dst, src); 16.175 } 16.176 16.177 +void Assembler::andpd(XMMRegister dst, XMMRegister src) { 16.178 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.179 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 16.180 + emit_byte(0x54); 16.181 + emit_byte(0xC0 | encode); 16.182 +} 16.183 + 16.184 +void Assembler::andps(XMMRegister dst, Address src) { 16.185 + NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.186 + InstructionMark im(this); 16.187 + simd_prefix(dst, dst, src, VEX_SIMD_NONE); 16.188 + emit_byte(0x54); 16.189 + emit_operand(dst, src); 16.190 +} 16.191 + 16.192 +void Assembler::andps(XMMRegister dst, XMMRegister src) { 16.193 + NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.194 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 16.195 + emit_byte(0x54); 16.196 + emit_byte(0xC0 | encode); 16.197 +} 16.198 + 16.199 void Assembler::bsfl(Register dst, Register src) { 16.200 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.201 emit_byte(0x0F); 16.202 @@ -1025,19 +1101,7 @@ 16.203 } 16.204 16.205 void Assembler::call(Register dst) { 16.206 - // This was originally using a 32bit register encoding 16.207 - // and surely we want 64bit! 16.208 - // this is a 32bit encoding but in 64bit mode the default 16.209 - // operand size is 64bit so there is no need for the 16.210 - // wide prefix. So prefix only happens if we use the 16.211 - // new registers. Much like push/pop. 16.212 - int x = offset(); 16.213 - // this may be true but dbx disassembles it as if it 16.214 - // were 32bits... 16.215 - // int encode = prefix_and_encode(dst->encoding()); 16.216 - // if (offset() != x) assert(dst->encoding() >= 8, "what?"); 16.217 - int encode = prefixq_and_encode(dst->encoding()); 16.218 - 16.219 + int encode = prefix_and_encode(dst->encoding()); 16.220 emit_byte(0xFF); 16.221 emit_byte(0xD0 | encode); 16.222 } 16.223 @@ -1157,87 +1221,119 @@ 16.224 // NOTE: dbx seems to decode this as comiss even though the 16.225 // 0x66 is there. Strangly ucomisd comes out correct 16.226 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.227 - emit_byte(0x66); 16.228 - comiss(dst, src); 16.229 + InstructionMark im(this); 16.230 + simd_prefix(dst, src, VEX_SIMD_66); 16.231 + emit_byte(0x2F); 16.232 + emit_operand(dst, src); 16.233 +} 16.234 + 16.235 +void Assembler::comisd(XMMRegister dst, XMMRegister src) { 16.236 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.237 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 16.238 + emit_byte(0x2F); 16.239 + emit_byte(0xC0 | encode); 16.240 } 16.241 16.242 void Assembler::comiss(XMMRegister dst, Address src) { 16.243 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.244 - 16.245 - InstructionMark im(this); 16.246 - prefix(src, dst); 16.247 - emit_byte(0x0F); 16.248 + InstructionMark im(this); 16.249 + simd_prefix(dst, src, VEX_SIMD_NONE); 16.250 emit_byte(0x2F); 16.251 emit_operand(dst, src); 16.252 } 16.253 16.254 +void Assembler::comiss(XMMRegister dst, XMMRegister src) { 16.255 + NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.256 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 16.257 + emit_byte(0x2F); 16.258 + emit_byte(0xC0 | encode); 16.259 +} 16.260 + 16.261 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 16.262 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.263 - emit_byte(0xF3); 16.264 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.265 - emit_byte(0x0F); 16.266 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 16.267 emit_byte(0xE6); 16.268 emit_byte(0xC0 | encode); 16.269 } 16.270 16.271 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 16.272 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.273 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.274 - emit_byte(0x0F); 16.275 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 16.276 emit_byte(0x5B); 16.277 emit_byte(0xC0 | encode); 16.278 } 16.279 16.280 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 16.281 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.282 - emit_byte(0xF2); 16.283 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.284 - emit_byte(0x0F); 16.285 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.286 emit_byte(0x5A); 16.287 emit_byte(0xC0 | encode); 16.288 } 16.289 16.290 +void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 16.291 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.292 + InstructionMark im(this); 16.293 + simd_prefix(dst, dst, src, VEX_SIMD_F2); 16.294 + emit_byte(0x5A); 16.295 + emit_operand(dst, src); 16.296 +} 16.297 + 16.298 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 16.299 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.300 - emit_byte(0xF2); 16.301 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.302 - emit_byte(0x0F); 16.303 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.304 emit_byte(0x2A); 16.305 emit_byte(0xC0 | encode); 16.306 } 16.307 16.308 +void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 16.309 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.310 + InstructionMark im(this); 16.311 + simd_prefix(dst, dst, src, VEX_SIMD_F2); 16.312 + emit_byte(0x2A); 16.313 + emit_operand(dst, src); 16.314 +} 16.315 + 16.316 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 16.317 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.318 - emit_byte(0xF3); 16.319 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.320 - emit_byte(0x0F); 16.321 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.322 emit_byte(0x2A); 16.323 emit_byte(0xC0 | encode); 16.324 } 16.325 16.326 +void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 16.327 + NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.328 + InstructionMark im(this); 16.329 + simd_prefix(dst, dst, src, VEX_SIMD_F3); 16.330 + emit_byte(0x2A); 16.331 + emit_operand(dst, src); 16.332 +} 16.333 + 16.334 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 16.335 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.336 - emit_byte(0xF3); 16.337 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.338 - emit_byte(0x0F); 16.339 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.340 emit_byte(0x5A); 16.341 emit_byte(0xC0 | encode); 16.342 } 16.343 16.344 +void Assembler::cvtss2sd(XMMRegister dst, Address src) { 16.345 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.346 + InstructionMark im(this); 16.347 + simd_prefix(dst, dst, src, VEX_SIMD_F3); 16.348 + emit_byte(0x5A); 16.349 + emit_operand(dst, src); 16.350 +} 16.351 + 16.352 + 16.353 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 16.354 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.355 - emit_byte(0xF2); 16.356 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.357 - emit_byte(0x0F); 16.358 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 16.359 emit_byte(0x2C); 16.360 emit_byte(0xC0 | encode); 16.361 } 16.362 16.363 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 16.364 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.365 - emit_byte(0xF3); 16.366 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.367 - emit_byte(0x0F); 16.368 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 16.369 emit_byte(0x2C); 16.370 emit_byte(0xC0 | encode); 16.371 } 16.372 @@ -1253,18 +1349,14 @@ 16.373 void Assembler::divsd(XMMRegister dst, Address src) { 16.374 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.375 InstructionMark im(this); 16.376 - emit_byte(0xF2); 16.377 - prefix(src, dst); 16.378 - emit_byte(0x0F); 16.379 + simd_prefix(dst, dst, src, VEX_SIMD_F2); 16.380 emit_byte(0x5E); 16.381 emit_operand(dst, src); 16.382 } 16.383 16.384 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 16.385 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.386 - emit_byte(0xF2); 16.387 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.388 - emit_byte(0x0F); 16.389 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.390 emit_byte(0x5E); 16.391 emit_byte(0xC0 | encode); 16.392 } 16.393 @@ -1272,18 +1364,14 @@ 16.394 void Assembler::divss(XMMRegister dst, Address src) { 16.395 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.396 InstructionMark im(this); 16.397 - emit_byte(0xF3); 16.398 - prefix(src, dst); 16.399 - emit_byte(0x0F); 16.400 + simd_prefix(dst, dst, src, VEX_SIMD_F3); 16.401 emit_byte(0x5E); 16.402 emit_operand(dst, src); 16.403 } 16.404 16.405 void Assembler::divss(XMMRegister dst, XMMRegister src) { 16.406 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.407 - emit_byte(0xF3); 16.408 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.409 - emit_byte(0x0F); 16.410 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.411 emit_byte(0x5E); 16.412 emit_byte(0xC0 | encode); 16.413 } 16.414 @@ -1377,8 +1465,14 @@ 16.415 if (L.is_bound()) { 16.416 const int short_size = 2; 16.417 address entry = target(L); 16.418 - assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 16.419 - "Dispacement too large for a short jmp"); 16.420 +#ifdef ASSERT 16.421 + intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 16.422 + intptr_t delta = short_branch_delta(); 16.423 + if (delta != 0) { 16.424 + dist += (dist < 0 ? (-delta) :delta); 16.425 + } 16.426 + assert(is8bit(dist), "Dispacement too large for a short jmp"); 16.427 +#endif 16.428 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 16.429 // 0111 tttn #8-bit disp 16.430 emit_byte(0x70 | cc); 16.431 @@ -1444,9 +1538,15 @@ 16.432 if (L.is_bound()) { 16.433 const int short_size = 2; 16.434 address entry = target(L); 16.435 - assert(is8bit((entry - _code_pos) + short_size), 16.436 - "Dispacement too large for a short jmp"); 16.437 assert(entry != NULL, "jmp most probably wrong"); 16.438 +#ifdef ASSERT 16.439 + intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 16.440 + intptr_t delta = short_branch_delta(); 16.441 + if (delta != 0) { 16.442 + dist += (dist < 0 ? (-delta) :delta); 16.443 + } 16.444 + assert(is8bit(dist), "Dispacement too large for a short jmp"); 16.445 +#endif 16.446 intptr_t offs = entry - _code_pos; 16.447 emit_byte(0xEB); 16.448 emit_byte((offs - short_size) & 0xFF); 16.449 @@ -1509,49 +1609,16 @@ 16.450 16.451 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 16.452 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.453 - int dstenc = dst->encoding(); 16.454 - int srcenc = src->encoding(); 16.455 - emit_byte(0x66); 16.456 - if (dstenc < 8) { 16.457 - if (srcenc >= 8) { 16.458 - prefix(REX_B); 16.459 - srcenc -= 8; 16.460 - } 16.461 - } else { 16.462 - if (srcenc < 8) { 16.463 - prefix(REX_R); 16.464 - } else { 16.465 - prefix(REX_RB); 16.466 - srcenc -= 8; 16.467 - } 16.468 - dstenc -= 8; 16.469 - } 16.470 - emit_byte(0x0F); 16.471 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 16.472 emit_byte(0x28); 16.473 - emit_byte(0xC0 | dstenc << 3 | srcenc); 16.474 + emit_byte(0xC0 | encode); 16.475 } 16.476 16.477 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 16.478 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.479 - int dstenc = dst->encoding(); 16.480 - int srcenc = src->encoding(); 16.481 - if (dstenc < 8) { 16.482 - if (srcenc >= 8) { 16.483 - prefix(REX_B); 16.484 - srcenc -= 8; 16.485 - } 16.486 - } else { 16.487 - if (srcenc < 8) { 16.488 - prefix(REX_R); 16.489 - } else { 16.490 - prefix(REX_RB); 16.491 - srcenc -= 8; 16.492 - } 16.493 - dstenc -= 8; 16.494 - } 16.495 - emit_byte(0x0F); 16.496 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 16.497 emit_byte(0x28); 16.498 - emit_byte(0xC0 | dstenc << 3 | srcenc); 16.499 + emit_byte(0xC0 | encode); 16.500 } 16.501 16.502 void Assembler::movb(Register dst, Address src) { 16.503 @@ -1582,19 +1649,15 @@ 16.504 16.505 void Assembler::movdl(XMMRegister dst, Register src) { 16.506 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.507 - emit_byte(0x66); 16.508 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.509 - emit_byte(0x0F); 16.510 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 16.511 emit_byte(0x6E); 16.512 emit_byte(0xC0 | encode); 16.513 } 16.514 16.515 void Assembler::movdl(Register dst, XMMRegister src) { 16.516 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.517 - emit_byte(0x66); 16.518 // swap src/dst to get correct prefix 16.519 - int encode = prefix_and_encode(src->encoding(), dst->encoding()); 16.520 - emit_byte(0x0F); 16.521 + int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 16.522 emit_byte(0x7E); 16.523 emit_byte(0xC0 | encode); 16.524 } 16.525 @@ -1602,68 +1665,37 @@ 16.526 void Assembler::movdl(XMMRegister dst, Address src) { 16.527 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.528 InstructionMark im(this); 16.529 - emit_byte(0x66); 16.530 - prefix(src, dst); 16.531 - emit_byte(0x0F); 16.532 + simd_prefix(dst, src, VEX_SIMD_66); 16.533 emit_byte(0x6E); 16.534 emit_operand(dst, src); 16.535 } 16.536 16.537 - 16.538 -void Assembler::movdqa(XMMRegister dst, Address src) { 16.539 +void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 16.540 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.541 - InstructionMark im(this); 16.542 - emit_byte(0x66); 16.543 - prefix(src, dst); 16.544 - emit_byte(0x0F); 16.545 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 16.546 + emit_byte(0x6F); 16.547 + emit_byte(0xC0 | encode); 16.548 +} 16.549 + 16.550 +void Assembler::movdqu(XMMRegister dst, Address src) { 16.551 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.552 + InstructionMark im(this); 16.553 + simd_prefix(dst, src, VEX_SIMD_F3); 16.554 emit_byte(0x6F); 16.555 emit_operand(dst, src); 16.556 } 16.557 16.558 -void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 16.559 +void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 16.560 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.561 - emit_byte(0x66); 16.562 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.563 - emit_byte(0x0F); 16.564 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 16.565 emit_byte(0x6F); 16.566 emit_byte(0xC0 | encode); 16.567 } 16.568 16.569 -void Assembler::movdqa(Address dst, XMMRegister src) { 16.570 - NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.571 - InstructionMark im(this); 16.572 - emit_byte(0x66); 16.573 - prefix(dst, src); 16.574 - emit_byte(0x0F); 16.575 - emit_byte(0x7F); 16.576 - emit_operand(src, dst); 16.577 -} 16.578 - 16.579 -void Assembler::movdqu(XMMRegister dst, Address src) { 16.580 - NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.581 - InstructionMark im(this); 16.582 - emit_byte(0xF3); 16.583 - prefix(src, dst); 16.584 - emit_byte(0x0F); 16.585 - emit_byte(0x6F); 16.586 - emit_operand(dst, src); 16.587 -} 16.588 - 16.589 -void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 16.590 - NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.591 - emit_byte(0xF3); 16.592 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.593 - emit_byte(0x0F); 16.594 - emit_byte(0x6F); 16.595 - emit_byte(0xC0 | encode); 16.596 -} 16.597 - 16.598 void Assembler::movdqu(Address dst, XMMRegister src) { 16.599 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.600 InstructionMark im(this); 16.601 - emit_byte(0xF3); 16.602 - prefix(dst, src); 16.603 - emit_byte(0x0F); 16.604 + simd_prefix(dst, src, VEX_SIMD_F3); 16.605 emit_byte(0x7F); 16.606 emit_operand(src, dst); 16.607 } 16.608 @@ -1710,9 +1742,7 @@ 16.609 void Assembler::movlpd(XMMRegister dst, Address src) { 16.610 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.611 InstructionMark im(this); 16.612 - emit_byte(0x66); 16.613 - prefix(src, dst); 16.614 - emit_byte(0x0F); 16.615 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.616 emit_byte(0x12); 16.617 emit_operand(dst, src); 16.618 } 16.619 @@ -1740,9 +1770,7 @@ 16.620 void Assembler::movq(XMMRegister dst, Address src) { 16.621 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.622 InstructionMark im(this); 16.623 - emit_byte(0xF3); 16.624 - prefix(src, dst); 16.625 - emit_byte(0x0F); 16.626 + simd_prefix(dst, src, VEX_SIMD_F3); 16.627 emit_byte(0x7E); 16.628 emit_operand(dst, src); 16.629 } 16.630 @@ -1750,9 +1778,7 @@ 16.631 void Assembler::movq(Address dst, XMMRegister src) { 16.632 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.633 InstructionMark im(this); 16.634 - emit_byte(0x66); 16.635 - prefix(dst, src); 16.636 - emit_byte(0x0F); 16.637 + simd_prefix(dst, src, VEX_SIMD_66); 16.638 emit_byte(0xD6); 16.639 emit_operand(src, dst); 16.640 } 16.641 @@ -1775,9 +1801,7 @@ 16.642 16.643 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 16.644 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.645 - emit_byte(0xF2); 16.646 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.647 - emit_byte(0x0F); 16.648 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.649 emit_byte(0x10); 16.650 emit_byte(0xC0 | encode); 16.651 } 16.652 @@ -1785,9 +1809,7 @@ 16.653 void Assembler::movsd(XMMRegister dst, Address src) { 16.654 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.655 InstructionMark im(this); 16.656 - emit_byte(0xF2); 16.657 - prefix(src, dst); 16.658 - emit_byte(0x0F); 16.659 + simd_prefix(dst, src, VEX_SIMD_F2); 16.660 emit_byte(0x10); 16.661 emit_operand(dst, src); 16.662 } 16.663 @@ -1795,18 +1817,14 @@ 16.664 void Assembler::movsd(Address dst, XMMRegister src) { 16.665 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.666 InstructionMark im(this); 16.667 - emit_byte(0xF2); 16.668 - prefix(dst, src); 16.669 - emit_byte(0x0F); 16.670 + simd_prefix(dst, src, VEX_SIMD_F2); 16.671 emit_byte(0x11); 16.672 emit_operand(src, dst); 16.673 } 16.674 16.675 void Assembler::movss(XMMRegister dst, XMMRegister src) { 16.676 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.677 - emit_byte(0xF3); 16.678 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.679 - emit_byte(0x0F); 16.680 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.681 emit_byte(0x10); 16.682 emit_byte(0xC0 | encode); 16.683 } 16.684 @@ -1814,9 +1832,7 @@ 16.685 void Assembler::movss(XMMRegister dst, Address src) { 16.686 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.687 InstructionMark im(this); 16.688 - emit_byte(0xF3); 16.689 - prefix(src, dst); 16.690 - emit_byte(0x0F); 16.691 + simd_prefix(dst, src, VEX_SIMD_F3); 16.692 emit_byte(0x10); 16.693 emit_operand(dst, src); 16.694 } 16.695 @@ -1824,9 +1840,7 @@ 16.696 void Assembler::movss(Address dst, XMMRegister src) { 16.697 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.698 InstructionMark im(this); 16.699 - emit_byte(0xF3); 16.700 - prefix(dst, src); 16.701 - emit_byte(0x0F); 16.702 + simd_prefix(dst, src, VEX_SIMD_F3); 16.703 emit_byte(0x11); 16.704 emit_operand(src, dst); 16.705 } 16.706 @@ -1919,18 +1933,14 @@ 16.707 void Assembler::mulsd(XMMRegister dst, Address src) { 16.708 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.709 InstructionMark im(this); 16.710 - emit_byte(0xF2); 16.711 - prefix(src, dst); 16.712 - emit_byte(0x0F); 16.713 + simd_prefix(dst, dst, src, VEX_SIMD_F2); 16.714 emit_byte(0x59); 16.715 emit_operand(dst, src); 16.716 } 16.717 16.718 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 16.719 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.720 - emit_byte(0xF2); 16.721 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.722 - emit_byte(0x0F); 16.723 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.724 emit_byte(0x59); 16.725 emit_byte(0xC0 | encode); 16.726 } 16.727 @@ -1938,18 +1948,14 @@ 16.728 void Assembler::mulss(XMMRegister dst, Address src) { 16.729 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.730 InstructionMark im(this); 16.731 - emit_byte(0xF3); 16.732 - prefix(src, dst); 16.733 - emit_byte(0x0F); 16.734 + simd_prefix(dst, dst, src, VEX_SIMD_F3); 16.735 emit_byte(0x59); 16.736 emit_operand(dst, src); 16.737 } 16.738 16.739 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 16.740 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.741 - emit_byte(0xF3); 16.742 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.743 - emit_byte(0x0F); 16.744 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.745 emit_byte(0x59); 16.746 emit_byte(0xC0 | encode); 16.747 } 16.748 @@ -2237,14 +2243,26 @@ 16.749 emit_arith(0x0B, 0xC0, dst, src); 16.750 } 16.751 16.752 +void Assembler::packuswb(XMMRegister dst, Address src) { 16.753 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.754 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.755 + InstructionMark im(this); 16.756 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.757 + emit_byte(0x67); 16.758 + emit_operand(dst, src); 16.759 +} 16.760 + 16.761 +void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 16.762 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.763 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 16.764 + emit_byte(0x67); 16.765 + emit_byte(0xC0 | encode); 16.766 +} 16.767 + 16.768 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 16.769 assert(VM_Version::supports_sse4_2(), ""); 16.770 - 16.771 - InstructionMark im(this); 16.772 - emit_byte(0x66); 16.773 - prefix(src, dst); 16.774 - emit_byte(0x0F); 16.775 - emit_byte(0x3A); 16.776 + InstructionMark im(this); 16.777 + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 16.778 emit_byte(0x61); 16.779 emit_operand(dst, src); 16.780 emit_byte(imm8); 16.781 @@ -2252,16 +2270,27 @@ 16.782 16.783 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 16.784 assert(VM_Version::supports_sse4_2(), ""); 16.785 - 16.786 - emit_byte(0x66); 16.787 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.788 - emit_byte(0x0F); 16.789 - emit_byte(0x3A); 16.790 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 16.791 emit_byte(0x61); 16.792 emit_byte(0xC0 | encode); 16.793 emit_byte(imm8); 16.794 } 16.795 16.796 +void Assembler::pmovzxbw(XMMRegister dst, Address src) { 16.797 + assert(VM_Version::supports_sse4_1(), ""); 16.798 + InstructionMark im(this); 16.799 + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 16.800 + emit_byte(0x30); 16.801 + emit_operand(dst, src); 16.802 +} 16.803 + 16.804 +void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 16.805 + assert(VM_Version::supports_sse4_1(), ""); 16.806 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 16.807 + emit_byte(0x30); 16.808 + emit_byte(0xC0 | encode); 16.809 +} 16.810 + 16.811 // generic 16.812 void Assembler::pop(Register dst) { 16.813 int encode = prefix_and_encode(dst->encoding()); 16.814 @@ -2360,22 +2389,24 @@ 16.815 16.816 void Assembler::por(XMMRegister dst, XMMRegister src) { 16.817 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.818 - 16.819 - emit_byte(0x66); 16.820 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.821 - emit_byte(0x0F); 16.822 - 16.823 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 16.824 emit_byte(0xEB); 16.825 emit_byte(0xC0 | encode); 16.826 } 16.827 16.828 +void Assembler::por(XMMRegister dst, Address src) { 16.829 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.830 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.831 + InstructionMark im(this); 16.832 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.833 + emit_byte(0xEB); 16.834 + emit_operand(dst, src); 16.835 +} 16.836 + 16.837 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 16.838 assert(isByte(mode), "invalid value"); 16.839 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.840 - 16.841 - emit_byte(0x66); 16.842 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.843 - emit_byte(0x0F); 16.844 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 16.845 emit_byte(0x70); 16.846 emit_byte(0xC0 | encode); 16.847 emit_byte(mode & 0xFF); 16.848 @@ -2385,11 +2416,9 @@ 16.849 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 16.850 assert(isByte(mode), "invalid value"); 16.851 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.852 - 16.853 - InstructionMark im(this); 16.854 - emit_byte(0x66); 16.855 - prefix(src, dst); 16.856 - emit_byte(0x0F); 16.857 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.858 + InstructionMark im(this); 16.859 + simd_prefix(dst, src, VEX_SIMD_66); 16.860 emit_byte(0x70); 16.861 emit_operand(dst, src); 16.862 emit_byte(mode & 0xFF); 16.863 @@ -2398,10 +2427,7 @@ 16.864 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 16.865 assert(isByte(mode), "invalid value"); 16.866 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.867 - 16.868 - emit_byte(0xF2); 16.869 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.870 - emit_byte(0x0F); 16.871 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 16.872 emit_byte(0x70); 16.873 emit_byte(0xC0 | encode); 16.874 emit_byte(mode & 0xFF); 16.875 @@ -2410,11 +2436,9 @@ 16.876 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 16.877 assert(isByte(mode), "invalid value"); 16.878 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.879 - 16.880 - InstructionMark im(this); 16.881 - emit_byte(0xF2); 16.882 - prefix(src, dst); // QQ new 16.883 - emit_byte(0x0F); 16.884 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.885 + InstructionMark im(this); 16.886 + simd_prefix(dst, src, VEX_SIMD_F2); 16.887 emit_byte(0x70); 16.888 emit_operand(dst, src); 16.889 emit_byte(mode & 0xFF); 16.890 @@ -2425,11 +2449,8 @@ 16.891 // HMM Table D-1 says sse2 or mmx. 16.892 // Do not confuse it with psrldq SSE2 instruction which 16.893 // shifts 128 bit value in xmm register by number of bytes. 16.894 - NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.895 - 16.896 - int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); 16.897 - emit_byte(0x66); 16.898 - emit_byte(0x0F); 16.899 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.900 + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 16.901 emit_byte(0x73); 16.902 emit_byte(0xC0 | encode); 16.903 emit_byte(shift); 16.904 @@ -2438,10 +2459,7 @@ 16.905 void Assembler::psrldq(XMMRegister dst, int shift) { 16.906 // Shift 128 bit value in xmm register by number of bytes. 16.907 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.908 - 16.909 - int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding()); 16.910 - emit_byte(0x66); 16.911 - emit_byte(0x0F); 16.912 + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 16.913 emit_byte(0x73); 16.914 emit_byte(0xC0 | encode); 16.915 emit_byte(shift); 16.916 @@ -2449,36 +2467,52 @@ 16.917 16.918 void Assembler::ptest(XMMRegister dst, Address src) { 16.919 assert(VM_Version::supports_sse4_1(), ""); 16.920 - 16.921 - InstructionMark im(this); 16.922 - emit_byte(0x66); 16.923 - prefix(src, dst); 16.924 - emit_byte(0x0F); 16.925 - emit_byte(0x38); 16.926 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.927 + InstructionMark im(this); 16.928 + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 16.929 emit_byte(0x17); 16.930 emit_operand(dst, src); 16.931 } 16.932 16.933 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 16.934 assert(VM_Version::supports_sse4_1(), ""); 16.935 - 16.936 - emit_byte(0x66); 16.937 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.938 - emit_byte(0x0F); 16.939 - emit_byte(0x38); 16.940 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 16.941 emit_byte(0x17); 16.942 emit_byte(0xC0 | encode); 16.943 } 16.944 16.945 +void Assembler::punpcklbw(XMMRegister dst, Address src) { 16.946 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.947 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.948 + InstructionMark im(this); 16.949 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.950 + emit_byte(0x60); 16.951 + emit_operand(dst, src); 16.952 +} 16.953 + 16.954 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 16.955 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.956 - emit_byte(0x66); 16.957 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.958 - emit_byte(0x0F); 16.959 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 16.960 emit_byte(0x60); 16.961 emit_byte(0xC0 | encode); 16.962 } 16.963 16.964 +void Assembler::punpckldq(XMMRegister dst, Address src) { 16.965 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.966 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.967 + InstructionMark im(this); 16.968 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.969 + emit_byte(0x62); 16.970 + emit_operand(dst, src); 16.971 +} 16.972 + 16.973 +void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 16.974 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.975 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 16.976 + emit_byte(0x62); 16.977 + emit_byte(0xC0 | encode); 16.978 +} 16.979 + 16.980 void Assembler::push(int32_t imm32) { 16.981 // in 64bits we push 64bits onto the stack but only 16.982 // take a 32bit immediate 16.983 @@ -2508,20 +2542,16 @@ 16.984 16.985 void Assembler::pxor(XMMRegister dst, Address src) { 16.986 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.987 - InstructionMark im(this); 16.988 - emit_byte(0x66); 16.989 - prefix(src, dst); 16.990 - emit_byte(0x0F); 16.991 + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 16.992 + InstructionMark im(this); 16.993 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.994 emit_byte(0xEF); 16.995 emit_operand(dst, src); 16.996 } 16.997 16.998 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 16.999 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1000 - InstructionMark im(this); 16.1001 - emit_byte(0x66); 16.1002 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.1003 - emit_byte(0x0F); 16.1004 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 16.1005 emit_byte(0xEF); 16.1006 emit_byte(0xC0 | encode); 16.1007 } 16.1008 @@ -2683,12 +2713,8 @@ 16.1009 } 16.1010 16.1011 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 16.1012 - // HMM Table D-1 says sse2 16.1013 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1014 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1015 - emit_byte(0xF2); 16.1016 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.1017 - emit_byte(0x0F); 16.1018 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.1019 emit_byte(0x51); 16.1020 emit_byte(0xC0 | encode); 16.1021 } 16.1022 @@ -2696,30 +2722,22 @@ 16.1023 void Assembler::sqrtsd(XMMRegister dst, Address src) { 16.1024 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1025 InstructionMark im(this); 16.1026 - emit_byte(0xF2); 16.1027 - prefix(src, dst); 16.1028 - emit_byte(0x0F); 16.1029 + simd_prefix(dst, dst, src, VEX_SIMD_F2); 16.1030 emit_byte(0x51); 16.1031 emit_operand(dst, src); 16.1032 } 16.1033 16.1034 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 16.1035 - // HMM Table D-1 says sse2 16.1036 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1037 - NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1038 - emit_byte(0xF3); 16.1039 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.1040 - emit_byte(0x0F); 16.1041 + NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1042 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.1043 emit_byte(0x51); 16.1044 emit_byte(0xC0 | encode); 16.1045 } 16.1046 16.1047 void Assembler::sqrtss(XMMRegister dst, Address src) { 16.1048 - NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1049 - InstructionMark im(this); 16.1050 - emit_byte(0xF3); 16.1051 - prefix(src, dst); 16.1052 - emit_byte(0x0F); 16.1053 + NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1054 + InstructionMark im(this); 16.1055 + simd_prefix(dst, dst, src, VEX_SIMD_F3); 16.1056 emit_byte(0x51); 16.1057 emit_operand(dst, src); 16.1058 } 16.1059 @@ -2765,9 +2783,7 @@ 16.1060 16.1061 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 16.1062 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1063 - emit_byte(0xF2); 16.1064 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.1065 - emit_byte(0x0F); 16.1066 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 16.1067 emit_byte(0x5C); 16.1068 emit_byte(0xC0 | encode); 16.1069 } 16.1070 @@ -2775,18 +2791,14 @@ 16.1071 void Assembler::subsd(XMMRegister dst, Address src) { 16.1072 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1073 InstructionMark im(this); 16.1074 - emit_byte(0xF2); 16.1075 - prefix(src, dst); 16.1076 - emit_byte(0x0F); 16.1077 + simd_prefix(dst, dst, src, VEX_SIMD_F2); 16.1078 emit_byte(0x5C); 16.1079 emit_operand(dst, src); 16.1080 } 16.1081 16.1082 void Assembler::subss(XMMRegister dst, XMMRegister src) { 16.1083 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1084 - emit_byte(0xF3); 16.1085 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.1086 - emit_byte(0x0F); 16.1087 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 16.1088 emit_byte(0x5C); 16.1089 emit_byte(0xC0 | encode); 16.1090 } 16.1091 @@ -2794,9 +2806,7 @@ 16.1092 void Assembler::subss(XMMRegister dst, Address src) { 16.1093 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1094 InstructionMark im(this); 16.1095 - emit_byte(0xF3); 16.1096 - prefix(src, dst); 16.1097 - emit_byte(0x0F); 16.1098 + simd_prefix(dst, dst, src, VEX_SIMD_F3); 16.1099 emit_byte(0x5C); 16.1100 emit_operand(dst, src); 16.1101 } 16.1102 @@ -2836,30 +2846,30 @@ 16.1103 16.1104 void Assembler::ucomisd(XMMRegister dst, Address src) { 16.1105 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1106 - emit_byte(0x66); 16.1107 - ucomiss(dst, src); 16.1108 + InstructionMark im(this); 16.1109 + simd_prefix(dst, src, VEX_SIMD_66); 16.1110 + emit_byte(0x2E); 16.1111 + emit_operand(dst, src); 16.1112 } 16.1113 16.1114 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 16.1115 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1116 - emit_byte(0x66); 16.1117 - ucomiss(dst, src); 16.1118 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 16.1119 + emit_byte(0x2E); 16.1120 + emit_byte(0xC0 | encode); 16.1121 } 16.1122 16.1123 void Assembler::ucomiss(XMMRegister dst, Address src) { 16.1124 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1125 - 16.1126 - InstructionMark im(this); 16.1127 - prefix(src, dst); 16.1128 - emit_byte(0x0F); 16.1129 + InstructionMark im(this); 16.1130 + simd_prefix(dst, src, VEX_SIMD_NONE); 16.1131 emit_byte(0x2E); 16.1132 emit_operand(dst, src); 16.1133 } 16.1134 16.1135 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 16.1136 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1137 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.1138 - emit_byte(0x0F); 16.1139 + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 16.1140 emit_byte(0x2E); 16.1141 emit_byte(0xC0 | encode); 16.1142 } 16.1143 @@ -2905,16 +2915,15 @@ 16.1144 16.1145 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 16.1146 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1147 - emit_byte(0x66); 16.1148 - xorps(dst, src); 16.1149 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 16.1150 + emit_byte(0x57); 16.1151 + emit_byte(0xC0 | encode); 16.1152 } 16.1153 16.1154 void Assembler::xorpd(XMMRegister dst, Address src) { 16.1155 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1156 InstructionMark im(this); 16.1157 - emit_byte(0x66); 16.1158 - prefix(src, dst); 16.1159 - emit_byte(0x0F); 16.1160 + simd_prefix(dst, dst, src, VEX_SIMD_66); 16.1161 emit_byte(0x57); 16.1162 emit_operand(dst, src); 16.1163 } 16.1164 @@ -2922,8 +2931,7 @@ 16.1165 16.1166 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 16.1167 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1168 - int encode = prefix_and_encode(dst->encoding(), src->encoding()); 16.1169 - emit_byte(0x0F); 16.1170 + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 16.1171 emit_byte(0x57); 16.1172 emit_byte(0xC0 | encode); 16.1173 } 16.1174 @@ -2931,12 +2939,166 @@ 16.1175 void Assembler::xorps(XMMRegister dst, Address src) { 16.1176 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1177 InstructionMark im(this); 16.1178 - prefix(src, dst); 16.1179 - emit_byte(0x0F); 16.1180 + simd_prefix(dst, dst, src, VEX_SIMD_NONE); 16.1181 emit_byte(0x57); 16.1182 emit_operand(dst, src); 16.1183 } 16.1184 16.1185 +// AVX 3-operands non destructive source instructions (encoded with VEX prefix) 16.1186 + 16.1187 +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 16.1188 + assert(VM_Version::supports_avx(), ""); 16.1189 + InstructionMark im(this); 16.1190 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 16.1191 + emit_byte(0x58); 16.1192 + emit_operand(dst, src); 16.1193 +} 16.1194 + 16.1195 +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1196 + assert(VM_Version::supports_avx(), ""); 16.1197 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 16.1198 + emit_byte(0x58); 16.1199 + emit_byte(0xC0 | encode); 16.1200 +} 16.1201 + 16.1202 +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 16.1203 + assert(VM_Version::supports_avx(), ""); 16.1204 + InstructionMark im(this); 16.1205 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 16.1206 + emit_byte(0x58); 16.1207 + emit_operand(dst, src); 16.1208 +} 16.1209 + 16.1210 +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1211 + assert(VM_Version::supports_avx(), ""); 16.1212 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 16.1213 + emit_byte(0x58); 16.1214 + emit_byte(0xC0 | encode); 16.1215 +} 16.1216 + 16.1217 +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { 16.1218 + assert(VM_Version::supports_avx(), ""); 16.1219 + InstructionMark im(this); 16.1220 + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 16.1221 + emit_byte(0x54); 16.1222 + emit_operand(dst, src); 16.1223 +} 16.1224 + 16.1225 +void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { 16.1226 + assert(VM_Version::supports_avx(), ""); 16.1227 + InstructionMark im(this); 16.1228 + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 16.1229 + emit_byte(0x54); 16.1230 + emit_operand(dst, src); 16.1231 +} 16.1232 + 16.1233 +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 16.1234 + assert(VM_Version::supports_avx(), ""); 16.1235 + InstructionMark im(this); 16.1236 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 16.1237 + emit_byte(0x5E); 16.1238 + emit_operand(dst, src); 16.1239 +} 16.1240 + 16.1241 +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1242 + assert(VM_Version::supports_avx(), ""); 16.1243 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 16.1244 + emit_byte(0x5E); 16.1245 + emit_byte(0xC0 | encode); 16.1246 +} 16.1247 + 16.1248 +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 16.1249 + assert(VM_Version::supports_avx(), ""); 16.1250 + InstructionMark im(this); 16.1251 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 16.1252 + emit_byte(0x5E); 16.1253 + emit_operand(dst, src); 16.1254 +} 16.1255 + 16.1256 +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1257 + assert(VM_Version::supports_avx(), ""); 16.1258 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 16.1259 + emit_byte(0x5E); 16.1260 + emit_byte(0xC0 | encode); 16.1261 +} 16.1262 + 16.1263 +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 16.1264 + assert(VM_Version::supports_avx(), ""); 16.1265 + InstructionMark im(this); 16.1266 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 16.1267 + emit_byte(0x59); 16.1268 + emit_operand(dst, src); 16.1269 +} 16.1270 + 16.1271 +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1272 + assert(VM_Version::supports_avx(), ""); 16.1273 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 16.1274 + emit_byte(0x59); 16.1275 + emit_byte(0xC0 | encode); 16.1276 +} 16.1277 + 16.1278 +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 16.1279 + InstructionMark im(this); 16.1280 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 16.1281 + emit_byte(0x59); 16.1282 + emit_operand(dst, src); 16.1283 +} 16.1284 + 16.1285 +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1286 + assert(VM_Version::supports_avx(), ""); 16.1287 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 16.1288 + emit_byte(0x59); 16.1289 + emit_byte(0xC0 | encode); 16.1290 +} 16.1291 + 16.1292 + 16.1293 +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 16.1294 + assert(VM_Version::supports_avx(), ""); 16.1295 + InstructionMark im(this); 16.1296 + vex_prefix(dst, nds, src, VEX_SIMD_F2); 16.1297 + emit_byte(0x5C); 16.1298 + emit_operand(dst, src); 16.1299 +} 16.1300 + 16.1301 +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1302 + assert(VM_Version::supports_avx(), ""); 16.1303 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 16.1304 + emit_byte(0x5C); 16.1305 + emit_byte(0xC0 | encode); 16.1306 +} 16.1307 + 16.1308 +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 16.1309 + assert(VM_Version::supports_avx(), ""); 16.1310 + InstructionMark im(this); 16.1311 + vex_prefix(dst, nds, src, VEX_SIMD_F3); 16.1312 + emit_byte(0x5C); 16.1313 + emit_operand(dst, src); 16.1314 +} 16.1315 + 16.1316 +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 16.1317 + assert(VM_Version::supports_avx(), ""); 16.1318 + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 16.1319 + emit_byte(0x5C); 16.1320 + emit_byte(0xC0 | encode); 16.1321 +} 16.1322 + 16.1323 +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { 16.1324 + assert(VM_Version::supports_avx(), ""); 16.1325 + InstructionMark im(this); 16.1326 + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 16.1327 + emit_byte(0x57); 16.1328 + emit_operand(dst, src); 16.1329 +} 16.1330 + 16.1331 +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { 16.1332 + assert(VM_Version::supports_avx(), ""); 16.1333 + InstructionMark im(this); 16.1334 + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 16.1335 + emit_byte(0x57); 16.1336 + emit_operand(dst, src); 16.1337 +} 16.1338 + 16.1339 + 16.1340 #ifndef _LP64 16.1341 // 32bit only pieces of the assembler 16.1342 16.1343 @@ -3394,12 +3556,114 @@ 16.1344 emit_byte(0xF1); 16.1345 } 16.1346 16.1347 +// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 16.1348 +static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 16.1349 +// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 16.1350 +static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 16.1351 + 16.1352 +// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 16.1353 +void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 16.1354 + if (pre > 0) { 16.1355 + emit_byte(simd_pre[pre]); 16.1356 + } 16.1357 + if (rex_w) { 16.1358 + prefixq(adr, xreg); 16.1359 + } else { 16.1360 + prefix(adr, xreg); 16.1361 + } 16.1362 + if (opc > 0) { 16.1363 + emit_byte(0x0F); 16.1364 + int opc2 = simd_opc[opc]; 16.1365 + if (opc2 > 0) { 16.1366 + emit_byte(opc2); 16.1367 + } 16.1368 + } 16.1369 +} 16.1370 + 16.1371 +int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 16.1372 + if (pre > 0) { 16.1373 + emit_byte(simd_pre[pre]); 16.1374 + } 16.1375 + int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 16.1376 + prefix_and_encode(dst_enc, src_enc); 16.1377 + if (opc > 0) { 16.1378 + emit_byte(0x0F); 16.1379 + int opc2 = simd_opc[opc]; 16.1380 + if (opc2 > 0) { 16.1381 + emit_byte(opc2); 16.1382 + } 16.1383 + } 16.1384 + return encode; 16.1385 +} 16.1386 + 16.1387 + 16.1388 +void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 16.1389 + if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 16.1390 + prefix(VEX_3bytes); 16.1391 + 16.1392 + int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 16.1393 + byte1 = (~byte1) & 0xE0; 16.1394 + byte1 |= opc; 16.1395 + a_byte(byte1); 16.1396 + 16.1397 + int byte2 = ((~nds_enc) & 0xf) << 3; 16.1398 + byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 16.1399 + emit_byte(byte2); 16.1400 + } else { 16.1401 + prefix(VEX_2bytes); 16.1402 + 16.1403 + int byte1 = vex_r ? VEX_R : 0; 16.1404 + byte1 = (~byte1) & 0x80; 16.1405 + byte1 |= ((~nds_enc) & 0xf) << 3; 16.1406 + byte1 |= (vector256 ? 4 : 0) | pre; 16.1407 + emit_byte(byte1); 16.1408 + } 16.1409 +} 16.1410 + 16.1411 +void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 16.1412 + bool vex_r = (xreg_enc >= 8); 16.1413 + bool vex_b = adr.base_needs_rex(); 16.1414 + bool vex_x = adr.index_needs_rex(); 16.1415 + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 16.1416 +} 16.1417 + 16.1418 +int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 16.1419 + bool vex_r = (dst_enc >= 8); 16.1420 + bool vex_b = (src_enc >= 8); 16.1421 + bool vex_x = false; 16.1422 + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 16.1423 + return (((dst_enc & 7) << 3) | (src_enc & 7)); 16.1424 +} 16.1425 + 16.1426 + 16.1427 +void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 16.1428 + if (UseAVX > 0) { 16.1429 + int xreg_enc = xreg->encoding(); 16.1430 + int nds_enc = nds->is_valid() ? nds->encoding() : 0; 16.1431 + vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 16.1432 + } else { 16.1433 + assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 16.1434 + rex_prefix(adr, xreg, pre, opc, rex_w); 16.1435 + } 16.1436 +} 16.1437 + 16.1438 +int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 16.1439 + int dst_enc = dst->encoding(); 16.1440 + int src_enc = src->encoding(); 16.1441 + if (UseAVX > 0) { 16.1442 + int nds_enc = nds->is_valid() ? nds->encoding() : 0; 16.1443 + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 16.1444 + } else { 16.1445 + assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 16.1446 + return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 16.1447 + } 16.1448 +} 16.1449 16.1450 #ifndef _LP64 16.1451 16.1452 void Assembler::incl(Register dst) { 16.1453 // Don't use it directly. Use MacroAssembler::incrementl() instead. 16.1454 - emit_byte(0x40 | dst->encoding()); 16.1455 + emit_byte(0x40 | dst->encoding()); 16.1456 } 16.1457 16.1458 void Assembler::lea(Register dst, Address src) { 16.1459 @@ -3756,6 +4020,38 @@ 16.1460 } 16.1461 } 16.1462 16.1463 +void Assembler::prefixq(Address adr, XMMRegister src) { 16.1464 + if (src->encoding() < 8) { 16.1465 + if (adr.base_needs_rex()) { 16.1466 + if (adr.index_needs_rex()) { 16.1467 + prefix(REX_WXB); 16.1468 + } else { 16.1469 + prefix(REX_WB); 16.1470 + } 16.1471 + } else { 16.1472 + if (adr.index_needs_rex()) { 16.1473 + prefix(REX_WX); 16.1474 + } else { 16.1475 + prefix(REX_W); 16.1476 + } 16.1477 + } 16.1478 + } else { 16.1479 + if (adr.base_needs_rex()) { 16.1480 + if (adr.index_needs_rex()) { 16.1481 + prefix(REX_WRXB); 16.1482 + } else { 16.1483 + prefix(REX_WRB); 16.1484 + } 16.1485 + } else { 16.1486 + if (adr.index_needs_rex()) { 16.1487 + prefix(REX_WRX); 16.1488 + } else { 16.1489 + prefix(REX_WR); 16.1490 + } 16.1491 + } 16.1492 + } 16.1493 +} 16.1494 + 16.1495 void Assembler::adcq(Register dst, int32_t imm32) { 16.1496 (void) prefixq_and_encode(dst->encoding()); 16.1497 emit_arith(0x81, 0xD0, dst, imm32); 16.1498 @@ -3918,36 +4214,44 @@ 16.1499 16.1500 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 16.1501 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1502 - emit_byte(0xF2); 16.1503 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.1504 - emit_byte(0x0F); 16.1505 + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 16.1506 emit_byte(0x2A); 16.1507 emit_byte(0xC0 | encode); 16.1508 } 16.1509 16.1510 +void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 16.1511 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1512 + InstructionMark im(this); 16.1513 + simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 16.1514 + emit_byte(0x2A); 16.1515 + emit_operand(dst, src); 16.1516 +} 16.1517 + 16.1518 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 16.1519 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1520 - emit_byte(0xF3); 16.1521 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.1522 - emit_byte(0x0F); 16.1523 + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 16.1524 emit_byte(0x2A); 16.1525 emit_byte(0xC0 | encode); 16.1526 } 16.1527 16.1528 +void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 16.1529 + NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1530 + InstructionMark im(this); 16.1531 + simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 16.1532 + emit_byte(0x2A); 16.1533 + emit_operand(dst, src); 16.1534 +} 16.1535 + 16.1536 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 16.1537 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1538 - emit_byte(0xF2); 16.1539 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.1540 - emit_byte(0x0F); 16.1541 + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 16.1542 emit_byte(0x2C); 16.1543 emit_byte(0xC0 | encode); 16.1544 } 16.1545 16.1546 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 16.1547 NOT_LP64(assert(VM_Version::supports_sse(), "")); 16.1548 - emit_byte(0xF3); 16.1549 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.1550 - emit_byte(0x0F); 16.1551 + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 16.1552 emit_byte(0x2C); 16.1553 emit_byte(0xC0 | encode); 16.1554 } 16.1555 @@ -4107,21 +4411,17 @@ 16.1556 16.1557 void Assembler::movdq(XMMRegister dst, Register src) { 16.1558 // table D-1 says MMX/SSE2 16.1559 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 16.1560 - emit_byte(0x66); 16.1561 - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 16.1562 - emit_byte(0x0F); 16.1563 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1564 + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 16.1565 emit_byte(0x6E); 16.1566 emit_byte(0xC0 | encode); 16.1567 } 16.1568 16.1569 void Assembler::movdq(Register dst, XMMRegister src) { 16.1570 // table D-1 says MMX/SSE2 16.1571 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 16.1572 - emit_byte(0x66); 16.1573 + NOT_LP64(assert(VM_Version::supports_sse2(), "")); 16.1574 // swap src/dst to get correct prefix 16.1575 - int encode = prefixq_and_encode(src->encoding(), dst->encoding()); 16.1576 - emit_byte(0x0F); 16.1577 + int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 16.1578 emit_byte(0x7E); 16.1579 emit_byte(0xC0 | encode); 16.1580 } 16.1581 @@ -4632,7 +4932,7 @@ 16.1582 null_check_offset = offset(); 16.1583 } 16.1584 movl(tmp_reg, klass_addr); 16.1585 - xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 16.1586 + xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 16.1587 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 16.1588 if (need_tmp_reg) { 16.1589 pop(tmp_reg); 16.1590 @@ -4719,7 +5019,7 @@ 16.1591 } 16.1592 get_thread(tmp_reg); 16.1593 movl(swap_reg, klass_addr); 16.1594 - orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 16.1595 + orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 16.1596 movl(swap_reg, saved_mark_addr); 16.1597 if (os::is_MP()) { 16.1598 lock(); 16.1599 @@ -4757,7 +5057,7 @@ 16.1600 push(tmp_reg); 16.1601 } 16.1602 movl(tmp_reg, klass_addr); 16.1603 - movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 16.1604 + movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 16.1605 if (os::is_MP()) { 16.1606 lock(); 16.1607 } 16.1608 @@ -5680,6 +5980,24 @@ 16.1609 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 16.1610 } 16.1611 16.1612 +void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 16.1613 + if (reachable(src)) { 16.1614 + Assembler::addsd(dst, as_Address(src)); 16.1615 + } else { 16.1616 + lea(rscratch1, src); 16.1617 + Assembler::addsd(dst, Address(rscratch1, 0)); 16.1618 + } 16.1619 +} 16.1620 + 16.1621 +void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 16.1622 + if (reachable(src)) { 16.1623 + addss(dst, as_Address(src)); 16.1624 + } else { 16.1625 + lea(rscratch1, src); 16.1626 + addss(dst, Address(rscratch1, 0)); 16.1627 + } 16.1628 +} 16.1629 + 16.1630 void MacroAssembler::align(int modulus) { 16.1631 if (offset() % modulus != 0) { 16.1632 nop(modulus - (offset() % modulus)); 16.1633 @@ -5687,11 +6005,24 @@ 16.1634 } 16.1635 16.1636 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 16.1637 + // Used in sign-masking with aligned address. 16.1638 + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 16.1639 if (reachable(src)) { 16.1640 - andpd(dst, as_Address(src)); 16.1641 + Assembler::andpd(dst, as_Address(src)); 16.1642 } else { 16.1643 lea(rscratch1, src); 16.1644 - andpd(dst, Address(rscratch1, 0)); 16.1645 + Assembler::andpd(dst, Address(rscratch1, 0)); 16.1646 + } 16.1647 +} 16.1648 + 16.1649 +void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 16.1650 + // Used in sign-masking with aligned address. 16.1651 + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 16.1652 + if (reachable(src)) { 16.1653 + Assembler::andps(dst, as_Address(src)); 16.1654 + } else { 16.1655 + lea(rscratch1, src); 16.1656 + Assembler::andps(dst, Address(rscratch1, 0)); 16.1657 } 16.1658 } 16.1659 16.1660 @@ -6270,19 +6601,19 @@ 16.1661 16.1662 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 16.1663 if (reachable(src)) { 16.1664 - comisd(dst, as_Address(src)); 16.1665 + Assembler::comisd(dst, as_Address(src)); 16.1666 } else { 16.1667 lea(rscratch1, src); 16.1668 - comisd(dst, Address(rscratch1, 0)); 16.1669 + Assembler::comisd(dst, Address(rscratch1, 0)); 16.1670 } 16.1671 } 16.1672 16.1673 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 16.1674 if (reachable(src)) { 16.1675 - comiss(dst, as_Address(src)); 16.1676 + Assembler::comiss(dst, as_Address(src)); 16.1677 } else { 16.1678 lea(rscratch1, src); 16.1679 - comiss(dst, Address(rscratch1, 0)); 16.1680 + Assembler::comiss(dst, Address(rscratch1, 0)); 16.1681 } 16.1682 } 16.1683 16.1684 @@ -6366,6 +6697,24 @@ 16.1685 sarl(reg, shift_value); 16.1686 } 16.1687 16.1688 +void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 16.1689 + if (reachable(src)) { 16.1690 + Assembler::divsd(dst, as_Address(src)); 16.1691 + } else { 16.1692 + lea(rscratch1, src); 16.1693 + Assembler::divsd(dst, Address(rscratch1, 0)); 16.1694 + } 16.1695 +} 16.1696 + 16.1697 +void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 16.1698 + if (reachable(src)) { 16.1699 + Assembler::divss(dst, as_Address(src)); 16.1700 + } else { 16.1701 + lea(rscratch1, src); 16.1702 + Assembler::divss(dst, Address(rscratch1, 0)); 16.1703 + } 16.1704 +} 16.1705 + 16.1706 // !defined(COMPILER2) is because of stupid core builds 16.1707 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 16.1708 void MacroAssembler::empty_FPU_stack() { 16.1709 @@ -6805,12 +7154,39 @@ 16.1710 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 16.1711 } 16.1712 16.1713 +void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 16.1714 + if (reachable(src)) { 16.1715 + Assembler::movsd(dst, as_Address(src)); 16.1716 + } else { 16.1717 + lea(rscratch1, src); 16.1718 + Assembler::movsd(dst, Address(rscratch1, 0)); 16.1719 + } 16.1720 +} 16.1721 + 16.1722 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 16.1723 if (reachable(src)) { 16.1724 - movss(dst, as_Address(src)); 16.1725 + Assembler::movss(dst, as_Address(src)); 16.1726 } else { 16.1727 lea(rscratch1, src); 16.1728 - movss(dst, Address(rscratch1, 0)); 16.1729 + Assembler::movss(dst, Address(rscratch1, 0)); 16.1730 + } 16.1731 +} 16.1732 + 16.1733 +void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 16.1734 + if (reachable(src)) { 16.1735 + Assembler::mulsd(dst, as_Address(src)); 16.1736 + } else { 16.1737 + lea(rscratch1, src); 16.1738 + Assembler::mulsd(dst, Address(rscratch1, 0)); 16.1739 + } 16.1740 +} 16.1741 + 16.1742 +void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 16.1743 + if (reachable(src)) { 16.1744 + Assembler::mulss(dst, as_Address(src)); 16.1745 + } else { 16.1746 + lea(rscratch1, src); 16.1747 + Assembler::mulss(dst, Address(rscratch1, 0)); 16.1748 } 16.1749 } 16.1750 16.1751 @@ -6992,6 +7368,193 @@ 16.1752 testl(dst, as_Address(src)); 16.1753 } 16.1754 16.1755 +void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 16.1756 + if (reachable(src)) { 16.1757 + Assembler::sqrtsd(dst, as_Address(src)); 16.1758 + } else { 16.1759 + lea(rscratch1, src); 16.1760 + Assembler::sqrtsd(dst, Address(rscratch1, 0)); 16.1761 + } 16.1762 +} 16.1763 + 16.1764 +void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 16.1765 + if (reachable(src)) { 16.1766 + Assembler::sqrtss(dst, as_Address(src)); 16.1767 + } else { 16.1768 + lea(rscratch1, src); 16.1769 + Assembler::sqrtss(dst, Address(rscratch1, 0)); 16.1770 + } 16.1771 +} 16.1772 + 16.1773 +void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 16.1774 + if (reachable(src)) { 16.1775 + Assembler::subsd(dst, as_Address(src)); 16.1776 + } else { 16.1777 + lea(rscratch1, src); 16.1778 + Assembler::subsd(dst, Address(rscratch1, 0)); 16.1779 + } 16.1780 +} 16.1781 + 16.1782 +void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 16.1783 + if (reachable(src)) { 16.1784 + Assembler::subss(dst, as_Address(src)); 16.1785 + } else { 16.1786 + lea(rscratch1, src); 16.1787 + Assembler::subss(dst, Address(rscratch1, 0)); 16.1788 + } 16.1789 +} 16.1790 + 16.1791 +void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 16.1792 + if (reachable(src)) { 16.1793 + Assembler::ucomisd(dst, as_Address(src)); 16.1794 + } else { 16.1795 + lea(rscratch1, src); 16.1796 + Assembler::ucomisd(dst, Address(rscratch1, 0)); 16.1797 + } 16.1798 +} 16.1799 + 16.1800 +void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 16.1801 + if (reachable(src)) { 16.1802 + Assembler::ucomiss(dst, as_Address(src)); 16.1803 + } else { 16.1804 + lea(rscratch1, src); 16.1805 + Assembler::ucomiss(dst, Address(rscratch1, 0)); 16.1806 + } 16.1807 +} 16.1808 + 16.1809 +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 16.1810 + // Used in sign-bit flipping with aligned address. 16.1811 + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 16.1812 + if (reachable(src)) { 16.1813 + Assembler::xorpd(dst, as_Address(src)); 16.1814 + } else { 16.1815 + lea(rscratch1, src); 16.1816 + Assembler::xorpd(dst, Address(rscratch1, 0)); 16.1817 + } 16.1818 +} 16.1819 + 16.1820 +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 16.1821 + // Used in sign-bit flipping with aligned address. 16.1822 + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 16.1823 + if (reachable(src)) { 16.1824 + Assembler::xorps(dst, as_Address(src)); 16.1825 + } else { 16.1826 + lea(rscratch1, src); 16.1827 + Assembler::xorps(dst, Address(rscratch1, 0)); 16.1828 + } 16.1829 +} 16.1830 + 16.1831 +// AVX 3-operands instructions 16.1832 + 16.1833 +void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1834 + if (reachable(src)) { 16.1835 + vaddsd(dst, nds, as_Address(src)); 16.1836 + } else { 16.1837 + lea(rscratch1, src); 16.1838 + vaddsd(dst, nds, Address(rscratch1, 0)); 16.1839 + } 16.1840 +} 16.1841 + 16.1842 +void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1843 + if (reachable(src)) { 16.1844 + vaddss(dst, nds, as_Address(src)); 16.1845 + } else { 16.1846 + lea(rscratch1, src); 16.1847 + vaddss(dst, nds, Address(rscratch1, 0)); 16.1848 + } 16.1849 +} 16.1850 + 16.1851 +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1852 + if (reachable(src)) { 16.1853 + vandpd(dst, nds, as_Address(src)); 16.1854 + } else { 16.1855 + lea(rscratch1, src); 16.1856 + vandpd(dst, nds, Address(rscratch1, 0)); 16.1857 + } 16.1858 +} 16.1859 + 16.1860 +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1861 + if (reachable(src)) { 16.1862 + vandps(dst, nds, as_Address(src)); 16.1863 + } else { 16.1864 + lea(rscratch1, src); 16.1865 + vandps(dst, nds, Address(rscratch1, 0)); 16.1866 + } 16.1867 +} 16.1868 + 16.1869 +void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1870 + if (reachable(src)) { 16.1871 + vdivsd(dst, nds, as_Address(src)); 16.1872 + } else { 16.1873 + lea(rscratch1, src); 16.1874 + vdivsd(dst, nds, Address(rscratch1, 0)); 16.1875 + } 16.1876 +} 16.1877 + 16.1878 +void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1879 + if (reachable(src)) { 16.1880 + vdivss(dst, nds, as_Address(src)); 16.1881 + } else { 16.1882 + lea(rscratch1, src); 16.1883 + vdivss(dst, nds, Address(rscratch1, 0)); 16.1884 + } 16.1885 +} 16.1886 + 16.1887 +void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1888 + if (reachable(src)) { 16.1889 + vmulsd(dst, nds, as_Address(src)); 16.1890 + } else { 16.1891 + lea(rscratch1, src); 16.1892 + vmulsd(dst, nds, Address(rscratch1, 0)); 16.1893 + } 16.1894 +} 16.1895 + 16.1896 +void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1897 + if (reachable(src)) { 16.1898 + vmulss(dst, nds, as_Address(src)); 16.1899 + } else { 16.1900 + lea(rscratch1, src); 16.1901 + vmulss(dst, nds, Address(rscratch1, 0)); 16.1902 + } 16.1903 +} 16.1904 + 16.1905 +void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1906 + if (reachable(src)) { 16.1907 + vsubsd(dst, nds, as_Address(src)); 16.1908 + } else { 16.1909 + lea(rscratch1, src); 16.1910 + vsubsd(dst, nds, Address(rscratch1, 0)); 16.1911 + } 16.1912 +} 16.1913 + 16.1914 +void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1915 + if (reachable(src)) { 16.1916 + vsubss(dst, nds, as_Address(src)); 16.1917 + } else { 16.1918 + lea(rscratch1, src); 16.1919 + vsubss(dst, nds, Address(rscratch1, 0)); 16.1920 + } 16.1921 +} 16.1922 + 16.1923 +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1924 + if (reachable(src)) { 16.1925 + vxorpd(dst, nds, as_Address(src)); 16.1926 + } else { 16.1927 + lea(rscratch1, src); 16.1928 + vxorpd(dst, nds, Address(rscratch1, 0)); 16.1929 + } 16.1930 +} 16.1931 + 16.1932 +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 16.1933 + if (reachable(src)) { 16.1934 + vxorps(dst, nds, as_Address(src)); 16.1935 + } else { 16.1936 + lea(rscratch1, src); 16.1937 + vxorps(dst, nds, Address(rscratch1, 0)); 16.1938 + } 16.1939 +} 16.1940 + 16.1941 + 16.1942 ////////////////////////////////////////////////////////////////////////////////// 16.1943 #ifndef SERIALGC 16.1944 16.1945 @@ -7685,10 +8248,8 @@ 16.1946 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 16.1947 assert(label_nulls <= 1, "at most one NULL in the batch"); 16.1948 16.1949 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 16.1950 - Klass::secondary_super_cache_offset_in_bytes()); 16.1951 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 16.1952 - Klass::super_check_offset_offset_in_bytes()); 16.1953 + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 16.1954 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 16.1955 Address super_check_offset_addr(super_klass, sco_offset); 16.1956 16.1957 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 16.1958 @@ -7786,10 +8347,8 @@ 16.1959 assert(label_nulls <= 1, "at most one NULL in the batch"); 16.1960 16.1961 // a couple of useful fields in sub_klass: 16.1962 - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 16.1963 - Klass::secondary_supers_offset_in_bytes()); 16.1964 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 16.1965 - Klass::secondary_super_cache_offset_in_bytes()); 16.1966 + int ss_offset = in_bytes(Klass::secondary_supers_offset()); 16.1967 + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 16.1968 Address secondary_supers_addr(sub_klass, ss_offset); 16.1969 Address super_cache_addr( sub_klass, sc_offset); 16.1970 16.1971 @@ -7876,32 +8435,6 @@ 16.1972 } 16.1973 16.1974 16.1975 -void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 16.1976 - ucomisd(dst, as_Address(src)); 16.1977 -} 16.1978 - 16.1979 -void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 16.1980 - ucomiss(dst, as_Address(src)); 16.1981 -} 16.1982 - 16.1983 -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 16.1984 - if (reachable(src)) { 16.1985 - xorpd(dst, as_Address(src)); 16.1986 - } else { 16.1987 - lea(rscratch1, src); 16.1988 - xorpd(dst, Address(rscratch1, 0)); 16.1989 - } 16.1990 -} 16.1991 - 16.1992 -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 16.1993 - if (reachable(src)) { 16.1994 - xorps(dst, as_Address(src)); 16.1995 - } else { 16.1996 - lea(rscratch1, src); 16.1997 - xorps(dst, Address(rscratch1, 0)); 16.1998 - } 16.1999 -} 16.2000 - 16.2001 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 16.2002 if (VM_Version::supports_cmov()) { 16.2003 cmovl(cc, dst, src); 16.2004 @@ -8487,20 +9020,20 @@ 16.2005 if (Universe::narrow_oop_shift() != 0) { 16.2006 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 16.2007 if (LogMinObjAlignmentInBytes == Address::times_8) { 16.2008 - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 16.2009 + movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 16.2010 } else { 16.2011 // OK to use shift since we don't need to preserve flags. 16.2012 shlq(dst, LogMinObjAlignmentInBytes); 16.2013 - movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 16.2014 + movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); 16.2015 } 16.2016 } else { 16.2017 - movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 16.2018 + movq(dst, Address(dst, Klass::prototype_header_offset())); 16.2019 } 16.2020 } else 16.2021 #endif 16.2022 { 16.2023 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 16.2024 - movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 16.2025 + movptr(dst, Address(dst, Klass::prototype_header_offset())); 16.2026 } 16.2027 } 16.2028 16.2029 @@ -8761,6 +9294,7 @@ 16.2030 Register cnt1, Register cnt2, 16.2031 int int_cnt2, Register result, 16.2032 XMMRegister vec, Register tmp) { 16.2033 + ShortBranchVerifier sbv(this); 16.2034 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 16.2035 16.2036 // This method uses pcmpestri inxtruction with bound registers 16.2037 @@ -8890,9 +9424,9 @@ 16.2038 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 16.2039 } 16.2040 // Need to reload strings pointers if not matched whole vector 16.2041 - jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 16.2042 + jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 16.2043 addptr(cnt2, 8); 16.2044 - jccb(Assembler::negative, SCAN_SUBSTR); 16.2045 + jcc(Assembler::negative, SCAN_SUBSTR); 16.2046 // Fall through if found full substring 16.2047 16.2048 } // (int_cnt2 > 8) 16.2049 @@ -8911,6 +9445,7 @@ 16.2050 Register cnt1, Register cnt2, 16.2051 int int_cnt2, Register result, 16.2052 XMMRegister vec, Register tmp) { 16.2053 + ShortBranchVerifier sbv(this); 16.2054 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 16.2055 // 16.2056 // int_cnt2 is length of small (< 8 chars) constant substring 16.2057 @@ -9172,6 +9707,7 @@ 16.2058 void MacroAssembler::string_compare(Register str1, Register str2, 16.2059 Register cnt1, Register cnt2, Register result, 16.2060 XMMRegister vec1) { 16.2061 + ShortBranchVerifier sbv(this); 16.2062 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 16.2063 16.2064 // Compute the minimum of the string lengths and the 16.2065 @@ -9308,6 +9844,7 @@ 16.2066 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 16.2067 Register limit, Register result, Register chr, 16.2068 XMMRegister vec1, XMMRegister vec2) { 16.2069 + ShortBranchVerifier sbv(this); 16.2070 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 16.2071 16.2072 int length_offset = arrayOopDesc::length_offset_in_bytes(); 16.2073 @@ -9427,6 +9964,7 @@ 16.2074 void MacroAssembler::generate_fill(BasicType t, bool aligned, 16.2075 Register to, Register value, Register count, 16.2076 Register rtmp, XMMRegister xtmp) { 16.2077 + ShortBranchVerifier sbv(this); 16.2078 assert_different_registers(to, value, count, rtmp); 16.2079 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 16.2080 Label L_fill_2_bytes, L_fill_4_bytes;
17.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Tue Dec 27 12:38:49 2011 -0800 17.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Dec 29 11:37:50 2011 -0800 17.3 @@ -503,7 +503,31 @@ 17.4 REX_WR = 0x4C, 17.5 REX_WRB = 0x4D, 17.6 REX_WRX = 0x4E, 17.7 - REX_WRXB = 0x4F 17.8 + REX_WRXB = 0x4F, 17.9 + 17.10 + VEX_3bytes = 0xC4, 17.11 + VEX_2bytes = 0xC5 17.12 + }; 17.13 + 17.14 + enum VexPrefix { 17.15 + VEX_B = 0x20, 17.16 + VEX_X = 0x40, 17.17 + VEX_R = 0x80, 17.18 + VEX_W = 0x80 17.19 + }; 17.20 + 17.21 + enum VexSimdPrefix { 17.22 + VEX_SIMD_NONE = 0x0, 17.23 + VEX_SIMD_66 = 0x1, 17.24 + VEX_SIMD_F3 = 0x2, 17.25 + VEX_SIMD_F2 = 0x3 17.26 + }; 17.27 + 17.28 + enum VexOpcode { 17.29 + VEX_OPCODE_NONE = 0x0, 17.30 + VEX_OPCODE_0F = 0x1, 17.31 + VEX_OPCODE_0F_38 = 0x2, 17.32 + VEX_OPCODE_0F_3A = 0x3 17.33 }; 17.34 17.35 enum WhichOperand { 17.36 @@ -546,12 +570,99 @@ 17.37 void prefixq(Address adr); 17.38 17.39 void prefix(Address adr, Register reg, bool byteinst = false); 17.40 + void prefix(Address adr, XMMRegister reg); 17.41 void prefixq(Address adr, Register reg); 17.42 - 17.43 - void prefix(Address adr, XMMRegister reg); 17.44 + void prefixq(Address adr, XMMRegister reg); 17.45 17.46 void prefetch_prefix(Address src); 17.47 17.48 + void rex_prefix(Address adr, XMMRegister xreg, 17.49 + VexSimdPrefix pre, VexOpcode opc, bool rex_w); 17.50 + int rex_prefix_and_encode(int dst_enc, int src_enc, 17.51 + VexSimdPrefix pre, VexOpcode opc, bool rex_w); 17.52 + 17.53 + void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, 17.54 + int nds_enc, VexSimdPrefix pre, VexOpcode opc, 17.55 + bool vector256); 17.56 + 17.57 + void vex_prefix(Address adr, int nds_enc, int xreg_enc, 17.58 + VexSimdPrefix pre, VexOpcode opc, 17.59 + bool vex_w, bool vector256); 17.60 + 17.61 + void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, 17.62 + VexSimdPrefix pre, bool vector256 = false) { 17.63 + vex_prefix(src, nds->encoding(), dst->encoding(), 17.64 + pre, VEX_OPCODE_0F, false, vector256); 17.65 + } 17.66 + 17.67 + int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, 17.68 + VexSimdPrefix pre, VexOpcode opc, 17.69 + bool vex_w, bool vector256); 17.70 + 17.71 + int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, 17.72 + VexSimdPrefix pre, bool vector256 = false) { 17.73 + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), 17.74 + pre, VEX_OPCODE_0F, false, vector256); 17.75 + } 17.76 + 17.77 + void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, 17.78 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, 17.79 + bool rex_w = false, bool vector256 = false); 17.80 + 17.81 + void simd_prefix(XMMRegister dst, Address src, 17.82 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 17.83 + simd_prefix(dst, xnoreg, src, pre, opc); 17.84 + } 17.85 + void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { 17.86 + simd_prefix(src, dst, pre); 17.87 + } 17.88 + void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, 17.89 + VexSimdPrefix pre) { 17.90 + bool rex_w = true; 17.91 + simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); 17.92 + } 17.93 + 17.94 + 17.95 + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, 17.96 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, 17.97 + bool rex_w = false, bool vector256 = false); 17.98 + 17.99 + int simd_prefix_and_encode(XMMRegister dst, XMMRegister src, 17.100 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 17.101 + return simd_prefix_and_encode(dst, xnoreg, src, pre, opc); 17.102 + } 17.103 + 17.104 + // Move/convert 32-bit integer value. 17.105 + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, 17.106 + VexSimdPrefix pre) { 17.107 + // It is OK to cast from Register to XMMRegister to pass argument here 17.108 + // since only encoding is used in simd_prefix_and_encode() and number of 17.109 + // Gen and Xmm registers are the same. 17.110 + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre); 17.111 + } 17.112 + int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) { 17.113 + return simd_prefix_and_encode(dst, xnoreg, src, pre); 17.114 + } 17.115 + int simd_prefix_and_encode(Register dst, XMMRegister src, 17.116 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 17.117 + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc); 17.118 + } 17.119 + 17.120 + // Move/convert 64-bit integer value. 17.121 + int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, 17.122 + VexSimdPrefix pre) { 17.123 + bool rex_w = true; 17.124 + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w); 17.125 + } 17.126 + int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) { 17.127 + return simd_prefix_and_encode_q(dst, xnoreg, src, pre); 17.128 + } 17.129 + int simd_prefix_and_encode_q(Register dst, XMMRegister src, 17.130 + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { 17.131 + bool rex_w = true; 17.132 + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w); 17.133 + } 17.134 + 17.135 // Helper functions for groups of instructions 17.136 void emit_arith_b(int op1, int op2, Register dst, int imm8); 17.137 17.138 @@ -764,6 +875,7 @@ 17.139 void addss(XMMRegister dst, Address src); 17.140 void addss(XMMRegister dst, XMMRegister src); 17.141 17.142 + void andl(Address dst, int32_t imm32); 17.143 void andl(Register dst, int32_t imm32); 17.144 void andl(Register dst, Address src); 17.145 void andl(Register dst, Register src); 17.146 @@ -774,9 +886,11 @@ 17.147 void andq(Register dst, Register src); 17.148 17.149 // Bitwise Logical AND of Packed Double-Precision Floating-Point Values 17.150 - void andpd(XMMRegister dst, Address src); 17.151 void andpd(XMMRegister dst, XMMRegister src); 17.152 17.153 + // Bitwise Logical AND of Packed Single-Precision Floating-Point Values 17.154 + void andps(XMMRegister dst, XMMRegister src); 17.155 + 17.156 void bsfl(Register dst, Register src); 17.157 void bsrl(Register dst, Register src); 17.158 17.159 @@ -837,9 +951,11 @@ 17.160 17.161 // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS 17.162 void comisd(XMMRegister dst, Address src); 17.163 + void comisd(XMMRegister dst, XMMRegister src); 17.164 17.165 // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS 17.166 void comiss(XMMRegister dst, Address src); 17.167 + void comiss(XMMRegister dst, XMMRegister src); 17.168 17.169 // Identify processor type and features 17.170 void cpuid() { 17.171 @@ -849,14 +965,19 @@ 17.172 17.173 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value 17.174 void cvtsd2ss(XMMRegister dst, XMMRegister src); 17.175 + void cvtsd2ss(XMMRegister dst, Address src); 17.176 17.177 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value 17.178 void cvtsi2sdl(XMMRegister dst, Register src); 17.179 + void cvtsi2sdl(XMMRegister dst, Address src); 17.180 void cvtsi2sdq(XMMRegister dst, Register src); 17.181 + void cvtsi2sdq(XMMRegister dst, Address src); 17.182 17.183 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value 17.184 void cvtsi2ssl(XMMRegister dst, Register src); 17.185 + void cvtsi2ssl(XMMRegister dst, Address src); 17.186 void cvtsi2ssq(XMMRegister dst, Register src); 17.187 + void cvtsi2ssq(XMMRegister dst, Address src); 17.188 17.189 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value 17.190 void cvtdq2pd(XMMRegister dst, XMMRegister src); 17.191 @@ -866,6 +987,7 @@ 17.192 17.193 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value 17.194 void cvtss2sd(XMMRegister dst, XMMRegister src); 17.195 + void cvtss2sd(XMMRegister dst, Address src); 17.196 17.197 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer 17.198 void cvttsd2sil(Register dst, Address src); 17.199 @@ -1140,8 +1262,6 @@ 17.200 void movdq(Register dst, XMMRegister src); 17.201 17.202 // Move Aligned Double Quadword 17.203 - void movdqa(Address dst, XMMRegister src); 17.204 - void movdqa(XMMRegister dst, Address src); 17.205 void movdqa(XMMRegister dst, XMMRegister src); 17.206 17.207 // Move Unaligned Double Quadword 17.208 @@ -1261,10 +1381,18 @@ 17.209 void orq(Register dst, Address src); 17.210 void orq(Register dst, Register src); 17.211 17.212 + // Pack with unsigned saturation 17.213 + void packuswb(XMMRegister dst, XMMRegister src); 17.214 + void packuswb(XMMRegister dst, Address src); 17.215 + 17.216 // SSE4.2 string instructions 17.217 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); 17.218 void pcmpestri(XMMRegister xmm1, Address src, int imm8); 17.219 17.220 + // SSE4.1 packed move 17.221 + void pmovzxbw(XMMRegister dst, XMMRegister src); 17.222 + void pmovzxbw(XMMRegister dst, Address src); 17.223 + 17.224 #ifndef _LP64 // no 32bit push/pop on amd64 17.225 void popl(Address dst); 17.226 #endif 17.227 @@ -1292,6 +1420,7 @@ 17.228 17.229 // POR - Bitwise logical OR 17.230 void por(XMMRegister dst, XMMRegister src); 17.231 + void por(XMMRegister dst, Address src); 17.232 17.233 // Shuffle Packed Doublewords 17.234 void pshufd(XMMRegister dst, XMMRegister src, int mode); 17.235 @@ -1313,6 +1442,11 @@ 17.236 17.237 // Interleave Low Bytes 17.238 void punpcklbw(XMMRegister dst, XMMRegister src); 17.239 + void punpcklbw(XMMRegister dst, Address src); 17.240 + 17.241 + // Interleave Low Doublewords 17.242 + void punpckldq(XMMRegister dst, XMMRegister src); 17.243 + void punpckldq(XMMRegister dst, Address src); 17.244 17.245 #ifndef _LP64 // no 32bit push/pop on amd64 17.246 void pushl(Address src); 17.247 @@ -1429,6 +1563,13 @@ 17.248 void xchgq(Register reg, Address adr); 17.249 void xchgq(Register dst, Register src); 17.250 17.251 + // Get Value of Extended Control Register 17.252 + void xgetbv() { 17.253 + emit_byte(0x0F); 17.254 + emit_byte(0x01); 17.255 + emit_byte(0xD0); 17.256 + } 17.257 + 17.258 void xorl(Register dst, int32_t imm32); 17.259 void xorl(Register dst, Address src); 17.260 void xorl(Register dst, Register src); 17.261 @@ -1437,14 +1578,44 @@ 17.262 void xorq(Register dst, Register src); 17.263 17.264 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values 17.265 + void xorpd(XMMRegister dst, XMMRegister src); 17.266 + 17.267 + // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values 17.268 + void xorps(XMMRegister dst, XMMRegister src); 17.269 + 17.270 + void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 17.271 + 17.272 + // AVX 3-operands instructions (encoded with VEX prefix) 17.273 + void vaddsd(XMMRegister dst, XMMRegister nds, Address src); 17.274 + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.275 + void vaddss(XMMRegister dst, XMMRegister nds, Address src); 17.276 + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.277 + void vandpd(XMMRegister dst, XMMRegister nds, Address src); 17.278 + void vandps(XMMRegister dst, XMMRegister nds, Address src); 17.279 + void vdivsd(XMMRegister dst, XMMRegister nds, Address src); 17.280 + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.281 + void vdivss(XMMRegister dst, XMMRegister nds, Address src); 17.282 + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.283 + void vmulsd(XMMRegister dst, XMMRegister nds, Address src); 17.284 + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.285 + void vmulss(XMMRegister dst, XMMRegister nds, Address src); 17.286 + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.287 + void vsubsd(XMMRegister dst, XMMRegister nds, Address src); 17.288 + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.289 + void vsubss(XMMRegister dst, XMMRegister nds, Address src); 17.290 + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); 17.291 + void vxorpd(XMMRegister dst, XMMRegister nds, Address src); 17.292 + void vxorps(XMMRegister dst, XMMRegister nds, Address src); 17.293 + 17.294 + 17.295 + protected: 17.296 + // Next instructions require address alignment 16 bytes SSE mode. 17.297 + // They should be called only from corresponding MacroAssembler instructions. 17.298 + void andpd(XMMRegister dst, Address src); 17.299 + void andps(XMMRegister dst, Address src); 17.300 void xorpd(XMMRegister dst, Address src); 17.301 - void xorpd(XMMRegister dst, XMMRegister src); 17.302 - 17.303 - // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values 17.304 void xorps(XMMRegister dst, Address src); 17.305 - void xorps(XMMRegister dst, XMMRegister src); 17.306 - 17.307 - void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 17.308 + 17.309 }; 17.310 17.311 17.312 @@ -2175,9 +2346,15 @@ 17.313 void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } 17.314 void andpd(XMMRegister dst, AddressLiteral src); 17.315 17.316 + void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } 17.317 + void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } 17.318 + void andps(XMMRegister dst, AddressLiteral src); 17.319 + 17.320 + void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } 17.321 void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } 17.322 void comiss(XMMRegister dst, AddressLiteral src); 17.323 17.324 + void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } 17.325 void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } 17.326 void comisd(XMMRegister dst, AddressLiteral src); 17.327 17.328 @@ -2211,62 +2388,62 @@ 17.329 void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } 17.330 void movss(XMMRegister dst, AddressLiteral src); 17.331 17.332 - void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } 17.333 + void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } 17.334 void movlpd(XMMRegister dst, AddressLiteral src); 17.335 17.336 public: 17.337 17.338 void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } 17.339 void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } 17.340 - void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); } 17.341 + void addsd(XMMRegister dst, AddressLiteral src); 17.342 17.343 void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } 17.344 void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } 17.345 - void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); } 17.346 + void addss(XMMRegister dst, AddressLiteral src); 17.347 17.348 void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } 17.349 void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } 17.350 - void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); } 17.351 + void divsd(XMMRegister dst, AddressLiteral src); 17.352 17.353 void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } 17.354 void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } 17.355 - void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); } 17.356 + void divss(XMMRegister dst, AddressLiteral src); 17.357 17.358 void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } 17.359 void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } 17.360 void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } 17.361 - void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); } 17.362 + void movsd(XMMRegister dst, AddressLiteral src); 17.363 17.364 void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } 17.365 void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } 17.366 - void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); } 17.367 + void mulsd(XMMRegister dst, AddressLiteral src); 17.368 17.369 void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } 17.370 void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } 17.371 - void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); } 17.372 + void mulss(XMMRegister dst, AddressLiteral src); 17.373 17.374 void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } 17.375 void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } 17.376 - void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); } 17.377 + void sqrtsd(XMMRegister dst, AddressLiteral src); 17.378 17.379 void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } 17.380 void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } 17.381 - void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); } 17.382 + void sqrtss(XMMRegister dst, AddressLiteral src); 17.383 17.384 void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } 17.385 void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } 17.386 - void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); } 17.387 + void subsd(XMMRegister dst, AddressLiteral src); 17.388 17.389 void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } 17.390 void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } 17.391 - void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); } 17.392 + void subss(XMMRegister dst, AddressLiteral src); 17.393 17.394 void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } 17.395 - void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } 17.396 + void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } 17.397 void ucomiss(XMMRegister dst, AddressLiteral src); 17.398 17.399 void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } 17.400 - void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } 17.401 + void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } 17.402 void ucomisd(XMMRegister dst, AddressLiteral src); 17.403 17.404 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values 17.405 @@ -2279,6 +2456,53 @@ 17.406 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } 17.407 void xorps(XMMRegister dst, AddressLiteral src); 17.408 17.409 + // AVX 3-operands instructions 17.410 + 17.411 + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } 17.412 + void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } 17.413 + void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.414 + 17.415 + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } 17.416 + void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } 17.417 + void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.418 + 17.419 + void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); } 17.420 + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.421 + 17.422 + void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); } 17.423 + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.424 + 17.425 + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } 17.426 + void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } 17.427 + void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.428 + 17.429 + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } 17.430 + void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } 17.431 + void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.432 + 17.433 + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } 17.434 + void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } 17.435 + void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.436 + 17.437 + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } 17.438 + void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } 17.439 + void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.440 + 17.441 + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } 17.442 + void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } 17.443 + void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.444 + 17.445 + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } 17.446 + void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } 17.447 + void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.448 + 17.449 + void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } 17.450 + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.451 + 17.452 + void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } 17.453 + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src); 17.454 + 17.455 + 17.456 // Data 17.457 17.458 void cmov32( Condition cc, Register dst, Address src);
18.1 --- a/src/cpu/x86/vm/assembler_x86.inline.hpp Tue Dec 27 12:38:49 2011 -0800 18.2 +++ b/src/cpu/x86/vm/assembler_x86.inline.hpp Thu Dec 29 11:37:50 2011 -0800 18.3 @@ -86,6 +86,7 @@ 18.4 inline void Assembler::prefixq(Address adr, Register reg) {} 18.5 18.6 inline void Assembler::prefix(Address adr, XMMRegister reg) {} 18.7 +inline void Assembler::prefixq(Address adr, XMMRegister reg) {} 18.8 #else 18.9 inline void Assembler::emit_long64(jlong x) { 18.10 *(jlong*) _code_pos = x;
19.1 --- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Tue Dec 27 12:38:49 2011 -0800 19.2 +++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Thu Dec 29 11:37:50 2011 -0800 19.3 @@ -320,7 +320,7 @@ 19.4 // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null. 19.5 __ load_heap_oop_not_null(tmp2, Address(_obj, java_lang_Class::klass_offset_in_bytes())); 19.6 __ get_thread(tmp); 19.7 - __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc))); 19.8 + __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset())); 19.9 __ pop(tmp2); 19.10 __ pop(tmp); 19.11 __ jcc(Assembler::notEqual, call_patch); 19.12 @@ -519,7 +519,7 @@ 19.13 19.14 __ load_klass(tmp_reg, src_reg); 19.15 19.16 - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); 19.17 + Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); 19.18 __ cmpl(ref_type_adr, REF_NONE); 19.19 __ jcc(Assembler::equal, _continuation); 19.20
20.1 --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Dec 27 12:38:49 2011 -0800 20.2 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Thu Dec 29 11:37:50 2011 -0800 20.3 @@ -1558,7 +1558,7 @@ 20.4 void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { 20.5 if (op->init_check()) { 20.6 __ cmpl(Address(op->klass()->as_register(), 20.7 - instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), 20.8 + instanceKlass::init_state_offset()), 20.9 instanceKlass::fully_initialized); 20.10 add_debug_info_for_null_check_here(op->stub()->info()); 20.11 __ jcc(Assembler::notEqual, *op->stub()->entry()); 20.12 @@ -1730,7 +1730,7 @@ 20.13 #else 20.14 __ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding()); 20.15 #endif // _LP64 20.16 - if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { 20.17 + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { 20.18 __ jcc(Assembler::notEqual, *failure_target); 20.19 // successful cast, fall through to profile or jump 20.20 } else { 20.21 @@ -1842,7 +1842,7 @@ 20.22 __ load_klass(klass_RInfo, value); 20.23 20.24 // get instance klass (it's already uncompressed) 20.25 - __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); 20.26 + __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset())); 20.27 // perform the fast part of the checking logic 20.28 __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); 20.29 // call out-of-line instance of __ check_klass_subtype_slow_path(...): 20.30 @@ -3289,8 +3289,7 @@ 20.31 } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { 20.32 __ load_klass(tmp, dst); 20.33 } 20.34 - int lh_offset = klassOopDesc::header_size() * HeapWordSize + 20.35 - Klass::layout_helper_offset_in_bytes(); 20.36 + int lh_offset = in_bytes(Klass::layout_helper_offset()); 20.37 Address klass_lh_addr(tmp, lh_offset); 20.38 jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 20.39 __ cmpl(klass_lh_addr, objArray_lh); 20.40 @@ -3307,9 +3306,9 @@ 20.41 20.42 #ifndef _LP64 20.43 __ movptr(tmp, dst_klass_addr); 20.44 - __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); 20.45 + __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset())); 20.46 __ push(tmp); 20.47 - __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); 20.48 + __ movl(tmp, Address(tmp, Klass::super_check_offset_offset())); 20.49 __ push(tmp); 20.50 __ push(length); 20.51 __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); 20.52 @@ -3333,15 +3332,15 @@ 20.53 // Allocate abi space for args but be sure to keep stack aligned 20.54 __ subptr(rsp, 6*wordSize); 20.55 __ load_klass(c_rarg3, dst); 20.56 - __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); 20.57 + __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset())); 20.58 store_parameter(c_rarg3, 4); 20.59 - __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); 20.60 + __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset())); 20.61 __ call(RuntimeAddress(copyfunc_addr)); 20.62 __ addptr(rsp, 6*wordSize); 20.63 #else 20.64 __ load_klass(c_rarg4, dst); 20.65 - __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); 20.66 - __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); 20.67 + __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset())); 20.68 + __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); 20.69 __ call(RuntimeAddress(copyfunc_addr)); 20.70 #endif 20.71
21.1 --- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Tue Dec 27 12:38:49 2011 -0800 21.2 +++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp Thu Dec 29 11:37:50 2011 -0800 21.3 @@ -150,7 +150,7 @@ 21.4 assert_different_registers(obj, klass, len); 21.5 if (UseBiasedLocking && !len->is_valid()) { 21.6 assert_different_registers(obj, klass, len, t1, t2); 21.7 - movptr(t1, Address(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 21.8 + movptr(t1, Address(klass, Klass::prototype_header_offset())); 21.9 movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1); 21.10 } else { 21.11 // This assumes that all prototype bits fit in an int32_t
22.1 --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Tue Dec 27 12:38:49 2011 -0800 22.2 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Thu Dec 29 11:37:50 2011 -0800 22.3 @@ -1011,7 +1011,7 @@ 22.4 22.5 if (id == fast_new_instance_init_check_id) { 22.6 // make sure the klass is initialized 22.7 - __ cmpl(Address(klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized); 22.8 + __ cmpl(Address(klass, instanceKlass::init_state_offset()), instanceKlass::fully_initialized); 22.9 __ jcc(Assembler::notEqual, slow_path); 22.10 } 22.11 22.12 @@ -1019,7 +1019,7 @@ 22.13 // assert object can be fast path allocated 22.14 { 22.15 Label ok, not_ok; 22.16 - __ movl(obj_size, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); 22.17 + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); 22.18 __ cmpl(obj_size, 0); // make sure it's an instance (LH > 0) 22.19 __ jcc(Assembler::lessEqual, not_ok); 22.20 __ testl(obj_size, Klass::_lh_instance_slow_path_bit); 22.21 @@ -1040,7 +1040,7 @@ 22.22 __ bind(retry_tlab); 22.23 22.24 // get the instance size (size is postive so movl is fine for 64bit) 22.25 - __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); 22.26 + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); 22.27 22.28 __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path); 22.29 22.30 @@ -1052,7 +1052,7 @@ 22.31 22.32 __ bind(try_eden); 22.33 // get the instance size (size is postive so movl is fine for 64bit) 22.34 - __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); 22.35 + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); 22.36 22.37 __ eden_allocate(obj, obj_size, 0, t1, slow_path); 22.38 __ incr_allocated_bytes(thread, obj_size, 0); 22.39 @@ -1119,7 +1119,7 @@ 22.40 { 22.41 Label ok; 22.42 Register t0 = obj; 22.43 - __ movl(t0, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); 22.44 + __ movl(t0, Address(klass, Klass::layout_helper_offset())); 22.45 __ sarl(t0, Klass::_lh_array_tag_shift); 22.46 int tag = ((id == new_type_array_id) 22.47 ? Klass::_lh_array_tag_type_value 22.48 @@ -1153,7 +1153,7 @@ 22.49 22.50 // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) 22.51 // since size is positive movl does right thing on 64bit 22.52 - __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); 22.53 + __ movl(t1, Address(klass, Klass::layout_helper_offset())); 22.54 // since size is postive movl does right thing on 64bit 22.55 __ movl(arr_size, length); 22.56 assert(t1 == rcx, "fixed register usage"); 22.57 @@ -1167,7 +1167,7 @@ 22.58 __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size 22.59 22.60 __ initialize_header(obj, klass, length, t1, t2); 22.61 - __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte))); 22.62 + __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); 22.63 assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); 22.64 assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); 22.65 __ andptr(t1, Klass::_lh_header_size_mask); 22.66 @@ -1180,7 +1180,7 @@ 22.67 __ bind(try_eden); 22.68 // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) 22.69 // since size is positive movl does right thing on 64bit 22.70 - __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); 22.71 + __ movl(t1, Address(klass, Klass::layout_helper_offset())); 22.72 // since size is postive movl does right thing on 64bit 22.73 __ movl(arr_size, length); 22.74 assert(t1 == rcx, "fixed register usage"); 22.75 @@ -1195,7 +1195,7 @@ 22.76 __ incr_allocated_bytes(thread, arr_size, 0); 22.77 22.78 __ initialize_header(obj, klass, length, t1, t2); 22.79 - __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte))); 22.80 + __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); 22.81 assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); 22.82 assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); 22.83 __ andptr(t1, Klass::_lh_header_size_mask); 22.84 @@ -1267,7 +1267,7 @@ 22.85 Label register_finalizer; 22.86 Register t = rsi; 22.87 __ load_klass(t, rax); 22.88 - __ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); 22.89 + __ movl(t, Address(t, Klass::access_flags_offset())); 22.90 __ testl(t, JVM_ACC_HAS_FINALIZER); 22.91 __ jcc(Assembler::notZero, register_finalizer); 22.92 __ ret(0);
23.1 --- a/src/cpu/x86/vm/cppInterpreter_x86.cpp Tue Dec 27 12:38:49 2011 -0800 23.2 +++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp Thu Dec 29 11:37:50 2011 -0800 23.3 @@ -511,7 +511,7 @@ 23.4 // get synchronization object 23.5 23.6 Label done; 23.7 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 23.8 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 23.9 __ movl(rax, access_flags); 23.10 __ testl(rax, JVM_ACC_STATIC); 23.11 __ movptr(rax, Address(locals, 0)); // get receiver (assume this is frequent case) 23.12 @@ -763,7 +763,7 @@ 23.13 #endif // ASSERT 23.14 // get synchronization object 23.15 { Label done; 23.16 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 23.17 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 23.18 __ movl(rax, access_flags); 23.19 __ movptr(rdi, STATE(_locals)); // prepare to get receiver (assume common case) 23.20 __ testl(rax, JVM_ACC_STATIC); 23.21 @@ -1180,7 +1180,7 @@ 23.22 23.23 // pass mirror handle if static call 23.24 { Label L; 23.25 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 23.26 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 23.27 __ movl(t, Address(method, methodOopDesc::access_flags_offset())); 23.28 __ testl(t, JVM_ACC_STATIC); 23.29 __ jcc(Assembler::zero, L);
24.1 --- a/src/cpu/x86/vm/methodHandles_x86.cpp Tue Dec 27 12:38:49 2011 -0800 24.2 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp Thu Dec 29 11:37:50 2011 -0800 24.3 @@ -1160,7 +1160,7 @@ 24.4 Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() ); 24.5 Address vmarg; // __ argument_address(vmargslot) 24.6 24.7 - const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 24.8 + const int java_mirror_offset = in_bytes(Klass::java_mirror_offset()); 24.9 24.10 if (have_entry(ek)) { 24.11 __ nop(); // empty stubs make SG sick
25.1 --- a/src/cpu/x86/vm/nativeInst_x86.cpp Tue Dec 27 12:38:49 2011 -0800 25.2 +++ b/src/cpu/x86/vm/nativeInst_x86.cpp Thu Dec 29 11:37:50 2011 -0800 25.3 @@ -237,9 +237,21 @@ 25.4 int off = 0; 25.5 u_char instr_0 = ubyte_at(off); 25.6 25.7 + // See comment in Assembler::locate_operand() about VEX prefixes. 25.8 + if (instr_0 == instruction_VEX_prefix_2bytes) { 25.9 + assert((UseAVX > 0), "shouldn't have VEX prefix"); 25.10 + NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); 25.11 + return 2; 25.12 + } 25.13 + if (instr_0 == instruction_VEX_prefix_3bytes) { 25.14 + assert((UseAVX > 0), "shouldn't have VEX prefix"); 25.15 + NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); 25.16 + return 3; 25.17 + } 25.18 + 25.19 // First check to see if we have a (prefixed or not) xor 25.20 - if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 25.21 - instr_0 <= instruction_prefix_wide_hi) { // 0x4f 25.22 + if (instr_0 >= instruction_prefix_wide_lo && // 0x40 25.23 + instr_0 <= instruction_prefix_wide_hi) { // 0x4f 25.24 off++; 25.25 instr_0 = ubyte_at(off); 25.26 } 25.27 @@ -256,13 +268,13 @@ 25.28 instr_0 = ubyte_at(off); 25.29 } 25.30 25.31 - if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3 25.32 + if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3 25.33 instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2 25.34 off++; 25.35 instr_0 = ubyte_at(off); 25.36 } 25.37 25.38 - if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 25.39 + if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 25.40 instr_0 <= instruction_prefix_wide_hi) { // 0x4f 25.41 off++; 25.42 instr_0 = ubyte_at(off);
26.1 --- a/src/cpu/x86/vm/nativeInst_x86.hpp Tue Dec 27 12:38:49 2011 -0800 26.2 +++ b/src/cpu/x86/vm/nativeInst_x86.hpp Thu Dec 29 11:37:50 2011 -0800 26.3 @@ -287,6 +287,9 @@ 26.4 instruction_code_xmm_store = 0x11, 26.5 instruction_code_xmm_lpd = 0x12, 26.6 26.7 + instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes, 26.8 + instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes, 26.9 + 26.10 instruction_size = 4, 26.11 instruction_offset = 0, 26.12 data_offset = 2,
27.1 --- a/src/cpu/x86/vm/register_definitions_x86.cpp Tue Dec 27 12:38:49 2011 -0800 27.2 +++ b/src/cpu/x86/vm/register_definitions_x86.cpp Thu Dec 29 11:37:50 2011 -0800 27.3 @@ -53,6 +53,7 @@ 27.4 REGISTER_DEFINITION(Register, r15); 27.5 #endif // AMD64 27.6 27.7 +REGISTER_DEFINITION(XMMRegister, xnoreg); 27.8 REGISTER_DEFINITION(XMMRegister, xmm0 ); 27.9 REGISTER_DEFINITION(XMMRegister, xmm1 ); 27.10 REGISTER_DEFINITION(XMMRegister, xmm2 ); 27.11 @@ -115,6 +116,7 @@ 27.12 REGISTER_DEFINITION(Register, r15_thread); 27.13 #endif // AMD64 27.14 27.15 +REGISTER_DEFINITION(MMXRegister, mnoreg ); 27.16 REGISTER_DEFINITION(MMXRegister, mmx0 ); 27.17 REGISTER_DEFINITION(MMXRegister, mmx1 ); 27.18 REGISTER_DEFINITION(MMXRegister, mmx2 );
28.1 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Dec 27 12:38:49 2011 -0800 28.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Dec 29 11:37:50 2011 -0800 28.3 @@ -1374,8 +1374,7 @@ 28.4 // L_success, L_failure, NULL); 28.5 assert_different_registers(sub_klass, temp); 28.6 28.7 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 28.8 - Klass::secondary_super_cache_offset_in_bytes()); 28.9 + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 28.10 28.11 // if the pointers are equal, we are done (e.g., String[] elements) 28.12 __ cmpptr(sub_klass, super_klass_addr); 28.13 @@ -1787,8 +1786,7 @@ 28.14 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 28.15 // 28.16 28.17 - int lh_offset = klassOopDesc::header_size() * HeapWordSize + 28.18 - Klass::layout_helper_offset_in_bytes(); 28.19 + int lh_offset = in_bytes(Klass::layout_helper_offset()); 28.20 Address src_klass_lh_addr(rcx_src_klass, lh_offset); 28.21 28.22 // Handle objArrays completely differently... 28.23 @@ -1914,10 +1912,8 @@ 28.24 // live at this point: rcx_src_klass, dst[_pos], src[_pos] 28.25 { 28.26 // Handy offsets: 28.27 - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 28.28 - objArrayKlass::element_klass_offset_in_bytes()); 28.29 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 28.30 - Klass::super_check_offset_offset_in_bytes()); 28.31 + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); 28.32 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 28.33 28.34 Register rsi_dst_klass = rsi; 28.35 Register rdi_temp = rdi;
29.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Dec 27 12:38:49 2011 -0800 29.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Dec 29 11:37:50 2011 -0800 29.3 @@ -2261,8 +2261,7 @@ 29.4 // The ckoff and ckval must be mutually consistent, 29.5 // even though caller generates both. 29.6 { Label L; 29.7 - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 29.8 - Klass::super_check_offset_offset_in_bytes()); 29.9 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 29.10 __ cmpl(ckoff, Address(ckval, sco_offset)); 29.11 __ jcc(Assembler::equal, L); 29.12 __ stop("super_check_offset inconsistent"); 29.13 @@ -2572,8 +2571,7 @@ 29.14 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 29.15 // 29.16 29.17 - const int lh_offset = klassOopDesc::header_size() * HeapWordSize + 29.18 - Klass::layout_helper_offset_in_bytes(); 29.19 + const int lh_offset = in_bytes(Klass::layout_helper_offset()); 29.20 29.21 // Handle objArrays completely differently... 29.22 const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); 29.23 @@ -2722,15 +2720,13 @@ 29.24 assert_clean_int(count, sco_temp); 29.25 29.26 // Generate the type check. 29.27 - const int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 29.28 - Klass::super_check_offset_offset_in_bytes()); 29.29 + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); 29.30 __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); 29.31 assert_clean_int(sco_temp, rax); 29.32 generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); 29.33 29.34 // Fetch destination element klass from the objArrayKlass header. 29.35 - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + 29.36 - objArrayKlass::element_klass_offset_in_bytes()); 29.37 + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); 29.38 __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); 29.39 __ movl( sco_temp, Address(r11_dst_klass, sco_offset)); 29.40 assert_clean_int(sco_temp, rax);
30.1 --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Dec 27 12:38:49 2011 -0800 30.2 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Dec 29 11:37:50 2011 -0800 30.3 @@ -552,7 +552,7 @@ 30.4 #endif // ASSERT 30.5 // get synchronization object 30.6 { Label done; 30.7 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 30.8 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 30.9 __ movl(rax, access_flags); 30.10 __ testl(rax, JVM_ACC_STATIC); 30.11 __ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case) 30.12 @@ -1012,7 +1012,7 @@ 30.13 30.14 // pass mirror handle if static call 30.15 { Label L; 30.16 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); 30.17 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 30.18 __ movl(t, Address(method, methodOopDesc::access_flags_offset())); 30.19 __ testl(t, JVM_ACC_STATIC); 30.20 __ jcc(Assembler::zero, L);
31.1 --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Dec 27 12:38:49 2011 -0800 31.2 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Dec 29 11:37:50 2011 -0800 31.3 @@ -505,8 +505,7 @@ 31.4 31.5 // get synchronization object 31.6 { 31.7 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + 31.8 - Klass::java_mirror_offset_in_bytes(); 31.9 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 31.10 Label done; 31.11 __ movl(rax, access_flags); 31.12 __ testl(rax, JVM_ACC_STATIC); 31.13 @@ -1006,8 +1005,7 @@ 31.14 // pass mirror handle if static call 31.15 { 31.16 Label L; 31.17 - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + 31.18 - Klass::java_mirror_offset_in_bytes(); 31.19 + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); 31.20 __ movl(t, Address(method, methodOopDesc::access_flags_offset())); 31.21 __ testl(t, JVM_ACC_STATIC); 31.22 __ jcc(Assembler::zero, L);
32.1 --- a/src/cpu/x86/vm/templateTable_x86_32.cpp Tue Dec 27 12:38:49 2011 -0800 32.2 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp Thu Dec 29 11:37:50 2011 -0800 32.3 @@ -980,7 +980,7 @@ 32.4 __ load_klass(rbx, rax); 32.5 // Move superklass into EAX 32.6 __ load_klass(rax, rdx); 32.7 - __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); 32.8 + __ movptr(rax, Address(rax, objArrayKlass::element_klass_offset())); 32.9 // Compress array+index*wordSize+12 into a single register. Frees ECX. 32.10 __ lea(rdx, element_address); 32.11 32.12 @@ -2033,7 +2033,7 @@ 32.13 assert(state == vtos, "only valid state"); 32.14 __ movptr(rax, aaddress(0)); 32.15 __ load_klass(rdi, rax); 32.16 - __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); 32.17 + __ movl(rdi, Address(rdi, Klass::access_flags_offset())); 32.18 __ testl(rdi, JVM_ACC_HAS_FINALIZER); 32.19 Label skip_register_finalizer; 32.20 __ jcc(Assembler::zero, skip_register_finalizer); 32.21 @@ -3188,11 +3188,11 @@ 32.22 32.23 // make sure klass is initialized & doesn't have finalizer 32.24 // make sure klass is fully initialized 32.25 - __ cmpl(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized); 32.26 + __ cmpl(Address(rcx, instanceKlass::init_state_offset()), instanceKlass::fully_initialized); 32.27 __ jcc(Assembler::notEqual, slow_case); 32.28 32.29 // get instance_size in instanceKlass (scaled to a count of bytes) 32.30 - __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); 32.31 + __ movl(rdx, Address(rcx, Klass::layout_helper_offset())); 32.32 // test to see if it has a finalizer or is malformed in some way 32.33 __ testl(rdx, Klass::_lh_instance_slow_path_bit); 32.34 __ jcc(Assembler::notZero, slow_case); 32.35 @@ -3293,7 +3293,7 @@ 32.36 __ bind(initialize_header); 32.37 if (UseBiasedLocking) { 32.38 __ pop(rcx); // get saved klass back in the register. 32.39 - __ movptr(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 32.40 + __ movptr(rbx, Address(rcx, Klass::prototype_header_offset())); 32.41 __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx); 32.42 } else { 32.43 __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
33.1 --- a/src/cpu/x86/vm/templateTable_x86_64.cpp Tue Dec 27 12:38:49 2011 -0800 33.2 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp Thu Dec 29 11:37:50 2011 -0800 33.3 @@ -1004,8 +1004,7 @@ 33.4 // Move superklass into rax 33.5 __ load_klass(rax, rdx); 33.6 __ movptr(rax, Address(rax, 33.7 - sizeof(oopDesc) + 33.8 - objArrayKlass::element_klass_offset_in_bytes())); 33.9 + objArrayKlass::element_klass_offset())); 33.10 // Compress array + index*oopSize + 12 into a single register. Frees rcx. 33.11 __ lea(rdx, element_address); 33.12 33.13 @@ -2067,7 +2066,7 @@ 33.14 assert(state == vtos, "only valid state"); 33.15 __ movptr(c_rarg1, aaddress(0)); 33.16 __ load_klass(rdi, c_rarg1); 33.17 - __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); 33.18 + __ movl(rdi, Address(rdi, Klass::access_flags_offset())); 33.19 __ testl(rdi, JVM_ACC_HAS_FINALIZER); 33.20 Label skip_register_finalizer; 33.21 __ jcc(Assembler::zero, skip_register_finalizer); 33.22 @@ -3236,15 +3235,14 @@ 33.23 // make sure klass is initialized & doesn't have finalizer 33.24 // make sure klass is fully initialized 33.25 __ cmpl(Address(rsi, 33.26 - instanceKlass::init_state_offset_in_bytes() + 33.27 - sizeof(oopDesc)), 33.28 + instanceKlass::init_state_offset()), 33.29 instanceKlass::fully_initialized); 33.30 __ jcc(Assembler::notEqual, slow_case); 33.31 33.32 // get instance_size in instanceKlass (scaled to a count of bytes) 33.33 __ movl(rdx, 33.34 Address(rsi, 33.35 - Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); 33.36 + Klass::layout_helper_offset())); 33.37 // test to see if it has a finalizer or is malformed in some way 33.38 __ testl(rdx, Klass::_lh_instance_slow_path_bit); 33.39 __ jcc(Assembler::notZero, slow_case); 33.40 @@ -3337,7 +3335,7 @@ 33.41 // initialize object header only. 33.42 __ bind(initialize_header); 33.43 if (UseBiasedLocking) { 33.44 - __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 33.45 + __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset())); 33.46 __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1); 33.47 } else { 33.48 __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
34.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Tue Dec 27 12:38:49 2011 -0800 34.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Dec 29 11:37:50 2011 -0800 34.3 @@ -50,7 +50,7 @@ 34.4 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 34.5 34.6 static BufferBlob* stub_blob; 34.7 -static const int stub_size = 400; 34.8 +static const int stub_size = 500; 34.9 34.10 extern "C" { 34.11 typedef void (*getPsrInfo_stub_t)(void*); 34.12 @@ -73,7 +73,7 @@ 34.13 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 34.14 34.15 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 34.16 - Label ext_cpuid1, ext_cpuid5, done; 34.17 + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done; 34.18 34.19 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 34.20 # define __ _masm-> 34.21 @@ -229,6 +229,41 @@ 34.22 __ movl(Address(rsi, 8), rcx); 34.23 __ movl(Address(rsi,12), rdx); 34.24 34.25 + // 34.26 + // Check if OS has enabled XGETBV instruction to access XCR0 34.27 + // (OSXSAVE feature flag) and CPU supports AVX 34.28 + // 34.29 + __ andl(rcx, 0x18000000); 34.30 + __ cmpl(rcx, 0x18000000); 34.31 + __ jccb(Assembler::notEqual, sef_cpuid); 34.32 + 34.33 + // 34.34 + // XCR0, XFEATURE_ENABLED_MASK register 34.35 + // 34.36 + __ xorl(rcx, rcx); // zero for XCR0 register 34.37 + __ xgetbv(); 34.38 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 34.39 + __ movl(Address(rsi, 0), rax); 34.40 + __ movl(Address(rsi, 4), rdx); 34.41 + 34.42 + // 34.43 + // cpuid(0x7) Structured Extended Features 34.44 + // 34.45 + __ bind(sef_cpuid); 34.46 + __ movl(rax, 7); 34.47 + __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 34.48 + __ jccb(Assembler::greater, ext_cpuid); 34.49 + 34.50 + __ xorl(rcx, rcx); 34.51 + __ cpuid(); 34.52 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 34.53 + __ movl(Address(rsi, 0), rax); 34.54 + __ movl(Address(rsi, 4), rbx); 34.55 + 34.56 + // 34.57 + // Extended cpuid(0x80000000) 34.58 + // 34.59 + __ bind(ext_cpuid); 34.60 __ movl(rax, 0x80000000); 34.61 __ cpuid(); 34.62 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 34.63 @@ -359,13 +394,19 @@ 34.64 if (UseSSE < 1) 34.65 _cpuFeatures &= ~CPU_SSE; 34.66 34.67 + if (UseAVX < 2) 34.68 + _cpuFeatures &= ~CPU_AVX2; 34.69 + 34.70 + if (UseAVX < 1) 34.71 + _cpuFeatures &= ~CPU_AVX; 34.72 + 34.73 if (logical_processors_per_package() == 1) { 34.74 // HT processor could be installed on a system which doesn't support HT. 34.75 _cpuFeatures &= ~CPU_HT; 34.76 } 34.77 34.78 char buf[256]; 34.79 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 34.80 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 34.81 cores_per_cpu(), threads_per_core(), 34.82 cpu_family(), _model, _stepping, 34.83 (supports_cmov() ? ", cmov" : ""), 34.84 @@ -379,6 +420,8 @@ 34.85 (supports_sse4_1() ? ", sse4.1" : ""), 34.86 (supports_sse4_2() ? ", sse4.2" : ""), 34.87 (supports_popcnt() ? ", popcnt" : ""), 34.88 + (supports_avx() ? ", avx" : ""), 34.89 + (supports_avx2() ? ", avx2" : ""), 34.90 (supports_mmx_ext() ? ", mmxext" : ""), 34.91 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), 34.92 (supports_lzcnt() ? ", lzcnt": ""), 34.93 @@ -389,17 +432,24 @@ 34.94 // UseSSE is set to the smaller of what hardware supports and what 34.95 // the command line requires. I.e., you cannot set UseSSE to 2 on 34.96 // older Pentiums which do not support it. 34.97 - if( UseSSE > 4 ) UseSSE=4; 34.98 - if( UseSSE < 0 ) UseSSE=0; 34.99 - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 34.100 + if (UseSSE > 4) UseSSE=4; 34.101 + if (UseSSE < 0) UseSSE=0; 34.102 + if (!supports_sse4_1()) // Drop to 3 if no SSE4 support 34.103 UseSSE = MIN2((intx)3,UseSSE); 34.104 - if( !supports_sse3() ) // Drop to 2 if no SSE3 support 34.105 + if (!supports_sse3()) // Drop to 2 if no SSE3 support 34.106 UseSSE = MIN2((intx)2,UseSSE); 34.107 - if( !supports_sse2() ) // Drop to 1 if no SSE2 support 34.108 + if (!supports_sse2()) // Drop to 1 if no SSE2 support 34.109 UseSSE = MIN2((intx)1,UseSSE); 34.110 - if( !supports_sse () ) // Drop to 0 if no SSE support 34.111 + if (!supports_sse ()) // Drop to 0 if no SSE support 34.112 UseSSE = 0; 34.113 34.114 + if (UseAVX > 2) UseAVX=2; 34.115 + if (UseAVX < 0) UseAVX=0; 34.116 + if (!supports_avx2()) // Drop to 1 if no AVX2 support 34.117 + UseAVX = MIN2((intx)1,UseAVX); 34.118 + if (!supports_avx ()) // Drop to 0 if no AVX support 34.119 + UseAVX = 0; 34.120 + 34.121 // On new cpus instructions which update whole XMM register should be used 34.122 // to prevent partial register stall due to dependencies on high half. 34.123 // 34.124 @@ -534,6 +584,9 @@ 34.125 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 34.126 UsePopCountInstruction = true; 34.127 } 34.128 + } else if (UsePopCountInstruction) { 34.129 + warning("POPCNT instruction is not available on this CPU"); 34.130 + FLAG_SET_DEFAULT(UsePopCountInstruction, false); 34.131 } 34.132 34.133 #ifdef COMPILER2 34.134 @@ -605,7 +658,11 @@ 34.135 if (PrintMiscellaneous && Verbose) { 34.136 tty->print_cr("Logical CPUs per core: %u", 34.137 logical_processors_per_package()); 34.138 - tty->print_cr("UseSSE=%d",UseSSE); 34.139 + tty->print("UseSSE=%d",UseSSE); 34.140 + if (UseAVX > 0) { 34.141 + tty->print(" UseAVX=%d",UseAVX); 34.142 + } 34.143 + tty->cr(); 34.144 tty->print("Allocation"); 34.145 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { 34.146 tty->print_cr(": no prefetching");
35.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Tue Dec 27 12:38:49 2011 -0800 35.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Thu Dec 29 11:37:50 2011 -0800 35.3 @@ -78,7 +78,10 @@ 35.4 sse4_2 : 1, 35.5 : 2, 35.6 popcnt : 1, 35.7 - : 8; 35.8 + : 3, 35.9 + osxsave : 1, 35.10 + avx : 1, 35.11 + : 3; 35.12 } bits; 35.13 }; 35.14 35.15 @@ -176,6 +179,34 @@ 35.16 } bits; 35.17 }; 35.18 35.19 + union SefCpuid7Eax { 35.20 + uint32_t value; 35.21 + }; 35.22 + 35.23 + union SefCpuid7Ebx { 35.24 + uint32_t value; 35.25 + struct { 35.26 + uint32_t fsgsbase : 1, 35.27 + : 2, 35.28 + bmi1 : 1, 35.29 + : 1, 35.30 + avx2 : 1, 35.31 + : 2, 35.32 + bmi2 : 1, 35.33 + : 23; 35.34 + } bits; 35.35 + }; 35.36 + 35.37 + union XemXcr0Eax { 35.38 + uint32_t value; 35.39 + struct { 35.40 + uint32_t x87 : 1, 35.41 + sse : 1, 35.42 + ymm : 1, 35.43 + : 29; 35.44 + } bits; 35.45 + }; 35.46 + 35.47 protected: 35.48 static int _cpu; 35.49 static int _model; 35.50 @@ -200,7 +231,9 @@ 35.51 CPU_SSE4_1 = (1 << 11), 35.52 CPU_SSE4_2 = (1 << 12), 35.53 CPU_POPCNT = (1 << 13), 35.54 - CPU_LZCNT = (1 << 14) 35.55 + CPU_LZCNT = (1 << 14), 35.56 + CPU_AVX = (1 << 15), 35.57 + CPU_AVX2 = (1 << 16) 35.58 } cpuFeatureFlags; 35.59 35.60 // cpuid information block. All info derived from executing cpuid with 35.61 @@ -228,6 +261,12 @@ 35.62 uint32_t dcp_cpuid4_ecx; // unused currently 35.63 uint32_t dcp_cpuid4_edx; // unused currently 35.64 35.65 + // cpuid function 7 (structured extended features) 35.66 + SefCpuid7Eax sef_cpuid7_eax; 35.67 + SefCpuid7Ebx sef_cpuid7_ebx; 35.68 + uint32_t sef_cpuid7_ecx; // unused currently 35.69 + uint32_t sef_cpuid7_edx; // unused currently 35.70 + 35.71 // cpuid function 0xB (processor topology) 35.72 // ecx = 0 35.73 uint32_t tpl_cpuidB0_eax; 35.74 @@ -275,6 +314,10 @@ 35.75 uint32_t ext_cpuid8_ebx; // reserved 35.76 ExtCpuid8Ecx ext_cpuid8_ecx; 35.77 uint32_t ext_cpuid8_edx; // reserved 35.78 + 35.79 + // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) 35.80 + XemXcr0Eax xem_xcr0_eax; 35.81 + uint32_t xem_xcr0_edx; // reserved 35.82 }; 35.83 35.84 // The actual cpuid info block 35.85 @@ -328,6 +371,14 @@ 35.86 result |= CPU_SSE4_2; 35.87 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 35.88 result |= CPU_POPCNT; 35.89 + if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && 35.90 + _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && 35.91 + _cpuid_info.xem_xcr0_eax.bits.sse != 0 && 35.92 + _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { 35.93 + result |= CPU_AVX; 35.94 + if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) 35.95 + result |= CPU_AVX2; 35.96 + } 35.97 35.98 // AMD features. 35.99 if (is_amd()) { 35.100 @@ -350,12 +401,14 @@ 35.101 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } 35.102 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } 35.103 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } 35.104 + static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); } 35.105 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } 35.106 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } 35.107 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } 35.108 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } 35.109 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } 35.110 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } 35.111 + static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); } 35.112 35.113 // Initialization 35.114 static void initialize(); 35.115 @@ -447,6 +500,8 @@ 35.116 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 35.117 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 35.118 static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } 35.119 + static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } 35.120 + static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } 35.121 // 35.122 // AMD features 35.123 //
36.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 36.2 +++ b/src/cpu/x86/vm/x86.ad Thu Dec 29 11:37:50 2011 -0800 36.3 @@ -0,0 +1,777 @@ 36.4 +// 36.5 +// Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. 36.6 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 36.7 +// 36.8 +// This code is free software; you can redistribute it and/or modify it 36.9 +// under the terms of the GNU General Public License version 2 only, as 36.10 +// published by the Free Software Foundation. 36.11 +// 36.12 +// This code is distributed in the hope that it will be useful, but WITHOUT 36.13 +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 36.14 +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 36.15 +// version 2 for more details (a copy is included in the LICENSE file that 36.16 +// accompanied this code). 36.17 +// 36.18 +// You should have received a copy of the GNU General Public License version 36.19 +// 2 along with this work; if not, write to the Free Software Foundation, 36.20 +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 36.21 +// 36.22 +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 36.23 +// or visit www.oracle.com if you need additional information or have any 36.24 +// questions. 36.25 +// 36.26 +// 36.27 + 36.28 +// X86 Common Architecture Description File 36.29 + 36.30 +source %{ 36.31 + // Float masks come from different places depending on platform. 36.32 +#ifdef _LP64 36.33 + static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 36.34 + static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 36.35 + static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 36.36 + static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 36.37 +#else 36.38 + static address float_signmask() { return (address)float_signmask_pool; } 36.39 + static address float_signflip() { return (address)float_signflip_pool; } 36.40 + static address double_signmask() { return (address)double_signmask_pool; } 36.41 + static address double_signflip() { return (address)double_signflip_pool; } 36.42 +#endif 36.43 +%} 36.44 + 36.45 +// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 36.46 + 36.47 +instruct addF_reg(regF dst, regF src) %{ 36.48 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.49 + match(Set dst (AddF dst src)); 36.50 + 36.51 + format %{ "addss $dst, $src" %} 36.52 + ins_cost(150); 36.53 + ins_encode %{ 36.54 + __ addss($dst$$XMMRegister, $src$$XMMRegister); 36.55 + %} 36.56 + ins_pipe(pipe_slow); 36.57 +%} 36.58 + 36.59 +instruct addF_mem(regF dst, memory src) %{ 36.60 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.61 + match(Set dst (AddF dst (LoadF src))); 36.62 + 36.63 + format %{ "addss $dst, $src" %} 36.64 + ins_cost(150); 36.65 + ins_encode %{ 36.66 + __ addss($dst$$XMMRegister, $src$$Address); 36.67 + %} 36.68 + ins_pipe(pipe_slow); 36.69 +%} 36.70 + 36.71 +instruct addF_imm(regF dst, immF con) %{ 36.72 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.73 + match(Set dst (AddF dst con)); 36.74 + format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 36.75 + ins_cost(150); 36.76 + ins_encode %{ 36.77 + __ addss($dst$$XMMRegister, $constantaddress($con)); 36.78 + %} 36.79 + ins_pipe(pipe_slow); 36.80 +%} 36.81 + 36.82 +instruct vaddF_reg(regF dst, regF src1, regF src2) %{ 36.83 + predicate(UseAVX > 0); 36.84 + match(Set dst (AddF src1 src2)); 36.85 + 36.86 + format %{ "vaddss $dst, $src1, $src2" %} 36.87 + ins_cost(150); 36.88 + ins_encode %{ 36.89 + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.90 + %} 36.91 + ins_pipe(pipe_slow); 36.92 +%} 36.93 + 36.94 +instruct vaddF_mem(regF dst, regF src1, memory src2) %{ 36.95 + predicate(UseAVX > 0); 36.96 + match(Set dst (AddF src1 (LoadF src2))); 36.97 + 36.98 + format %{ "vaddss $dst, $src1, $src2" %} 36.99 + ins_cost(150); 36.100 + ins_encode %{ 36.101 + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.102 + %} 36.103 + ins_pipe(pipe_slow); 36.104 +%} 36.105 + 36.106 +instruct vaddF_imm(regF dst, regF src, immF con) %{ 36.107 + predicate(UseAVX > 0); 36.108 + match(Set dst (AddF src con)); 36.109 + 36.110 + format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 36.111 + ins_cost(150); 36.112 + ins_encode %{ 36.113 + __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.114 + %} 36.115 + ins_pipe(pipe_slow); 36.116 +%} 36.117 + 36.118 +instruct addD_reg(regD dst, regD src) %{ 36.119 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.120 + match(Set dst (AddD dst src)); 36.121 + 36.122 + format %{ "addsd $dst, $src" %} 36.123 + ins_cost(150); 36.124 + ins_encode %{ 36.125 + __ addsd($dst$$XMMRegister, $src$$XMMRegister); 36.126 + %} 36.127 + ins_pipe(pipe_slow); 36.128 +%} 36.129 + 36.130 +instruct addD_mem(regD dst, memory src) %{ 36.131 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.132 + match(Set dst (AddD dst (LoadD src))); 36.133 + 36.134 + format %{ "addsd $dst, $src" %} 36.135 + ins_cost(150); 36.136 + ins_encode %{ 36.137 + __ addsd($dst$$XMMRegister, $src$$Address); 36.138 + %} 36.139 + ins_pipe(pipe_slow); 36.140 +%} 36.141 + 36.142 +instruct addD_imm(regD dst, immD con) %{ 36.143 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.144 + match(Set dst (AddD dst con)); 36.145 + format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 36.146 + ins_cost(150); 36.147 + ins_encode %{ 36.148 + __ addsd($dst$$XMMRegister, $constantaddress($con)); 36.149 + %} 36.150 + ins_pipe(pipe_slow); 36.151 +%} 36.152 + 36.153 +instruct vaddD_reg(regD dst, regD src1, regD src2) %{ 36.154 + predicate(UseAVX > 0); 36.155 + match(Set dst (AddD src1 src2)); 36.156 + 36.157 + format %{ "vaddsd $dst, $src1, $src2" %} 36.158 + ins_cost(150); 36.159 + ins_encode %{ 36.160 + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.161 + %} 36.162 + ins_pipe(pipe_slow); 36.163 +%} 36.164 + 36.165 +instruct vaddD_mem(regD dst, regD src1, memory src2) %{ 36.166 + predicate(UseAVX > 0); 36.167 + match(Set dst (AddD src1 (LoadD src2))); 36.168 + 36.169 + format %{ "vaddsd $dst, $src1, $src2" %} 36.170 + ins_cost(150); 36.171 + ins_encode %{ 36.172 + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.173 + %} 36.174 + ins_pipe(pipe_slow); 36.175 +%} 36.176 + 36.177 +instruct vaddD_imm(regD dst, regD src, immD con) %{ 36.178 + predicate(UseAVX > 0); 36.179 + match(Set dst (AddD src con)); 36.180 + 36.181 + format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 36.182 + ins_cost(150); 36.183 + ins_encode %{ 36.184 + __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.185 + %} 36.186 + ins_pipe(pipe_slow); 36.187 +%} 36.188 + 36.189 +instruct subF_reg(regF dst, regF src) %{ 36.190 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.191 + match(Set dst (SubF dst src)); 36.192 + 36.193 + format %{ "subss $dst, $src" %} 36.194 + ins_cost(150); 36.195 + ins_encode %{ 36.196 + __ subss($dst$$XMMRegister, $src$$XMMRegister); 36.197 + %} 36.198 + ins_pipe(pipe_slow); 36.199 +%} 36.200 + 36.201 +instruct subF_mem(regF dst, memory src) %{ 36.202 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.203 + match(Set dst (SubF dst (LoadF src))); 36.204 + 36.205 + format %{ "subss $dst, $src" %} 36.206 + ins_cost(150); 36.207 + ins_encode %{ 36.208 + __ subss($dst$$XMMRegister, $src$$Address); 36.209 + %} 36.210 + ins_pipe(pipe_slow); 36.211 +%} 36.212 + 36.213 +instruct subF_imm(regF dst, immF con) %{ 36.214 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.215 + match(Set dst (SubF dst con)); 36.216 + format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 36.217 + ins_cost(150); 36.218 + ins_encode %{ 36.219 + __ subss($dst$$XMMRegister, $constantaddress($con)); 36.220 + %} 36.221 + ins_pipe(pipe_slow); 36.222 +%} 36.223 + 36.224 +instruct vsubF_reg(regF dst, regF src1, regF src2) %{ 36.225 + predicate(UseAVX > 0); 36.226 + match(Set dst (SubF src1 src2)); 36.227 + 36.228 + format %{ "vsubss $dst, $src1, $src2" %} 36.229 + ins_cost(150); 36.230 + ins_encode %{ 36.231 + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.232 + %} 36.233 + ins_pipe(pipe_slow); 36.234 +%} 36.235 + 36.236 +instruct vsubF_mem(regF dst, regF src1, memory src2) %{ 36.237 + predicate(UseAVX > 0); 36.238 + match(Set dst (SubF src1 (LoadF src2))); 36.239 + 36.240 + format %{ "vsubss $dst, $src1, $src2" %} 36.241 + ins_cost(150); 36.242 + ins_encode %{ 36.243 + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.244 + %} 36.245 + ins_pipe(pipe_slow); 36.246 +%} 36.247 + 36.248 +instruct vsubF_imm(regF dst, regF src, immF con) %{ 36.249 + predicate(UseAVX > 0); 36.250 + match(Set dst (SubF src con)); 36.251 + 36.252 + format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 36.253 + ins_cost(150); 36.254 + ins_encode %{ 36.255 + __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.256 + %} 36.257 + ins_pipe(pipe_slow); 36.258 +%} 36.259 + 36.260 +instruct subD_reg(regD dst, regD src) %{ 36.261 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.262 + match(Set dst (SubD dst src)); 36.263 + 36.264 + format %{ "subsd $dst, $src" %} 36.265 + ins_cost(150); 36.266 + ins_encode %{ 36.267 + __ subsd($dst$$XMMRegister, $src$$XMMRegister); 36.268 + %} 36.269 + ins_pipe(pipe_slow); 36.270 +%} 36.271 + 36.272 +instruct subD_mem(regD dst, memory src) %{ 36.273 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.274 + match(Set dst (SubD dst (LoadD src))); 36.275 + 36.276 + format %{ "subsd $dst, $src" %} 36.277 + ins_cost(150); 36.278 + ins_encode %{ 36.279 + __ subsd($dst$$XMMRegister, $src$$Address); 36.280 + %} 36.281 + ins_pipe(pipe_slow); 36.282 +%} 36.283 + 36.284 +instruct subD_imm(regD dst, immD con) %{ 36.285 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.286 + match(Set dst (SubD dst con)); 36.287 + format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 36.288 + ins_cost(150); 36.289 + ins_encode %{ 36.290 + __ subsd($dst$$XMMRegister, $constantaddress($con)); 36.291 + %} 36.292 + ins_pipe(pipe_slow); 36.293 +%} 36.294 + 36.295 +instruct vsubD_reg(regD dst, regD src1, regD src2) %{ 36.296 + predicate(UseAVX > 0); 36.297 + match(Set dst (SubD src1 src2)); 36.298 + 36.299 + format %{ "vsubsd $dst, $src1, $src2" %} 36.300 + ins_cost(150); 36.301 + ins_encode %{ 36.302 + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.303 + %} 36.304 + ins_pipe(pipe_slow); 36.305 +%} 36.306 + 36.307 +instruct vsubD_mem(regD dst, regD src1, memory src2) %{ 36.308 + predicate(UseAVX > 0); 36.309 + match(Set dst (SubD src1 (LoadD src2))); 36.310 + 36.311 + format %{ "vsubsd $dst, $src1, $src2" %} 36.312 + ins_cost(150); 36.313 + ins_encode %{ 36.314 + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.315 + %} 36.316 + ins_pipe(pipe_slow); 36.317 +%} 36.318 + 36.319 +instruct vsubD_imm(regD dst, regD src, immD con) %{ 36.320 + predicate(UseAVX > 0); 36.321 + match(Set dst (SubD src con)); 36.322 + 36.323 + format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 36.324 + ins_cost(150); 36.325 + ins_encode %{ 36.326 + __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.327 + %} 36.328 + ins_pipe(pipe_slow); 36.329 +%} 36.330 + 36.331 +instruct mulF_reg(regF dst, regF src) %{ 36.332 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.333 + match(Set dst (MulF dst src)); 36.334 + 36.335 + format %{ "mulss $dst, $src" %} 36.336 + ins_cost(150); 36.337 + ins_encode %{ 36.338 + __ mulss($dst$$XMMRegister, $src$$XMMRegister); 36.339 + %} 36.340 + ins_pipe(pipe_slow); 36.341 +%} 36.342 + 36.343 +instruct mulF_mem(regF dst, memory src) %{ 36.344 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.345 + match(Set dst (MulF dst (LoadF src))); 36.346 + 36.347 + format %{ "mulss $dst, $src" %} 36.348 + ins_cost(150); 36.349 + ins_encode %{ 36.350 + __ mulss($dst$$XMMRegister, $src$$Address); 36.351 + %} 36.352 + ins_pipe(pipe_slow); 36.353 +%} 36.354 + 36.355 +instruct mulF_imm(regF dst, immF con) %{ 36.356 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.357 + match(Set dst (MulF dst con)); 36.358 + format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 36.359 + ins_cost(150); 36.360 + ins_encode %{ 36.361 + __ mulss($dst$$XMMRegister, $constantaddress($con)); 36.362 + %} 36.363 + ins_pipe(pipe_slow); 36.364 +%} 36.365 + 36.366 +instruct vmulF_reg(regF dst, regF src1, regF src2) %{ 36.367 + predicate(UseAVX > 0); 36.368 + match(Set dst (MulF src1 src2)); 36.369 + 36.370 + format %{ "vmulss $dst, $src1, $src2" %} 36.371 + ins_cost(150); 36.372 + ins_encode %{ 36.373 + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.374 + %} 36.375 + ins_pipe(pipe_slow); 36.376 +%} 36.377 + 36.378 +instruct vmulF_mem(regF dst, regF src1, memory src2) %{ 36.379 + predicate(UseAVX > 0); 36.380 + match(Set dst (MulF src1 (LoadF src2))); 36.381 + 36.382 + format %{ "vmulss $dst, $src1, $src2" %} 36.383 + ins_cost(150); 36.384 + ins_encode %{ 36.385 + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.386 + %} 36.387 + ins_pipe(pipe_slow); 36.388 +%} 36.389 + 36.390 +instruct vmulF_imm(regF dst, regF src, immF con) %{ 36.391 + predicate(UseAVX > 0); 36.392 + match(Set dst (MulF src con)); 36.393 + 36.394 + format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 36.395 + ins_cost(150); 36.396 + ins_encode %{ 36.397 + __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.398 + %} 36.399 + ins_pipe(pipe_slow); 36.400 +%} 36.401 + 36.402 +instruct mulD_reg(regD dst, regD src) %{ 36.403 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.404 + match(Set dst (MulD dst src)); 36.405 + 36.406 + format %{ "mulsd $dst, $src" %} 36.407 + ins_cost(150); 36.408 + ins_encode %{ 36.409 + __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 36.410 + %} 36.411 + ins_pipe(pipe_slow); 36.412 +%} 36.413 + 36.414 +instruct mulD_mem(regD dst, memory src) %{ 36.415 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.416 + match(Set dst (MulD dst (LoadD src))); 36.417 + 36.418 + format %{ "mulsd $dst, $src" %} 36.419 + ins_cost(150); 36.420 + ins_encode %{ 36.421 + __ mulsd($dst$$XMMRegister, $src$$Address); 36.422 + %} 36.423 + ins_pipe(pipe_slow); 36.424 +%} 36.425 + 36.426 +instruct mulD_imm(regD dst, immD con) %{ 36.427 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.428 + match(Set dst (MulD dst con)); 36.429 + format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 36.430 + ins_cost(150); 36.431 + ins_encode %{ 36.432 + __ mulsd($dst$$XMMRegister, $constantaddress($con)); 36.433 + %} 36.434 + ins_pipe(pipe_slow); 36.435 +%} 36.436 + 36.437 +instruct vmulD_reg(regD dst, regD src1, regD src2) %{ 36.438 + predicate(UseAVX > 0); 36.439 + match(Set dst (MulD src1 src2)); 36.440 + 36.441 + format %{ "vmulsd $dst, $src1, $src2" %} 36.442 + ins_cost(150); 36.443 + ins_encode %{ 36.444 + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.445 + %} 36.446 + ins_pipe(pipe_slow); 36.447 +%} 36.448 + 36.449 +instruct vmulD_mem(regD dst, regD src1, memory src2) %{ 36.450 + predicate(UseAVX > 0); 36.451 + match(Set dst (MulD src1 (LoadD src2))); 36.452 + 36.453 + format %{ "vmulsd $dst, $src1, $src2" %} 36.454 + ins_cost(150); 36.455 + ins_encode %{ 36.456 + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.457 + %} 36.458 + ins_pipe(pipe_slow); 36.459 +%} 36.460 + 36.461 +instruct vmulD_imm(regD dst, regD src, immD con) %{ 36.462 + predicate(UseAVX > 0); 36.463 + match(Set dst (MulD src con)); 36.464 + 36.465 + format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 36.466 + ins_cost(150); 36.467 + ins_encode %{ 36.468 + __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.469 + %} 36.470 + ins_pipe(pipe_slow); 36.471 +%} 36.472 + 36.473 +instruct divF_reg(regF dst, regF src) %{ 36.474 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.475 + match(Set dst (DivF dst src)); 36.476 + 36.477 + format %{ "divss $dst, $src" %} 36.478 + ins_cost(150); 36.479 + ins_encode %{ 36.480 + __ divss($dst$$XMMRegister, $src$$XMMRegister); 36.481 + %} 36.482 + ins_pipe(pipe_slow); 36.483 +%} 36.484 + 36.485 +instruct divF_mem(regF dst, memory src) %{ 36.486 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.487 + match(Set dst (DivF dst (LoadF src))); 36.488 + 36.489 + format %{ "divss $dst, $src" %} 36.490 + ins_cost(150); 36.491 + ins_encode %{ 36.492 + __ divss($dst$$XMMRegister, $src$$Address); 36.493 + %} 36.494 + ins_pipe(pipe_slow); 36.495 +%} 36.496 + 36.497 +instruct divF_imm(regF dst, immF con) %{ 36.498 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.499 + match(Set dst (DivF dst con)); 36.500 + format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 36.501 + ins_cost(150); 36.502 + ins_encode %{ 36.503 + __ divss($dst$$XMMRegister, $constantaddress($con)); 36.504 + %} 36.505 + ins_pipe(pipe_slow); 36.506 +%} 36.507 + 36.508 +instruct vdivF_reg(regF dst, regF src1, regF src2) %{ 36.509 + predicate(UseAVX > 0); 36.510 + match(Set dst (DivF src1 src2)); 36.511 + 36.512 + format %{ "vdivss $dst, $src1, $src2" %} 36.513 + ins_cost(150); 36.514 + ins_encode %{ 36.515 + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.516 + %} 36.517 + ins_pipe(pipe_slow); 36.518 +%} 36.519 + 36.520 +instruct vdivF_mem(regF dst, regF src1, memory src2) %{ 36.521 + predicate(UseAVX > 0); 36.522 + match(Set dst (DivF src1 (LoadF src2))); 36.523 + 36.524 + format %{ "vdivss $dst, $src1, $src2" %} 36.525 + ins_cost(150); 36.526 + ins_encode %{ 36.527 + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.528 + %} 36.529 + ins_pipe(pipe_slow); 36.530 +%} 36.531 + 36.532 +instruct vdivF_imm(regF dst, regF src, immF con) %{ 36.533 + predicate(UseAVX > 0); 36.534 + match(Set dst (DivF src con)); 36.535 + 36.536 + format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 36.537 + ins_cost(150); 36.538 + ins_encode %{ 36.539 + __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.540 + %} 36.541 + ins_pipe(pipe_slow); 36.542 +%} 36.543 + 36.544 +instruct divD_reg(regD dst, regD src) %{ 36.545 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.546 + match(Set dst (DivD dst src)); 36.547 + 36.548 + format %{ "divsd $dst, $src" %} 36.549 + ins_cost(150); 36.550 + ins_encode %{ 36.551 + __ divsd($dst$$XMMRegister, $src$$XMMRegister); 36.552 + %} 36.553 + ins_pipe(pipe_slow); 36.554 +%} 36.555 + 36.556 +instruct divD_mem(regD dst, memory src) %{ 36.557 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.558 + match(Set dst (DivD dst (LoadD src))); 36.559 + 36.560 + format %{ "divsd $dst, $src" %} 36.561 + ins_cost(150); 36.562 + ins_encode %{ 36.563 + __ divsd($dst$$XMMRegister, $src$$Address); 36.564 + %} 36.565 + ins_pipe(pipe_slow); 36.566 +%} 36.567 + 36.568 +instruct divD_imm(regD dst, immD con) %{ 36.569 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.570 + match(Set dst (DivD dst con)); 36.571 + format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 36.572 + ins_cost(150); 36.573 + ins_encode %{ 36.574 + __ divsd($dst$$XMMRegister, $constantaddress($con)); 36.575 + %} 36.576 + ins_pipe(pipe_slow); 36.577 +%} 36.578 + 36.579 +instruct vdivD_reg(regD dst, regD src1, regD src2) %{ 36.580 + predicate(UseAVX > 0); 36.581 + match(Set dst (DivD src1 src2)); 36.582 + 36.583 + format %{ "vdivsd $dst, $src1, $src2" %} 36.584 + ins_cost(150); 36.585 + ins_encode %{ 36.586 + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 36.587 + %} 36.588 + ins_pipe(pipe_slow); 36.589 +%} 36.590 + 36.591 +instruct vdivD_mem(regD dst, regD src1, memory src2) %{ 36.592 + predicate(UseAVX > 0); 36.593 + match(Set dst (DivD src1 (LoadD src2))); 36.594 + 36.595 + format %{ "vdivsd $dst, $src1, $src2" %} 36.596 + ins_cost(150); 36.597 + ins_encode %{ 36.598 + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 36.599 + %} 36.600 + ins_pipe(pipe_slow); 36.601 +%} 36.602 + 36.603 +instruct vdivD_imm(regD dst, regD src, immD con) %{ 36.604 + predicate(UseAVX > 0); 36.605 + match(Set dst (DivD src con)); 36.606 + 36.607 + format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 36.608 + ins_cost(150); 36.609 + ins_encode %{ 36.610 + __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 36.611 + %} 36.612 + ins_pipe(pipe_slow); 36.613 +%} 36.614 + 36.615 +instruct absF_reg(regF dst) %{ 36.616 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.617 + match(Set dst (AbsF dst)); 36.618 + ins_cost(150); 36.619 + format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 36.620 + ins_encode %{ 36.621 + __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 36.622 + %} 36.623 + ins_pipe(pipe_slow); 36.624 +%} 36.625 + 36.626 +instruct vabsF_reg(regF dst, regF src) %{ 36.627 + predicate(UseAVX > 0); 36.628 + match(Set dst (AbsF src)); 36.629 + ins_cost(150); 36.630 + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 36.631 + ins_encode %{ 36.632 + __ vandps($dst$$XMMRegister, $src$$XMMRegister, 36.633 + ExternalAddress(float_signmask())); 36.634 + %} 36.635 + ins_pipe(pipe_slow); 36.636 +%} 36.637 + 36.638 +instruct absD_reg(regD dst) %{ 36.639 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.640 + match(Set dst (AbsD dst)); 36.641 + ins_cost(150); 36.642 + format %{ "andpd $dst, [0x7fffffffffffffff]\t" 36.643 + "# abs double by sign masking" %} 36.644 + ins_encode %{ 36.645 + __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 36.646 + %} 36.647 + ins_pipe(pipe_slow); 36.648 +%} 36.649 + 36.650 +instruct vabsD_reg(regD dst, regD src) %{ 36.651 + predicate(UseAVX > 0); 36.652 + match(Set dst (AbsD src)); 36.653 + ins_cost(150); 36.654 + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 36.655 + "# abs double by sign masking" %} 36.656 + ins_encode %{ 36.657 + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 36.658 + ExternalAddress(double_signmask())); 36.659 + %} 36.660 + ins_pipe(pipe_slow); 36.661 +%} 36.662 + 36.663 +instruct negF_reg(regF dst) %{ 36.664 + predicate((UseSSE>=1) && (UseAVX == 0)); 36.665 + match(Set dst (NegF dst)); 36.666 + ins_cost(150); 36.667 + format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 36.668 + ins_encode %{ 36.669 + __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 36.670 + %} 36.671 + ins_pipe(pipe_slow); 36.672 +%} 36.673 + 36.674 +instruct vnegF_reg(regF dst, regF src) %{ 36.675 + predicate(UseAVX > 0); 36.676 + match(Set dst (NegF src)); 36.677 + ins_cost(150); 36.678 + format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 36.679 + ins_encode %{ 36.680 + __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 36.681 + ExternalAddress(float_signflip())); 36.682 + %} 36.683 + ins_pipe(pipe_slow); 36.684 +%} 36.685 + 36.686 +instruct negD_reg(regD dst) %{ 36.687 + predicate((UseSSE>=2) && (UseAVX == 0)); 36.688 + match(Set dst (NegD dst)); 36.689 + ins_cost(150); 36.690 + format %{ "xorpd $dst, [0x8000000000000000]\t" 36.691 + "# neg double by sign flipping" %} 36.692 + ins_encode %{ 36.693 + __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 36.694 + %} 36.695 + ins_pipe(pipe_slow); 36.696 +%} 36.697 + 36.698 +instruct vnegD_reg(regD dst, regD src) %{ 36.699 + predicate(UseAVX > 0); 36.700 + match(Set dst (NegD src)); 36.701 + ins_cost(150); 36.702 + format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 36.703 + "# neg double by sign flipping" %} 36.704 + ins_encode %{ 36.705 + __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 36.706 + ExternalAddress(double_signflip())); 36.707 + %} 36.708 + ins_pipe(pipe_slow); 36.709 +%} 36.710 + 36.711 +instruct sqrtF_reg(regF dst, regF src) %{ 36.712 + predicate(UseSSE>=1); 36.713 + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 36.714 + 36.715 + format %{ "sqrtss $dst, $src" %} 36.716 + ins_cost(150); 36.717 + ins_encode %{ 36.718 + __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 36.719 + %} 36.720 + ins_pipe(pipe_slow); 36.721 +%} 36.722 + 36.723 +instruct sqrtF_mem(regF dst, memory src) %{ 36.724 + predicate(UseSSE>=1); 36.725 + match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 36.726 + 36.727 + format %{ "sqrtss $dst, $src" %} 36.728 + ins_cost(150); 36.729 + ins_encode %{ 36.730 + __ sqrtss($dst$$XMMRegister, $src$$Address); 36.731 + %} 36.732 + ins_pipe(pipe_slow); 36.733 +%} 36.734 + 36.735 +instruct sqrtF_imm(regF dst, immF con) %{ 36.736 + predicate(UseSSE>=1); 36.737 + match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 36.738 + format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 36.739 + ins_cost(150); 36.740 + ins_encode %{ 36.741 + __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 36.742 + %} 36.743 + ins_pipe(pipe_slow); 36.744 +%} 36.745 + 36.746 +instruct sqrtD_reg(regD dst, regD src) %{ 36.747 + predicate(UseSSE>=2); 36.748 + match(Set dst (SqrtD src)); 36.749 + 36.750 + format %{ "sqrtsd $dst, $src" %} 36.751 + ins_cost(150); 36.752 + ins_encode %{ 36.753 + __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 36.754 + %} 36.755 + ins_pipe(pipe_slow); 36.756 +%} 36.757 + 36.758 +instruct sqrtD_mem(regD dst, memory src) %{ 36.759 + predicate(UseSSE>=2); 36.760 + match(Set dst (SqrtD (LoadD src))); 36.761 + 36.762 + format %{ "sqrtsd $dst, $src" %} 36.763 + ins_cost(150); 36.764 + ins_encode %{ 36.765 + __ sqrtsd($dst$$XMMRegister, $src$$Address); 36.766 + %} 36.767 + ins_pipe(pipe_slow); 36.768 +%} 36.769 + 36.770 +instruct sqrtD_imm(regD dst, immD con) %{ 36.771 + predicate(UseSSE>=2); 36.772 + match(Set dst (SqrtD con)); 36.773 + format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 36.774 + ins_cost(150); 36.775 + ins_encode %{ 36.776 + __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 36.777 + %} 36.778 + ins_pipe(pipe_slow); 36.779 +%} 36.780 +
37.1 --- a/src/cpu/x86/vm/x86_32.ad Tue Dec 27 12:38:49 2011 -0800 37.2 +++ b/src/cpu/x86/vm/x86_32.ad Thu Dec 29 11:37:50 2011 -0800 37.3 @@ -281,7 +281,7 @@ 37.4 } 37.5 37.6 static int preserve_SP_size() { 37.7 - return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) 37.8 + return 2; // op, rm(reg/reg) 37.9 } 37.10 37.11 // !!!!! Special hack to get all type of calls to specify the byte offset 37.12 @@ -495,14 +495,34 @@ 37.13 } 37.14 } 37.15 37.16 -void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 37.17 - if( dst_encoding == src_encoding ) { 37.18 - // reg-reg copy, use an empty encoding 37.19 - } else { 37.20 - MacroAssembler _masm(&cbuf); 37.21 - 37.22 - __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); 37.23 - } 37.24 +void emit_cmpfp_fixup(MacroAssembler& _masm) { 37.25 + Label exit; 37.26 + __ jccb(Assembler::noParity, exit); 37.27 + __ pushf(); 37.28 + // 37.29 + // comiss/ucomiss instructions set ZF,PF,CF flags and 37.30 + // zero OF,AF,SF for NaN values. 37.31 + // Fixup flags by zeroing ZF,PF so that compare of NaN 37.32 + // values returns 'less than' result (CF is set). 37.33 + // Leave the rest of flags unchanged. 37.34 + // 37.35 + // 7 6 5 4 3 2 1 0 37.36 + // |S|Z|r|A|r|P|r|C| (r - reserved bit) 37.37 + // 0 0 1 0 1 0 1 1 (0x2B) 37.38 + // 37.39 + __ andl(Address(rsp, 0), 0xffffff2b); 37.40 + __ popf(); 37.41 + __ bind(exit); 37.42 +} 37.43 + 37.44 +void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 37.45 + Label done; 37.46 + __ movl(dst, -1); 37.47 + __ jcc(Assembler::parity, done); 37.48 + __ jcc(Assembler::below, done); 37.49 + __ setb(Assembler::notEqual, dst); 37.50 + __ movzbl(dst, dst); 37.51 + __ bind(done); 37.52 } 37.53 37.54 37.55 @@ -792,92 +812,88 @@ 37.56 // Helper for XMM registers. Extra opcode bits, limited syntax. 37.57 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 37.58 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 37.59 - if( cbuf ) { 37.60 - if( reg_lo+1 == reg_hi ) { // double move? 37.61 - if( is_load && !UseXmmLoadAndClearUpper ) 37.62 - emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load 37.63 - else 37.64 - emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise 37.65 + if (cbuf) { 37.66 + MacroAssembler _masm(cbuf); 37.67 + if (reg_lo+1 == reg_hi) { // double move? 37.68 + if (is_load) { 37.69 + __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 37.70 + } else { 37.71 + __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 37.72 + } 37.73 } else { 37.74 - emit_opcode(*cbuf, 0xF3 ); 37.75 + if (is_load) { 37.76 + __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 37.77 + } else { 37.78 + __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 37.79 + } 37.80 } 37.81 - emit_opcode(*cbuf, 0x0F ); 37.82 - if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) 37.83 - emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load 37.84 - else 37.85 - emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); 37.86 - encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); 37.87 #ifndef PRODUCT 37.88 - } else if( !do_size ) { 37.89 - if( size != 0 ) st->print("\n\t"); 37.90 - if( reg_lo+1 == reg_hi ) { // double move? 37.91 - if( is_load ) st->print("%s %s,[ESP + #%d]", 37.92 - UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 37.93 - Matcher::regName[reg_lo], offset); 37.94 - else st->print("MOVSD [ESP + #%d],%s", 37.95 - offset, Matcher::regName[reg_lo]); 37.96 + } else if (!do_size) { 37.97 + if (size != 0) st->print("\n\t"); 37.98 + if (reg_lo+1 == reg_hi) { // double move? 37.99 + if (is_load) st->print("%s %s,[ESP + #%d]", 37.100 + UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 37.101 + Matcher::regName[reg_lo], offset); 37.102 + else st->print("MOVSD [ESP + #%d],%s", 37.103 + offset, Matcher::regName[reg_lo]); 37.104 } else { 37.105 - if( is_load ) st->print("MOVSS %s,[ESP + #%d]", 37.106 - Matcher::regName[reg_lo], offset); 37.107 - else st->print("MOVSS [ESP + #%d],%s", 37.108 - offset, Matcher::regName[reg_lo]); 37.109 + if (is_load) st->print("MOVSS %s,[ESP + #%d]", 37.110 + Matcher::regName[reg_lo], offset); 37.111 + else st->print("MOVSS [ESP + #%d],%s", 37.112 + offset, Matcher::regName[reg_lo]); 37.113 } 37.114 #endif 37.115 } 37.116 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 37.117 + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes. 37.118 return size+5+offset_size; 37.119 } 37.120 37.121 37.122 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 37.123 int src_hi, int dst_hi, int size, outputStream* st ) { 37.124 - if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers 37.125 - if( cbuf ) { 37.126 - if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { 37.127 - emit_opcode(*cbuf, 0x66 ); 37.128 - } 37.129 - emit_opcode(*cbuf, 0x0F ); 37.130 - emit_opcode(*cbuf, 0x28 ); 37.131 - emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 37.132 + if (cbuf) { 37.133 + MacroAssembler _masm(cbuf); 37.134 + if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 37.135 + __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 37.136 + as_XMMRegister(Matcher::_regEncode[src_lo])); 37.137 + } else { 37.138 + __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 37.139 + as_XMMRegister(Matcher::_regEncode[src_lo])); 37.140 + } 37.141 #ifndef PRODUCT 37.142 - } else if( !do_size ) { 37.143 - if( size != 0 ) st->print("\n\t"); 37.144 - if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 37.145 + } else if (!do_size) { 37.146 + if (size != 0) st->print("\n\t"); 37.147 + if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 37.148 + if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 37.149 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 37.150 } else { 37.151 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 37.152 } 37.153 -#endif 37.154 - } 37.155 - return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3); 37.156 - } else { 37.157 - if( cbuf ) { 37.158 - emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 ); 37.159 - emit_opcode(*cbuf, 0x0F ); 37.160 - emit_opcode(*cbuf, 0x10 ); 37.161 - emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); 37.162 -#ifndef PRODUCT 37.163 - } else if( !do_size ) { 37.164 - if( size != 0 ) st->print("\n\t"); 37.165 + } else { 37.166 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 37.167 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 37.168 } else { 37.169 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 37.170 } 37.171 + } 37.172 #endif 37.173 - } 37.174 - return size+4; 37.175 } 37.176 + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes. 37.177 + // Only MOVAPS SSE prefix uses 1 byte. 37.178 + int sz = 4; 37.179 + if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 37.180 + UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 37.181 + return size + sz; 37.182 } 37.183 37.184 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 37.185 int src_hi, int dst_hi, int size, outputStream* st ) { 37.186 // 32-bit 37.187 if (cbuf) { 37.188 - emit_opcode(*cbuf, 0x66); 37.189 - emit_opcode(*cbuf, 0x0F); 37.190 - emit_opcode(*cbuf, 0x6E); 37.191 - emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7); 37.192 + MacroAssembler _masm(cbuf); 37.193 + __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 37.194 + as_Register(Matcher::_regEncode[src_lo])); 37.195 #ifndef PRODUCT 37.196 } else if (!do_size) { 37.197 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 37.198 @@ -891,10 +907,9 @@ 37.199 int src_hi, int dst_hi, int size, outputStream* st ) { 37.200 // 32-bit 37.201 if (cbuf) { 37.202 - emit_opcode(*cbuf, 0x66); 37.203 - emit_opcode(*cbuf, 0x0F); 37.204 - emit_opcode(*cbuf, 0x7E); 37.205 - emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7); 37.206 + MacroAssembler _masm(cbuf); 37.207 + __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 37.208 + as_XMMRegister(Matcher::_regEncode[src_lo])); 37.209 #ifndef PRODUCT 37.210 } else if (!do_size) { 37.211 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 37.212 @@ -1760,7 +1775,7 @@ 37.213 emit_cc(cbuf, $secondary, $cop$$cmpcode); 37.214 %} 37.215 37.216 - enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV 37.217 + enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 37.218 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 37.219 emit_d8(cbuf, op >> 8 ); 37.220 emit_d8(cbuf, op & 255); 37.221 @@ -1931,11 +1946,6 @@ 37.222 37.223 %} 37.224 37.225 - enc_class Xor_Reg (eRegI dst) %{ 37.226 - emit_opcode(cbuf, 0x33); 37.227 - emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 37.228 - %} 37.229 - 37.230 // Following encoding is no longer used, but may be restored if calling 37.231 // convention changes significantly. 37.232 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 37.233 @@ -2013,64 +2023,6 @@ 37.234 %} 37.235 37.236 37.237 - enc_class MovI2X_reg(regX dst, eRegI src) %{ 37.238 - emit_opcode(cbuf, 0x66 ); // MOVD dst,src 37.239 - emit_opcode(cbuf, 0x0F ); 37.240 - emit_opcode(cbuf, 0x6E ); 37.241 - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 37.242 - %} 37.243 - 37.244 - enc_class MovX2I_reg(eRegI dst, regX src) %{ 37.245 - emit_opcode(cbuf, 0x66 ); // MOVD dst,src 37.246 - emit_opcode(cbuf, 0x0F ); 37.247 - emit_opcode(cbuf, 0x7E ); 37.248 - emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 37.249 - %} 37.250 - 37.251 - enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{ 37.252 - { // MOVD $dst,$src.lo 37.253 - emit_opcode(cbuf,0x66); 37.254 - emit_opcode(cbuf,0x0F); 37.255 - emit_opcode(cbuf,0x6E); 37.256 - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 37.257 - } 37.258 - { // MOVD $tmp,$src.hi 37.259 - emit_opcode(cbuf,0x66); 37.260 - emit_opcode(cbuf,0x0F); 37.261 - emit_opcode(cbuf,0x6E); 37.262 - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 37.263 - } 37.264 - { // PUNPCKLDQ $dst,$tmp 37.265 - emit_opcode(cbuf,0x66); 37.266 - emit_opcode(cbuf,0x0F); 37.267 - emit_opcode(cbuf,0x62); 37.268 - emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg); 37.269 - } 37.270 - %} 37.271 - 37.272 - enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{ 37.273 - { // MOVD $dst.lo,$src 37.274 - emit_opcode(cbuf,0x66); 37.275 - emit_opcode(cbuf,0x0F); 37.276 - emit_opcode(cbuf,0x7E); 37.277 - emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); 37.278 - } 37.279 - { // PSHUFLW $tmp,$src,0x4E (01001110b) 37.280 - emit_opcode(cbuf,0xF2); 37.281 - emit_opcode(cbuf,0x0F); 37.282 - emit_opcode(cbuf,0x70); 37.283 - emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 37.284 - emit_d8(cbuf, 0x4E); 37.285 - } 37.286 - { // MOVD $dst.hi,$tmp 37.287 - emit_opcode(cbuf,0x66); 37.288 - emit_opcode(cbuf,0x0F); 37.289 - emit_opcode(cbuf,0x7E); 37.290 - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 37.291 - } 37.292 - %} 37.293 - 37.294 - 37.295 // Encode a reg-reg copy. If it is useless, then empty encoding. 37.296 enc_class enc_Copy( eRegI dst, eRegI src ) %{ 37.297 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 37.298 @@ -2080,11 +2032,6 @@ 37.299 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 37.300 %} 37.301 37.302 - // Encode xmm reg-reg copy. If it is useless, then empty encoding. 37.303 - enc_class enc_CopyXD( RegXD dst, RegXD src ) %{ 37.304 - encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); 37.305 - %} 37.306 - 37.307 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) 37.308 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 37.309 %} 37.310 @@ -2116,14 +2063,14 @@ 37.311 $$$emit32$src$$constant; 37.312 %} 37.313 37.314 - enc_class Con32F_as_bits(immF src) %{ // storeF_imm 37.315 + enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 37.316 // Output Float immediate bits 37.317 jfloat jf = $src$$constant; 37.318 int jf_as_bits = jint_cast( jf ); 37.319 emit_d32(cbuf, jf_as_bits); 37.320 %} 37.321 37.322 - enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm 37.323 + enc_class Con32F_as_bits(immF src) %{ // storeX_imm 37.324 // Output Float immediate bits 37.325 jfloat jf = $src$$constant; 37.326 int jf_as_bits = jint_cast( jf ); 37.327 @@ -2336,7 +2283,7 @@ 37.328 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 37.329 %} 37.330 37.331 - enc_class enc_FP_store(memory mem, regD src) %{ 37.332 + enc_class enc_FPR_store(memory mem, regDPR src) %{ 37.333 // If src is FPR1, we can just FST to store it. 37.334 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 37.335 int reg_encoding = 0x2; // Just store 37.336 @@ -2485,7 +2432,7 @@ 37.337 37.338 // ----------------- Encodings for floating point unit ----------------- 37.339 // May leave result in FPU-TOS or FPU reg depending on opcodes 37.340 - enc_class OpcReg_F (regF src) %{ // FMUL, FDIV 37.341 + enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 37.342 $$$emit8$primary; 37.343 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 37.344 %} 37.345 @@ -2497,17 +2444,17 @@ 37.346 %} 37.347 37.348 // !!!!! equivalent to Pop_Reg_F 37.349 - enc_class Pop_Reg_D( regD dst ) %{ 37.350 + enc_class Pop_Reg_DPR( regDPR dst ) %{ 37.351 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 37.352 emit_d8( cbuf, 0xD8+$dst$$reg ); 37.353 %} 37.354 37.355 - enc_class Push_Reg_D( regD dst ) %{ 37.356 + enc_class Push_Reg_DPR( regDPR dst ) %{ 37.357 emit_opcode( cbuf, 0xD9 ); 37.358 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 37.359 %} 37.360 37.361 - enc_class strictfp_bias1( regD dst ) %{ 37.362 + enc_class strictfp_bias1( regDPR dst ) %{ 37.363 emit_opcode( cbuf, 0xDB ); // FLD m80real 37.364 emit_opcode( cbuf, 0x2D ); 37.365 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 37.366 @@ -2515,7 +2462,7 @@ 37.367 emit_opcode( cbuf, 0xC8+$dst$$reg ); 37.368 %} 37.369 37.370 - enc_class strictfp_bias2( regD dst ) %{ 37.371 + enc_class strictfp_bias2( regDPR dst ) %{ 37.372 emit_opcode( cbuf, 0xDB ); // FLD m80real 37.373 emit_opcode( cbuf, 0x2D ); 37.374 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 37.375 @@ -2541,39 +2488,29 @@ 37.376 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 37.377 %} 37.378 37.379 - // Push the float in stackSlot 'src' onto FP-stack 37.380 - enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] 37.381 - store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); 37.382 - %} 37.383 - 37.384 - // Push the double in stackSlot 'src' onto FP-stack 37.385 - enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] 37.386 - store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); 37.387 - %} 37.388 - 37.389 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 37.390 - enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 37.391 + enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 37.392 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 37.393 %} 37.394 37.395 // Same as Pop_Mem_F except for opcode 37.396 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 37.397 - enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 37.398 + enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 37.399 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 37.400 %} 37.401 37.402 - enc_class Pop_Reg_F( regF dst ) %{ 37.403 + enc_class Pop_Reg_FPR( regFPR dst ) %{ 37.404 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 37.405 emit_d8( cbuf, 0xD8+$dst$$reg ); 37.406 %} 37.407 37.408 - enc_class Push_Reg_F( regF dst ) %{ 37.409 + enc_class Push_Reg_FPR( regFPR dst ) %{ 37.410 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 37.411 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 37.412 %} 37.413 37.414 // Push FPU's float to a stack-slot, and pop FPU-stack 37.415 - enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ 37.416 + enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 37.417 int pop = 0x02; 37.418 if ($src$$reg != FPR1L_enc) { 37.419 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 37.420 @@ -2584,7 +2521,7 @@ 37.421 %} 37.422 37.423 // Push FPU's double to a stack-slot, and pop FPU-stack 37.424 - enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ 37.425 + enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 37.426 int pop = 0x02; 37.427 if ($src$$reg != FPR1L_enc) { 37.428 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 37.429 @@ -2595,7 +2532,7 @@ 37.430 %} 37.431 37.432 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 37.433 - enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ 37.434 + enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 37.435 int pop = 0xD0 - 1; // -1 since we skip FLD 37.436 if ($src$$reg != FPR1L_enc) { 37.437 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 37.438 @@ -2607,16 +2544,7 @@ 37.439 %} 37.440 37.441 37.442 - enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ 37.443 - MacroAssembler masm(&cbuf); 37.444 - masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg 37.445 - masm.fmul( $src2$$reg+0); // value at TOS 37.446 - masm.fadd( $src$$reg+0); // value at TOS 37.447 - masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store 37.448 - %} 37.449 - 37.450 - 37.451 - enc_class Push_Reg_Mod_D( regD dst, regD src) %{ 37.452 + enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 37.453 // load dst in FPR0 37.454 emit_opcode( cbuf, 0xD9 ); 37.455 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 37.456 @@ -2634,116 +2562,59 @@ 37.457 } 37.458 %} 37.459 37.460 - enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ 37.461 - // Allocate a word 37.462 - emit_opcode(cbuf,0x83); // SUB ESP,8 37.463 - emit_opcode(cbuf,0xEC); 37.464 - emit_d8(cbuf,0x08); 37.465 - 37.466 - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 37.467 - emit_opcode (cbuf, 0x0F ); 37.468 - emit_opcode (cbuf, 0x11 ); 37.469 - encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 37.470 - 37.471 - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 37.472 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.473 - 37.474 - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 37.475 - emit_opcode (cbuf, 0x0F ); 37.476 - emit_opcode (cbuf, 0x11 ); 37.477 - encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 37.478 - 37.479 - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 37.480 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.481 - 37.482 - %} 37.483 - 37.484 - enc_class Push_ModX_encoding( regX src0, regX src1) %{ 37.485 - // Allocate a word 37.486 - emit_opcode(cbuf,0x83); // SUB ESP,4 37.487 - emit_opcode(cbuf,0xEC); 37.488 - emit_d8(cbuf,0x04); 37.489 - 37.490 - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 37.491 - emit_opcode (cbuf, 0x0F ); 37.492 - emit_opcode (cbuf, 0x11 ); 37.493 - encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); 37.494 - 37.495 - emit_opcode(cbuf,0xD9 ); // FLD [ESP] 37.496 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.497 - 37.498 - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0 37.499 - emit_opcode (cbuf, 0x0F ); 37.500 - emit_opcode (cbuf, 0x11 ); 37.501 - encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); 37.502 - 37.503 - emit_opcode(cbuf,0xD9 ); // FLD [ESP] 37.504 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.505 - 37.506 - %} 37.507 - 37.508 - enc_class Push_ResultXD(regXD dst) %{ 37.509 - store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] 37.510 - 37.511 - // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] 37.512 - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 37.513 - emit_opcode (cbuf, 0x0F ); 37.514 - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); 37.515 - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 37.516 - 37.517 - emit_opcode(cbuf,0x83); // ADD ESP,8 37.518 - emit_opcode(cbuf,0xC4); 37.519 - emit_d8(cbuf,0x08); 37.520 - %} 37.521 - 37.522 - enc_class Push_ResultX(regX dst, immI d8) %{ 37.523 - store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] 37.524 - 37.525 - emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 37.526 - emit_opcode (cbuf, 0x0F ); 37.527 - emit_opcode (cbuf, 0x10 ); 37.528 - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 37.529 - 37.530 - emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8) 37.531 - emit_opcode(cbuf,0xC4); 37.532 - emit_d8(cbuf,$d8$$constant); 37.533 - %} 37.534 - 37.535 - enc_class Push_SrcXD(regXD src) %{ 37.536 - // Allocate a word 37.537 - emit_opcode(cbuf,0x83); // SUB ESP,8 37.538 - emit_opcode(cbuf,0xEC); 37.539 - emit_d8(cbuf,0x08); 37.540 - 37.541 - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 37.542 - emit_opcode (cbuf, 0x0F ); 37.543 - emit_opcode (cbuf, 0x11 ); 37.544 - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 37.545 - 37.546 - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 37.547 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.548 + enc_class Push_ModD_encoding(regD src0, regD src1) %{ 37.549 + MacroAssembler _masm(&cbuf); 37.550 + __ subptr(rsp, 8); 37.551 + __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 37.552 + __ fld_d(Address(rsp, 0)); 37.553 + __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 37.554 + __ fld_d(Address(rsp, 0)); 37.555 + %} 37.556 + 37.557 + enc_class Push_ModF_encoding(regF src0, regF src1) %{ 37.558 + MacroAssembler _masm(&cbuf); 37.559 + __ subptr(rsp, 4); 37.560 + __ movflt(Address(rsp, 0), $src1$$XMMRegister); 37.561 + __ fld_s(Address(rsp, 0)); 37.562 + __ movflt(Address(rsp, 0), $src0$$XMMRegister); 37.563 + __ fld_s(Address(rsp, 0)); 37.564 + %} 37.565 + 37.566 + enc_class Push_ResultD(regD dst) %{ 37.567 + MacroAssembler _masm(&cbuf); 37.568 + __ fstp_d(Address(rsp, 0)); 37.569 + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 37.570 + __ addptr(rsp, 8); 37.571 + %} 37.572 + 37.573 + enc_class Push_ResultF(regF dst, immI d8) %{ 37.574 + MacroAssembler _masm(&cbuf); 37.575 + __ fstp_s(Address(rsp, 0)); 37.576 + __ movflt($dst$$XMMRegister, Address(rsp, 0)); 37.577 + __ addptr(rsp, $d8$$constant); 37.578 + %} 37.579 + 37.580 + enc_class Push_SrcD(regD src) %{ 37.581 + MacroAssembler _masm(&cbuf); 37.582 + __ subptr(rsp, 8); 37.583 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 37.584 + __ fld_d(Address(rsp, 0)); 37.585 %} 37.586 37.587 enc_class push_stack_temp_qword() %{ 37.588 - emit_opcode(cbuf,0x83); // SUB ESP,8 37.589 - emit_opcode(cbuf,0xEC); 37.590 - emit_d8 (cbuf,0x08); 37.591 + MacroAssembler _masm(&cbuf); 37.592 + __ subptr(rsp, 8); 37.593 %} 37.594 37.595 enc_class pop_stack_temp_qword() %{ 37.596 - emit_opcode(cbuf,0x83); // ADD ESP,8 37.597 - emit_opcode(cbuf,0xC4); 37.598 - emit_d8 (cbuf,0x08); 37.599 - %} 37.600 - 37.601 - enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ 37.602 - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src 37.603 - emit_opcode (cbuf, 0x0F ); 37.604 - emit_opcode (cbuf, 0x11 ); 37.605 - encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false); 37.606 - 37.607 - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 37.608 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.609 + MacroAssembler _masm(&cbuf); 37.610 + __ addptr(rsp, 8); 37.611 + %} 37.612 + 37.613 + enc_class push_xmm_to_fpr1(regD src) %{ 37.614 + MacroAssembler _masm(&cbuf); 37.615 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 37.616 + __ fld_d(Address(rsp, 0)); 37.617 %} 37.618 37.619 // Compute X^Y using Intel's fast hardware instructions, if possible. 37.620 @@ -2785,10 +2656,7 @@ 37.621 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); 37.622 %} 37.623 37.624 -// enc_class Pop_Reg_Mod_D( regD dst, regD src) 37.625 -// was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() 37.626 - 37.627 - enc_class Push_Result_Mod_D( regD src) %{ 37.628 + enc_class Push_Result_Mod_DPR( regDPR src) %{ 37.629 if ($src$$reg != FPR1L_enc) { 37.630 // fincstp 37.631 emit_opcode (cbuf, 0xD9); 37.632 @@ -2817,7 +2685,7 @@ 37.633 emit_opcode( cbuf, 0x05 ); 37.634 %} 37.635 37.636 - enc_class emitModD() %{ 37.637 + enc_class emitModDPR() %{ 37.638 // fprem must be iterative 37.639 // :: loop 37.640 // fprem 37.641 @@ -2922,24 +2790,6 @@ 37.642 %} 37.643 37.644 37.645 - // XMM version of CmpF_Result. Because the XMM compare 37.646 - // instructions set the EFLAGS directly. It becomes simpler than 37.647 - // the float version above. 37.648 - enc_class CmpX_Result(eRegI dst) %{ 37.649 - MacroAssembler _masm(&cbuf); 37.650 - Label nan, inc, done; 37.651 - 37.652 - __ jccb(Assembler::parity, nan); 37.653 - __ jccb(Assembler::equal, done); 37.654 - __ jccb(Assembler::above, inc); 37.655 - __ bind(nan); 37.656 - __ decrement(as_Register($dst$$reg)); // NO L qqq 37.657 - __ jmpb(done); 37.658 - __ bind(inc); 37.659 - __ increment(as_Register($dst$$reg)); // NO L qqq 37.660 - __ bind(done); 37.661 - %} 37.662 - 37.663 // Compare the longs and set flags 37.664 // BROKEN! Do Not use as-is 37.665 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 37.666 @@ -3162,48 +3012,6 @@ 37.667 emit_d8 (cbuf,0 ); 37.668 %} 37.669 37.670 - enc_class movq_ld(regXD dst, memory mem) %{ 37.671 - MacroAssembler _masm(&cbuf); 37.672 - __ movq($dst$$XMMRegister, $mem$$Address); 37.673 - %} 37.674 - 37.675 - enc_class movq_st(memory mem, regXD src) %{ 37.676 - MacroAssembler _masm(&cbuf); 37.677 - __ movq($mem$$Address, $src$$XMMRegister); 37.678 - %} 37.679 - 37.680 - enc_class pshufd_8x8(regX dst, regX src) %{ 37.681 - MacroAssembler _masm(&cbuf); 37.682 - 37.683 - encode_CopyXD(cbuf, $dst$$reg, $src$$reg); 37.684 - __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); 37.685 - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); 37.686 - %} 37.687 - 37.688 - enc_class pshufd_4x16(regX dst, regX src) %{ 37.689 - MacroAssembler _masm(&cbuf); 37.690 - 37.691 - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); 37.692 - %} 37.693 - 37.694 - enc_class pshufd(regXD dst, regXD src, int mode) %{ 37.695 - MacroAssembler _masm(&cbuf); 37.696 - 37.697 - __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); 37.698 - %} 37.699 - 37.700 - enc_class pxor(regXD dst, regXD src) %{ 37.701 - MacroAssembler _masm(&cbuf); 37.702 - 37.703 - __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); 37.704 - %} 37.705 - 37.706 - enc_class mov_i2x(regXD dst, eRegI src) %{ 37.707 - MacroAssembler _masm(&cbuf); 37.708 - 37.709 - __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); 37.710 - %} 37.711 - 37.712 37.713 // Because the transitions from emitted code to the runtime 37.714 // monitorenter/exit helper stubs are so slow it's critical that 37.715 @@ -3757,7 +3565,7 @@ 37.716 // 'zero', store the darned double down as an int, and reset the 37.717 // rounding mode to 'nearest'. The hardware throws an exception which 37.718 // patches up the correct value directly to the stack. 37.719 - enc_class D2I_encoding( regD src ) %{ 37.720 + enc_class DPR2I_encoding( regDPR src ) %{ 37.721 // Flip to round-to-zero mode. We attempted to allow invalid-op 37.722 // exceptions here, so that a NAN or other corner-case value will 37.723 // thrown an exception (but normal values get converted at full speed). 37.724 @@ -3800,7 +3608,7 @@ 37.725 // Carry on here... 37.726 %} 37.727 37.728 - enc_class D2L_encoding( regD src ) %{ 37.729 + enc_class DPR2L_encoding( regDPR src ) %{ 37.730 emit_opcode(cbuf,0xD9); // FLDCW trunc 37.731 emit_opcode(cbuf,0x2D); 37.732 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 37.733 @@ -3842,294 +3650,27 @@ 37.734 // Carry on here... 37.735 %} 37.736 37.737 - enc_class X2L_encoding( regX src ) %{ 37.738 - // Allocate a word 37.739 - emit_opcode(cbuf,0x83); // SUB ESP,8 37.740 - emit_opcode(cbuf,0xEC); 37.741 - emit_d8(cbuf,0x08); 37.742 - 37.743 - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 37.744 - emit_opcode (cbuf, 0x0F ); 37.745 - emit_opcode (cbuf, 0x11 ); 37.746 - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 37.747 - 37.748 - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 37.749 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.750 - 37.751 - emit_opcode(cbuf,0xD9); // FLDCW trunc 37.752 - emit_opcode(cbuf,0x2D); 37.753 - emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 37.754 - 37.755 - // Encoding assumes a double has been pushed into FPR0. 37.756 - // Store down the double as a long, popping the FPU stack 37.757 - emit_opcode(cbuf,0xDF); // FISTP [ESP] 37.758 - emit_opcode(cbuf,0x3C); 37.759 - emit_d8(cbuf,0x24); 37.760 - 37.761 - // Restore the rounding mode; mask the exception 37.762 - emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 37.763 - emit_opcode(cbuf,0x2D); 37.764 - emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 37.765 - ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 37.766 - : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 37.767 - 37.768 - // Load the converted int; adjust CPU stack 37.769 - emit_opcode(cbuf,0x58); // POP EAX 37.770 - 37.771 - emit_opcode(cbuf,0x5A); // POP EDX 37.772 - 37.773 - emit_opcode(cbuf,0x81); // CMP EDX,imm 37.774 - emit_d8 (cbuf,0xFA); // rdx 37.775 - emit_d32 (cbuf,0x80000000);// 0x80000000 37.776 - 37.777 - emit_opcode(cbuf,0x75); // JNE around_slow_call 37.778 - emit_d8 (cbuf,0x13+4); // Size of slow_call 37.779 - 37.780 - emit_opcode(cbuf,0x85); // TEST EAX,EAX 37.781 - emit_opcode(cbuf,0xC0); // 2/rax,/rax, 37.782 - 37.783 - emit_opcode(cbuf,0x75); // JNE around_slow_call 37.784 - emit_d8 (cbuf,0x13); // Size of slow_call 37.785 - 37.786 - // Allocate a word 37.787 - emit_opcode(cbuf,0x83); // SUB ESP,4 37.788 - emit_opcode(cbuf,0xEC); 37.789 - emit_d8(cbuf,0x04); 37.790 - 37.791 - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src 37.792 - emit_opcode (cbuf, 0x0F ); 37.793 - emit_opcode (cbuf, 0x11 ); 37.794 - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 37.795 - 37.796 - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 37.797 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.798 - 37.799 - emit_opcode(cbuf,0x83); // ADD ESP,4 37.800 - emit_opcode(cbuf,0xC4); 37.801 - emit_d8(cbuf,0x04); 37.802 - 37.803 - // CALL directly to the runtime 37.804 - cbuf.set_insts_mark(); 37.805 - emit_opcode(cbuf,0xE8); // Call into runtime 37.806 - emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 37.807 - // Carry on here... 37.808 - %} 37.809 - 37.810 - enc_class XD2L_encoding( regXD src ) %{ 37.811 - // Allocate a word 37.812 - emit_opcode(cbuf,0x83); // SUB ESP,8 37.813 - emit_opcode(cbuf,0xEC); 37.814 - emit_d8(cbuf,0x08); 37.815 - 37.816 - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 37.817 - emit_opcode (cbuf, 0x0F ); 37.818 - emit_opcode (cbuf, 0x11 ); 37.819 - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 37.820 - 37.821 - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 37.822 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.823 - 37.824 - emit_opcode(cbuf,0xD9); // FLDCW trunc 37.825 - emit_opcode(cbuf,0x2D); 37.826 - emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 37.827 - 37.828 - // Encoding assumes a double has been pushed into FPR0. 37.829 - // Store down the double as a long, popping the FPU stack 37.830 - emit_opcode(cbuf,0xDF); // FISTP [ESP] 37.831 - emit_opcode(cbuf,0x3C); 37.832 - emit_d8(cbuf,0x24); 37.833 - 37.834 - // Restore the rounding mode; mask the exception 37.835 - emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 37.836 - emit_opcode(cbuf,0x2D); 37.837 - emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 37.838 - ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 37.839 - : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 37.840 - 37.841 - // Load the converted int; adjust CPU stack 37.842 - emit_opcode(cbuf,0x58); // POP EAX 37.843 - 37.844 - emit_opcode(cbuf,0x5A); // POP EDX 37.845 - 37.846 - emit_opcode(cbuf,0x81); // CMP EDX,imm 37.847 - emit_d8 (cbuf,0xFA); // rdx 37.848 - emit_d32 (cbuf,0x80000000); // 0x80000000 37.849 - 37.850 - emit_opcode(cbuf,0x75); // JNE around_slow_call 37.851 - emit_d8 (cbuf,0x13+4); // Size of slow_call 37.852 - 37.853 - emit_opcode(cbuf,0x85); // TEST EAX,EAX 37.854 - emit_opcode(cbuf,0xC0); // 2/rax,/rax, 37.855 - 37.856 - emit_opcode(cbuf,0x75); // JNE around_slow_call 37.857 - emit_d8 (cbuf,0x13); // Size of slow_call 37.858 - 37.859 - // Push src onto stack slow-path 37.860 - // Allocate a word 37.861 - emit_opcode(cbuf,0x83); // SUB ESP,8 37.862 - emit_opcode(cbuf,0xEC); 37.863 - emit_d8(cbuf,0x08); 37.864 - 37.865 - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src 37.866 - emit_opcode (cbuf, 0x0F ); 37.867 - emit_opcode (cbuf, 0x11 ); 37.868 - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 37.869 - 37.870 - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] 37.871 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.872 - 37.873 - emit_opcode(cbuf,0x83); // ADD ESP,8 37.874 - emit_opcode(cbuf,0xC4); 37.875 - emit_d8(cbuf,0x08); 37.876 - 37.877 - // CALL directly to the runtime 37.878 - cbuf.set_insts_mark(); 37.879 - emit_opcode(cbuf,0xE8); // Call into runtime 37.880 - emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 37.881 - // Carry on here... 37.882 - %} 37.883 - 37.884 - enc_class D2X_encoding( regX dst, regD src ) %{ 37.885 - // Allocate a word 37.886 - emit_opcode(cbuf,0x83); // SUB ESP,4 37.887 - emit_opcode(cbuf,0xEC); 37.888 - emit_d8(cbuf,0x04); 37.889 - int pop = 0x02; 37.890 - if ($src$$reg != FPR1L_enc) { 37.891 - emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 37.892 - emit_d8( cbuf, 0xC0-1+$src$$reg ); 37.893 - pop = 0x03; 37.894 - } 37.895 - store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S [ESP] 37.896 - 37.897 - emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] 37.898 - emit_opcode (cbuf, 0x0F ); 37.899 - emit_opcode (cbuf, 0x10 ); 37.900 - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); 37.901 - 37.902 - emit_opcode(cbuf,0x83); // ADD ESP,4 37.903 - emit_opcode(cbuf,0xC4); 37.904 - emit_d8(cbuf,0x04); 37.905 - // Carry on here... 37.906 - %} 37.907 - 37.908 - enc_class FX2I_encoding( regX src, eRegI dst ) %{ 37.909 - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 37.910 - 37.911 - // Compare the result to see if we need to go to the slow path 37.912 - emit_opcode(cbuf,0x81); // CMP dst,imm 37.913 - emit_rm (cbuf,0x3,0x7,$dst$$reg); 37.914 - emit_d32 (cbuf,0x80000000); // 0x80000000 37.915 - 37.916 - emit_opcode(cbuf,0x75); // JNE around_slow_call 37.917 - emit_d8 (cbuf,0x13); // Size of slow_call 37.918 - // Store xmm to a temp memory 37.919 - // location and push it onto stack. 37.920 - 37.921 - emit_opcode(cbuf,0x83); // SUB ESP,4 37.922 - emit_opcode(cbuf,0xEC); 37.923 - emit_d8(cbuf, $primary ? 0x8 : 0x4); 37.924 - 37.925 - emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm 37.926 - emit_opcode (cbuf, 0x0F ); 37.927 - emit_opcode (cbuf, 0x11 ); 37.928 - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 37.929 - 37.930 - emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP] 37.931 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.932 - 37.933 - emit_opcode(cbuf,0x83); // ADD ESP,4 37.934 - emit_opcode(cbuf,0xC4); 37.935 - emit_d8(cbuf, $primary ? 0x8 : 0x4); 37.936 - 37.937 - // CALL directly to the runtime 37.938 - cbuf.set_insts_mark(); 37.939 - emit_opcode(cbuf,0xE8); // Call into runtime 37.940 - emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 37.941 - 37.942 - // Carry on here... 37.943 - %} 37.944 - 37.945 - enc_class X2D_encoding( regD dst, regX src ) %{ 37.946 - // Allocate a word 37.947 - emit_opcode(cbuf,0x83); // SUB ESP,4 37.948 - emit_opcode(cbuf,0xEC); 37.949 - emit_d8(cbuf,0x04); 37.950 - 37.951 - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm 37.952 - emit_opcode (cbuf, 0x0F ); 37.953 - emit_opcode (cbuf, 0x11 ); 37.954 - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); 37.955 - 37.956 - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] 37.957 - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); 37.958 - 37.959 - emit_opcode(cbuf,0x83); // ADD ESP,4 37.960 - emit_opcode(cbuf,0xC4); 37.961 - emit_d8(cbuf,0x04); 37.962 - 37.963 - // Carry on here... 37.964 - %} 37.965 - 37.966 - enc_class AbsXF_encoding(regX dst) %{ 37.967 - address signmask_address=(address)float_signmask_pool; 37.968 - // andpd:\tANDPS $dst,[signconst] 37.969 - emit_opcode(cbuf, 0x0F); 37.970 - emit_opcode(cbuf, 0x54); 37.971 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 37.972 - emit_d32(cbuf, (int)signmask_address); 37.973 - %} 37.974 - 37.975 - enc_class AbsXD_encoding(regXD dst) %{ 37.976 - address signmask_address=(address)double_signmask_pool; 37.977 - // andpd:\tANDPD $dst,[signconst] 37.978 - emit_opcode(cbuf, 0x66); 37.979 - emit_opcode(cbuf, 0x0F); 37.980 - emit_opcode(cbuf, 0x54); 37.981 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 37.982 - emit_d32(cbuf, (int)signmask_address); 37.983 - %} 37.984 - 37.985 - enc_class NegXF_encoding(regX dst) %{ 37.986 - address signmask_address=(address)float_signflip_pool; 37.987 - // andpd:\tXORPS $dst,[signconst] 37.988 - emit_opcode(cbuf, 0x0F); 37.989 - emit_opcode(cbuf, 0x57); 37.990 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 37.991 - emit_d32(cbuf, (int)signmask_address); 37.992 - %} 37.993 - 37.994 - enc_class NegXD_encoding(regXD dst) %{ 37.995 - address signmask_address=(address)double_signflip_pool; 37.996 - // andpd:\tXORPD $dst,[signconst] 37.997 - emit_opcode(cbuf, 0x66); 37.998 - emit_opcode(cbuf, 0x0F); 37.999 - emit_opcode(cbuf, 0x57); 37.1000 - emit_rm(cbuf, 0x0, $dst$$reg, 0x5); 37.1001 - emit_d32(cbuf, (int)signmask_address); 37.1002 - %} 37.1003 - 37.1004 - enc_class FMul_ST_reg( eRegF src1 ) %{ 37.1005 + enc_class FMul_ST_reg( eRegFPR src1 ) %{ 37.1006 // Operand was loaded from memory into fp ST (stack top) 37.1007 // FMUL ST,$src /* D8 C8+i */ 37.1008 emit_opcode(cbuf, 0xD8); 37.1009 emit_opcode(cbuf, 0xC8 + $src1$$reg); 37.1010 %} 37.1011 37.1012 - enc_class FAdd_ST_reg( eRegF src2 ) %{ 37.1013 + enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 37.1014 // FADDP ST,src2 /* D8 C0+i */ 37.1015 emit_opcode(cbuf, 0xD8); 37.1016 emit_opcode(cbuf, 0xC0 + $src2$$reg); 37.1017 //could use FADDP src2,fpST /* DE C0+i */ 37.1018 %} 37.1019 37.1020 - enc_class FAddP_reg_ST( eRegF src2 ) %{ 37.1021 + enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 37.1022 // FADDP src2,ST /* DE C0+i */ 37.1023 emit_opcode(cbuf, 0xDE); 37.1024 emit_opcode(cbuf, 0xC0 + $src2$$reg); 37.1025 %} 37.1026 37.1027 - enc_class subF_divF_encode( eRegF src1, eRegF src2) %{ 37.1028 + enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 37.1029 // Operand has been loaded into fp ST (stack top) 37.1030 // FSUB ST,$src1 37.1031 emit_opcode(cbuf, 0xD8); 37.1032 @@ -4140,7 +3681,7 @@ 37.1033 emit_opcode(cbuf, 0xF0 + $src2$$reg); 37.1034 %} 37.1035 37.1036 - enc_class MulFAddF (eRegF src1, eRegF src2) %{ 37.1037 + enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 37.1038 // Operand was loaded from memory into fp ST (stack top) 37.1039 // FADD ST,$src /* D8 C0+i */ 37.1040 emit_opcode(cbuf, 0xD8); 37.1041 @@ -4152,7 +3693,7 @@ 37.1042 %} 37.1043 37.1044 37.1045 - enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{ 37.1046 + enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 37.1047 // Operand was loaded from memory into fp ST (stack top) 37.1048 // FADD ST,$src /* D8 C0+i */ 37.1049 emit_opcode(cbuf, 0xD8); 37.1050 @@ -4176,66 +3717,6 @@ 37.1051 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 37.1052 %} 37.1053 37.1054 - enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{ 37.1055 - { // Atomic long load 37.1056 - // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 37.1057 - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 37.1058 - emit_opcode(cbuf,0x0F); 37.1059 - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 37.1060 - int base = $mem$$base; 37.1061 - int index = $mem$$index; 37.1062 - int scale = $mem$$scale; 37.1063 - int displace = $mem$$disp; 37.1064 - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 37.1065 - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 37.1066 - } 37.1067 - { // MOVSD $dst,$tmp ! atomic long store 37.1068 - emit_opcode(cbuf,0xF2); 37.1069 - emit_opcode(cbuf,0x0F); 37.1070 - emit_opcode(cbuf,0x11); 37.1071 - int base = $dst$$base; 37.1072 - int index = $dst$$index; 37.1073 - int scale = $dst$$scale; 37.1074 - int displace = $dst$$disp; 37.1075 - bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals 37.1076 - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 37.1077 - } 37.1078 - %} 37.1079 - 37.1080 - enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{ 37.1081 - { // Atomic long load 37.1082 - // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem 37.1083 - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 37.1084 - emit_opcode(cbuf,0x0F); 37.1085 - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 37.1086 - int base = $mem$$base; 37.1087 - int index = $mem$$index; 37.1088 - int scale = $mem$$scale; 37.1089 - int displace = $mem$$disp; 37.1090 - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 37.1091 - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 37.1092 - } 37.1093 - { // MOVD $dst.lo,$tmp 37.1094 - emit_opcode(cbuf,0x66); 37.1095 - emit_opcode(cbuf,0x0F); 37.1096 - emit_opcode(cbuf,0x7E); 37.1097 - emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg); 37.1098 - } 37.1099 - { // PSRLQ $tmp,32 37.1100 - emit_opcode(cbuf,0x66); 37.1101 - emit_opcode(cbuf,0x0F); 37.1102 - emit_opcode(cbuf,0x73); 37.1103 - emit_rm(cbuf, 0x3, 0x02, $tmp$$reg); 37.1104 - emit_d8(cbuf, 0x20); 37.1105 - } 37.1106 - { // MOVD $dst.hi,$tmp 37.1107 - emit_opcode(cbuf,0x66); 37.1108 - emit_opcode(cbuf,0x0F); 37.1109 - emit_opcode(cbuf,0x7E); 37.1110 - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); 37.1111 - } 37.1112 - %} 37.1113 - 37.1114 // Volatile Store Long. Must be atomic, so move it into 37.1115 // the FP TOS and then do a 64-bit FIST. Has to probe the 37.1116 // target address before the store (for null-ptr checks) 37.1117 @@ -4253,66 +3734,6 @@ 37.1118 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); 37.1119 %} 37.1120 37.1121 - enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{ 37.1122 - { // Atomic long load 37.1123 - // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src] 37.1124 - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 37.1125 - emit_opcode(cbuf,0x0F); 37.1126 - emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12); 37.1127 - int base = $src$$base; 37.1128 - int index = $src$$index; 37.1129 - int scale = $src$$scale; 37.1130 - int displace = $src$$disp; 37.1131 - bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals 37.1132 - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 37.1133 - } 37.1134 - cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop 37.1135 - { // MOVSD $mem,$tmp ! atomic long store 37.1136 - emit_opcode(cbuf,0xF2); 37.1137 - emit_opcode(cbuf,0x0F); 37.1138 - emit_opcode(cbuf,0x11); 37.1139 - int base = $mem$$base; 37.1140 - int index = $mem$$index; 37.1141 - int scale = $mem$$scale; 37.1142 - int displace = $mem$$disp; 37.1143 - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 37.1144 - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 37.1145 - } 37.1146 - %} 37.1147 - 37.1148 - enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{ 37.1149 - { // MOVD $tmp,$src.lo 37.1150 - emit_opcode(cbuf,0x66); 37.1151 - emit_opcode(cbuf,0x0F); 37.1152 - emit_opcode(cbuf,0x6E); 37.1153 - emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 37.1154 - } 37.1155 - { // MOVD $tmp2,$src.hi 37.1156 - emit_opcode(cbuf,0x66); 37.1157 - emit_opcode(cbuf,0x0F); 37.1158 - emit_opcode(cbuf,0x6E); 37.1159 - emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg)); 37.1160 - } 37.1161 - { // PUNPCKLDQ $tmp,$tmp2 37.1162 - emit_opcode(cbuf,0x66); 37.1163 - emit_opcode(cbuf,0x0F); 37.1164 - emit_opcode(cbuf,0x62); 37.1165 - emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg); 37.1166 - } 37.1167 - cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop 37.1168 - { // MOVSD $mem,$tmp ! atomic long store 37.1169 - emit_opcode(cbuf,0xF2); 37.1170 - emit_opcode(cbuf,0x0F); 37.1171 - emit_opcode(cbuf,0x11); 37.1172 - int base = $mem$$base; 37.1173 - int index = $mem$$index; 37.1174 - int scale = $mem$$scale; 37.1175 - int displace = $mem$$disp; 37.1176 - bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals 37.1177 - encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop); 37.1178 - } 37.1179 - %} 37.1180 - 37.1181 // Safepoint Poll. This polls the safepoint page, and causes an 37.1182 // exception if it is not readable. Unfortunately, it kills the condition code 37.1183 // in the process 37.1184 @@ -4705,7 +4126,7 @@ 37.1185 %} 37.1186 37.1187 //Double Immediate zero 37.1188 -operand immD0() %{ 37.1189 +operand immDPR0() %{ 37.1190 // Do additional (and counter-intuitive) test against NaN to work around VC++ 37.1191 // bug that generates code such that NaNs compare equal to 0.0 37.1192 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 37.1193 @@ -4717,7 +4138,7 @@ 37.1194 %} 37.1195 37.1196 // Double Immediate one 37.1197 -operand immD1() %{ 37.1198 +operand immDPR1() %{ 37.1199 predicate( UseSSE<=1 && n->getd() == 1.0 ); 37.1200 match(ConD); 37.1201 37.1202 @@ -4727,7 +4148,7 @@ 37.1203 %} 37.1204 37.1205 // Double Immediate 37.1206 -operand immD() %{ 37.1207 +operand immDPR() %{ 37.1208 predicate(UseSSE<=1); 37.1209 match(ConD); 37.1210 37.1211 @@ -4736,7 +4157,7 @@ 37.1212 interface(CONST_INTER); 37.1213 %} 37.1214 37.1215 -operand immXD() %{ 37.1216 +operand immD() %{ 37.1217 predicate(UseSSE>=2); 37.1218 match(ConD); 37.1219 37.1220 @@ -4746,7 +4167,7 @@ 37.1221 %} 37.1222 37.1223 // Double Immediate zero 37.1224 -operand immXD0() %{ 37.1225 +operand immD0() %{ 37.1226 // Do additional (and counter-intuitive) test against NaN to work around VC++ 37.1227 // bug that generates code such that NaNs compare equal to 0.0 AND do not 37.1228 // compare equal to -0.0. 37.1229 @@ -4758,7 +4179,7 @@ 37.1230 %} 37.1231 37.1232 // Float Immediate zero 37.1233 -operand immF0() %{ 37.1234 +operand immFPR0() %{ 37.1235 predicate(UseSSE == 0 && n->getf() == 0.0F); 37.1236 match(ConF); 37.1237 37.1238 @@ -4768,7 +4189,7 @@ 37.1239 %} 37.1240 37.1241 // Float Immediate one 37.1242 -operand immF1() %{ 37.1243 +operand immFPR1() %{ 37.1244 predicate(UseSSE == 0 && n->getf() == 1.0F); 37.1245 match(ConF); 37.1246 37.1247 @@ -4778,7 +4199,7 @@ 37.1248 %} 37.1249 37.1250 // Float Immediate 37.1251 -operand immF() %{ 37.1252 +operand immFPR() %{ 37.1253 predicate( UseSSE == 0 ); 37.1254 match(ConF); 37.1255 37.1256 @@ -4788,7 +4209,7 @@ 37.1257 %} 37.1258 37.1259 // Float Immediate 37.1260 -operand immXF() %{ 37.1261 +operand immF() %{ 37.1262 predicate(UseSSE >= 1); 37.1263 match(ConF); 37.1264 37.1265 @@ -4798,7 +4219,7 @@ 37.1266 %} 37.1267 37.1268 // Float Immediate zero. Zero and not -0.0 37.1269 -operand immXF0() %{ 37.1270 +operand immF0() %{ 37.1271 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 37.1272 match(ConF); 37.1273 37.1274 @@ -5174,7 +4595,7 @@ 37.1275 %} 37.1276 37.1277 // Float register operands 37.1278 -operand regD() %{ 37.1279 +operand regDPR() %{ 37.1280 predicate( UseSSE < 2 ); 37.1281 constraint(ALLOC_IN_RC(dbl_reg)); 37.1282 match(RegD); 37.1283 @@ -5184,7 +4605,7 @@ 37.1284 interface(REG_INTER); 37.1285 %} 37.1286 37.1287 -operand regDPR1(regD reg) %{ 37.1288 +operand regDPR1(regDPR reg) %{ 37.1289 predicate( UseSSE < 2 ); 37.1290 constraint(ALLOC_IN_RC(dbl_reg0)); 37.1291 match(reg); 37.1292 @@ -5192,7 +4613,7 @@ 37.1293 interface(REG_INTER); 37.1294 %} 37.1295 37.1296 -operand regDPR2(regD reg) %{ 37.1297 +operand regDPR2(regDPR reg) %{ 37.1298 predicate( UseSSE < 2 ); 37.1299 constraint(ALLOC_IN_RC(dbl_reg1)); 37.1300 match(reg); 37.1301 @@ -5200,7 +4621,7 @@ 37.1302 interface(REG_INTER); 37.1303 %} 37.1304 37.1305 -operand regnotDPR1(regD reg) %{ 37.1306 +operand regnotDPR1(regDPR reg) %{ 37.1307 predicate( UseSSE < 2 ); 37.1308 constraint(ALLOC_IN_RC(dbl_notreg0)); 37.1309 match(reg); 37.1310 @@ -5209,18 +4630,18 @@ 37.1311 %} 37.1312 37.1313 // XMM Double register operands 37.1314 -operand regXD() %{ 37.1315 +operand regD() %{ 37.1316 predicate( UseSSE>=2 ); 37.1317 constraint(ALLOC_IN_RC(xdb_reg)); 37.1318 match(RegD); 37.1319 - match(regXD6); 37.1320 - match(regXD7); 37.1321 + match(regD6); 37.1322 + match(regD7); 37.1323 format %{ %} 37.1324 interface(REG_INTER); 37.1325 %} 37.1326 37.1327 // XMM6 double register operands 37.1328 -operand regXD6(regXD reg) %{ 37.1329 +operand regD6(regD reg) %{ 37.1330 predicate( UseSSE>=2 ); 37.1331 constraint(ALLOC_IN_RC(xdb_reg6)); 37.1332 match(reg); 37.1333 @@ -5229,7 +4650,7 @@ 37.1334 %} 37.1335 37.1336 // XMM7 double register operands 37.1337 -operand regXD7(regXD reg) %{ 37.1338 +operand regD7(regD reg) %{ 37.1339 predicate( UseSSE>=2 ); 37.1340 constraint(ALLOC_IN_RC(xdb_reg7)); 37.1341 match(reg); 37.1342 @@ -5238,7 +4659,7 @@ 37.1343 %} 37.1344 37.1345 // Float register operands 37.1346 -operand regF() %{ 37.1347 +operand regFPR() %{ 37.1348 predicate( UseSSE < 2 ); 37.1349 constraint(ALLOC_IN_RC(flt_reg)); 37.1350 match(RegF); 37.1351 @@ -5248,7 +4669,7 @@ 37.1352 %} 37.1353 37.1354 // Float register operands 37.1355 -operand regFPR1(regF reg) %{ 37.1356 +operand regFPR1(regFPR reg) %{ 37.1357 predicate( UseSSE < 2 ); 37.1358 constraint(ALLOC_IN_RC(flt_reg0)); 37.1359 match(reg); 37.1360 @@ -5257,7 +4678,7 @@ 37.1361 %} 37.1362 37.1363 // XMM register operands 37.1364 -operand regX() %{ 37.1365 +operand regF() %{ 37.1366 predicate( UseSSE>=1 ); 37.1367 constraint(ALLOC_IN_RC(xmm_reg)); 37.1368 match(RegF); 37.1369 @@ -6001,7 +5422,7 @@ 37.1370 %} 37.1371 37.1372 // Conditional move double reg-reg 37.1373 -pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{ 37.1374 +pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 37.1375 single_instruction; 37.1376 dst : S4(write); 37.1377 src : S3(read); 37.1378 @@ -6010,7 +5431,7 @@ 37.1379 %} 37.1380 37.1381 // Float reg-reg operation 37.1382 -pipe_class fpu_reg(regD dst) %{ 37.1383 +pipe_class fpu_reg(regDPR dst) %{ 37.1384 instruction_count(2); 37.1385 dst : S3(read); 37.1386 DECODE : S0(2); // any 2 decoders 37.1387 @@ -6018,7 +5439,7 @@ 37.1388 %} 37.1389 37.1390 // Float reg-reg operation 37.1391 -pipe_class fpu_reg_reg(regD dst, regD src) %{ 37.1392 +pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 37.1393 instruction_count(2); 37.1394 dst : S4(write); 37.1395 src : S3(read); 37.1396 @@ -6027,7 +5448,7 @@ 37.1397 %} 37.1398 37.1399 // Float reg-reg operation 37.1400 -pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{ 37.1401 +pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 37.1402 instruction_count(3); 37.1403 dst : S4(write); 37.1404 src1 : S3(read); 37.1405 @@ -6037,7 +5458,7 @@ 37.1406 %} 37.1407 37.1408 // Float reg-reg operation 37.1409 -pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ 37.1410 +pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 37.1411 instruction_count(4); 37.1412 dst : S4(write); 37.1413 src1 : S3(read); 37.1414 @@ -6048,7 +5469,7 @@ 37.1415 %} 37.1416 37.1417 // Float reg-reg operation 37.1418 -pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{ 37.1419 +pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 37.1420 instruction_count(4); 37.1421 dst : S4(write); 37.1422 src1 : S3(read); 37.1423 @@ -6061,7 +5482,7 @@ 37.1424 %} 37.1425 37.1426 // Float reg-mem operation 37.1427 -pipe_class fpu_reg_mem(regD dst, memory mem) %{ 37.1428 +pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 37.1429 instruction_count(2); 37.1430 dst : S5(write); 37.1431 mem : S3(read); 37.1432 @@ -6072,7 +5493,7 @@ 37.1433 %} 37.1434 37.1435 // Float reg-mem operation 37.1436 -pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{ 37.1437 +pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 37.1438 instruction_count(3); 37.1439 dst : S5(write); 37.1440 src1 : S3(read); 37.1441 @@ -6084,7 +5505,7 @@ 37.1442 %} 37.1443 37.1444 // Float mem-reg operation 37.1445 -pipe_class fpu_mem_reg(memory mem, regD src) %{ 37.1446 +pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 37.1447 instruction_count(2); 37.1448 src : S5(read); 37.1449 mem : S3(read); 37.1450 @@ -6094,7 +5515,7 @@ 37.1451 MEM : S3; // any mem 37.1452 %} 37.1453 37.1454 -pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{ 37.1455 +pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 37.1456 instruction_count(3); 37.1457 src1 : S3(read); 37.1458 src2 : S3(read); 37.1459 @@ -6105,7 +5526,7 @@ 37.1460 MEM : S3; // any mem 37.1461 %} 37.1462 37.1463 -pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{ 37.1464 +pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 37.1465 instruction_count(3); 37.1466 src1 : S3(read); 37.1467 src2 : S3(read); 37.1468 @@ -6134,7 +5555,7 @@ 37.1469 MEM : S3(3); // any mem 37.1470 %} 37.1471 37.1472 -pipe_class fpu_mem_reg_con(memory mem, regD src1) %{ 37.1473 +pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 37.1474 instruction_count(3); 37.1475 src1 : S4(read); 37.1476 mem : S4(read); 37.1477 @@ -6145,7 +5566,7 @@ 37.1478 %} 37.1479 37.1480 // Float load constant 37.1481 -pipe_class fpu_reg_con(regD dst) %{ 37.1482 +pipe_class fpu_reg_con(regDPR dst) %{ 37.1483 instruction_count(2); 37.1484 dst : S5(write); 37.1485 D0 : S0; // big decoder only for the load 37.1486 @@ -6155,7 +5576,7 @@ 37.1487 %} 37.1488 37.1489 // Float load constant 37.1490 -pipe_class fpu_reg_reg_con(regD dst, regD src) %{ 37.1491 +pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 37.1492 instruction_count(3); 37.1493 dst : S5(write); 37.1494 src : S3(read); 37.1495 @@ -6870,18 +6291,21 @@ 37.1496 ins_pipe( fpu_reg_mem ); 37.1497 %} 37.1498 37.1499 -instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{ 37.1500 +instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 37.1501 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 37.1502 match(Set dst (LoadL mem)); 37.1503 effect(TEMP tmp); 37.1504 ins_cost(180); 37.1505 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 37.1506 "MOVSD $dst,$tmp" %} 37.1507 - ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 37.1508 - ins_pipe( pipe_slow ); 37.1509 -%} 37.1510 - 37.1511 -instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{ 37.1512 + ins_encode %{ 37.1513 + __ movdbl($tmp$$XMMRegister, $mem$$Address); 37.1514 + __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 37.1515 + %} 37.1516 + ins_pipe( pipe_slow ); 37.1517 +%} 37.1518 + 37.1519 +instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 37.1520 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 37.1521 match(Set dst (LoadL mem)); 37.1522 effect(TEMP tmp); 37.1523 @@ -6890,7 +6314,12 @@ 37.1524 "MOVD $dst.lo,$tmp\n\t" 37.1525 "PSRLQ $tmp,32\n\t" 37.1526 "MOVD $dst.hi,$tmp" %} 37.1527 - ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 37.1528 + ins_encode %{ 37.1529 + __ movdbl($tmp$$XMMRegister, $mem$$Address); 37.1530 + __ movdl($dst$$Register, $tmp$$XMMRegister); 37.1531 + __ psrlq($tmp$$XMMRegister, 32); 37.1532 + __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 37.1533 + %} 37.1534 ins_pipe( pipe_slow ); 37.1535 %} 37.1536 37.1537 @@ -6929,7 +6358,7 @@ 37.1538 %} 37.1539 37.1540 // Load Double 37.1541 -instruct loadD(regD dst, memory mem) %{ 37.1542 +instruct loadDPR(regDPR dst, memory mem) %{ 37.1543 predicate(UseSSE<=1); 37.1544 match(Set dst (LoadD mem)); 37.1545 37.1546 @@ -6938,42 +6367,48 @@ 37.1547 "FSTP $dst" %} 37.1548 opcode(0xDD); /* DD /0 */ 37.1549 ins_encode( OpcP, RMopc_Mem(0x00,mem), 37.1550 - Pop_Reg_D(dst) ); 37.1551 + Pop_Reg_DPR(dst) ); 37.1552 ins_pipe( fpu_reg_mem ); 37.1553 %} 37.1554 37.1555 // Load Double to XMM 37.1556 -instruct loadXD(regXD dst, memory mem) %{ 37.1557 +instruct loadD(regD dst, memory mem) %{ 37.1558 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 37.1559 match(Set dst (LoadD mem)); 37.1560 ins_cost(145); 37.1561 format %{ "MOVSD $dst,$mem" %} 37.1562 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 37.1563 - ins_pipe( pipe_slow ); 37.1564 -%} 37.1565 - 37.1566 -instruct loadXD_partial(regXD dst, memory mem) %{ 37.1567 + ins_encode %{ 37.1568 + __ movdbl ($dst$$XMMRegister, $mem$$Address); 37.1569 + %} 37.1570 + ins_pipe( pipe_slow ); 37.1571 +%} 37.1572 + 37.1573 +instruct loadD_partial(regD dst, memory mem) %{ 37.1574 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 37.1575 match(Set dst (LoadD mem)); 37.1576 ins_cost(145); 37.1577 format %{ "MOVLPD $dst,$mem" %} 37.1578 - ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem)); 37.1579 + ins_encode %{ 37.1580 + __ movdbl ($dst$$XMMRegister, $mem$$Address); 37.1581 + %} 37.1582 ins_pipe( pipe_slow ); 37.1583 %} 37.1584 37.1585 // Load to XMM register (single-precision floating point) 37.1586 // MOVSS instruction 37.1587 -instruct loadX(regX dst, memory mem) %{ 37.1588 +instruct loadF(regF dst, memory mem) %{ 37.1589 predicate(UseSSE>=1); 37.1590 match(Set dst (LoadF mem)); 37.1591 ins_cost(145); 37.1592 format %{ "MOVSS $dst,$mem" %} 37.1593 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem)); 37.1594 + ins_encode %{ 37.1595 + __ movflt ($dst$$XMMRegister, $mem$$Address); 37.1596 + %} 37.1597 ins_pipe( pipe_slow ); 37.1598 %} 37.1599 37.1600 // Load Float 37.1601 -instruct loadF(regF dst, memory mem) %{ 37.1602 +instruct loadFPR(regFPR dst, memory mem) %{ 37.1603 predicate(UseSSE==0); 37.1604 match(Set dst (LoadF mem)); 37.1605 37.1606 @@ -6982,57 +6417,67 @@ 37.1607 "FSTP $dst" %} 37.1608 opcode(0xD9); /* D9 /0 */ 37.1609 ins_encode( OpcP, RMopc_Mem(0x00,mem), 37.1610 - Pop_Reg_F(dst) ); 37.1611 + Pop_Reg_FPR(dst) ); 37.1612 ins_pipe( fpu_reg_mem ); 37.1613 %} 37.1614 37.1615 // Load Aligned Packed Byte to XMM register 37.1616 -instruct loadA8B(regXD dst, memory mem) %{ 37.1617 +instruct loadA8B(regD dst, memory mem) %{ 37.1618 predicate(UseSSE>=1); 37.1619 match(Set dst (Load8B mem)); 37.1620 ins_cost(125); 37.1621 format %{ "MOVQ $dst,$mem\t! packed8B" %} 37.1622 - ins_encode( movq_ld(dst, mem)); 37.1623 + ins_encode %{ 37.1624 + __ movq($dst$$XMMRegister, $mem$$Address); 37.1625 + %} 37.1626 ins_pipe( pipe_slow ); 37.1627 %} 37.1628 37.1629 // Load Aligned Packed Short to XMM register 37.1630 -instruct loadA4S(regXD dst, memory mem) %{ 37.1631 +instruct loadA4S(regD dst, memory mem) %{ 37.1632 predicate(UseSSE>=1); 37.1633 match(Set dst (Load4S mem)); 37.1634 ins_cost(125); 37.1635 format %{ "MOVQ $dst,$mem\t! packed4S" %} 37.1636 - ins_encode( movq_ld(dst, mem)); 37.1637 + ins_encode %{ 37.1638 + __ movq($dst$$XMMRegister, $mem$$Address); 37.1639 + %} 37.1640 ins_pipe( pipe_slow ); 37.1641 %} 37.1642 37.1643 // Load Aligned Packed Char to XMM register 37.1644 -instruct loadA4C(regXD dst, memory mem) %{ 37.1645 +instruct loadA4C(regD dst, memory mem) %{ 37.1646 predicate(UseSSE>=1); 37.1647 match(Set dst (Load4C mem)); 37.1648 ins_cost(125); 37.1649 format %{ "MOVQ $dst,$mem\t! packed4C" %} 37.1650 - ins_encode( movq_ld(dst, mem)); 37.1651 + ins_encode %{ 37.1652 + __ movq($dst$$XMMRegister, $mem$$Address); 37.1653 + %} 37.1654 ins_pipe( pipe_slow ); 37.1655 %} 37.1656 37.1657 // Load Aligned Packed Integer to XMM register 37.1658 -instruct load2IU(regXD dst, memory mem) %{ 37.1659 +instruct load2IU(regD dst, memory mem) %{ 37.1660 predicate(UseSSE>=1); 37.1661 match(Set dst (Load2I mem)); 37.1662 ins_cost(125); 37.1663 format %{ "MOVQ $dst,$mem\t! packed2I" %} 37.1664 - ins_encode( movq_ld(dst, mem)); 37.1665 + ins_encode %{ 37.1666 + __ movq($dst$$XMMRegister, $mem$$Address); 37.1667 + %} 37.1668 ins_pipe( pipe_slow ); 37.1669 %} 37.1670 37.1671 // Load Aligned Packed Single to XMM 37.1672 -instruct loadA2F(regXD dst, memory mem) %{ 37.1673 +instruct loadA2F(regD dst, memory mem) %{ 37.1674 predicate(UseSSE>=1); 37.1675 match(Set dst (Load2F mem)); 37.1676 ins_cost(145); 37.1677 format %{ "MOVQ $dst,$mem\t! packed2F" %} 37.1678 - ins_encode( movq_ld(dst, mem)); 37.1679 + ins_encode %{ 37.1680 + __ movq($dst$$XMMRegister, $mem$$Address); 37.1681 + %} 37.1682 ins_pipe( pipe_slow ); 37.1683 %} 37.1684 37.1685 @@ -7139,58 +6584,58 @@ 37.1686 ins_pipe( ialu_reg_long ); 37.1687 %} 37.1688 37.1689 +// The instruction usage is guarded by predicate in operand immFPR(). 37.1690 +instruct loadConFPR(regFPR dst, immFPR con) %{ 37.1691 + match(Set dst con); 37.1692 + ins_cost(125); 37.1693 + format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 37.1694 + "FSTP $dst" %} 37.1695 + ins_encode %{ 37.1696 + __ fld_s($constantaddress($con)); 37.1697 + __ fstp_d($dst$$reg); 37.1698 + %} 37.1699 + ins_pipe(fpu_reg_con); 37.1700 +%} 37.1701 + 37.1702 +// The instruction usage is guarded by predicate in operand immFPR0(). 37.1703 +instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 37.1704 + match(Set dst con); 37.1705 + ins_cost(125); 37.1706 + format %{ "FLDZ ST\n\t" 37.1707 + "FSTP $dst" %} 37.1708 + ins_encode %{ 37.1709 + __ fldz(); 37.1710 + __ fstp_d($dst$$reg); 37.1711 + %} 37.1712 + ins_pipe(fpu_reg_con); 37.1713 +%} 37.1714 + 37.1715 +// The instruction usage is guarded by predicate in operand immFPR1(). 37.1716 +instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 37.1717 + match(Set dst con); 37.1718 + ins_cost(125); 37.1719 + format %{ "FLD1 ST\n\t" 37.1720 + "FSTP $dst" %} 37.1721 + ins_encode %{ 37.1722 + __ fld1(); 37.1723 + __ fstp_d($dst$$reg); 37.1724 + %} 37.1725 + ins_pipe(fpu_reg_con); 37.1726 +%} 37.1727 + 37.1728 // The instruction usage is guarded by predicate in operand immF(). 37.1729 instruct loadConF(regF dst, immF con) %{ 37.1730 match(Set dst con); 37.1731 ins_cost(125); 37.1732 - format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 37.1733 - "FSTP $dst" %} 37.1734 - ins_encode %{ 37.1735 - __ fld_s($constantaddress($con)); 37.1736 - __ fstp_d($dst$$reg); 37.1737 - %} 37.1738 - ins_pipe(fpu_reg_con); 37.1739 + format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 37.1740 + ins_encode %{ 37.1741 + __ movflt($dst$$XMMRegister, $constantaddress($con)); 37.1742 + %} 37.1743 + ins_pipe(pipe_slow); 37.1744 %} 37.1745 37.1746 // The instruction usage is guarded by predicate in operand immF0(). 37.1747 -instruct loadConF0(regF dst, immF0 con) %{ 37.1748 - match(Set dst con); 37.1749 - ins_cost(125); 37.1750 - format %{ "FLDZ ST\n\t" 37.1751 - "FSTP $dst" %} 37.1752 - ins_encode %{ 37.1753 - __ fldz(); 37.1754 - __ fstp_d($dst$$reg); 37.1755 - %} 37.1756 - ins_pipe(fpu_reg_con); 37.1757 -%} 37.1758 - 37.1759 -// The instruction usage is guarded by predicate in operand immF1(). 37.1760 -instruct loadConF1(regF dst, immF1 con) %{ 37.1761 - match(Set dst con); 37.1762 - ins_cost(125); 37.1763 - format %{ "FLD1 ST\n\t" 37.1764 - "FSTP $dst" %} 37.1765 - ins_encode %{ 37.1766 - __ fld1(); 37.1767 - __ fstp_d($dst$$reg); 37.1768 - %} 37.1769 - ins_pipe(fpu_reg_con); 37.1770 -%} 37.1771 - 37.1772 -// The instruction usage is guarded by predicate in operand immXF(). 37.1773 -instruct loadConX(regX dst, immXF con) %{ 37.1774 - match(Set dst con); 37.1775 - ins_cost(125); 37.1776 - format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 37.1777 - ins_encode %{ 37.1778 - __ movflt($dst$$XMMRegister, $constantaddress($con)); 37.1779 - %} 37.1780 - ins_pipe(pipe_slow); 37.1781 -%} 37.1782 - 37.1783 -// The instruction usage is guarded by predicate in operand immXF0(). 37.1784 -instruct loadConX0(regX dst, immXF0 src) %{ 37.1785 +instruct loadConF0(regF dst, immF0 src) %{ 37.1786 match(Set dst src); 37.1787 ins_cost(100); 37.1788 format %{ "XORPS $dst,$dst\t# float 0.0" %} 37.1789 @@ -7200,65 +6645,67 @@ 37.1790 ins_pipe(pipe_slow); 37.1791 %} 37.1792 37.1793 +// The instruction usage is guarded by predicate in operand immDPR(). 37.1794 +instruct loadConDPR(regDPR dst, immDPR con) %{ 37.1795 + match(Set dst con); 37.1796 + ins_cost(125); 37.1797 + 37.1798 + format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 37.1799 + "FSTP $dst" %} 37.1800 + ins_encode %{ 37.1801 + __ fld_d($constantaddress($con)); 37.1802 + __ fstp_d($dst$$reg); 37.1803 + %} 37.1804 + ins_pipe(fpu_reg_con); 37.1805 +%} 37.1806 + 37.1807 +// The instruction usage is guarded by predicate in operand immDPR0(). 37.1808 +instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 37.1809 + match(Set dst con); 37.1810 + ins_cost(125); 37.1811 + 37.1812 + format %{ "FLDZ ST\n\t" 37.1813 + "FSTP $dst" %} 37.1814 + ins_encode %{ 37.1815 + __ fldz(); 37.1816 + __ fstp_d($dst$$reg); 37.1817 + %} 37.1818 + ins_pipe(fpu_reg_con); 37.1819 +%} 37.1820 + 37.1821 +// The instruction usage is guarded by predicate in operand immDPR1(). 37.1822 +instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 37.1823 + match(Set dst con); 37.1824 + ins_cost(125); 37.1825 + 37.1826 + format %{ "FLD1 ST\n\t" 37.1827 + "FSTP $dst" %} 37.1828 + ins_encode %{ 37.1829 + __ fld1(); 37.1830 + __ fstp_d($dst$$reg); 37.1831 + %} 37.1832 + ins_pipe(fpu_reg_con); 37.1833 +%} 37.1834 + 37.1835 // The instruction usage is guarded by predicate in operand immD(). 37.1836 instruct loadConD(regD dst, immD con) %{ 37.1837 match(Set dst con); 37.1838 ins_cost(125); 37.1839 - 37.1840 - format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 37.1841 - "FSTP $dst" %} 37.1842 - ins_encode %{ 37.1843 - __ fld_d($constantaddress($con)); 37.1844 - __ fstp_d($dst$$reg); 37.1845 - %} 37.1846 - ins_pipe(fpu_reg_con); 37.1847 + format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 37.1848 + ins_encode %{ 37.1849 + __ movdbl($dst$$XMMRegister, $constantaddress($con)); 37.1850 + %} 37.1851 + ins_pipe(pipe_slow); 37.1852 %} 37.1853 37.1854 // The instruction usage is guarded by predicate in operand immD0(). 37.1855 -instruct loadConD0(regD dst, immD0 con) %{ 37.1856 - match(Set dst con); 37.1857 - ins_cost(125); 37.1858 - 37.1859 - format %{ "FLDZ ST\n\t" 37.1860 - "FSTP $dst" %} 37.1861 - ins_encode %{ 37.1862 - __ fldz(); 37.1863 - __ fstp_d($dst$$reg); 37.1864 - %} 37.1865 - ins_pipe(fpu_reg_con); 37.1866 -%} 37.1867 - 37.1868 -// The instruction usage is guarded by predicate in operand immD1(). 37.1869 -instruct loadConD1(regD dst, immD1 con) %{ 37.1870 - match(Set dst con); 37.1871 - ins_cost(125); 37.1872 - 37.1873 - format %{ "FLD1 ST\n\t" 37.1874 - "FSTP $dst" %} 37.1875 - ins_encode %{ 37.1876 - __ fld1(); 37.1877 - __ fstp_d($dst$$reg); 37.1878 - %} 37.1879 - ins_pipe(fpu_reg_con); 37.1880 -%} 37.1881 - 37.1882 -// The instruction usage is guarded by predicate in operand immXD(). 37.1883 -instruct loadConXD(regXD dst, immXD con) %{ 37.1884 - match(Set dst con); 37.1885 - ins_cost(125); 37.1886 - format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 37.1887 - ins_encode %{ 37.1888 - __ movdbl($dst$$XMMRegister, $constantaddress($con)); 37.1889 - %} 37.1890 - ins_pipe(pipe_slow); 37.1891 -%} 37.1892 - 37.1893 -// The instruction usage is guarded by predicate in operand immXD0(). 37.1894 -instruct loadConXD0(regXD dst, immXD0 src) %{ 37.1895 +instruct loadConD0(regD dst, immD0 src) %{ 37.1896 match(Set dst src); 37.1897 ins_cost(100); 37.1898 format %{ "XORPD $dst,$dst\t# double 0.0" %} 37.1899 - ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst)); 37.1900 + ins_encode %{ 37.1901 + __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 37.1902 + %} 37.1903 ins_pipe( pipe_slow ); 37.1904 %} 37.1905 37.1906 @@ -7296,7 +6743,7 @@ 37.1907 %} 37.1908 37.1909 // Load Stack Slot 37.1910 -instruct loadSSF(regF dst, stackSlotF src) %{ 37.1911 +instruct loadSSF(regFPR dst, stackSlotF src) %{ 37.1912 match(Set dst src); 37.1913 ins_cost(125); 37.1914 37.1915 @@ -7304,12 +6751,12 @@ 37.1916 "FSTP $dst" %} 37.1917 opcode(0xD9); /* D9 /0, FLD m32real */ 37.1918 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 37.1919 - Pop_Reg_F(dst) ); 37.1920 + Pop_Reg_FPR(dst) ); 37.1921 ins_pipe( fpu_reg_mem ); 37.1922 %} 37.1923 37.1924 // Load Stack Slot 37.1925 -instruct loadSSD(regD dst, stackSlotD src) %{ 37.1926 +instruct loadSSD(regDPR dst, stackSlotD src) %{ 37.1927 match(Set dst src); 37.1928 ins_cost(125); 37.1929 37.1930 @@ -7317,7 +6764,7 @@ 37.1931 "FSTP $dst" %} 37.1932 opcode(0xDD); /* DD /0, FLD m64real */ 37.1933 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 37.1934 - Pop_Reg_D(dst) ); 37.1935 + Pop_Reg_DPR(dst) ); 37.1936 ins_pipe( fpu_reg_mem ); 37.1937 %} 37.1938 37.1939 @@ -7552,7 +6999,7 @@ 37.1940 ins_pipe( fpu_reg_mem ); 37.1941 %} 37.1942 37.1943 -instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{ 37.1944 +instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 37.1945 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 37.1946 match(Set mem (StoreL mem src)); 37.1947 effect( TEMP tmp, KILL cr ); 37.1948 @@ -7560,12 +7007,15 @@ 37.1949 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 37.1950 "MOVSD $tmp,$src\n\t" 37.1951 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 37.1952 - opcode(0x3B); 37.1953 - ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp)); 37.1954 - ins_pipe( pipe_slow ); 37.1955 -%} 37.1956 - 37.1957 -instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{ 37.1958 + ins_encode %{ 37.1959 + __ cmpl(rax, $mem$$Address); 37.1960 + __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 37.1961 + __ movdbl($mem$$Address, $tmp$$XMMRegister); 37.1962 + %} 37.1963 + ins_pipe( pipe_slow ); 37.1964 +%} 37.1965 + 37.1966 +instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 37.1967 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 37.1968 match(Set mem (StoreL mem src)); 37.1969 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 37.1970 @@ -7575,8 +7025,13 @@ 37.1971 "MOVD $tmp2,$src.hi\n\t" 37.1972 "PUNPCKLDQ $tmp,$tmp2\n\t" 37.1973 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 37.1974 - opcode(0x3B); 37.1975 - ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2)); 37.1976 + ins_encode %{ 37.1977 + __ cmpl(rax, $mem$$Address); 37.1978 + __ movdl($tmp$$XMMRegister, $src$$Register); 37.1979 + __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 37.1980 + __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 37.1981 + __ movdbl($mem$$Address, $tmp$$XMMRegister); 37.1982 + %} 37.1983 ins_pipe( pipe_slow ); 37.1984 %} 37.1985 37.1986 @@ -7638,32 +7093,38 @@ 37.1987 %} 37.1988 37.1989 // Store Aligned Packed Byte XMM register to memory 37.1990 -instruct storeA8B(memory mem, regXD src) %{ 37.1991 +instruct storeA8B(memory mem, regD src) %{ 37.1992 predicate(UseSSE>=1); 37.1993 match(Set mem (Store8B mem src)); 37.1994 ins_cost(145); 37.1995 format %{ "MOVQ $mem,$src\t! packed8B" %} 37.1996 - ins_encode( movq_st(mem, src)); 37.1997 + ins_encode %{ 37.1998 + __ movq($mem$$Address, $src$$XMMRegister); 37.1999 + %} 37.2000 ins_pipe( pipe_slow ); 37.2001 %} 37.2002 37.2003 // Store Aligned Packed Char/Short XMM register to memory 37.2004 -instruct storeA4C(memory mem, regXD src) %{ 37.2005 +instruct storeA4C(memory mem, regD src) %{ 37.2006 predicate(UseSSE>=1); 37.2007 match(Set mem (Store4C mem src)); 37.2008 ins_cost(145); 37.2009 format %{ "MOVQ $mem,$src\t! packed4C" %} 37.2010 - ins_encode( movq_st(mem, src)); 37.2011 + ins_encode %{ 37.2012 + __ movq($mem$$Address, $src$$XMMRegister); 37.2013 + %} 37.2014 ins_pipe( pipe_slow ); 37.2015 %} 37.2016 37.2017 // Store Aligned Packed Integer XMM register to memory 37.2018 -instruct storeA2I(memory mem, regXD src) %{ 37.2019 +instruct storeA2I(memory mem, regD src) %{ 37.2020 predicate(UseSSE>=1); 37.2021 match(Set mem (Store2I mem src)); 37.2022 ins_cost(145); 37.2023 format %{ "MOVQ $mem,$src\t! packed2I" %} 37.2024 - ins_encode( movq_st(mem, src)); 37.2025 + ins_encode %{ 37.2026 + __ movq($mem$$Address, $src$$XMMRegister); 37.2027 + %} 37.2028 ins_pipe( pipe_slow ); 37.2029 %} 37.2030 37.2031 @@ -7679,98 +7140,116 @@ 37.2032 %} 37.2033 37.2034 // Store Double 37.2035 -instruct storeD( memory mem, regDPR1 src) %{ 37.2036 +instruct storeDPR( memory mem, regDPR1 src) %{ 37.2037 predicate(UseSSE<=1); 37.2038 match(Set mem (StoreD mem src)); 37.2039 37.2040 ins_cost(100); 37.2041 format %{ "FST_D $mem,$src" %} 37.2042 opcode(0xDD); /* DD /2 */ 37.2043 - ins_encode( enc_FP_store(mem,src) ); 37.2044 + ins_encode( enc_FPR_store(mem,src) ); 37.2045 ins_pipe( fpu_mem_reg ); 37.2046 %} 37.2047 37.2048 // Store double does rounding on x86 37.2049 -instruct storeD_rounded( memory mem, regDPR1 src) %{ 37.2050 +instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 37.2051 predicate(UseSSE<=1); 37.2052 match(Set mem (StoreD mem (RoundDouble src))); 37.2053 37.2054 ins_cost(100); 37.2055 format %{ "FST_D $mem,$src\t# round" %} 37.2056 opcode(0xDD); /* DD /2 */ 37.2057 - ins_encode( enc_FP_store(mem,src) ); 37.2058 + ins_encode( enc_FPR_store(mem,src) ); 37.2059 ins_pipe( fpu_mem_reg ); 37.2060 %} 37.2061 37.2062 // Store XMM register to memory (double-precision floating points) 37.2063 // MOVSD instruction 37.2064 -instruct storeXD(memory mem, regXD src) %{ 37.2065 +instruct storeD(memory mem, regD src) %{ 37.2066 predicate(UseSSE>=2); 37.2067 match(Set mem (StoreD mem src)); 37.2068 ins_cost(95); 37.2069 format %{ "MOVSD $mem,$src" %} 37.2070 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 37.2071 + ins_encode %{ 37.2072 + __ movdbl($mem$$Address, $src$$XMMRegister); 37.2073 + %} 37.2074 ins_pipe( pipe_slow ); 37.2075 %} 37.2076 37.2077 // Store XMM register to memory (single-precision floating point) 37.2078 // MOVSS instruction 37.2079 -instruct storeX(memory mem, regX src) %{ 37.2080 +instruct storeF(memory mem, regF src) %{ 37.2081 predicate(UseSSE>=1); 37.2082 match(Set mem (StoreF mem src)); 37.2083 ins_cost(95); 37.2084 format %{ "MOVSS $mem,$src" %} 37.2085 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem)); 37.2086 + ins_encode %{ 37.2087 + __ movflt($mem$$Address, $src$$XMMRegister); 37.2088 + %} 37.2089 ins_pipe( pipe_slow ); 37.2090 %} 37.2091 37.2092 // Store Aligned Packed Single Float XMM register to memory 37.2093 -instruct storeA2F(memory mem, regXD src) %{ 37.2094 +instruct storeA2F(memory mem, regD src) %{ 37.2095 predicate(UseSSE>=1); 37.2096 match(Set mem (Store2F mem src)); 37.2097 ins_cost(145); 37.2098 format %{ "MOVQ $mem,$src\t! packed2F" %} 37.2099 - ins_encode( movq_st(mem, src)); 37.2100 + ins_encode %{ 37.2101 + __ movq($mem$$Address, $src$$XMMRegister); 37.2102 + %} 37.2103 ins_pipe( pipe_slow ); 37.2104 %} 37.2105 37.2106 // Store Float 37.2107 -instruct storeF( memory mem, regFPR1 src) %{ 37.2108 +instruct storeFPR( memory mem, regFPR1 src) %{ 37.2109 predicate(UseSSE==0); 37.2110 match(Set mem (StoreF mem src)); 37.2111 37.2112 ins_cost(100); 37.2113 format %{ "FST_S $mem,$src" %} 37.2114 opcode(0xD9); /* D9 /2 */ 37.2115 - ins_encode( enc_FP_store(mem,src) ); 37.2116 + ins_encode( enc_FPR_store(mem,src) ); 37.2117 ins_pipe( fpu_mem_reg ); 37.2118 %} 37.2119 37.2120 // Store Float does rounding on x86 37.2121 -instruct storeF_rounded( memory mem, regFPR1 src) %{ 37.2122 +instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 37.2123 predicate(UseSSE==0); 37.2124 match(Set mem (StoreF mem (RoundFloat src))); 37.2125 37.2126 ins_cost(100); 37.2127 format %{ "FST_S $mem,$src\t# round" %} 37.2128 opcode(0xD9); /* D9 /2 */ 37.2129 - ins_encode( enc_FP_store(mem,src) ); 37.2130 + ins_encode( enc_FPR_store(mem,src) ); 37.2131 ins_pipe( fpu_mem_reg ); 37.2132 %} 37.2133 37.2134 // Store Float does rounding on x86 37.2135 -instruct storeF_Drounded( memory mem, regDPR1 src) %{ 37.2136 +instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 37.2137 predicate(UseSSE<=1); 37.2138 match(Set mem (StoreF mem (ConvD2F src))); 37.2139 37.2140 ins_cost(100); 37.2141 format %{ "FST_S $mem,$src\t# D-round" %} 37.2142 opcode(0xD9); /* D9 /2 */ 37.2143 - ins_encode( enc_FP_store(mem,src) ); 37.2144 + ins_encode( enc_FPR_store(mem,src) ); 37.2145 ins_pipe( fpu_mem_reg ); 37.2146 %} 37.2147 37.2148 // Store immediate Float value (it is faster than store from FPU register) 37.2149 +// The instruction usage is guarded by predicate in operand immFPR(). 37.2150 +instruct storeFPR_imm( memory mem, immFPR src) %{ 37.2151 + match(Set mem (StoreF mem src)); 37.2152 + 37.2153 + ins_cost(50); 37.2154 + format %{ "MOV $mem,$src\t# store float" %} 37.2155 + opcode(0xC7); /* C7 /0 */ 37.2156 + ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 37.2157 + ins_pipe( ialu_mem_imm ); 37.2158 +%} 37.2159 + 37.2160 +// Store immediate Float value (it is faster than store from XMM register) 37.2161 // The instruction usage is guarded by predicate in operand immF(). 37.2162 instruct storeF_imm( memory mem, immF src) %{ 37.2163 match(Set mem (StoreF mem src)); 37.2164 @@ -7782,18 +7261,6 @@ 37.2165 ins_pipe( ialu_mem_imm ); 37.2166 %} 37.2167 37.2168 -// Store immediate Float value (it is faster than store from XMM register) 37.2169 -// The instruction usage is guarded by predicate in operand immXF(). 37.2170 -instruct storeX_imm( memory mem, immXF src) %{ 37.2171 - match(Set mem (StoreF mem src)); 37.2172 - 37.2173 - ins_cost(50); 37.2174 - format %{ "MOV $mem,$src\t# store float" %} 37.2175 - opcode(0xC7); /* C7 /0 */ 37.2176 - ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src )); 37.2177 - ins_pipe( ialu_mem_imm ); 37.2178 -%} 37.2179 - 37.2180 // Store Integer to stack slot 37.2181 instruct storeSSI(stackSlotI dst, eRegI src) %{ 37.2182 match(Set dst src); 37.2183 @@ -7901,6 +7368,16 @@ 37.2184 ins_pipe(empty); 37.2185 %} 37.2186 37.2187 +instruct membar_storestore() %{ 37.2188 + match(MemBarStoreStore); 37.2189 + ins_cost(0); 37.2190 + 37.2191 + size(0); 37.2192 + format %{ "MEMBAR-storestore (empty encoding)" %} 37.2193 + ins_encode( ); 37.2194 + ins_pipe(empty); 37.2195 +%} 37.2196 + 37.2197 //----------Move Instructions-------------------------------------------------- 37.2198 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 37.2199 match(Set dst (CastX2P src)); 37.2200 @@ -8088,29 +7565,29 @@ 37.2201 //%} 37.2202 37.2203 // Conditional move 37.2204 -instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{ 37.2205 +instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 37.2206 predicate(UseSSE<=1); 37.2207 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 37.2208 ins_cost(200); 37.2209 format %{ "FCMOV$cop $dst,$src\t# double" %} 37.2210 opcode(0xDA); 37.2211 - ins_encode( enc_cmov_d(cop,src) ); 37.2212 - ins_pipe( pipe_cmovD_reg ); 37.2213 + ins_encode( enc_cmov_dpr(cop,src) ); 37.2214 + ins_pipe( pipe_cmovDPR_reg ); 37.2215 %} 37.2216 37.2217 // Conditional move 37.2218 -instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{ 37.2219 +instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 37.2220 predicate(UseSSE==0); 37.2221 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 37.2222 ins_cost(200); 37.2223 format %{ "FCMOV$cop $dst,$src\t# float" %} 37.2224 opcode(0xDA); 37.2225 - ins_encode( enc_cmov_d(cop,src) ); 37.2226 - ins_pipe( pipe_cmovD_reg ); 37.2227 + ins_encode( enc_cmov_dpr(cop,src) ); 37.2228 + ins_pipe( pipe_cmovDPR_reg ); 37.2229 %} 37.2230 37.2231 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 37.2232 -instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 37.2233 +instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 37.2234 predicate(UseSSE<=1); 37.2235 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 37.2236 ins_cost(200); 37.2237 @@ -8118,12 +7595,12 @@ 37.2238 "MOV $dst,$src\t# double\n" 37.2239 "skip:" %} 37.2240 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 37.2241 - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) ); 37.2242 - ins_pipe( pipe_cmovD_reg ); 37.2243 + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 37.2244 + ins_pipe( pipe_cmovDPR_reg ); 37.2245 %} 37.2246 37.2247 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 37.2248 -instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 37.2249 +instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 37.2250 predicate(UseSSE==0); 37.2251 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 37.2252 ins_cost(200); 37.2253 @@ -8131,12 +7608,12 @@ 37.2254 "MOV $dst,$src\t# float\n" 37.2255 "skip:" %} 37.2256 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 37.2257 - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) ); 37.2258 - ins_pipe( pipe_cmovD_reg ); 37.2259 + ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 37.2260 + ins_pipe( pipe_cmovDPR_reg ); 37.2261 %} 37.2262 37.2263 // No CMOVE with SSE/SSE2 37.2264 -instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{ 37.2265 +instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 37.2266 predicate (UseSSE>=1); 37.2267 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 37.2268 ins_cost(200); 37.2269 @@ -8154,7 +7631,7 @@ 37.2270 %} 37.2271 37.2272 // No CMOVE with SSE/SSE2 37.2273 -instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{ 37.2274 +instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 37.2275 predicate (UseSSE>=2); 37.2276 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 37.2277 ins_cost(200); 37.2278 @@ -8172,7 +7649,7 @@ 37.2279 %} 37.2280 37.2281 // unsigned version 37.2282 -instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{ 37.2283 +instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 37.2284 predicate (UseSSE>=1); 37.2285 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 37.2286 ins_cost(200); 37.2287 @@ -8189,17 +7666,17 @@ 37.2288 ins_pipe( pipe_slow ); 37.2289 %} 37.2290 37.2291 -instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{ 37.2292 +instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 37.2293 predicate (UseSSE>=1); 37.2294 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 37.2295 ins_cost(200); 37.2296 expand %{ 37.2297 - fcmovX_regU(cop, cr, dst, src); 37.2298 + fcmovF_regU(cop, cr, dst, src); 37.2299 %} 37.2300 %} 37.2301 37.2302 // unsigned version 37.2303 -instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{ 37.2304 +instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 37.2305 predicate (UseSSE>=2); 37.2306 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 37.2307 ins_cost(200); 37.2308 @@ -8216,12 +7693,12 @@ 37.2309 ins_pipe( pipe_slow ); 37.2310 %} 37.2311 37.2312 -instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{ 37.2313 +instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 37.2314 predicate (UseSSE>=2); 37.2315 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 37.2316 ins_cost(200); 37.2317 expand %{ 37.2318 - fcmovXD_regU(cop, cr, dst, src); 37.2319 + fcmovD_regU(cop, cr, dst, src); 37.2320 %} 37.2321 %} 37.2322 37.2323 @@ -8440,7 +7917,7 @@ 37.2324 %} 37.2325 37.2326 // LoadLong-locked - same as a volatile long load when used with compare-swap 37.2327 -instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{ 37.2328 +instruct loadLLocked(stackSlotL dst, memory mem) %{ 37.2329 predicate(UseSSE<=1); 37.2330 match(Set dst (LoadLLocked mem)); 37.2331 37.2332 @@ -8451,18 +7928,21 @@ 37.2333 ins_pipe( fpu_reg_mem ); 37.2334 %} 37.2335 37.2336 -instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{ 37.2337 +instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{ 37.2338 predicate(UseSSE>=2); 37.2339 match(Set dst (LoadLLocked mem)); 37.2340 effect(TEMP tmp); 37.2341 ins_cost(180); 37.2342 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 37.2343 "MOVSD $dst,$tmp" %} 37.2344 - ins_encode(enc_loadLX_volatile(mem, dst, tmp)); 37.2345 - ins_pipe( pipe_slow ); 37.2346 -%} 37.2347 - 37.2348 -instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{ 37.2349 + ins_encode %{ 37.2350 + __ movdbl($tmp$$XMMRegister, $mem$$Address); 37.2351 + __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 37.2352 + %} 37.2353 + ins_pipe( pipe_slow ); 37.2354 +%} 37.2355 + 37.2356 +instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{ 37.2357 predicate(UseSSE>=2); 37.2358 match(Set dst (LoadLLocked mem)); 37.2359 effect(TEMP tmp); 37.2360 @@ -8471,7 +7951,12 @@ 37.2361 "MOVD $dst.lo,$tmp\n\t" 37.2362 "PSRLQ $tmp,32\n\t" 37.2363 "MOVD $dst.hi,$tmp" %} 37.2364 - ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp)); 37.2365 + ins_encode %{ 37.2366 + __ movdbl($tmp$$XMMRegister, $mem$$Address); 37.2367 + __ movdl($dst$$Register, $tmp$$XMMRegister); 37.2368 + __ psrlq($tmp$$XMMRegister, 32); 37.2369 + __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 37.2370 + %} 37.2371 ins_pipe( pipe_slow ); 37.2372 %} 37.2373 37.2374 @@ -10054,7 +9539,7 @@ 37.2375 // Compare & branch 37.2376 37.2377 // P6 version of float compare, sets condition codes in EFLAGS 37.2378 -instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 37.2379 +instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 37.2380 predicate(VM_Version::supports_cmov() && UseSSE <=1); 37.2381 match(Set cr (CmpD src1 src2)); 37.2382 effect(KILL rax); 37.2383 @@ -10066,26 +9551,26 @@ 37.2384 "SAHF\n" 37.2385 "exit:\tNOP // avoid branch to branch" %} 37.2386 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 37.2387 - ins_encode( Push_Reg_D(src1), 37.2388 + ins_encode( Push_Reg_DPR(src1), 37.2389 OpcP, RegOpc(src2), 37.2390 cmpF_P6_fixup ); 37.2391 ins_pipe( pipe_slow ); 37.2392 %} 37.2393 37.2394 -instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{ 37.2395 +instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 37.2396 predicate(VM_Version::supports_cmov() && UseSSE <=1); 37.2397 match(Set cr (CmpD src1 src2)); 37.2398 ins_cost(150); 37.2399 format %{ "FLD $src1\n\t" 37.2400 "FUCOMIP ST,$src2 // P6 instruction" %} 37.2401 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 37.2402 - ins_encode( Push_Reg_D(src1), 37.2403 + ins_encode( Push_Reg_DPR(src1), 37.2404 OpcP, RegOpc(src2)); 37.2405 ins_pipe( pipe_slow ); 37.2406 %} 37.2407 37.2408 // Compare & branch 37.2409 -instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{ 37.2410 +instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 37.2411 predicate(UseSSE<=1); 37.2412 match(Set cr (CmpD src1 src2)); 37.2413 effect(KILL rax); 37.2414 @@ -10098,138 +9583,140 @@ 37.2415 "MOV AH,1\t# unordered treat as LT\n" 37.2416 "flags:\tSAHF" %} 37.2417 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 37.2418 - ins_encode( Push_Reg_D(src1), 37.2419 + ins_encode( Push_Reg_DPR(src1), 37.2420 OpcP, RegOpc(src2), 37.2421 fpu_flags); 37.2422 ins_pipe( pipe_slow ); 37.2423 %} 37.2424 37.2425 // Compare vs zero into -1,0,1 37.2426 -instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{ 37.2427 +instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 37.2428 predicate(UseSSE<=1); 37.2429 match(Set dst (CmpD3 src1 zero)); 37.2430 effect(KILL cr, KILL rax); 37.2431 ins_cost(280); 37.2432 format %{ "FTSTD $dst,$src1" %} 37.2433 opcode(0xE4, 0xD9); 37.2434 - ins_encode( Push_Reg_D(src1), 37.2435 + ins_encode( Push_Reg_DPR(src1), 37.2436 OpcS, OpcP, PopFPU, 37.2437 CmpF_Result(dst)); 37.2438 ins_pipe( pipe_slow ); 37.2439 %} 37.2440 37.2441 // Compare into -1,0,1 37.2442 -instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{ 37.2443 +instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 37.2444 predicate(UseSSE<=1); 37.2445 match(Set dst (CmpD3 src1 src2)); 37.2446 effect(KILL cr, KILL rax); 37.2447 ins_cost(300); 37.2448 format %{ "FCMPD $dst,$src1,$src2" %} 37.2449 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 37.2450 - ins_encode( Push_Reg_D(src1), 37.2451 + ins_encode( Push_Reg_DPR(src1), 37.2452 OpcP, RegOpc(src2), 37.2453 CmpF_Result(dst)); 37.2454 ins_pipe( pipe_slow ); 37.2455 %} 37.2456 37.2457 // float compare and set condition codes in EFLAGS by XMM regs 37.2458 -instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{ 37.2459 +instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 37.2460 predicate(UseSSE>=2); 37.2461 - match(Set cr (CmpD dst src)); 37.2462 - effect(KILL rax); 37.2463 - ins_cost(125); 37.2464 - format %{ "COMISD $dst,$src\n" 37.2465 - "\tJNP exit\n" 37.2466 - "\tMOV ah,1 // saw a NaN, set CF\n" 37.2467 - "\tSAHF\n" 37.2468 - "exit:\tNOP // avoid branch to branch" %} 37.2469 - opcode(0x66, 0x0F, 0x2F); 37.2470 - ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup); 37.2471 - ins_pipe( pipe_slow ); 37.2472 -%} 37.2473 - 37.2474 -instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{ 37.2475 + match(Set cr (CmpD src1 src2)); 37.2476 + ins_cost(145); 37.2477 + format %{ "UCOMISD $src1,$src2\n\t" 37.2478 + "JNP,s exit\n\t" 37.2479 + "PUSHF\t# saw NaN, set CF\n\t" 37.2480 + "AND [rsp], #0xffffff2b\n\t" 37.2481 + "POPF\n" 37.2482 + "exit:" %} 37.2483 + ins_encode %{ 37.2484 + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 37.2485 + emit_cmpfp_fixup(_masm); 37.2486 + %} 37.2487 + ins_pipe( pipe_slow ); 37.2488 +%} 37.2489 + 37.2490 +instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 37.2491 predicate(UseSSE>=2); 37.2492 - match(Set cr (CmpD dst src)); 37.2493 + match(Set cr (CmpD src1 src2)); 37.2494 ins_cost(100); 37.2495 - format %{ "COMISD $dst,$src" %} 37.2496 - opcode(0x66, 0x0F, 0x2F); 37.2497 - ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 37.2498 + format %{ "UCOMISD $src1,$src2" %} 37.2499 + ins_encode %{ 37.2500 + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 37.2501 + %} 37.2502 ins_pipe( pipe_slow ); 37.2503 %} 37.2504 37.2505 // float compare and set condition codes in EFLAGS by XMM regs 37.2506 -instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{ 37.2507 +instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 37.2508 predicate(UseSSE>=2); 37.2509 - match(Set cr (CmpD dst (LoadD src))); 37.2510 - effect(KILL rax); 37.2511 + match(Set cr (CmpD src1 (LoadD src2))); 37.2512 ins_cost(145); 37.2513 - format %{ "COMISD $dst,$src\n" 37.2514 - "\tJNP exit\n" 37.2515 - "\tMOV ah,1 // saw a NaN, set CF\n" 37.2516 - "\tSAHF\n" 37.2517 - "exit:\tNOP // avoid branch to branch" %} 37.2518 - opcode(0x66, 0x0F, 0x2F); 37.2519 - ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup); 37.2520 - ins_pipe( pipe_slow ); 37.2521 -%} 37.2522 - 37.2523 -instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{ 37.2524 + format %{ "UCOMISD $src1,$src2\n\t" 37.2525 + "JNP,s exit\n\t" 37.2526 + "PUSHF\t# saw NaN, set CF\n\t" 37.2527 + "AND [rsp], #0xffffff2b\n\t" 37.2528 + "POPF\n" 37.2529 + "exit:" %} 37.2530 + ins_encode %{ 37.2531 + __ ucomisd($src1$$XMMRegister, $src2$$Address); 37.2532 + emit_cmpfp_fixup(_masm); 37.2533 + %} 37.2534 + ins_pipe( pipe_slow ); 37.2535 +%} 37.2536 + 37.2537 +instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 37.2538 predicate(UseSSE>=2); 37.2539 - match(Set cr (CmpD dst (LoadD src))); 37.2540 + match(Set cr (CmpD src1 (LoadD src2))); 37.2541 ins_cost(100); 37.2542 - format %{ "COMISD $dst,$src" %} 37.2543 - opcode(0x66, 0x0F, 0x2F); 37.2544 - ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src)); 37.2545 + format %{ "UCOMISD $src1,$src2" %} 37.2546 + ins_encode %{ 37.2547 + __ ucomisd($src1$$XMMRegister, $src2$$Address); 37.2548 + %} 37.2549 ins_pipe( pipe_slow ); 37.2550 %} 37.2551 37.2552 // Compare into -1,0,1 in XMM 37.2553 -instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{ 37.2554 +instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 37.2555 predicate(UseSSE>=2); 37.2556 match(Set dst (CmpD3 src1 src2)); 37.2557 effect(KILL cr); 37.2558 ins_cost(255); 37.2559 - format %{ "XOR $dst,$dst\n" 37.2560 - "\tCOMISD $src1,$src2\n" 37.2561 - "\tJP,s nan\n" 37.2562 - "\tJEQ,s exit\n" 37.2563 - "\tJA,s inc\n" 37.2564 - "nan:\tDEC $dst\n" 37.2565 - "\tJMP,s exit\n" 37.2566 - "inc:\tINC $dst\n" 37.2567 - "exit:" 37.2568 - %} 37.2569 - opcode(0x66, 0x0F, 0x2F); 37.2570 - ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2), 37.2571 - CmpX_Result(dst)); 37.2572 + format %{ "UCOMISD $src1, $src2\n\t" 37.2573 + "MOV $dst, #-1\n\t" 37.2574 + "JP,s done\n\t" 37.2575 + "JB,s done\n\t" 37.2576 + "SETNE $dst\n\t" 37.2577 + "MOVZB $dst, $dst\n" 37.2578 + "done:" %} 37.2579 + ins_encode %{ 37.2580 + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 37.2581 + emit_cmpfp3(_masm, $dst$$Register); 37.2582 + %} 37.2583 ins_pipe( pipe_slow ); 37.2584 %} 37.2585 37.2586 // Compare into -1,0,1 in XMM and memory 37.2587 -instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{ 37.2588 +instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 37.2589 predicate(UseSSE>=2); 37.2590 - match(Set dst (CmpD3 src1 (LoadD mem))); 37.2591 + match(Set dst (CmpD3 src1 (LoadD src2))); 37.2592 effect(KILL cr); 37.2593 ins_cost(275); 37.2594 - format %{ "COMISD $src1,$mem\n" 37.2595 - "\tMOV $dst,0\t\t# do not blow flags\n" 37.2596 - "\tJP,s nan\n" 37.2597 - "\tJEQ,s exit\n" 37.2598 - "\tJA,s inc\n" 37.2599 - "nan:\tDEC $dst\n" 37.2600 - "\tJMP,s exit\n" 37.2601 - "inc:\tINC $dst\n" 37.2602 - "exit:" 37.2603 - %} 37.2604 - opcode(0x66, 0x0F, 0x2F); 37.2605 - ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem), 37.2606 - LdImmI(dst,0x0), CmpX_Result(dst)); 37.2607 - ins_pipe( pipe_slow ); 37.2608 -%} 37.2609 - 37.2610 - 37.2611 -instruct subD_reg(regD dst, regD src) %{ 37.2612 + format %{ "UCOMISD $src1, $src2\n\t" 37.2613 + "MOV $dst, #-1\n\t" 37.2614 + "JP,s done\n\t" 37.2615 + "JB,s done\n\t" 37.2616 + "SETNE $dst\n\t" 37.2617 + "MOVZB $dst, $dst\n" 37.2618 + "done:" %} 37.2619 + ins_encode %{ 37.2620 + __ ucomisd($src1$$XMMRegister, $src2$$Address); 37.2621 + emit_cmpfp3(_masm, $dst$$Register); 37.2622 + %} 37.2623 + ins_pipe( pipe_slow ); 37.2624 +%} 37.2625 + 37.2626 + 37.2627 +instruct subDPR_reg(regDPR dst, regDPR src) %{ 37.2628 predicate (UseSSE <=1); 37.2629 match(Set dst (SubD dst src)); 37.2630 37.2631 @@ -10237,12 +9724,12 @@ 37.2632 "DSUBp $dst,ST" %} 37.2633 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 37.2634 ins_cost(150); 37.2635 - ins_encode( Push_Reg_D(src), 37.2636 + ins_encode( Push_Reg_DPR(src), 37.2637 OpcP, RegOpc(dst) ); 37.2638 ins_pipe( fpu_reg_reg ); 37.2639 %} 37.2640 37.2641 -instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 37.2642 +instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 37.2643 predicate (UseSSE <=1); 37.2644 match(Set dst (RoundDouble (SubD src1 src2))); 37.2645 ins_cost(250); 37.2646 @@ -10251,13 +9738,13 @@ 37.2647 "DSUB ST,$src1\n\t" 37.2648 "FSTP_D $dst\t# D-round" %} 37.2649 opcode(0xD8, 0x5); 37.2650 - ins_encode( Push_Reg_D(src2), 37.2651 - OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 37.2652 + ins_encode( Push_Reg_DPR(src2), 37.2653 + OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 37.2654 ins_pipe( fpu_mem_reg_reg ); 37.2655 %} 37.2656 37.2657 37.2658 -instruct subD_reg_mem(regD dst, memory src) %{ 37.2659 +instruct subDPR_reg_mem(regDPR dst, memory src) %{ 37.2660 predicate (UseSSE <=1); 37.2661 match(Set dst (SubD dst (LoadD src))); 37.2662 ins_cost(150); 37.2663 @@ -10270,7 +9757,7 @@ 37.2664 ins_pipe( fpu_reg_mem ); 37.2665 %} 37.2666 37.2667 -instruct absD_reg(regDPR1 dst, regDPR1 src) %{ 37.2668 +instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 37.2669 predicate (UseSSE<=1); 37.2670 match(Set dst (AbsD src)); 37.2671 ins_cost(100); 37.2672 @@ -10280,15 +9767,7 @@ 37.2673 ins_pipe( fpu_reg_reg ); 37.2674 %} 37.2675 37.2676 -instruct absXD_reg( regXD dst ) %{ 37.2677 - predicate(UseSSE>=2); 37.2678 - match(Set dst (AbsD dst)); 37.2679 - format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %} 37.2680 - ins_encode( AbsXD_encoding(dst)); 37.2681 - ins_pipe( pipe_slow ); 37.2682 -%} 37.2683 - 37.2684 -instruct negD_reg(regDPR1 dst, regDPR1 src) %{ 37.2685 +instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 37.2686 predicate(UseSSE<=1); 37.2687 match(Set dst (NegD src)); 37.2688 ins_cost(100); 37.2689 @@ -10298,18 +9777,7 @@ 37.2690 ins_pipe( fpu_reg_reg ); 37.2691 %} 37.2692 37.2693 -instruct negXD_reg( regXD dst ) %{ 37.2694 - predicate(UseSSE>=2); 37.2695 - match(Set dst (NegD dst)); 37.2696 - format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %} 37.2697 - ins_encode %{ 37.2698 - __ xorpd($dst$$XMMRegister, 37.2699 - ExternalAddress((address)double_signflip_pool)); 37.2700 - %} 37.2701 - ins_pipe( pipe_slow ); 37.2702 -%} 37.2703 - 37.2704 -instruct addD_reg(regD dst, regD src) %{ 37.2705 +instruct addDPR_reg(regDPR dst, regDPR src) %{ 37.2706 predicate(UseSSE<=1); 37.2707 match(Set dst (AddD dst src)); 37.2708 format %{ "FLD $src\n\t" 37.2709 @@ -10317,13 +9785,13 @@ 37.2710 size(4); 37.2711 ins_cost(150); 37.2712 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 37.2713 - ins_encode( Push_Reg_D(src), 37.2714 + ins_encode( Push_Reg_DPR(src), 37.2715 OpcP, RegOpc(dst) ); 37.2716 ins_pipe( fpu_reg_reg ); 37.2717 %} 37.2718 37.2719 37.2720 -instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 37.2721 +instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 37.2722 predicate(UseSSE<=1); 37.2723 match(Set dst (RoundDouble (AddD src1 src2))); 37.2724 ins_cost(250); 37.2725 @@ -10332,13 +9800,13 @@ 37.2726 "DADD ST,$src1\n\t" 37.2727 "FSTP_D $dst\t# D-round" %} 37.2728 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 37.2729 - ins_encode( Push_Reg_D(src2), 37.2730 - OpcP, RegOpc(src1), Pop_Mem_D(dst) ); 37.2731 + ins_encode( Push_Reg_DPR(src2), 37.2732 + OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 37.2733 ins_pipe( fpu_mem_reg_reg ); 37.2734 %} 37.2735 37.2736 37.2737 -instruct addD_reg_mem(regD dst, memory src) %{ 37.2738 +instruct addDPR_reg_mem(regDPR dst, memory src) %{ 37.2739 predicate(UseSSE<=1); 37.2740 match(Set dst (AddD dst (LoadD src))); 37.2741 ins_cost(150); 37.2742 @@ -10352,7 +9820,7 @@ 37.2743 %} 37.2744 37.2745 // add-to-memory 37.2746 -instruct addD_mem_reg(memory dst, regD src) %{ 37.2747 +instruct addDPR_mem_reg(memory dst, regDPR src) %{ 37.2748 predicate(UseSSE<=1); 37.2749 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 37.2750 ins_cost(150); 37.2751 @@ -10368,7 +9836,7 @@ 37.2752 ins_pipe( fpu_reg_mem ); 37.2753 %} 37.2754 37.2755 -instruct addD_reg_imm1(regD dst, immD1 con) %{ 37.2756 +instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 37.2757 predicate(UseSSE<=1); 37.2758 match(Set dst (AddD dst con)); 37.2759 ins_cost(125); 37.2760 @@ -10381,7 +9849,7 @@ 37.2761 ins_pipe(fpu_reg); 37.2762 %} 37.2763 37.2764 -instruct addD_reg_imm(regD dst, immD con) %{ 37.2765 +instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 37.2766 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 37.2767 match(Set dst (AddD dst con)); 37.2768 ins_cost(200); 37.2769 @@ -10394,7 +9862,7 @@ 37.2770 ins_pipe(fpu_reg_mem); 37.2771 %} 37.2772 37.2773 -instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{ 37.2774 +instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 37.2775 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 37.2776 match(Set dst (RoundDouble (AddD src con))); 37.2777 ins_cost(200); 37.2778 @@ -10409,124 +9877,14 @@ 37.2779 ins_pipe(fpu_mem_reg_con); 37.2780 %} 37.2781 37.2782 -// Add two double precision floating point values in xmm 37.2783 -instruct addXD_reg(regXD dst, regXD src) %{ 37.2784 - predicate(UseSSE>=2); 37.2785 - match(Set dst (AddD dst src)); 37.2786 - format %{ "ADDSD $dst,$src" %} 37.2787 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 37.2788 - ins_pipe( pipe_slow ); 37.2789 -%} 37.2790 - 37.2791 -instruct addXD_imm(regXD dst, immXD con) %{ 37.2792 - predicate(UseSSE>=2); 37.2793 - match(Set dst (AddD dst con)); 37.2794 - format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 37.2795 - ins_encode %{ 37.2796 - __ addsd($dst$$XMMRegister, $constantaddress($con)); 37.2797 - %} 37.2798 - ins_pipe(pipe_slow); 37.2799 -%} 37.2800 - 37.2801 -instruct addXD_mem(regXD dst, memory mem) %{ 37.2802 - predicate(UseSSE>=2); 37.2803 - match(Set dst (AddD dst (LoadD mem))); 37.2804 - format %{ "ADDSD $dst,$mem" %} 37.2805 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem)); 37.2806 - ins_pipe( pipe_slow ); 37.2807 -%} 37.2808 - 37.2809 -// Sub two double precision floating point values in xmm 37.2810 -instruct subXD_reg(regXD dst, regXD src) %{ 37.2811 - predicate(UseSSE>=2); 37.2812 - match(Set dst (SubD dst src)); 37.2813 - format %{ "SUBSD $dst,$src" %} 37.2814 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 37.2815 - ins_pipe( pipe_slow ); 37.2816 -%} 37.2817 - 37.2818 -instruct subXD_imm(regXD dst, immXD con) %{ 37.2819 - predicate(UseSSE>=2); 37.2820 - match(Set dst (SubD dst con)); 37.2821 - format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 37.2822 - ins_encode %{ 37.2823 - __ subsd($dst$$XMMRegister, $constantaddress($con)); 37.2824 - %} 37.2825 - ins_pipe(pipe_slow); 37.2826 -%} 37.2827 - 37.2828 -instruct subXD_mem(regXD dst, memory mem) %{ 37.2829 - predicate(UseSSE>=2); 37.2830 - match(Set dst (SubD dst (LoadD mem))); 37.2831 - format %{ "SUBSD $dst,$mem" %} 37.2832 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 37.2833 - ins_pipe( pipe_slow ); 37.2834 -%} 37.2835 - 37.2836 -// Mul two double precision floating point values in xmm 37.2837 -instruct mulXD_reg(regXD dst, regXD src) %{ 37.2838 - predicate(UseSSE>=2); 37.2839 - match(Set dst (MulD dst src)); 37.2840 - format %{ "MULSD $dst,$src" %} 37.2841 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 37.2842 - ins_pipe( pipe_slow ); 37.2843 -%} 37.2844 - 37.2845 -instruct mulXD_imm(regXD dst, immXD con) %{ 37.2846 - predicate(UseSSE>=2); 37.2847 - match(Set dst (MulD dst con)); 37.2848 - format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 37.2849 - ins_encode %{ 37.2850 - __ mulsd($dst$$XMMRegister, $constantaddress($con)); 37.2851 - %} 37.2852 - ins_pipe(pipe_slow); 37.2853 -%} 37.2854 - 37.2855 -instruct mulXD_mem(regXD dst, memory mem) %{ 37.2856 - predicate(UseSSE>=2); 37.2857 - match(Set dst (MulD dst (LoadD mem))); 37.2858 - format %{ "MULSD $dst,$mem" %} 37.2859 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 37.2860 - ins_pipe( pipe_slow ); 37.2861 -%} 37.2862 - 37.2863 -// Div two double precision floating point values in xmm 37.2864 -instruct divXD_reg(regXD dst, regXD src) %{ 37.2865 - predicate(UseSSE>=2); 37.2866 - match(Set dst (DivD dst src)); 37.2867 - format %{ "DIVSD $dst,$src" %} 37.2868 - opcode(0xF2, 0x0F, 0x5E); 37.2869 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 37.2870 - ins_pipe( pipe_slow ); 37.2871 -%} 37.2872 - 37.2873 -instruct divXD_imm(regXD dst, immXD con) %{ 37.2874 - predicate(UseSSE>=2); 37.2875 - match(Set dst (DivD dst con)); 37.2876 - format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 37.2877 - ins_encode %{ 37.2878 - __ divsd($dst$$XMMRegister, $constantaddress($con)); 37.2879 - %} 37.2880 - ins_pipe(pipe_slow); 37.2881 -%} 37.2882 - 37.2883 -instruct divXD_mem(regXD dst, memory mem) %{ 37.2884 - predicate(UseSSE>=2); 37.2885 - match(Set dst (DivD dst (LoadD mem))); 37.2886 - format %{ "DIVSD $dst,$mem" %} 37.2887 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 37.2888 - ins_pipe( pipe_slow ); 37.2889 -%} 37.2890 - 37.2891 - 37.2892 -instruct mulD_reg(regD dst, regD src) %{ 37.2893 +instruct mulDPR_reg(regDPR dst, regDPR src) %{ 37.2894 predicate(UseSSE<=1); 37.2895 match(Set dst (MulD dst src)); 37.2896 format %{ "FLD $src\n\t" 37.2897 "DMULp $dst,ST" %} 37.2898 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 37.2899 ins_cost(150); 37.2900 - ins_encode( Push_Reg_D(src), 37.2901 + ins_encode( Push_Reg_DPR(src), 37.2902 OpcP, RegOpc(dst) ); 37.2903 ins_pipe( fpu_reg_reg ); 37.2904 %} 37.2905 @@ -10539,7 +9897,7 @@ 37.2906 // multiply scaled arg1 by arg2 37.2907 // rescale product by 2^(15360) 37.2908 // 37.2909 -instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{ 37.2910 +instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 37.2911 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 37.2912 match(Set dst (MulD dst src)); 37.2913 ins_cost(1); // Select this instruction for all strict FP double multiplies 37.2914 @@ -10552,13 +9910,13 @@ 37.2915 "DMULp $dst,ST\n\t" %} 37.2916 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 37.2917 ins_encode( strictfp_bias1(dst), 37.2918 - Push_Reg_D(src), 37.2919 + Push_Reg_DPR(src), 37.2920 OpcP, RegOpc(dst), 37.2921 strictfp_bias2(dst) ); 37.2922 ins_pipe( fpu_reg_reg ); 37.2923 %} 37.2924 37.2925 -instruct mulD_reg_imm(regD dst, immD con) %{ 37.2926 +instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 37.2927 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 37.2928 match(Set dst (MulD dst con)); 37.2929 ins_cost(200); 37.2930 @@ -10572,7 +9930,7 @@ 37.2931 %} 37.2932 37.2933 37.2934 -instruct mulD_reg_mem(regD dst, memory src) %{ 37.2935 +instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 37.2936 predicate( UseSSE<=1 ); 37.2937 match(Set dst (MulD dst (LoadD src))); 37.2938 ins_cost(200); 37.2939 @@ -10586,7 +9944,7 @@ 37.2940 37.2941 // 37.2942 // Cisc-alternate to reg-reg multiply 37.2943 -instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{ 37.2944 +instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 37.2945 predicate( UseSSE<=1 ); 37.2946 match(Set dst (MulD src (LoadD mem))); 37.2947 ins_cost(250); 37.2948 @@ -10595,17 +9953,17 @@ 37.2949 "FSTP_D $dst" %} 37.2950 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 37.2951 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 37.2952 - OpcReg_F(src), 37.2953 - Pop_Reg_D(dst) ); 37.2954 + OpcReg_FPR(src), 37.2955 + Pop_Reg_DPR(dst) ); 37.2956 ins_pipe( fpu_reg_reg_mem ); 37.2957 %} 37.2958 37.2959 37.2960 -// MACRO3 -- addD a mulD 37.2961 +// MACRO3 -- addDPR a mulDPR 37.2962 // This instruction is a '2-address' instruction in that the result goes 37.2963 // back to src2. This eliminates a move from the macro; possibly the 37.2964 // register allocator will have to add it back (and maybe not). 37.2965 -instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{ 37.2966 +instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 37.2967 predicate( UseSSE<=1 ); 37.2968 match(Set src2 (AddD (MulD src0 src1) src2)); 37.2969 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 37.2970 @@ -10613,29 +9971,29 @@ 37.2971 "DADDp $src2,ST" %} 37.2972 ins_cost(250); 37.2973 opcode(0xDD); /* LoadD DD /0 */ 37.2974 - ins_encode( Push_Reg_F(src0), 37.2975 + ins_encode( Push_Reg_FPR(src0), 37.2976 FMul_ST_reg(src1), 37.2977 FAddP_reg_ST(src2) ); 37.2978 ins_pipe( fpu_reg_reg_reg ); 37.2979 %} 37.2980 37.2981 37.2982 -// MACRO3 -- subD a mulD 37.2983 -instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{ 37.2984 +// MACRO3 -- subDPR a mulDPR 37.2985 +instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 37.2986 predicate( UseSSE<=1 ); 37.2987 match(Set src2 (SubD (MulD src0 src1) src2)); 37.2988 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 37.2989 "DMUL ST,$src1\n\t" 37.2990 "DSUBRp $src2,ST" %} 37.2991 ins_cost(250); 37.2992 - ins_encode( Push_Reg_F(src0), 37.2993 + ins_encode( Push_Reg_FPR(src0), 37.2994 FMul_ST_reg(src1), 37.2995 Opcode(0xDE), Opc_plus(0xE0,src2)); 37.2996 ins_pipe( fpu_reg_reg_reg ); 37.2997 %} 37.2998 37.2999 37.3000 -instruct divD_reg(regD dst, regD src) %{ 37.3001 +instruct divDPR_reg(regDPR dst, regDPR src) %{ 37.3002 predicate( UseSSE<=1 ); 37.3003 match(Set dst (DivD dst src)); 37.3004 37.3005 @@ -10643,7 +10001,7 @@ 37.3006 "FDIVp $dst,ST" %} 37.3007 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 37.3008 ins_cost(150); 37.3009 - ins_encode( Push_Reg_D(src), 37.3010 + ins_encode( Push_Reg_DPR(src), 37.3011 OpcP, RegOpc(dst) ); 37.3012 ins_pipe( fpu_reg_reg ); 37.3013 %} 37.3014 @@ -10656,7 +10014,7 @@ 37.3015 // divide scaled dividend by divisor 37.3016 // rescale quotient by 2^(15360) 37.3017 // 37.3018 -instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{ 37.3019 +instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 37.3020 predicate (UseSSE<=1); 37.3021 match(Set dst (DivD dst src)); 37.3022 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 37.3023 @@ -10670,13 +10028,13 @@ 37.3024 "DMULp $dst,ST\n\t" %} 37.3025 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 37.3026 ins_encode( strictfp_bias1(dst), 37.3027 - Push_Reg_D(src), 37.3028 + Push_Reg_DPR(src), 37.3029 OpcP, RegOpc(dst), 37.3030 strictfp_bias2(dst) ); 37.3031 ins_pipe( fpu_reg_reg ); 37.3032 %} 37.3033 37.3034 -instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{ 37.3035 +instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 37.3036 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 37.3037 match(Set dst (RoundDouble (DivD src1 src2))); 37.3038 37.3039 @@ -10684,27 +10042,27 @@ 37.3040 "FDIV ST,$src2\n\t" 37.3041 "FSTP_D $dst\t# D-round" %} 37.3042 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 37.3043 - ins_encode( Push_Reg_D(src1), 37.3044 - OpcP, RegOpc(src2), Pop_Mem_D(dst) ); 37.3045 + ins_encode( Push_Reg_DPR(src1), 37.3046 + OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 37.3047 ins_pipe( fpu_mem_reg_reg ); 37.3048 %} 37.3049 37.3050 37.3051 -instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{ 37.3052 +instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 37.3053 predicate(UseSSE<=1); 37.3054 match(Set dst (ModD dst src)); 37.3055 - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 37.3056 + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 37.3057 37.3058 format %{ "DMOD $dst,$src" %} 37.3059 ins_cost(250); 37.3060 - ins_encode(Push_Reg_Mod_D(dst, src), 37.3061 - emitModD(), 37.3062 - Push_Result_Mod_D(src), 37.3063 - Pop_Reg_D(dst)); 37.3064 - ins_pipe( pipe_slow ); 37.3065 -%} 37.3066 - 37.3067 -instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{ 37.3068 + ins_encode(Push_Reg_Mod_DPR(dst, src), 37.3069 + emitModDPR(), 37.3070 + Push_Result_Mod_DPR(src), 37.3071 + Pop_Reg_DPR(dst)); 37.3072 + ins_pipe( pipe_slow ); 37.3073 +%} 37.3074 + 37.3075 +instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 37.3076 predicate(UseSSE>=2); 37.3077 match(Set dst (ModD src0 src1)); 37.3078 effect(KILL rax, KILL cr); 37.3079 @@ -10725,11 +10083,11 @@ 37.3080 "\tFSTP ST0\t # Restore FPU Stack" 37.3081 %} 37.3082 ins_cost(250); 37.3083 - ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU); 37.3084 - ins_pipe( pipe_slow ); 37.3085 -%} 37.3086 - 37.3087 -instruct sinD_reg(regDPR1 dst, regDPR1 src) %{ 37.3088 + ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 37.3089 + ins_pipe( pipe_slow ); 37.3090 +%} 37.3091 + 37.3092 +instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 37.3093 predicate (UseSSE<=1); 37.3094 match(Set dst (SinD src)); 37.3095 ins_cost(1800); 37.3096 @@ -10739,18 +10097,18 @@ 37.3097 ins_pipe( pipe_slow ); 37.3098 %} 37.3099 37.3100 -instruct sinXD_reg(regXD dst, eFlagsReg cr) %{ 37.3101 +instruct sinD_reg(regD dst, eFlagsReg cr) %{ 37.3102 predicate (UseSSE>=2); 37.3103 match(Set dst (SinD dst)); 37.3104 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 37.3105 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 37.3106 ins_cost(1800); 37.3107 format %{ "DSIN $dst" %} 37.3108 opcode(0xD9, 0xFE); 37.3109 - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 37.3110 - ins_pipe( pipe_slow ); 37.3111 -%} 37.3112 - 37.3113 -instruct cosD_reg(regDPR1 dst, regDPR1 src) %{ 37.3114 + ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 37.3115 + ins_pipe( pipe_slow ); 37.3116 +%} 37.3117 + 37.3118 +instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 37.3119 predicate (UseSSE<=1); 37.3120 match(Set dst (CosD src)); 37.3121 ins_cost(1800); 37.3122 @@ -10760,18 +10118,18 @@ 37.3123 ins_pipe( pipe_slow ); 37.3124 %} 37.3125 37.3126 -instruct cosXD_reg(regXD dst, eFlagsReg cr) %{ 37.3127 +instruct cosD_reg(regD dst, eFlagsReg cr) %{ 37.3128 predicate (UseSSE>=2); 37.3129 match(Set dst (CosD dst)); 37.3130 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 37.3131 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 37.3132 ins_cost(1800); 37.3133 format %{ "DCOS $dst" %} 37.3134 opcode(0xD9, 0xFF); 37.3135 - ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) ); 37.3136 - ins_pipe( pipe_slow ); 37.3137 -%} 37.3138 - 37.3139 -instruct tanD_reg(regDPR1 dst, regDPR1 src) %{ 37.3140 + ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 37.3141 + ins_pipe( pipe_slow ); 37.3142 +%} 37.3143 + 37.3144 +instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 37.3145 predicate (UseSSE<=1); 37.3146 match(Set dst(TanD src)); 37.3147 format %{ "DTAN $dst" %} 37.3148 @@ -10780,50 +10138,50 @@ 37.3149 ins_pipe( pipe_slow ); 37.3150 %} 37.3151 37.3152 -instruct tanXD_reg(regXD dst, eFlagsReg cr) %{ 37.3153 +instruct tanD_reg(regD dst, eFlagsReg cr) %{ 37.3154 predicate (UseSSE>=2); 37.3155 match(Set dst(TanD dst)); 37.3156 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 37.3157 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 37.3158 format %{ "DTAN $dst" %} 37.3159 - ins_encode( Push_SrcXD(dst), 37.3160 + ins_encode( Push_SrcD(dst), 37.3161 Opcode(0xD9), Opcode(0xF2), // fptan 37.3162 Opcode(0xDD), Opcode(0xD8), // fstp st 37.3163 - Push_ResultXD(dst) ); 37.3164 - ins_pipe( pipe_slow ); 37.3165 -%} 37.3166 - 37.3167 -instruct atanD_reg(regD dst, regD src) %{ 37.3168 + Push_ResultD(dst) ); 37.3169 + ins_pipe( pipe_slow ); 37.3170 +%} 37.3171 + 37.3172 +instruct atanDPR_reg(regDPR dst, regDPR src) %{ 37.3173 predicate (UseSSE<=1); 37.3174 match(Set dst(AtanD dst src)); 37.3175 format %{ "DATA $dst,$src" %} 37.3176 opcode(0xD9, 0xF3); 37.3177 - ins_encode( Push_Reg_D(src), 37.3178 + ins_encode( Push_Reg_DPR(src), 37.3179 OpcP, OpcS, RegOpc(dst) ); 37.3180 ins_pipe( pipe_slow ); 37.3181 %} 37.3182 37.3183 -instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 37.3184 +instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 37.3185 predicate (UseSSE>=2); 37.3186 match(Set dst(AtanD dst src)); 37.3187 - effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8" 37.3188 + effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 37.3189 format %{ "DATA $dst,$src" %} 37.3190 opcode(0xD9, 0xF3); 37.3191 - ins_encode( Push_SrcXD(src), 37.3192 - OpcP, OpcS, Push_ResultXD(dst) ); 37.3193 - ins_pipe( pipe_slow ); 37.3194 -%} 37.3195 - 37.3196 -instruct sqrtD_reg(regD dst, regD src) %{ 37.3197 + ins_encode( Push_SrcD(src), 37.3198 + OpcP, OpcS, Push_ResultD(dst) ); 37.3199 + ins_pipe( pipe_slow ); 37.3200 +%} 37.3201 + 37.3202 +instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 37.3203 predicate (UseSSE<=1); 37.3204 match(Set dst (SqrtD src)); 37.3205 format %{ "DSQRT $dst,$src" %} 37.3206 opcode(0xFA, 0xD9); 37.3207 - ins_encode( Push_Reg_D(src), 37.3208 - OpcS, OpcP, Pop_Reg_D(dst) ); 37.3209 - ins_pipe( pipe_slow ); 37.3210 -%} 37.3211 - 37.3212 -instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 37.3213 + ins_encode( Push_Reg_DPR(src), 37.3214 + OpcS, OpcP, Pop_Reg_DPR(dst) ); 37.3215 + ins_pipe( pipe_slow ); 37.3216 +%} 37.3217 + 37.3218 +instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 37.3219 predicate (UseSSE<=1); 37.3220 match(Set Y (PowD X Y)); // Raise X to the Yth power 37.3221 effect(KILL rax, KILL rbx, KILL rcx); 37.3222 @@ -10852,14 +10210,14 @@ 37.3223 "ADD ESP,8" 37.3224 %} 37.3225 ins_encode( push_stack_temp_qword, 37.3226 - Push_Reg_D(X), 37.3227 + Push_Reg_DPR(X), 37.3228 Opcode(0xD9), Opcode(0xF1), // fyl2x 37.3229 pow_exp_core_encoding, 37.3230 pop_stack_temp_qword); 37.3231 ins_pipe( pipe_slow ); 37.3232 %} 37.3233 37.3234 -instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 37.3235 +instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{ 37.3236 predicate (UseSSE>=2); 37.3237 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 37.3238 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx ); 37.3239 @@ -10897,12 +10255,12 @@ 37.3240 push_xmm_to_fpr1(src0), 37.3241 Opcode(0xD9), Opcode(0xF1), // fyl2x 37.3242 pow_exp_core_encoding, 37.3243 - Push_ResultXD(dst) ); 37.3244 - ins_pipe( pipe_slow ); 37.3245 -%} 37.3246 - 37.3247 - 37.3248 -instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 37.3249 + Push_ResultD(dst) ); 37.3250 + ins_pipe( pipe_slow ); 37.3251 +%} 37.3252 + 37.3253 + 37.3254 +instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 37.3255 predicate (UseSSE<=1); 37.3256 match(Set dpr1 (ExpD dpr1)); 37.3257 effect(KILL rax, KILL rbx, KILL rcx); 37.3258 @@ -10938,7 +10296,7 @@ 37.3259 ins_pipe( pipe_slow ); 37.3260 %} 37.3261 37.3262 -instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 37.3263 +instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{ 37.3264 predicate (UseSSE>=2); 37.3265 match(Set dst (ExpD src)); 37.3266 effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx); 37.3267 @@ -10969,17 +10327,17 @@ 37.3268 "MOVSD $dst,[ESP]\n\t" 37.3269 "ADD ESP,8" 37.3270 %} 37.3271 - ins_encode( Push_SrcXD(src), 37.3272 + ins_encode( Push_SrcD(src), 37.3273 Opcode(0xD9), Opcode(0xEA), // fldl2e 37.3274 Opcode(0xDE), Opcode(0xC9), // fmulp 37.3275 pow_exp_core_encoding, 37.3276 - Push_ResultXD(dst) ); 37.3277 - ins_pipe( pipe_slow ); 37.3278 -%} 37.3279 - 37.3280 - 37.3281 - 37.3282 -instruct log10D_reg(regDPR1 dst, regDPR1 src) %{ 37.3283 + Push_ResultD(dst) ); 37.3284 + ins_pipe( pipe_slow ); 37.3285 +%} 37.3286 + 37.3287 + 37.3288 + 37.3289 +instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 37.3290 predicate (UseSSE<=1); 37.3291 // The source Double operand on FPU stack 37.3292 match(Set dst (Log10D src)); 37.3293 @@ -10997,7 +10355,7 @@ 37.3294 ins_pipe( pipe_slow ); 37.3295 %} 37.3296 37.3297 -instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 37.3298 +instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 37.3299 predicate (UseSSE>=2); 37.3300 effect(KILL cr); 37.3301 match(Set dst (Log10D src)); 37.3302 @@ -11007,14 +10365,14 @@ 37.3303 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 37.3304 %} 37.3305 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 37.3306 - Push_SrcXD(src), 37.3307 + Push_SrcD(src), 37.3308 Opcode(0xD9), Opcode(0xF1), // fyl2x 37.3309 - Push_ResultXD(dst)); 37.3310 - 37.3311 - ins_pipe( pipe_slow ); 37.3312 -%} 37.3313 - 37.3314 -instruct logD_reg(regDPR1 dst, regDPR1 src) %{ 37.3315 + Push_ResultD(dst)); 37.3316 + 37.3317 + ins_pipe( pipe_slow ); 37.3318 +%} 37.3319 + 37.3320 +instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 37.3321 predicate (UseSSE<=1); 37.3322 // The source Double operand on FPU stack 37.3323 match(Set dst (LogD src)); 37.3324 @@ -11032,7 +10390,7 @@ 37.3325 ins_pipe( pipe_slow ); 37.3326 %} 37.3327 37.3328 -instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{ 37.3329 +instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 37.3330 predicate (UseSSE>=2); 37.3331 effect(KILL cr); 37.3332 // The source and result Double operands in XMM registers 37.3333 @@ -11043,9 +10401,9 @@ 37.3334 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 37.3335 %} 37.3336 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 37.3337 - Push_SrcXD(src), 37.3338 + Push_SrcD(src), 37.3339 Opcode(0xD9), Opcode(0xF1), // fyl2x 37.3340 - Push_ResultXD(dst)); 37.3341 + Push_ResultD(dst)); 37.3342 ins_pipe( pipe_slow ); 37.3343 %} 37.3344 37.3345 @@ -11066,7 +10424,7 @@ 37.3346 // exit: 37.3347 37.3348 // P6 version of float compare, sets condition codes in EFLAGS 37.3349 -instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 37.3350 +instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 37.3351 predicate(VM_Version::supports_cmov() && UseSSE == 0); 37.3352 match(Set cr (CmpF src1 src2)); 37.3353 effect(KILL rax); 37.3354 @@ -11078,27 +10436,27 @@ 37.3355 "SAHF\n" 37.3356 "exit:\tNOP // avoid branch to branch" %} 37.3357 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 37.3358 - ins_encode( Push_Reg_D(src1), 37.3359 + ins_encode( Push_Reg_DPR(src1), 37.3360 OpcP, RegOpc(src2), 37.3361 cmpF_P6_fixup ); 37.3362 ins_pipe( pipe_slow ); 37.3363 %} 37.3364 37.3365 -instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{ 37.3366 +instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 37.3367 predicate(VM_Version::supports_cmov() && UseSSE == 0); 37.3368 match(Set cr (CmpF src1 src2)); 37.3369 ins_cost(100); 37.3370 format %{ "FLD $src1\n\t" 37.3371 "FUCOMIP ST,$src2 // P6 instruction" %} 37.3372 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 37.3373 - ins_encode( Push_Reg_D(src1), 37.3374 + ins_encode( Push_Reg_DPR(src1), 37.3375 OpcP, RegOpc(src2)); 37.3376 ins_pipe( pipe_slow ); 37.3377 %} 37.3378 37.3379 37.3380 // Compare & branch 37.3381 -instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{ 37.3382 +instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 37.3383 predicate(UseSSE == 0); 37.3384 match(Set cr (CmpF src1 src2)); 37.3385 effect(KILL rax); 37.3386 @@ -11111,328 +10469,190 @@ 37.3387 "MOV AH,1\t# unordered treat as LT\n" 37.3388 "flags:\tSAHF" %} 37.3389 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 37.3390 - ins_encode( Push_Reg_D(src1), 37.3391 + ins_encode( Push_Reg_DPR(src1), 37.3392 OpcP, RegOpc(src2), 37.3393 fpu_flags); 37.3394 ins_pipe( pipe_slow ); 37.3395 %} 37.3396 37.3397 // Compare vs zero into -1,0,1 37.3398 -instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{ 37.3399 +instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 37.3400 predicate(UseSSE == 0); 37.3401 match(Set dst (CmpF3 src1 zero)); 37.3402 effect(KILL cr, KILL rax); 37.3403 ins_cost(280); 37.3404 format %{ "FTSTF $dst,$src1" %} 37.3405 opcode(0xE4, 0xD9); 37.3406 - ins_encode( Push_Reg_D(src1), 37.3407 + ins_encode( Push_Reg_DPR(src1), 37.3408 OpcS, OpcP, PopFPU, 37.3409 CmpF_Result(dst)); 37.3410 ins_pipe( pipe_slow ); 37.3411 %} 37.3412 37.3413 // Compare into -1,0,1 37.3414 -instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 37.3415 +instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 37.3416 predicate(UseSSE == 0); 37.3417 match(Set dst (CmpF3 src1 src2)); 37.3418 effect(KILL cr, KILL rax); 37.3419 ins_cost(300); 37.3420 format %{ "FCMPF $dst,$src1,$src2" %} 37.3421 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 37.3422 - ins_encode( Push_Reg_D(src1), 37.3423 + ins_encode( Push_Reg_DPR(src1), 37.3424 OpcP, RegOpc(src2), 37.3425 CmpF_Result(dst)); 37.3426 ins_pipe( pipe_slow ); 37.3427 %} 37.3428 37.3429 // float compare and set condition codes in EFLAGS by XMM regs 37.3430 -instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{ 37.3431 +instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 37.3432 predicate(UseSSE>=1); 37.3433 - match(Set cr (CmpF dst src)); 37.3434 - effect(KILL rax); 37.3435 + match(Set cr (CmpF src1 src2)); 37.3436 ins_cost(145); 37.3437 - format %{ "COMISS $dst,$src\n" 37.3438 - "\tJNP exit\n" 37.3439 - "\tMOV ah,1 // saw a NaN, set CF\n" 37.3440 - "\tSAHF\n" 37.3441 - "exit:\tNOP // avoid branch to branch" %} 37.3442 - opcode(0x0F, 0x2F); 37.3443 - ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup); 37.3444 - ins_pipe( pipe_slow ); 37.3445 -%} 37.3446 - 37.3447 -instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{ 37.3448 + format %{ "UCOMISS $src1,$src2\n\t" 37.3449 + "JNP,s exit\n\t" 37.3450 + "PUSHF\t# saw NaN, set CF\n\t" 37.3451 + "AND [rsp], #0xffffff2b\n\t" 37.3452 + "POPF\n" 37.3453 + "exit:" %} 37.3454 + ins_encode %{ 37.3455 + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 37.3456 + emit_cmpfp_fixup(_masm); 37.3457 + %} 37.3458 + ins_pipe( pipe_slow ); 37.3459 +%} 37.3460 + 37.3461 +instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 37.3462 predicate(UseSSE>=1); 37.3463 - match(Set cr (CmpF dst src)); 37.3464 + match(Set cr (CmpF src1 src2)); 37.3465 ins_cost(100); 37.3466 - format %{ "COMISS $dst,$src" %} 37.3467 - opcode(0x0F, 0x2F); 37.3468 - ins_encode(OpcP, OpcS, RegReg(dst, src)); 37.3469 + format %{ "UCOMISS $src1,$src2" %} 37.3470 + ins_encode %{ 37.3471 + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 37.3472 + %} 37.3473 ins_pipe( pipe_slow ); 37.3474 %} 37.3475 37.3476 // float compare and set condition codes in EFLAGS by XMM regs 37.3477 -instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{ 37.3478 +instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 37.3479 predicate(UseSSE>=1); 37.3480 - match(Set cr (CmpF dst (LoadF src))); 37.3481 - effect(KILL rax); 37.3482 + match(Set cr (CmpF src1 (LoadF src2))); 37.3483 ins_cost(165); 37.3484 - format %{ "COMISS $dst,$src\n" 37.3485 - "\tJNP exit\n" 37.3486 - "\tMOV ah,1 // saw a NaN, set CF\n" 37.3487 - "\tSAHF\n" 37.3488 - "exit:\tNOP // avoid branch to branch" %} 37.3489 - opcode(0x0F, 0x2F); 37.3490 - ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup); 37.3491 - ins_pipe( pipe_slow ); 37.3492 -%} 37.3493 - 37.3494 -instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{ 37.3495 + format %{ "UCOMISS $src1,$src2\n\t" 37.3496 + "JNP,s exit\n\t" 37.3497 + "PUSHF\t# saw NaN, set CF\n\t" 37.3498 + "AND [rsp], #0xffffff2b\n\t" 37.3499 + "POPF\n" 37.3500 + "exit:" %} 37.3501 + ins_encode %{ 37.3502 + __ ucomiss($src1$$XMMRegister, $src2$$Address); 37.3503 + emit_cmpfp_fixup(_masm); 37.3504 + %} 37.3505 + ins_pipe( pipe_slow ); 37.3506 +%} 37.3507 + 37.3508 +instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 37.3509 predicate(UseSSE>=1); 37.3510 - match(Set cr (CmpF dst (LoadF src))); 37.3511 + match(Set cr (CmpF src1 (LoadF src2))); 37.3512 ins_cost(100); 37.3513 - format %{ "COMISS $dst,$src" %} 37.3514 - opcode(0x0F, 0x2F); 37.3515 - ins_encode(OpcP, OpcS, RegMem(dst, src)); 37.3516 + format %{ "UCOMISS $src1,$src2" %} 37.3517 + ins_encode %{ 37.3518 + __ ucomiss($src1$$XMMRegister, $src2$$Address); 37.3519 + %} 37.3520 ins_pipe( pipe_slow ); 37.3521 %} 37.3522 37.3523 // Compare into -1,0,1 in XMM 37.3524 -instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{ 37.3525 +instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 37.3526 predicate(UseSSE>=1); 37.3527 match(Set dst (CmpF3 src1 src2)); 37.3528 effect(KILL cr); 37.3529 ins_cost(255); 37.3530 - format %{ "XOR $dst,$dst\n" 37.3531 - "\tCOMISS $src1,$src2\n" 37.3532 - "\tJP,s nan\n" 37.3533 - "\tJEQ,s exit\n" 37.3534 - "\tJA,s inc\n" 37.3535 - "nan:\tDEC $dst\n" 37.3536 - "\tJMP,s exit\n" 37.3537 - "inc:\tINC $dst\n" 37.3538 - "exit:" 37.3539 - %} 37.3540 - opcode(0x0F, 0x2F); 37.3541 - ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst)); 37.3542 + format %{ "UCOMISS $src1, $src2\n\t" 37.3543 + "MOV $dst, #-1\n\t" 37.3544 + "JP,s done\n\t" 37.3545 + "JB,s done\n\t" 37.3546 + "SETNE $dst\n\t" 37.3547 + "MOVZB $dst, $dst\n" 37.3548 + "done:" %} 37.3549 + ins_encode %{ 37.3550 + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 37.3551 + emit_cmpfp3(_masm, $dst$$Register); 37.3552 + %} 37.3553 ins_pipe( pipe_slow ); 37.3554 %} 37.3555 37.3556 // Compare into -1,0,1 in XMM and memory 37.3557 -instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{ 37.3558 +instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 37.3559 predicate(UseSSE>=1); 37.3560 - match(Set dst (CmpF3 src1 (LoadF mem))); 37.3561 + match(Set dst (CmpF3 src1 (LoadF src2))); 37.3562 effect(KILL cr); 37.3563 ins_cost(275); 37.3564 - format %{ "COMISS $src1,$mem\n" 37.3565 - "\tMOV $dst,0\t\t# do not blow flags\n" 37.3566 - "\tJP,s nan\n" 37.3567 - "\tJEQ,s exit\n" 37.3568 - "\tJA,s inc\n" 37.3569 - "nan:\tDEC $dst\n" 37.3570 - "\tJMP,s exit\n" 37.3571 - "inc:\tINC $dst\n" 37.3572 - "exit:" 37.3573 - %} 37.3574 - opcode(0x0F, 0x2F); 37.3575 - ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst)); 37.3576 + format %{ "UCOMISS $src1, $src2\n\t" 37.3577 + "MOV $dst, #-1\n\t" 37.3578 + "JP,s done\n\t" 37.3579 + "JB,s done\n\t" 37.3580 + "SETNE $dst\n\t" 37.3581 + "MOVZB $dst, $dst\n" 37.3582 + "done:" %} 37.3583 + ins_encode %{ 37.3584 + __ ucomiss($src1$$XMMRegister, $src2$$Address); 37.3585 + emit_cmpfp3(_masm, $dst$$Register); 37.3586 + %} 37.3587 ins_pipe( pipe_slow ); 37.3588 %} 37.3589 37.3590 // Spill to obtain 24-bit precision 37.3591 -instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{ 37.3592 +instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 37.3593 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3594 match(Set dst (SubF src1 src2)); 37.3595 37.3596 format %{ "FSUB $dst,$src1 - $src2" %} 37.3597 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 37.3598 - ins_encode( Push_Reg_F(src1), 37.3599 - OpcReg_F(src2), 37.3600 - Pop_Mem_F(dst) ); 37.3601 + ins_encode( Push_Reg_FPR(src1), 37.3602 + OpcReg_FPR(src2), 37.3603 + Pop_Mem_FPR(dst) ); 37.3604 ins_pipe( fpu_mem_reg_reg ); 37.3605 %} 37.3606 // 37.3607 // This instruction does not round to 24-bits 37.3608 -instruct subF_reg(regF dst, regF src) %{ 37.3609 +instruct subFPR_reg(regFPR dst, regFPR src) %{ 37.3610 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.3611 match(Set dst (SubF dst src)); 37.3612 37.3613 format %{ "FSUB $dst,$src" %} 37.3614 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 37.3615 - ins_encode( Push_Reg_F(src), 37.3616 + ins_encode( Push_Reg_FPR(src), 37.3617 OpcP, RegOpc(dst) ); 37.3618 ins_pipe( fpu_reg_reg ); 37.3619 %} 37.3620 37.3621 // Spill to obtain 24-bit precision 37.3622 -instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{ 37.3623 +instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 37.3624 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3625 match(Set dst (AddF src1 src2)); 37.3626 37.3627 format %{ "FADD $dst,$src1,$src2" %} 37.3628 opcode(0xD8, 0x0); /* D8 C0+i */ 37.3629 - ins_encode( Push_Reg_F(src2), 37.3630 - OpcReg_F(src1), 37.3631 - Pop_Mem_F(dst) ); 37.3632 + ins_encode( Push_Reg_FPR(src2), 37.3633 + OpcReg_FPR(src1), 37.3634 + Pop_Mem_FPR(dst) ); 37.3635 ins_pipe( fpu_mem_reg_reg ); 37.3636 %} 37.3637 // 37.3638 // This instruction does not round to 24-bits 37.3639 -instruct addF_reg(regF dst, regF src) %{ 37.3640 +instruct addFPR_reg(regFPR dst, regFPR src) %{ 37.3641 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.3642 match(Set dst (AddF dst src)); 37.3643 37.3644 format %{ "FLD $src\n\t" 37.3645 "FADDp $dst,ST" %} 37.3646 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 37.3647 - ins_encode( Push_Reg_F(src), 37.3648 + ins_encode( Push_Reg_FPR(src), 37.3649 OpcP, RegOpc(dst) ); 37.3650 ins_pipe( fpu_reg_reg ); 37.3651 %} 37.3652 37.3653 -// Add two single precision floating point values in xmm 37.3654 -instruct addX_reg(regX dst, regX src) %{ 37.3655 - predicate(UseSSE>=1); 37.3656 - match(Set dst (AddF dst src)); 37.3657 - format %{ "ADDSS $dst,$src" %} 37.3658 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src)); 37.3659 - ins_pipe( pipe_slow ); 37.3660 -%} 37.3661 - 37.3662 -instruct addX_imm(regX dst, immXF con) %{ 37.3663 - predicate(UseSSE>=1); 37.3664 - match(Set dst (AddF dst con)); 37.3665 - format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 37.3666 - ins_encode %{ 37.3667 - __ addss($dst$$XMMRegister, $constantaddress($con)); 37.3668 - %} 37.3669 - ins_pipe(pipe_slow); 37.3670 -%} 37.3671 - 37.3672 -instruct addX_mem(regX dst, memory mem) %{ 37.3673 - predicate(UseSSE>=1); 37.3674 - match(Set dst (AddF dst (LoadF mem))); 37.3675 - format %{ "ADDSS $dst,$mem" %} 37.3676 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem)); 37.3677 - ins_pipe( pipe_slow ); 37.3678 -%} 37.3679 - 37.3680 -// Subtract two single precision floating point values in xmm 37.3681 -instruct subX_reg(regX dst, regX src) %{ 37.3682 - predicate(UseSSE>=1); 37.3683 - match(Set dst (SubF dst src)); 37.3684 - format %{ "SUBSS $dst,$src" %} 37.3685 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src)); 37.3686 - ins_pipe( pipe_slow ); 37.3687 -%} 37.3688 - 37.3689 -instruct subX_imm(regX dst, immXF con) %{ 37.3690 - predicate(UseSSE>=1); 37.3691 - match(Set dst (SubF dst con)); 37.3692 - format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 37.3693 - ins_encode %{ 37.3694 - __ subss($dst$$XMMRegister, $constantaddress($con)); 37.3695 - %} 37.3696 - ins_pipe(pipe_slow); 37.3697 -%} 37.3698 - 37.3699 -instruct subX_mem(regX dst, memory mem) %{ 37.3700 - predicate(UseSSE>=1); 37.3701 - match(Set dst (SubF dst (LoadF mem))); 37.3702 - format %{ "SUBSS $dst,$mem" %} 37.3703 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem)); 37.3704 - ins_pipe( pipe_slow ); 37.3705 -%} 37.3706 - 37.3707 -// Multiply two single precision floating point values in xmm 37.3708 -instruct mulX_reg(regX dst, regX src) %{ 37.3709 - predicate(UseSSE>=1); 37.3710 - match(Set dst (MulF dst src)); 37.3711 - format %{ "MULSS $dst,$src" %} 37.3712 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src)); 37.3713 - ins_pipe( pipe_slow ); 37.3714 -%} 37.3715 - 37.3716 -instruct mulX_imm(regX dst, immXF con) %{ 37.3717 - predicate(UseSSE>=1); 37.3718 - match(Set dst (MulF dst con)); 37.3719 - format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 37.3720 - ins_encode %{ 37.3721 - __ mulss($dst$$XMMRegister, $constantaddress($con)); 37.3722 - %} 37.3723 - ins_pipe(pipe_slow); 37.3724 -%} 37.3725 - 37.3726 -instruct mulX_mem(regX dst, memory mem) %{ 37.3727 - predicate(UseSSE>=1); 37.3728 - match(Set dst (MulF dst (LoadF mem))); 37.3729 - format %{ "MULSS $dst,$mem" %} 37.3730 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem)); 37.3731 - ins_pipe( pipe_slow ); 37.3732 -%} 37.3733 - 37.3734 -// Divide two single precision floating point values in xmm 37.3735 -instruct divX_reg(regX dst, regX src) %{ 37.3736 - predicate(UseSSE>=1); 37.3737 - match(Set dst (DivF dst src)); 37.3738 - format %{ "DIVSS $dst,$src" %} 37.3739 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src)); 37.3740 - ins_pipe( pipe_slow ); 37.3741 -%} 37.3742 - 37.3743 -instruct divX_imm(regX dst, immXF con) %{ 37.3744 - predicate(UseSSE>=1); 37.3745 - match(Set dst (DivF dst con)); 37.3746 - format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 37.3747 - ins_encode %{ 37.3748 - __ divss($dst$$XMMRegister, $constantaddress($con)); 37.3749 - %} 37.3750 - ins_pipe(pipe_slow); 37.3751 -%} 37.3752 - 37.3753 -instruct divX_mem(regX dst, memory mem) %{ 37.3754 - predicate(UseSSE>=1); 37.3755 - match(Set dst (DivF dst (LoadF mem))); 37.3756 - format %{ "DIVSS $dst,$mem" %} 37.3757 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem)); 37.3758 - ins_pipe( pipe_slow ); 37.3759 -%} 37.3760 - 37.3761 -// Get the square root of a single precision floating point values in xmm 37.3762 -instruct sqrtX_reg(regX dst, regX src) %{ 37.3763 - predicate(UseSSE>=1); 37.3764 - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 37.3765 - format %{ "SQRTSS $dst,$src" %} 37.3766 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 37.3767 - ins_pipe( pipe_slow ); 37.3768 -%} 37.3769 - 37.3770 -instruct sqrtX_mem(regX dst, memory mem) %{ 37.3771 - predicate(UseSSE>=1); 37.3772 - match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem))))); 37.3773 - format %{ "SQRTSS $dst,$mem" %} 37.3774 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 37.3775 - ins_pipe( pipe_slow ); 37.3776 -%} 37.3777 - 37.3778 -// Get the square root of a double precision floating point values in xmm 37.3779 -instruct sqrtXD_reg(regXD dst, regXD src) %{ 37.3780 - predicate(UseSSE>=2); 37.3781 - match(Set dst (SqrtD src)); 37.3782 - format %{ "SQRTSD $dst,$src" %} 37.3783 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src)); 37.3784 - ins_pipe( pipe_slow ); 37.3785 -%} 37.3786 - 37.3787 -instruct sqrtXD_mem(regXD dst, memory mem) %{ 37.3788 - predicate(UseSSE>=2); 37.3789 - match(Set dst (SqrtD (LoadD mem))); 37.3790 - format %{ "SQRTSD $dst,$mem" %} 37.3791 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem)); 37.3792 - ins_pipe( pipe_slow ); 37.3793 -%} 37.3794 - 37.3795 -instruct absF_reg(regFPR1 dst, regFPR1 src) %{ 37.3796 +instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 37.3797 predicate(UseSSE==0); 37.3798 match(Set dst (AbsF src)); 37.3799 ins_cost(100); 37.3800 @@ -11442,15 +10662,7 @@ 37.3801 ins_pipe( fpu_reg_reg ); 37.3802 %} 37.3803 37.3804 -instruct absX_reg(regX dst ) %{ 37.3805 - predicate(UseSSE>=1); 37.3806 - match(Set dst (AbsF dst)); 37.3807 - format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %} 37.3808 - ins_encode( AbsXF_encoding(dst)); 37.3809 - ins_pipe( pipe_slow ); 37.3810 -%} 37.3811 - 37.3812 -instruct negF_reg(regFPR1 dst, regFPR1 src) %{ 37.3813 +instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 37.3814 predicate(UseSSE==0); 37.3815 match(Set dst (NegF src)); 37.3816 ins_cost(100); 37.3817 @@ -11460,17 +10672,9 @@ 37.3818 ins_pipe( fpu_reg_reg ); 37.3819 %} 37.3820 37.3821 -instruct negX_reg( regX dst ) %{ 37.3822 - predicate(UseSSE>=1); 37.3823 - match(Set dst (NegF dst)); 37.3824 - format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %} 37.3825 - ins_encode( NegXF_encoding(dst)); 37.3826 - ins_pipe( pipe_slow ); 37.3827 -%} 37.3828 - 37.3829 -// Cisc-alternate to addF_reg 37.3830 +// Cisc-alternate to addFPR_reg 37.3831 // Spill to obtain 24-bit precision 37.3832 -instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 37.3833 +instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 37.3834 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3835 match(Set dst (AddF src1 (LoadF src2))); 37.3836 37.3837 @@ -11479,14 +10683,14 @@ 37.3838 "FSTP_S $dst" %} 37.3839 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 37.3840 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 37.3841 - OpcReg_F(src1), 37.3842 - Pop_Mem_F(dst) ); 37.3843 + OpcReg_FPR(src1), 37.3844 + Pop_Mem_FPR(dst) ); 37.3845 ins_pipe( fpu_mem_reg_mem ); 37.3846 %} 37.3847 // 37.3848 -// Cisc-alternate to addF_reg 37.3849 +// Cisc-alternate to addFPR_reg 37.3850 // This instruction does not round to 24-bits 37.3851 -instruct addF_reg_mem(regF dst, memory src) %{ 37.3852 +instruct addFPR_reg_mem(regFPR dst, memory src) %{ 37.3853 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.3854 match(Set dst (AddF dst (LoadF src))); 37.3855 37.3856 @@ -11499,21 +10703,21 @@ 37.3857 37.3858 // // Following two instructions for _222_mpegaudio 37.3859 // Spill to obtain 24-bit precision 37.3860 -instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{ 37.3861 +instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 37.3862 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3863 match(Set dst (AddF src1 src2)); 37.3864 37.3865 format %{ "FADD $dst,$src1,$src2" %} 37.3866 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 37.3867 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 37.3868 - OpcReg_F(src2), 37.3869 - Pop_Mem_F(dst) ); 37.3870 + OpcReg_FPR(src2), 37.3871 + Pop_Mem_FPR(dst) ); 37.3872 ins_pipe( fpu_mem_reg_mem ); 37.3873 %} 37.3874 37.3875 // Cisc-spill variant 37.3876 // Spill to obtain 24-bit precision 37.3877 -instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 37.3878 +instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 37.3879 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3880 match(Set dst (AddF src1 (LoadF src2))); 37.3881 37.3882 @@ -11522,12 +10726,12 @@ 37.3883 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 37.3884 set_instruction_start, 37.3885 OpcP, RMopc_Mem(secondary,src1), 37.3886 - Pop_Mem_F(dst) ); 37.3887 + Pop_Mem_FPR(dst) ); 37.3888 ins_pipe( fpu_mem_mem_mem ); 37.3889 %} 37.3890 37.3891 // Spill to obtain 24-bit precision 37.3892 -instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 37.3893 +instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 37.3894 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3895 match(Set dst (AddF src1 src2)); 37.3896 37.3897 @@ -11536,13 +10740,13 @@ 37.3898 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 37.3899 set_instruction_start, 37.3900 OpcP, RMopc_Mem(secondary,src1), 37.3901 - Pop_Mem_F(dst) ); 37.3902 + Pop_Mem_FPR(dst) ); 37.3903 ins_pipe( fpu_mem_mem_mem ); 37.3904 %} 37.3905 37.3906 37.3907 // Spill to obtain 24-bit precision 37.3908 -instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 37.3909 +instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 37.3910 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3911 match(Set dst (AddF src con)); 37.3912 format %{ "FLD $src\n\t" 37.3913 @@ -11557,7 +10761,7 @@ 37.3914 %} 37.3915 // 37.3916 // This instruction does not round to 24-bits 37.3917 -instruct addF_reg_imm(regF dst, regF src, immF con) %{ 37.3918 +instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 37.3919 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.3920 match(Set dst (AddF src con)); 37.3921 format %{ "FLD $src\n\t" 37.3922 @@ -11572,7 +10776,7 @@ 37.3923 %} 37.3924 37.3925 // Spill to obtain 24-bit precision 37.3926 -instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{ 37.3927 +instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 37.3928 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3929 match(Set dst (MulF src1 src2)); 37.3930 37.3931 @@ -11580,14 +10784,14 @@ 37.3932 "FMUL $src2\n\t" 37.3933 "FSTP_S $dst" %} 37.3934 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 37.3935 - ins_encode( Push_Reg_F(src1), 37.3936 - OpcReg_F(src2), 37.3937 - Pop_Mem_F(dst) ); 37.3938 + ins_encode( Push_Reg_FPR(src1), 37.3939 + OpcReg_FPR(src2), 37.3940 + Pop_Mem_FPR(dst) ); 37.3941 ins_pipe( fpu_mem_reg_reg ); 37.3942 %} 37.3943 // 37.3944 // This instruction does not round to 24-bits 37.3945 -instruct mulF_reg(regF dst, regF src1, regF src2) %{ 37.3946 +instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 37.3947 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.3948 match(Set dst (MulF src1 src2)); 37.3949 37.3950 @@ -11595,16 +10799,16 @@ 37.3951 "FMUL $src2\n\t" 37.3952 "FSTP_S $dst" %} 37.3953 opcode(0xD8, 0x1); /* D8 C8+i */ 37.3954 - ins_encode( Push_Reg_F(src2), 37.3955 - OpcReg_F(src1), 37.3956 - Pop_Reg_F(dst) ); 37.3957 + ins_encode( Push_Reg_FPR(src2), 37.3958 + OpcReg_FPR(src1), 37.3959 + Pop_Reg_FPR(dst) ); 37.3960 ins_pipe( fpu_reg_reg_reg ); 37.3961 %} 37.3962 37.3963 37.3964 // Spill to obtain 24-bit precision 37.3965 // Cisc-alternate to reg-reg multiply 37.3966 -instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{ 37.3967 +instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 37.3968 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.3969 match(Set dst (MulF src1 (LoadF src2))); 37.3970 37.3971 @@ -11613,27 +10817,27 @@ 37.3972 "FSTP_S $dst" %} 37.3973 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 37.3974 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 37.3975 - OpcReg_F(src1), 37.3976 - Pop_Mem_F(dst) ); 37.3977 + OpcReg_FPR(src1), 37.3978 + Pop_Mem_FPR(dst) ); 37.3979 ins_pipe( fpu_mem_reg_mem ); 37.3980 %} 37.3981 // 37.3982 // This instruction does not round to 24-bits 37.3983 // Cisc-alternate to reg-reg multiply 37.3984 -instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 37.3985 +instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 37.3986 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.3987 match(Set dst (MulF src1 (LoadF src2))); 37.3988 37.3989 format %{ "FMUL $dst,$src1,$src2" %} 37.3990 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 37.3991 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 37.3992 - OpcReg_F(src1), 37.3993 - Pop_Reg_F(dst) ); 37.3994 + OpcReg_FPR(src1), 37.3995 + Pop_Reg_FPR(dst) ); 37.3996 ins_pipe( fpu_reg_reg_mem ); 37.3997 %} 37.3998 37.3999 // Spill to obtain 24-bit precision 37.4000 -instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 37.4001 +instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 37.4002 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.4003 match(Set dst (MulF src1 src2)); 37.4004 37.4005 @@ -11642,12 +10846,12 @@ 37.4006 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 37.4007 set_instruction_start, 37.4008 OpcP, RMopc_Mem(secondary,src1), 37.4009 - Pop_Mem_F(dst) ); 37.4010 + Pop_Mem_FPR(dst) ); 37.4011 ins_pipe( fpu_mem_mem_mem ); 37.4012 %} 37.4013 37.4014 // Spill to obtain 24-bit precision 37.4015 -instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{ 37.4016 +instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 37.4017 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.4018 match(Set dst (MulF src con)); 37.4019 37.4020 @@ -11663,7 +10867,7 @@ 37.4021 %} 37.4022 // 37.4023 // This instruction does not round to 24-bits 37.4024 -instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 37.4025 +instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 37.4026 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4027 match(Set dst (MulF src con)); 37.4028 37.4029 @@ -11680,9 +10884,9 @@ 37.4030 37.4031 37.4032 // 37.4033 -// MACRO1 -- subsume unshared load into mulF 37.4034 +// MACRO1 -- subsume unshared load into mulFPR 37.4035 // This instruction does not round to 24-bits 37.4036 -instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{ 37.4037 +instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 37.4038 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4039 match(Set dst (MulF (LoadF mem1) src)); 37.4040 37.4041 @@ -11691,36 +10895,36 @@ 37.4042 "FSTP $dst" %} 37.4043 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 37.4044 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 37.4045 - OpcReg_F(src), 37.4046 - Pop_Reg_F(dst) ); 37.4047 + OpcReg_FPR(src), 37.4048 + Pop_Reg_FPR(dst) ); 37.4049 ins_pipe( fpu_reg_reg_mem ); 37.4050 %} 37.4051 // 37.4052 -// MACRO2 -- addF a mulF which subsumed an unshared load 37.4053 +// MACRO2 -- addFPR a mulFPR which subsumed an unshared load 37.4054 // This instruction does not round to 24-bits 37.4055 -instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{ 37.4056 +instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 37.4057 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4058 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 37.4059 ins_cost(95); 37.4060 37.4061 format %{ "FLD $mem1 ===MACRO2===\n\t" 37.4062 - "FMUL ST,$src1 subsume mulF left load\n\t" 37.4063 + "FMUL ST,$src1 subsume mulFPR left load\n\t" 37.4064 "FADD ST,$src2\n\t" 37.4065 "FSTP $dst" %} 37.4066 opcode(0xD9); /* LoadF D9 /0 */ 37.4067 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 37.4068 FMul_ST_reg(src1), 37.4069 FAdd_ST_reg(src2), 37.4070 - Pop_Reg_F(dst) ); 37.4071 + Pop_Reg_FPR(dst) ); 37.4072 ins_pipe( fpu_reg_mem_reg_reg ); 37.4073 %} 37.4074 37.4075 -// MACRO3 -- addF a mulF 37.4076 +// MACRO3 -- addFPR a mulFPR 37.4077 // This instruction does not round to 24-bits. It is a '2-address' 37.4078 // instruction in that the result goes back to src2. This eliminates 37.4079 // a move from the macro; possibly the register allocator will have 37.4080 // to add it back (and maybe not). 37.4081 -instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{ 37.4082 +instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 37.4083 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4084 match(Set src2 (AddF (MulF src0 src1) src2)); 37.4085 37.4086 @@ -11728,15 +10932,15 @@ 37.4087 "FMUL ST,$src1\n\t" 37.4088 "FADDP $src2,ST" %} 37.4089 opcode(0xD9); /* LoadF D9 /0 */ 37.4090 - ins_encode( Push_Reg_F(src0), 37.4091 + ins_encode( Push_Reg_FPR(src0), 37.4092 FMul_ST_reg(src1), 37.4093 FAddP_reg_ST(src2) ); 37.4094 ins_pipe( fpu_reg_reg_reg ); 37.4095 %} 37.4096 37.4097 -// MACRO4 -- divF subF 37.4098 +// MACRO4 -- divFPR subFPR 37.4099 // This instruction does not round to 24-bits 37.4100 -instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{ 37.4101 +instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 37.4102 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4103 match(Set dst (DivF (SubF src2 src1) src3)); 37.4104 37.4105 @@ -11745,67 +10949,67 @@ 37.4106 "FDIV ST,$src3\n\t" 37.4107 "FSTP $dst" %} 37.4108 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 37.4109 - ins_encode( Push_Reg_F(src2), 37.4110 - subF_divF_encode(src1,src3), 37.4111 - Pop_Reg_F(dst) ); 37.4112 + ins_encode( Push_Reg_FPR(src2), 37.4113 + subFPR_divFPR_encode(src1,src3), 37.4114 + Pop_Reg_FPR(dst) ); 37.4115 ins_pipe( fpu_reg_reg_reg_reg ); 37.4116 %} 37.4117 37.4118 // Spill to obtain 24-bit precision 37.4119 -instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{ 37.4120 +instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 37.4121 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.4122 match(Set dst (DivF src1 src2)); 37.4123 37.4124 format %{ "FDIV $dst,$src1,$src2" %} 37.4125 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 37.4126 - ins_encode( Push_Reg_F(src1), 37.4127 - OpcReg_F(src2), 37.4128 - Pop_Mem_F(dst) ); 37.4129 + ins_encode( Push_Reg_FPR(src1), 37.4130 + OpcReg_FPR(src2), 37.4131 + Pop_Mem_FPR(dst) ); 37.4132 ins_pipe( fpu_mem_reg_reg ); 37.4133 %} 37.4134 // 37.4135 // This instruction does not round to 24-bits 37.4136 -instruct divF_reg(regF dst, regF src) %{ 37.4137 +instruct divFPR_reg(regFPR dst, regFPR src) %{ 37.4138 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4139 match(Set dst (DivF dst src)); 37.4140 37.4141 format %{ "FDIV $dst,$src" %} 37.4142 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 37.4143 - ins_encode( Push_Reg_F(src), 37.4144 + ins_encode( Push_Reg_FPR(src), 37.4145 OpcP, RegOpc(dst) ); 37.4146 ins_pipe( fpu_reg_reg ); 37.4147 %} 37.4148 37.4149 37.4150 // Spill to obtain 24-bit precision 37.4151 -instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{ 37.4152 +instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 37.4153 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.4154 match(Set dst (ModF src1 src2)); 37.4155 - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 37.4156 + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 37.4157 37.4158 format %{ "FMOD $dst,$src1,$src2" %} 37.4159 - ins_encode( Push_Reg_Mod_D(src1, src2), 37.4160 - emitModD(), 37.4161 - Push_Result_Mod_D(src2), 37.4162 - Pop_Mem_F(dst)); 37.4163 + ins_encode( Push_Reg_Mod_DPR(src1, src2), 37.4164 + emitModDPR(), 37.4165 + Push_Result_Mod_DPR(src2), 37.4166 + Pop_Mem_FPR(dst)); 37.4167 ins_pipe( pipe_slow ); 37.4168 %} 37.4169 // 37.4170 // This instruction does not round to 24-bits 37.4171 -instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{ 37.4172 +instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 37.4173 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4174 match(Set dst (ModF dst src)); 37.4175 - effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS 37.4176 + effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 37.4177 37.4178 format %{ "FMOD $dst,$src" %} 37.4179 - ins_encode(Push_Reg_Mod_D(dst, src), 37.4180 - emitModD(), 37.4181 - Push_Result_Mod_D(src), 37.4182 - Pop_Reg_F(dst)); 37.4183 - ins_pipe( pipe_slow ); 37.4184 -%} 37.4185 - 37.4186 -instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{ 37.4187 + ins_encode(Push_Reg_Mod_DPR(dst, src), 37.4188 + emitModDPR(), 37.4189 + Push_Result_Mod_DPR(src), 37.4190 + Pop_Reg_FPR(dst)); 37.4191 + ins_pipe( pipe_slow ); 37.4192 +%} 37.4193 + 37.4194 +instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 37.4195 predicate(UseSSE>=1); 37.4196 match(Set dst (ModF src0 src1)); 37.4197 effect(KILL rax, KILL cr); 37.4198 @@ -11825,7 +11029,7 @@ 37.4199 "\tFSTP ST0\t # Restore FPU Stack" 37.4200 %} 37.4201 ins_cost(250); 37.4202 - ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU); 37.4203 + ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 37.4204 ins_pipe( pipe_slow ); 37.4205 %} 37.4206 37.4207 @@ -11833,26 +11037,26 @@ 37.4208 //----------Arithmetic Conversion Instructions--------------------------------- 37.4209 // The conversions operations are all Alpha sorted. Please keep it that way! 37.4210 37.4211 -instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{ 37.4212 +instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 37.4213 predicate(UseSSE==0); 37.4214 match(Set dst (RoundFloat src)); 37.4215 ins_cost(125); 37.4216 format %{ "FST_S $dst,$src\t# F-round" %} 37.4217 - ins_encode( Pop_Mem_Reg_F(dst, src) ); 37.4218 + ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 37.4219 ins_pipe( fpu_mem_reg ); 37.4220 %} 37.4221 37.4222 -instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{ 37.4223 +instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 37.4224 predicate(UseSSE<=1); 37.4225 match(Set dst (RoundDouble src)); 37.4226 ins_cost(125); 37.4227 format %{ "FST_D $dst,$src\t# D-round" %} 37.4228 - ins_encode( Pop_Mem_Reg_D(dst, src) ); 37.4229 + ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 37.4230 ins_pipe( fpu_mem_reg ); 37.4231 %} 37.4232 37.4233 // Force rounding to 24-bit precision and 6-bit exponent 37.4234 -instruct convD2F_reg(stackSlotF dst, regD src) %{ 37.4235 +instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 37.4236 predicate(UseSSE==0); 37.4237 match(Set dst (ConvD2F src)); 37.4238 format %{ "FST_S $dst,$src\t# F-round" %} 37.4239 @@ -11862,7 +11066,7 @@ 37.4240 %} 37.4241 37.4242 // Force rounding to 24-bit precision and 6-bit exponent 37.4243 -instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{ 37.4244 +instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 37.4245 predicate(UseSSE==1); 37.4246 match(Set dst (ConvD2F src)); 37.4247 effect( KILL cr ); 37.4248 @@ -11870,29 +11074,40 @@ 37.4249 "FST_S [ESP],$src\t# F-round\n\t" 37.4250 "MOVSS $dst,[ESP]\n\t" 37.4251 "ADD ESP,4" %} 37.4252 - ins_encode( D2X_encoding(dst, src) ); 37.4253 + ins_encode %{ 37.4254 + __ subptr(rsp, 4); 37.4255 + if ($src$$reg != FPR1L_enc) { 37.4256 + __ fld_s($src$$reg-1); 37.4257 + __ fstp_s(Address(rsp, 0)); 37.4258 + } else { 37.4259 + __ fst_s(Address(rsp, 0)); 37.4260 + } 37.4261 + __ movflt($dst$$XMMRegister, Address(rsp, 0)); 37.4262 + __ addptr(rsp, 4); 37.4263 + %} 37.4264 ins_pipe( pipe_slow ); 37.4265 %} 37.4266 37.4267 // Force rounding double precision to single precision 37.4268 -instruct convXD2X_reg(regX dst, regXD src) %{ 37.4269 +instruct convD2F_reg(regF dst, regD src) %{ 37.4270 predicate(UseSSE>=2); 37.4271 match(Set dst (ConvD2F src)); 37.4272 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 37.4273 - opcode(0xF2, 0x0F, 0x5A); 37.4274 - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 37.4275 - ins_pipe( pipe_slow ); 37.4276 -%} 37.4277 - 37.4278 -instruct convF2D_reg_reg(regD dst, regF src) %{ 37.4279 + ins_encode %{ 37.4280 + __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 37.4281 + %} 37.4282 + ins_pipe( pipe_slow ); 37.4283 +%} 37.4284 + 37.4285 +instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 37.4286 predicate(UseSSE==0); 37.4287 match(Set dst (ConvF2D src)); 37.4288 format %{ "FST_S $dst,$src\t# D-round" %} 37.4289 - ins_encode( Pop_Reg_Reg_D(dst, src)); 37.4290 + ins_encode( Pop_Reg_Reg_DPR(dst, src)); 37.4291 ins_pipe( fpu_reg_reg ); 37.4292 %} 37.4293 37.4294 -instruct convF2D_reg(stackSlotD dst, regF src) %{ 37.4295 +instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 37.4296 predicate(UseSSE==1); 37.4297 match(Set dst (ConvF2D src)); 37.4298 format %{ "FST_D $dst,$src\t# D-round" %} 37.4299 @@ -11901,7 +11116,7 @@ 37.4300 %} 37.4301 %} 37.4302 37.4303 -instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{ 37.4304 +instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 37.4305 predicate(UseSSE==1); 37.4306 match(Set dst (ConvF2D src)); 37.4307 effect( KILL cr ); 37.4308 @@ -11910,21 +11125,28 @@ 37.4309 "FLD_S [ESP]\n\t" 37.4310 "ADD ESP,4\n\t" 37.4311 "FSTP $dst\t# D-round" %} 37.4312 - ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst)); 37.4313 - ins_pipe( pipe_slow ); 37.4314 -%} 37.4315 - 37.4316 -instruct convX2XD_reg(regXD dst, regX src) %{ 37.4317 + ins_encode %{ 37.4318 + __ subptr(rsp, 4); 37.4319 + __ movflt(Address(rsp, 0), $src$$XMMRegister); 37.4320 + __ fld_s(Address(rsp, 0)); 37.4321 + __ addptr(rsp, 4); 37.4322 + __ fstp_d($dst$$reg); 37.4323 + %} 37.4324 + ins_pipe( pipe_slow ); 37.4325 +%} 37.4326 + 37.4327 +instruct convF2D_reg(regD dst, regF src) %{ 37.4328 predicate(UseSSE>=2); 37.4329 match(Set dst (ConvF2D src)); 37.4330 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 37.4331 - opcode(0xF3, 0x0F, 0x5A); 37.4332 - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 37.4333 + ins_encode %{ 37.4334 + __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 37.4335 + %} 37.4336 ins_pipe( pipe_slow ); 37.4337 %} 37.4338 37.4339 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 37.4340 -instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 37.4341 +instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 37.4342 predicate(UseSSE<=1); 37.4343 match(Set dst (ConvD2I src)); 37.4344 effect( KILL tmp, KILL cr ); 37.4345 @@ -11939,12 +11161,12 @@ 37.4346 "FLD_D $src\n\t" 37.4347 "CALL d2i_wrapper\n" 37.4348 "fast:" %} 37.4349 - ins_encode( Push_Reg_D(src), D2I_encoding(src) ); 37.4350 + ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 37.4351 ins_pipe( pipe_slow ); 37.4352 %} 37.4353 37.4354 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 37.4355 -instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{ 37.4356 +instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 37.4357 predicate(UseSSE>=2); 37.4358 match(Set dst (ConvD2I src)); 37.4359 effect( KILL tmp, KILL cr ); 37.4360 @@ -11957,12 +11179,22 @@ 37.4361 "ADD ESP, 8\n\t" 37.4362 "CALL d2i_wrapper\n" 37.4363 "fast:" %} 37.4364 - opcode(0x1); // double-precision conversion 37.4365 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 37.4366 - ins_pipe( pipe_slow ); 37.4367 -%} 37.4368 - 37.4369 -instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 37.4370 + ins_encode %{ 37.4371 + Label fast; 37.4372 + __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 37.4373 + __ cmpl($dst$$Register, 0x80000000); 37.4374 + __ jccb(Assembler::notEqual, fast); 37.4375 + __ subptr(rsp, 8); 37.4376 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 37.4377 + __ fld_d(Address(rsp, 0)); 37.4378 + __ addptr(rsp, 8); 37.4379 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 37.4380 + __ bind(fast); 37.4381 + %} 37.4382 + ins_pipe( pipe_slow ); 37.4383 +%} 37.4384 + 37.4385 +instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 37.4386 predicate(UseSSE<=1); 37.4387 match(Set dst (ConvD2L src)); 37.4388 effect( KILL cr ); 37.4389 @@ -11980,12 +11212,12 @@ 37.4390 "FLD $src\n\t" 37.4391 "CALL d2l_wrapper\n" 37.4392 "fast:" %} 37.4393 - ins_encode( Push_Reg_D(src), D2L_encoding(src) ); 37.4394 + ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 37.4395 ins_pipe( pipe_slow ); 37.4396 %} 37.4397 37.4398 // XMM lacks a float/double->long conversion, so use the old FPU stack. 37.4399 -instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{ 37.4400 +instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 37.4401 predicate (UseSSE>=2); 37.4402 match(Set dst (ConvD2L src)); 37.4403 effect( KILL cr ); 37.4404 @@ -12004,9 +11236,36 @@ 37.4405 "SUB ESP,8\n\t" 37.4406 "MOVSD [ESP],$src\n\t" 37.4407 "FLD_D [ESP]\n\t" 37.4408 + "ADD ESP,8\n\t" 37.4409 "CALL d2l_wrapper\n" 37.4410 "fast:" %} 37.4411 - ins_encode( XD2L_encoding(src) ); 37.4412 + ins_encode %{ 37.4413 + Label fast; 37.4414 + __ subptr(rsp, 8); 37.4415 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 37.4416 + __ fld_d(Address(rsp, 0)); 37.4417 + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 37.4418 + __ fistp_d(Address(rsp, 0)); 37.4419 + // Restore the rounding mode, mask the exception 37.4420 + if (Compile::current()->in_24_bit_fp_mode()) { 37.4421 + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 37.4422 + } else { 37.4423 + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 37.4424 + } 37.4425 + // Load the converted long, adjust CPU stack 37.4426 + __ pop(rax); 37.4427 + __ pop(rdx); 37.4428 + __ cmpl(rdx, 0x80000000); 37.4429 + __ jccb(Assembler::notEqual, fast); 37.4430 + __ testl(rax, rax); 37.4431 + __ jccb(Assembler::notEqual, fast); 37.4432 + __ subptr(rsp, 8); 37.4433 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 37.4434 + __ fld_d(Address(rsp, 0)); 37.4435 + __ addptr(rsp, 8); 37.4436 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 37.4437 + __ bind(fast); 37.4438 + %} 37.4439 ins_pipe( pipe_slow ); 37.4440 %} 37.4441 37.4442 @@ -12016,7 +11275,7 @@ 37.4443 // rounding mode to 'nearest'. The hardware stores a flag value down 37.4444 // if we would overflow or converted a NAN; we check for this and 37.4445 // and go the slow path if needed. 37.4446 -instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 37.4447 +instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 37.4448 predicate(UseSSE==0); 37.4449 match(Set dst (ConvF2I src)); 37.4450 effect( KILL tmp, KILL cr ); 37.4451 @@ -12031,13 +11290,13 @@ 37.4452 "FLD $src\n\t" 37.4453 "CALL d2i_wrapper\n" 37.4454 "fast:" %} 37.4455 - // D2I_encoding works for F2I 37.4456 - ins_encode( Push_Reg_F(src), D2I_encoding(src) ); 37.4457 + // DPR2I_encoding works for FPR2I 37.4458 + ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 37.4459 ins_pipe( pipe_slow ); 37.4460 %} 37.4461 37.4462 // Convert a float in xmm to an int reg. 37.4463 -instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{ 37.4464 +instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 37.4465 predicate(UseSSE>=1); 37.4466 match(Set dst (ConvF2I src)); 37.4467 effect( KILL tmp, KILL cr ); 37.4468 @@ -12050,12 +11309,22 @@ 37.4469 "ADD ESP, 4\n\t" 37.4470 "CALL d2i_wrapper\n" 37.4471 "fast:" %} 37.4472 - opcode(0x0); // single-precision conversion 37.4473 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst)); 37.4474 - ins_pipe( pipe_slow ); 37.4475 -%} 37.4476 - 37.4477 -instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 37.4478 + ins_encode %{ 37.4479 + Label fast; 37.4480 + __ cvttss2sil($dst$$Register, $src$$XMMRegister); 37.4481 + __ cmpl($dst$$Register, 0x80000000); 37.4482 + __ jccb(Assembler::notEqual, fast); 37.4483 + __ subptr(rsp, 4); 37.4484 + __ movflt(Address(rsp, 0), $src$$XMMRegister); 37.4485 + __ fld_s(Address(rsp, 0)); 37.4486 + __ addptr(rsp, 4); 37.4487 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 37.4488 + __ bind(fast); 37.4489 + %} 37.4490 + ins_pipe( pipe_slow ); 37.4491 +%} 37.4492 + 37.4493 +instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 37.4494 predicate(UseSSE==0); 37.4495 match(Set dst (ConvF2L src)); 37.4496 effect( KILL cr ); 37.4497 @@ -12073,13 +11342,13 @@ 37.4498 "FLD $src\n\t" 37.4499 "CALL d2l_wrapper\n" 37.4500 "fast:" %} 37.4501 - // D2L_encoding works for F2L 37.4502 - ins_encode( Push_Reg_F(src), D2L_encoding(src) ); 37.4503 + // DPR2L_encoding works for FPR2L 37.4504 + ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 37.4505 ins_pipe( pipe_slow ); 37.4506 %} 37.4507 37.4508 // XMM lacks a float/double->long conversion, so use the old FPU stack. 37.4509 -instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{ 37.4510 +instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 37.4511 predicate (UseSSE>=1); 37.4512 match(Set dst (ConvF2L src)); 37.4513 effect( KILL cr ); 37.4514 @@ -12101,39 +11370,67 @@ 37.4515 "ADD ESP,4\n\t" 37.4516 "CALL d2l_wrapper\n" 37.4517 "fast:" %} 37.4518 - ins_encode( X2L_encoding(src) ); 37.4519 - ins_pipe( pipe_slow ); 37.4520 -%} 37.4521 - 37.4522 -instruct convI2D_reg(regD dst, stackSlotI src) %{ 37.4523 + ins_encode %{ 37.4524 + Label fast; 37.4525 + __ subptr(rsp, 8); 37.4526 + __ movflt(Address(rsp, 0), $src$$XMMRegister); 37.4527 + __ fld_s(Address(rsp, 0)); 37.4528 + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 37.4529 + __ fistp_d(Address(rsp, 0)); 37.4530 + // Restore the rounding mode, mask the exception 37.4531 + if (Compile::current()->in_24_bit_fp_mode()) { 37.4532 + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 37.4533 + } else { 37.4534 + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 37.4535 + } 37.4536 + // Load the converted long, adjust CPU stack 37.4537 + __ pop(rax); 37.4538 + __ pop(rdx); 37.4539 + __ cmpl(rdx, 0x80000000); 37.4540 + __ jccb(Assembler::notEqual, fast); 37.4541 + __ testl(rax, rax); 37.4542 + __ jccb(Assembler::notEqual, fast); 37.4543 + __ subptr(rsp, 4); 37.4544 + __ movflt(Address(rsp, 0), $src$$XMMRegister); 37.4545 + __ fld_s(Address(rsp, 0)); 37.4546 + __ addptr(rsp, 4); 37.4547 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 37.4548 + __ bind(fast); 37.4549 + %} 37.4550 + ins_pipe( pipe_slow ); 37.4551 +%} 37.4552 + 37.4553 +instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 37.4554 predicate( UseSSE<=1 ); 37.4555 match(Set dst (ConvI2D src)); 37.4556 format %{ "FILD $src\n\t" 37.4557 "FSTP $dst" %} 37.4558 opcode(0xDB, 0x0); /* DB /0 */ 37.4559 - ins_encode(Push_Mem_I(src), Pop_Reg_D(dst)); 37.4560 + ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 37.4561 ins_pipe( fpu_reg_mem ); 37.4562 %} 37.4563 37.4564 -instruct convI2XD_reg(regXD dst, eRegI src) %{ 37.4565 +instruct convI2D_reg(regD dst, eRegI src) %{ 37.4566 predicate( UseSSE>=2 && !UseXmmI2D ); 37.4567 match(Set dst (ConvI2D src)); 37.4568 format %{ "CVTSI2SD $dst,$src" %} 37.4569 - opcode(0xF2, 0x0F, 0x2A); 37.4570 - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 37.4571 - ins_pipe( pipe_slow ); 37.4572 -%} 37.4573 - 37.4574 -instruct convI2XD_mem(regXD dst, memory mem) %{ 37.4575 + ins_encode %{ 37.4576 + __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 37.4577 + %} 37.4578 + ins_pipe( pipe_slow ); 37.4579 +%} 37.4580 + 37.4581 +instruct convI2D_mem(regD dst, memory mem) %{ 37.4582 predicate( UseSSE>=2 ); 37.4583 match(Set dst (ConvI2D (LoadI mem))); 37.4584 format %{ "CVTSI2SD $dst,$mem" %} 37.4585 - opcode(0xF2, 0x0F, 0x2A); 37.4586 - ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem)); 37.4587 - ins_pipe( pipe_slow ); 37.4588 -%} 37.4589 - 37.4590 -instruct convXI2XD_reg(regXD dst, eRegI src) 37.4591 + ins_encode %{ 37.4592 + __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 37.4593 + %} 37.4594 + ins_pipe( pipe_slow ); 37.4595 +%} 37.4596 + 37.4597 +instruct convXI2D_reg(regD dst, eRegI src) 37.4598 %{ 37.4599 predicate( UseSSE>=2 && UseXmmI2D ); 37.4600 match(Set dst (ConvI2D src)); 37.4601 @@ -12147,31 +11444,31 @@ 37.4602 ins_pipe(pipe_slow); // XXX 37.4603 %} 37.4604 37.4605 -instruct convI2D_mem(regD dst, memory mem) %{ 37.4606 +instruct convI2DPR_mem(regDPR dst, memory mem) %{ 37.4607 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 37.4608 match(Set dst (ConvI2D (LoadI mem))); 37.4609 format %{ "FILD $mem\n\t" 37.4610 "FSTP $dst" %} 37.4611 opcode(0xDB); /* DB /0 */ 37.4612 ins_encode( OpcP, RMopc_Mem(0x00,mem), 37.4613 - Pop_Reg_D(dst)); 37.4614 + Pop_Reg_DPR(dst)); 37.4615 ins_pipe( fpu_reg_mem ); 37.4616 %} 37.4617 37.4618 // Convert a byte to a float; no rounding step needed. 37.4619 -instruct conv24I2F_reg(regF dst, stackSlotI src) %{ 37.4620 +instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 37.4621 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 37.4622 match(Set dst (ConvI2F src)); 37.4623 format %{ "FILD $src\n\t" 37.4624 "FSTP $dst" %} 37.4625 37.4626 opcode(0xDB, 0x0); /* DB /0 */ 37.4627 - ins_encode(Push_Mem_I(src), Pop_Reg_F(dst)); 37.4628 + ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 37.4629 ins_pipe( fpu_reg_mem ); 37.4630 %} 37.4631 37.4632 // In 24-bit mode, force exponent rounding by storing back out 37.4633 -instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{ 37.4634 +instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 37.4635 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.4636 match(Set dst (ConvI2F src)); 37.4637 ins_cost(200); 37.4638 @@ -12179,12 +11476,12 @@ 37.4639 "FSTP_S $dst" %} 37.4640 opcode(0xDB, 0x0); /* DB /0 */ 37.4641 ins_encode( Push_Mem_I(src), 37.4642 - Pop_Mem_F(dst)); 37.4643 + Pop_Mem_FPR(dst)); 37.4644 ins_pipe( fpu_mem_mem ); 37.4645 %} 37.4646 37.4647 // In 24-bit mode, force exponent rounding by storing back out 37.4648 -instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{ 37.4649 +instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 37.4650 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 37.4651 match(Set dst (ConvI2F (LoadI mem))); 37.4652 ins_cost(200); 37.4653 @@ -12192,46 +11489,46 @@ 37.4654 "FSTP_S $dst" %} 37.4655 opcode(0xDB); /* DB /0 */ 37.4656 ins_encode( OpcP, RMopc_Mem(0x00,mem), 37.4657 - Pop_Mem_F(dst)); 37.4658 + Pop_Mem_FPR(dst)); 37.4659 ins_pipe( fpu_mem_mem ); 37.4660 %} 37.4661 37.4662 // This instruction does not round to 24-bits 37.4663 -instruct convI2F_reg(regF dst, stackSlotI src) %{ 37.4664 +instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 37.4665 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4666 match(Set dst (ConvI2F src)); 37.4667 format %{ "FILD $src\n\t" 37.4668 "FSTP $dst" %} 37.4669 opcode(0xDB, 0x0); /* DB /0 */ 37.4670 ins_encode( Push_Mem_I(src), 37.4671 - Pop_Reg_F(dst)); 37.4672 + Pop_Reg_FPR(dst)); 37.4673 ins_pipe( fpu_reg_mem ); 37.4674 %} 37.4675 37.4676 // This instruction does not round to 24-bits 37.4677 -instruct convI2F_mem(regF dst, memory mem) %{ 37.4678 +instruct convI2FPR_mem(regFPR dst, memory mem) %{ 37.4679 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 37.4680 match(Set dst (ConvI2F (LoadI mem))); 37.4681 format %{ "FILD $mem\n\t" 37.4682 "FSTP $dst" %} 37.4683 opcode(0xDB); /* DB /0 */ 37.4684 ins_encode( OpcP, RMopc_Mem(0x00,mem), 37.4685 - Pop_Reg_F(dst)); 37.4686 + Pop_Reg_FPR(dst)); 37.4687 ins_pipe( fpu_reg_mem ); 37.4688 %} 37.4689 37.4690 // Convert an int to a float in xmm; no rounding step needed. 37.4691 -instruct convI2X_reg(regX dst, eRegI src) %{ 37.4692 +instruct convI2F_reg(regF dst, eRegI src) %{ 37.4693 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 37.4694 match(Set dst (ConvI2F src)); 37.4695 format %{ "CVTSI2SS $dst, $src" %} 37.4696 - 37.4697 - opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */ 37.4698 - ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src)); 37.4699 - ins_pipe( pipe_slow ); 37.4700 -%} 37.4701 - 37.4702 - instruct convXI2X_reg(regX dst, eRegI src) 37.4703 + ins_encode %{ 37.4704 + __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 37.4705 + %} 37.4706 + ins_pipe( pipe_slow ); 37.4707 +%} 37.4708 + 37.4709 + instruct convXI2F_reg(regF dst, eRegI src) 37.4710 %{ 37.4711 predicate( UseSSE>=2 && UseXmmI2F ); 37.4712 match(Set dst (ConvI2F src)); 37.4713 @@ -12280,7 +11577,7 @@ 37.4714 ins_pipe( ialu_reg_reg_long ); 37.4715 %} 37.4716 37.4717 -instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 37.4718 +instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 37.4719 predicate (UseSSE<=1); 37.4720 match(Set dst (ConvL2D src)); 37.4721 effect( KILL cr ); 37.4722 @@ -12290,11 +11587,11 @@ 37.4723 "ADD ESP,8\n\t" 37.4724 "FSTP_D $dst\t# D-round" %} 37.4725 opcode(0xDF, 0x5); /* DF /5 */ 37.4726 - ins_encode(convert_long_double(src), Pop_Mem_D(dst)); 37.4727 - ins_pipe( pipe_slow ); 37.4728 -%} 37.4729 - 37.4730 -instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{ 37.4731 + ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 37.4732 + ins_pipe( pipe_slow ); 37.4733 +%} 37.4734 + 37.4735 +instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 37.4736 predicate (UseSSE>=2); 37.4737 match(Set dst (ConvL2D src)); 37.4738 effect( KILL cr ); 37.4739 @@ -12305,11 +11602,11 @@ 37.4740 "MOVSD $dst,[ESP]\n\t" 37.4741 "ADD ESP,8" %} 37.4742 opcode(0xDF, 0x5); /* DF /5 */ 37.4743 - ins_encode(convert_long_double2(src), Push_ResultXD(dst)); 37.4744 - ins_pipe( pipe_slow ); 37.4745 -%} 37.4746 - 37.4747 -instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{ 37.4748 + ins_encode(convert_long_double2(src), Push_ResultD(dst)); 37.4749 + ins_pipe( pipe_slow ); 37.4750 +%} 37.4751 + 37.4752 +instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 37.4753 predicate (UseSSE>=1); 37.4754 match(Set dst (ConvL2F src)); 37.4755 effect( KILL cr ); 37.4756 @@ -12320,11 +11617,11 @@ 37.4757 "MOVSS $dst,[ESP]\n\t" 37.4758 "ADD ESP,8" %} 37.4759 opcode(0xDF, 0x5); /* DF /5 */ 37.4760 - ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8)); 37.4761 - ins_pipe( pipe_slow ); 37.4762 -%} 37.4763 - 37.4764 -instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 37.4765 + ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 37.4766 + ins_pipe( pipe_slow ); 37.4767 +%} 37.4768 + 37.4769 +instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 37.4770 match(Set dst (ConvL2F src)); 37.4771 effect( KILL cr ); 37.4772 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 37.4773 @@ -12333,7 +11630,7 @@ 37.4774 "ADD ESP,8\n\t" 37.4775 "FSTP_S $dst\t# F-round" %} 37.4776 opcode(0xDF, 0x5); /* DF /5 */ 37.4777 - ins_encode(convert_long_double(src), Pop_Mem_F(dst)); 37.4778 + ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 37.4779 ins_pipe( pipe_slow ); 37.4780 %} 37.4781 37.4782 @@ -12351,40 +11648,45 @@ 37.4783 effect( DEF dst, USE src ); 37.4784 ins_cost(100); 37.4785 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 37.4786 - opcode(0x8B); 37.4787 - ins_encode( OpcP, RegMem(dst,src)); 37.4788 + ins_encode %{ 37.4789 + __ movl($dst$$Register, Address(rsp, $src$$disp)); 37.4790 + %} 37.4791 ins_pipe( ialu_reg_mem ); 37.4792 %} 37.4793 37.4794 -instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{ 37.4795 +instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 37.4796 predicate(UseSSE==0); 37.4797 match(Set dst (MoveF2I src)); 37.4798 effect( DEF dst, USE src ); 37.4799 37.4800 ins_cost(125); 37.4801 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 37.4802 - ins_encode( Pop_Mem_Reg_F(dst, src) ); 37.4803 + ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 37.4804 ins_pipe( fpu_mem_reg ); 37.4805 %} 37.4806 37.4807 -instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{ 37.4808 +instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 37.4809 predicate(UseSSE>=1); 37.4810 match(Set dst (MoveF2I src)); 37.4811 effect( DEF dst, USE src ); 37.4812 37.4813 ins_cost(95); 37.4814 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 37.4815 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst)); 37.4816 - ins_pipe( pipe_slow ); 37.4817 -%} 37.4818 - 37.4819 -instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{ 37.4820 + ins_encode %{ 37.4821 + __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 37.4822 + %} 37.4823 + ins_pipe( pipe_slow ); 37.4824 +%} 37.4825 + 37.4826 +instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{ 37.4827 predicate(UseSSE>=2); 37.4828 match(Set dst (MoveF2I src)); 37.4829 effect( DEF dst, USE src ); 37.4830 ins_cost(85); 37.4831 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 37.4832 - ins_encode( MovX2I_reg(dst, src)); 37.4833 + ins_encode %{ 37.4834 + __ movdl($dst$$Register, $src$$XMMRegister); 37.4835 + %} 37.4836 ins_pipe( pipe_slow ); 37.4837 %} 37.4838 37.4839 @@ -12394,13 +11696,14 @@ 37.4840 37.4841 ins_cost(100); 37.4842 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 37.4843 - opcode(0x89); 37.4844 - ins_encode( OpcPRegSS( dst, src ) ); 37.4845 + ins_encode %{ 37.4846 + __ movl(Address(rsp, $dst$$disp), $src$$Register); 37.4847 + %} 37.4848 ins_pipe( ialu_mem_reg ); 37.4849 %} 37.4850 37.4851 37.4852 -instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ 37.4853 +instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 37.4854 predicate(UseSSE==0); 37.4855 match(Set dst (MoveI2F src)); 37.4856 effect(DEF dst, USE src); 37.4857 @@ -12410,29 +11713,33 @@ 37.4858 "FSTP $dst\t# MoveI2F_stack_reg" %} 37.4859 opcode(0xD9); /* D9 /0, FLD m32real */ 37.4860 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 37.4861 - Pop_Reg_F(dst) ); 37.4862 + Pop_Reg_FPR(dst) ); 37.4863 ins_pipe( fpu_reg_mem ); 37.4864 %} 37.4865 37.4866 -instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{ 37.4867 +instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 37.4868 predicate(UseSSE>=1); 37.4869 match(Set dst (MoveI2F src)); 37.4870 effect( DEF dst, USE src ); 37.4871 37.4872 ins_cost(95); 37.4873 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 37.4874 - ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 37.4875 - ins_pipe( pipe_slow ); 37.4876 -%} 37.4877 - 37.4878 -instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{ 37.4879 + ins_encode %{ 37.4880 + __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 37.4881 + %} 37.4882 + ins_pipe( pipe_slow ); 37.4883 +%} 37.4884 + 37.4885 +instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{ 37.4886 predicate(UseSSE>=2); 37.4887 match(Set dst (MoveI2F src)); 37.4888 effect( DEF dst, USE src ); 37.4889 37.4890 ins_cost(85); 37.4891 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 37.4892 - ins_encode( MovI2X_reg(dst, src) ); 37.4893 + ins_encode %{ 37.4894 + __ movdl($dst$$XMMRegister, $src$$Register); 37.4895 + %} 37.4896 ins_pipe( pipe_slow ); 37.4897 %} 37.4898 37.4899 @@ -12448,29 +11755,30 @@ 37.4900 ins_pipe( ialu_mem_long_reg ); 37.4901 %} 37.4902 37.4903 -instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ 37.4904 +instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 37.4905 predicate(UseSSE<=1); 37.4906 match(Set dst (MoveD2L src)); 37.4907 effect(DEF dst, USE src); 37.4908 37.4909 ins_cost(125); 37.4910 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 37.4911 - ins_encode( Pop_Mem_Reg_D(dst, src) ); 37.4912 + ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 37.4913 ins_pipe( fpu_mem_reg ); 37.4914 %} 37.4915 37.4916 -instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{ 37.4917 +instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 37.4918 predicate(UseSSE>=2); 37.4919 match(Set dst (MoveD2L src)); 37.4920 effect(DEF dst, USE src); 37.4921 ins_cost(95); 37.4922 - 37.4923 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 37.4924 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst)); 37.4925 - ins_pipe( pipe_slow ); 37.4926 -%} 37.4927 - 37.4928 -instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{ 37.4929 + ins_encode %{ 37.4930 + __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 37.4931 + %} 37.4932 + ins_pipe( pipe_slow ); 37.4933 +%} 37.4934 + 37.4935 +instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 37.4936 predicate(UseSSE>=2); 37.4937 match(Set dst (MoveD2L src)); 37.4938 effect(DEF dst, USE src, TEMP tmp); 37.4939 @@ -12478,7 +11786,11 @@ 37.4940 format %{ "MOVD $dst.lo,$src\n\t" 37.4941 "PSHUFLW $tmp,$src,0x4E\n\t" 37.4942 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 37.4943 - ins_encode( MovXD2L_reg(dst, src, tmp) ); 37.4944 + ins_encode %{ 37.4945 + __ movdl($dst$$Register, $src$$XMMRegister); 37.4946 + __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 37.4947 + __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 37.4948 + %} 37.4949 ins_pipe( pipe_slow ); 37.4950 %} 37.4951 37.4952 @@ -12495,7 +11807,7 @@ 37.4953 %} 37.4954 37.4955 37.4956 -instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ 37.4957 +instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 37.4958 predicate(UseSSE<=1); 37.4959 match(Set dst (MoveL2D src)); 37.4960 effect(DEF dst, USE src); 37.4961 @@ -12505,34 +11817,38 @@ 37.4962 "FSTP $dst\t# MoveL2D_stack_reg" %} 37.4963 opcode(0xDD); /* DD /0, FLD m64real */ 37.4964 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 37.4965 - Pop_Reg_D(dst) ); 37.4966 + Pop_Reg_DPR(dst) ); 37.4967 ins_pipe( fpu_reg_mem ); 37.4968 %} 37.4969 37.4970 37.4971 -instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{ 37.4972 +instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 37.4973 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 37.4974 match(Set dst (MoveL2D src)); 37.4975 effect(DEF dst, USE src); 37.4976 37.4977 ins_cost(95); 37.4978 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 37.4979 - ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src)); 37.4980 - ins_pipe( pipe_slow ); 37.4981 -%} 37.4982 - 37.4983 -instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{ 37.4984 + ins_encode %{ 37.4985 + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 37.4986 + %} 37.4987 + ins_pipe( pipe_slow ); 37.4988 +%} 37.4989 + 37.4990 +instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 37.4991 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 37.4992 match(Set dst (MoveL2D src)); 37.4993 effect(DEF dst, USE src); 37.4994 37.4995 ins_cost(95); 37.4996 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 37.4997 - ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src)); 37.4998 - ins_pipe( pipe_slow ); 37.4999 -%} 37.5000 - 37.5001 -instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{ 37.5002 + ins_encode %{ 37.5003 + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 37.5004 + %} 37.5005 + ins_pipe( pipe_slow ); 37.5006 +%} 37.5007 + 37.5008 +instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 37.5009 predicate(UseSSE>=2); 37.5010 match(Set dst (MoveL2D src)); 37.5011 effect(TEMP dst, USE src, TEMP tmp); 37.5012 @@ -12540,149 +11856,192 @@ 37.5013 format %{ "MOVD $dst,$src.lo\n\t" 37.5014 "MOVD $tmp,$src.hi\n\t" 37.5015 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 37.5016 - ins_encode( MovL2XD_reg(dst, src, tmp) ); 37.5017 + ins_encode %{ 37.5018 + __ movdl($dst$$XMMRegister, $src$$Register); 37.5019 + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 37.5020 + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 37.5021 + %} 37.5022 ins_pipe( pipe_slow ); 37.5023 %} 37.5024 37.5025 // Replicate scalar to packed byte (1 byte) values in xmm 37.5026 -instruct Repl8B_reg(regXD dst, regXD src) %{ 37.5027 +instruct Repl8B_reg(regD dst, regD src) %{ 37.5028 predicate(UseSSE>=2); 37.5029 match(Set dst (Replicate8B src)); 37.5030 format %{ "MOVDQA $dst,$src\n\t" 37.5031 "PUNPCKLBW $dst,$dst\n\t" 37.5032 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 37.5033 - ins_encode( pshufd_8x8(dst, src)); 37.5034 + ins_encode %{ 37.5035 + if ($dst$$reg != $src$$reg) { 37.5036 + __ movdqa($dst$$XMMRegister, $src$$XMMRegister); 37.5037 + } 37.5038 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 37.5039 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 37.5040 + %} 37.5041 ins_pipe( pipe_slow ); 37.5042 %} 37.5043 37.5044 // Replicate scalar to packed byte (1 byte) values in xmm 37.5045 -instruct Repl8B_eRegI(regXD dst, eRegI src) %{ 37.5046 +instruct Repl8B_eRegI(regD dst, eRegI src) %{ 37.5047 predicate(UseSSE>=2); 37.5048 match(Set dst (Replicate8B src)); 37.5049 format %{ "MOVD $dst,$src\n\t" 37.5050 "PUNPCKLBW $dst,$dst\n\t" 37.5051 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 37.5052 - ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); 37.5053 + ins_encode %{ 37.5054 + __ movdl($dst$$XMMRegister, $src$$Register); 37.5055 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 37.5056 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 37.5057 + %} 37.5058 ins_pipe( pipe_slow ); 37.5059 %} 37.5060 37.5061 // Replicate scalar zero to packed byte (1 byte) values in xmm 37.5062 -instruct Repl8B_immI0(regXD dst, immI0 zero) %{ 37.5063 +instruct Repl8B_immI0(regD dst, immI0 zero) %{ 37.5064 predicate(UseSSE>=2); 37.5065 match(Set dst (Replicate8B zero)); 37.5066 format %{ "PXOR $dst,$dst\t! replicate8B" %} 37.5067 - ins_encode( pxor(dst, dst)); 37.5068 + ins_encode %{ 37.5069 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 37.5070 + %} 37.5071 ins_pipe( fpu_reg_reg ); 37.5072 %} 37.5073 37.5074 // Replicate scalar to packed shore (2 byte) values in xmm 37.5075 -instruct Repl4S_reg(regXD dst, regXD src) %{ 37.5076 +instruct Repl4S_reg(regD dst, regD src) %{ 37.5077 predicate(UseSSE>=2); 37.5078 match(Set dst (Replicate4S src)); 37.5079 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 37.5080 - ins_encode( pshufd_4x16(dst, src)); 37.5081 + ins_encode %{ 37.5082 + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); 37.5083 + %} 37.5084 ins_pipe( fpu_reg_reg ); 37.5085 %} 37.5086 37.5087 // Replicate scalar to packed shore (2 byte) values in xmm 37.5088 -instruct Repl4S_eRegI(regXD dst, eRegI src) %{ 37.5089 +instruct Repl4S_eRegI(regD dst, eRegI src) %{ 37.5090 predicate(UseSSE>=2); 37.5091 match(Set dst (Replicate4S src)); 37.5092 format %{ "MOVD $dst,$src\n\t" 37.5093 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} 37.5094 - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 37.5095 + ins_encode %{ 37.5096 + __ movdl($dst$$XMMRegister, $src$$Register); 37.5097 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 37.5098 + %} 37.5099 ins_pipe( fpu_reg_reg ); 37.5100 %} 37.5101 37.5102 // Replicate scalar zero to packed short (2 byte) values in xmm 37.5103 -instruct Repl4S_immI0(regXD dst, immI0 zero) %{ 37.5104 +instruct Repl4S_immI0(regD dst, immI0 zero) %{ 37.5105 predicate(UseSSE>=2); 37.5106 match(Set dst (Replicate4S zero)); 37.5107 format %{ "PXOR $dst,$dst\t! replicate4S" %} 37.5108 - ins_encode( pxor(dst, dst)); 37.5109 + ins_encode %{ 37.5110 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 37.5111 + %} 37.5112 ins_pipe( fpu_reg_reg ); 37.5113 %} 37.5114 37.5115 // Replicate scalar to packed char (2 byte) values in xmm 37.5116 -instruct Repl4C_reg(regXD dst, regXD src) %{ 37.5117 +instruct Repl4C_reg(regD dst, regD src) %{ 37.5118 predicate(UseSSE>=2); 37.5119 match(Set dst (Replicate4C src)); 37.5120 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 37.5121 - ins_encode( pshufd_4x16(dst, src)); 37.5122 + ins_encode %{ 37.5123 + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); 37.5124 + %} 37.5125 ins_pipe( fpu_reg_reg ); 37.5126 %} 37.5127 37.5128 // Replicate scalar to packed char (2 byte) values in xmm 37.5129 -instruct Repl4C_eRegI(regXD dst, eRegI src) %{ 37.5130 +instruct Repl4C_eRegI(regD dst, eRegI src) %{ 37.5131 predicate(UseSSE>=2); 37.5132 match(Set dst (Replicate4C src)); 37.5133 format %{ "MOVD $dst,$src\n\t" 37.5134 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} 37.5135 - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 37.5136 + ins_encode %{ 37.5137 + __ movdl($dst$$XMMRegister, $src$$Register); 37.5138 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 37.5139 + %} 37.5140 ins_pipe( fpu_reg_reg ); 37.5141 %} 37.5142 37.5143 // Replicate scalar zero to packed char (2 byte) values in xmm 37.5144 -instruct Repl4C_immI0(regXD dst, immI0 zero) %{ 37.5145 +instruct Repl4C_immI0(regD dst, immI0 zero) %{ 37.5146 predicate(UseSSE>=2); 37.5147 match(Set dst (Replicate4C zero)); 37.5148 format %{ "PXOR $dst,$dst\t! replicate4C" %} 37.5149 - ins_encode( pxor(dst, dst)); 37.5150 + ins_encode %{ 37.5151 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 37.5152 + %} 37.5153 ins_pipe( fpu_reg_reg ); 37.5154 %} 37.5155 37.5156 // Replicate scalar to packed integer (4 byte) values in xmm 37.5157 -instruct Repl2I_reg(regXD dst, regXD src) %{ 37.5158 +instruct Repl2I_reg(regD dst, regD src) %{ 37.5159 predicate(UseSSE>=2); 37.5160 match(Set dst (Replicate2I src)); 37.5161 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 37.5162 - ins_encode( pshufd(dst, src, 0x00)); 37.5163 + ins_encode %{ 37.5164 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 37.5165 + %} 37.5166 ins_pipe( fpu_reg_reg ); 37.5167 %} 37.5168 37.5169 // Replicate scalar to packed integer (4 byte) values in xmm 37.5170 -instruct Repl2I_eRegI(regXD dst, eRegI src) %{ 37.5171 +instruct Repl2I_eRegI(regD dst, eRegI src) %{ 37.5172 predicate(UseSSE>=2); 37.5173 match(Set dst (Replicate2I src)); 37.5174 format %{ "MOVD $dst,$src\n\t" 37.5175 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} 37.5176 - ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); 37.5177 + ins_encode %{ 37.5178 + __ movdl($dst$$XMMRegister, $src$$Register); 37.5179 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 37.5180 + %} 37.5181 ins_pipe( fpu_reg_reg ); 37.5182 %} 37.5183 37.5184 // Replicate scalar zero to packed integer (2 byte) values in xmm 37.5185 -instruct Repl2I_immI0(regXD dst, immI0 zero) %{ 37.5186 +instruct Repl2I_immI0(regD dst, immI0 zero) %{ 37.5187 predicate(UseSSE>=2); 37.5188 match(Set dst (Replicate2I zero)); 37.5189 format %{ "PXOR $dst,$dst\t! replicate2I" %} 37.5190 - ins_encode( pxor(dst, dst)); 37.5191 + ins_encode %{ 37.5192 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 37.5193 + %} 37.5194 ins_pipe( fpu_reg_reg ); 37.5195 %} 37.5196 37.5197 // Replicate scalar to packed single precision floating point values in xmm 37.5198 -instruct Repl2F_reg(regXD dst, regXD src) %{ 37.5199 +instruct Repl2F_reg(regD dst, regD src) %{ 37.5200 predicate(UseSSE>=2); 37.5201 match(Set dst (Replicate2F src)); 37.5202 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 37.5203 - ins_encode( pshufd(dst, src, 0xe0)); 37.5204 + ins_encode %{ 37.5205 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); 37.5206 + %} 37.5207 ins_pipe( fpu_reg_reg ); 37.5208 %} 37.5209 37.5210 // Replicate scalar to packed single precision floating point values in xmm 37.5211 -instruct Repl2F_regX(regXD dst, regX src) %{ 37.5212 +instruct Repl2F_regF(regD dst, regF src) %{ 37.5213 predicate(UseSSE>=2); 37.5214 match(Set dst (Replicate2F src)); 37.5215 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 37.5216 - ins_encode( pshufd(dst, src, 0xe0)); 37.5217 + ins_encode %{ 37.5218 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); 37.5219 + %} 37.5220 ins_pipe( fpu_reg_reg ); 37.5221 %} 37.5222 37.5223 // Replicate scalar to packed single precision floating point values in xmm 37.5224 -instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ 37.5225 +instruct Repl2F_immF0(regD dst, immF0 zero) %{ 37.5226 predicate(UseSSE>=2); 37.5227 match(Set dst (Replicate2F zero)); 37.5228 format %{ "PXOR $dst,$dst\t! replicate2F" %} 37.5229 - ins_encode( pxor(dst, dst)); 37.5230 + ins_encode %{ 37.5231 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 37.5232 + %} 37.5233 ins_pipe( fpu_reg_reg ); 37.5234 %} 37.5235 37.5236 @@ -12702,7 +12061,7 @@ 37.5237 %} 37.5238 37.5239 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 37.5240 - eAXRegI result, regXD tmp1, eFlagsReg cr) %{ 37.5241 + eAXRegI result, regD tmp1, eFlagsReg cr) %{ 37.5242 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 37.5243 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 37.5244 37.5245 @@ -12717,7 +12076,7 @@ 37.5246 37.5247 // fast string equals 37.5248 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 37.5249 - regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 37.5250 + regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 37.5251 match(Set result (StrEquals (Binary str1 str2) cnt)); 37.5252 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 37.5253 37.5254 @@ -12732,7 +12091,7 @@ 37.5255 37.5256 // fast search of substring with known size. 37.5257 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 37.5258 - eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 37.5259 + eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 37.5260 predicate(UseSSE42Intrinsics); 37.5261 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 37.5262 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 37.5263 @@ -12759,7 +12118,7 @@ 37.5264 %} 37.5265 37.5266 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 37.5267 - eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{ 37.5268 + eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 37.5269 predicate(UseSSE42Intrinsics); 37.5270 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 37.5271 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 37.5272 @@ -12776,7 +12135,7 @@ 37.5273 37.5274 // fast array equals 37.5275 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 37.5276 - regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 37.5277 + regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 37.5278 %{ 37.5279 match(Set result (AryEq ary1 ary2)); 37.5280 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 37.5281 @@ -13602,40 +12961,40 @@ 37.5282 %} 37.5283 37.5284 // Compare 2 longs and CMOVE doubles 37.5285 -instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 37.5286 +instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 37.5287 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 37.5288 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 37.5289 ins_cost(200); 37.5290 expand %{ 37.5291 - fcmovD_regS(cmp,flags,dst,src); 37.5292 + fcmovDPR_regS(cmp,flags,dst,src); 37.5293 %} 37.5294 %} 37.5295 37.5296 // Compare 2 longs and CMOVE doubles 37.5297 -instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{ 37.5298 +instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 37.5299 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 37.5300 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 37.5301 ins_cost(200); 37.5302 expand %{ 37.5303 - fcmovXD_regS(cmp,flags,dst,src); 37.5304 - %} 37.5305 -%} 37.5306 - 37.5307 -instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 37.5308 + fcmovD_regS(cmp,flags,dst,src); 37.5309 + %} 37.5310 +%} 37.5311 + 37.5312 +instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 37.5313 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 37.5314 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 37.5315 ins_cost(200); 37.5316 expand %{ 37.5317 - fcmovF_regS(cmp,flags,dst,src); 37.5318 - %} 37.5319 -%} 37.5320 - 37.5321 -instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{ 37.5322 + fcmovFPR_regS(cmp,flags,dst,src); 37.5323 + %} 37.5324 +%} 37.5325 + 37.5326 +instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 37.5327 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 37.5328 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 37.5329 ins_cost(200); 37.5330 expand %{ 37.5331 - fcmovX_regS(cmp,flags,dst,src); 37.5332 + fcmovF_regS(cmp,flags,dst,src); 37.5333 %} 37.5334 %} 37.5335 37.5336 @@ -13730,40 +13089,40 @@ 37.5337 %} 37.5338 37.5339 // Compare 2 longs and CMOVE doubles 37.5340 -instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 37.5341 +instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 37.5342 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 37.5343 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 37.5344 ins_cost(200); 37.5345 expand %{ 37.5346 - fcmovD_regS(cmp,flags,dst,src); 37.5347 + fcmovDPR_regS(cmp,flags,dst,src); 37.5348 %} 37.5349 %} 37.5350 37.5351 // Compare 2 longs and CMOVE doubles 37.5352 -instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{ 37.5353 +instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 37.5354 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 37.5355 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 37.5356 ins_cost(200); 37.5357 expand %{ 37.5358 - fcmovXD_regS(cmp,flags,dst,src); 37.5359 - %} 37.5360 -%} 37.5361 - 37.5362 -instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 37.5363 + fcmovD_regS(cmp,flags,dst,src); 37.5364 + %} 37.5365 +%} 37.5366 + 37.5367 +instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 37.5368 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 37.5369 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 37.5370 ins_cost(200); 37.5371 expand %{ 37.5372 - fcmovF_regS(cmp,flags,dst,src); 37.5373 - %} 37.5374 -%} 37.5375 - 37.5376 -instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{ 37.5377 + fcmovFPR_regS(cmp,flags,dst,src); 37.5378 + %} 37.5379 +%} 37.5380 + 37.5381 +instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 37.5382 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 37.5383 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 37.5384 ins_cost(200); 37.5385 expand %{ 37.5386 - fcmovX_regS(cmp,flags,dst,src); 37.5387 + fcmovF_regS(cmp,flags,dst,src); 37.5388 %} 37.5389 %} 37.5390 37.5391 @@ -13863,41 +13222,41 @@ 37.5392 %} 37.5393 37.5394 // Compare 2 longs and CMOVE doubles 37.5395 -instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 37.5396 +instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 37.5397 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 37.5398 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 37.5399 ins_cost(200); 37.5400 expand %{ 37.5401 - fcmovD_regS(cmp,flags,dst,src); 37.5402 + fcmovDPR_regS(cmp,flags,dst,src); 37.5403 %} 37.5404 %} 37.5405 37.5406 // Compare 2 longs and CMOVE doubles 37.5407 -instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{ 37.5408 +instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 37.5409 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 37.5410 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 37.5411 ins_cost(200); 37.5412 expand %{ 37.5413 - fcmovXD_regS(cmp,flags,dst,src); 37.5414 - %} 37.5415 -%} 37.5416 - 37.5417 -instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 37.5418 + fcmovD_regS(cmp,flags,dst,src); 37.5419 + %} 37.5420 +%} 37.5421 + 37.5422 +instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 37.5423 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 37.5424 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 37.5425 ins_cost(200); 37.5426 expand %{ 37.5427 - fcmovF_regS(cmp,flags,dst,src); 37.5428 - %} 37.5429 -%} 37.5430 - 37.5431 - 37.5432 -instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{ 37.5433 + fcmovFPR_regS(cmp,flags,dst,src); 37.5434 + %} 37.5435 +%} 37.5436 + 37.5437 + 37.5438 +instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 37.5439 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 37.5440 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 37.5441 ins_cost(200); 37.5442 expand %{ 37.5443 - fcmovX_regS(cmp,flags,dst,src); 37.5444 + fcmovF_regS(cmp,flags,dst,src); 37.5445 %} 37.5446 %} 37.5447
38.1 --- a/src/cpu/x86/vm/x86_64.ad Tue Dec 27 12:38:49 2011 -0800 38.2 +++ b/src/cpu/x86/vm/x86_64.ad Thu Dec 29 11:37:50 2011 -0800 38.3 @@ -552,7 +552,7 @@ 38.4 #define __ _masm. 38.5 38.6 static int preserve_SP_size() { 38.7 - return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) 38.8 + return 3; // rex.w, op, rm(reg/reg) 38.9 } 38.10 38.11 // !!!!! Special hack to get all types of calls to specify the byte offset 38.12 @@ -797,48 +797,35 @@ 38.13 } 38.14 } 38.15 38.16 -void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc) 38.17 -{ 38.18 - if (dstenc != srcenc) { 38.19 - if (dstenc < 8) { 38.20 - if (srcenc >= 8) { 38.21 - emit_opcode(cbuf, Assembler::REX_B); 38.22 - srcenc -= 8; 38.23 - } 38.24 - } else { 38.25 - if (srcenc < 8) { 38.26 - emit_opcode(cbuf, Assembler::REX_R); 38.27 - } else { 38.28 - emit_opcode(cbuf, Assembler::REX_RB); 38.29 - srcenc -= 8; 38.30 - } 38.31 - dstenc -= 8; 38.32 - } 38.33 - 38.34 - emit_opcode(cbuf, 0x8B); 38.35 - emit_rm(cbuf, 0x3, dstenc, srcenc); 38.36 - } 38.37 -} 38.38 - 38.39 -void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 38.40 - if( dst_encoding == src_encoding ) { 38.41 - // reg-reg copy, use an empty encoding 38.42 - } else { 38.43 - MacroAssembler _masm(&cbuf); 38.44 - 38.45 - __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); 38.46 - } 38.47 -} 38.48 - 38.49 // This could be in MacroAssembler but it's fairly C2 specific 38.50 void emit_cmpfp_fixup(MacroAssembler& _masm) { 38.51 Label exit; 38.52 __ jccb(Assembler::noParity, exit); 38.53 __ pushf(); 38.54 + // 38.55 + // comiss/ucomiss instructions set ZF,PF,CF flags and 38.56 + // zero OF,AF,SF for NaN values. 38.57 + // Fixup flags by zeroing ZF,PF so that compare of NaN 38.58 + // values returns 'less than' result (CF is set). 38.59 + // Leave the rest of flags unchanged. 38.60 + // 38.61 + // 7 6 5 4 3 2 1 0 38.62 + // |S|Z|r|A|r|P|r|C| (r - reserved bit) 38.63 + // 0 0 1 0 1 0 1 1 (0x2B) 38.64 + // 38.65 __ andq(Address(rsp, 0), 0xffffff2b); 38.66 __ popf(); 38.67 __ bind(exit); 38.68 - __ nop(); // (target for branch to avoid branch to branch) 38.69 +} 38.70 + 38.71 +void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 38.72 + Label done; 38.73 + __ movl(dst, -1); 38.74 + __ jcc(Assembler::parity, done); 38.75 + __ jcc(Assembler::below, done); 38.76 + __ setb(Assembler::notEqual, dst); 38.77 + __ movzbl(dst, dst); 38.78 + __ bind(done); 38.79 } 38.80 38.81 38.82 @@ -1274,16 +1261,8 @@ 38.83 // 64-bit 38.84 int offset = ra_->reg2offset(src_first); 38.85 if (cbuf) { 38.86 - emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 38.87 - if (Matcher::_regEncode[dst_first] >= 8) { 38.88 - emit_opcode(*cbuf, Assembler::REX_R); 38.89 - } 38.90 - emit_opcode(*cbuf, 0x0F); 38.91 - emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); 38.92 - encode_RegMem(*cbuf, 38.93 - Matcher::_regEncode[dst_first], 38.94 - RSP_enc, 0x4, 0, offset, 38.95 - false); 38.96 + MacroAssembler _masm(cbuf); 38.97 + __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 38.98 #ifndef PRODUCT 38.99 } else if (!do_size) { 38.100 st->print("%s %s, [rsp + #%d]\t# spill", 38.101 @@ -1294,25 +1273,17 @@ 38.102 } 38.103 return 38.104 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 38.105 - ((Matcher::_regEncode[dst_first] < 8) 38.106 - ? 5 38.107 - : 6); // REX 38.108 + ((Matcher::_regEncode[dst_first] >= 8) 38.109 + ? 6 38.110 + : (5 + ((UseAVX>0)?1:0))); // REX 38.111 } else { 38.112 // 32-bit 38.113 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 38.114 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 38.115 int offset = ra_->reg2offset(src_first); 38.116 if (cbuf) { 38.117 - emit_opcode(*cbuf, 0xF3); 38.118 - if (Matcher::_regEncode[dst_first] >= 8) { 38.119 - emit_opcode(*cbuf, Assembler::REX_R); 38.120 - } 38.121 - emit_opcode(*cbuf, 0x0F); 38.122 - emit_opcode(*cbuf, 0x10); 38.123 - encode_RegMem(*cbuf, 38.124 - Matcher::_regEncode[dst_first], 38.125 - RSP_enc, 0x4, 0, offset, 38.126 - false); 38.127 + MacroAssembler _masm(cbuf); 38.128 + __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 38.129 #ifndef PRODUCT 38.130 } else if (!do_size) { 38.131 st->print("movss %s, [rsp + #%d]\t# spill", 38.132 @@ -1322,9 +1293,9 @@ 38.133 } 38.134 return 38.135 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 38.136 - ((Matcher::_regEncode[dst_first] < 8) 38.137 - ? 5 38.138 - : 6); // REX 38.139 + ((Matcher::_regEncode[dst_first] >= 8) 38.140 + ? 6 38.141 + : (5 + ((UseAVX>0)?1:0))); // REX 38.142 } 38.143 } 38.144 } else if (src_first_rc == rc_int) { 38.145 @@ -1450,25 +1421,8 @@ 38.146 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 38.147 // 64-bit 38.148 if (cbuf) { 38.149 - emit_opcode(*cbuf, 0x66); 38.150 - if (Matcher::_regEncode[dst_first] < 8) { 38.151 - if (Matcher::_regEncode[src_first] < 8) { 38.152 - emit_opcode(*cbuf, Assembler::REX_W); 38.153 - } else { 38.154 - emit_opcode(*cbuf, Assembler::REX_WB); 38.155 - } 38.156 - } else { 38.157 - if (Matcher::_regEncode[src_first] < 8) { 38.158 - emit_opcode(*cbuf, Assembler::REX_WR); 38.159 - } else { 38.160 - emit_opcode(*cbuf, Assembler::REX_WRB); 38.161 - } 38.162 - } 38.163 - emit_opcode(*cbuf, 0x0F); 38.164 - emit_opcode(*cbuf, 0x6E); 38.165 - emit_rm(*cbuf, 0x3, 38.166 - Matcher::_regEncode[dst_first] & 7, 38.167 - Matcher::_regEncode[src_first] & 7); 38.168 + MacroAssembler _masm(cbuf); 38.169 + __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); 38.170 #ifndef PRODUCT 38.171 } else if (!do_size) { 38.172 st->print("movdq %s, %s\t# spill", 38.173 @@ -1482,23 +1436,8 @@ 38.174 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 38.175 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 38.176 if (cbuf) { 38.177 - emit_opcode(*cbuf, 0x66); 38.178 - if (Matcher::_regEncode[dst_first] < 8) { 38.179 - if (Matcher::_regEncode[src_first] >= 8) { 38.180 - emit_opcode(*cbuf, Assembler::REX_B); 38.181 - } 38.182 - } else { 38.183 - if (Matcher::_regEncode[src_first] < 8) { 38.184 - emit_opcode(*cbuf, Assembler::REX_R); 38.185 - } else { 38.186 - emit_opcode(*cbuf, Assembler::REX_RB); 38.187 - } 38.188 - } 38.189 - emit_opcode(*cbuf, 0x0F); 38.190 - emit_opcode(*cbuf, 0x6E); 38.191 - emit_rm(*cbuf, 0x3, 38.192 - Matcher::_regEncode[dst_first] & 7, 38.193 - Matcher::_regEncode[src_first] & 7); 38.194 + MacroAssembler _masm(cbuf); 38.195 + __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); 38.196 #ifndef PRODUCT 38.197 } else if (!do_size) { 38.198 st->print("movdl %s, %s\t# spill", 38.199 @@ -1507,9 +1446,9 @@ 38.200 #endif 38.201 } 38.202 return 38.203 - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) 38.204 - ? 4 38.205 - : 5; // REX 38.206 + (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) 38.207 + ? 5 38.208 + : (4 + ((UseAVX>0)?1:0)); // REX 38.209 } 38.210 } 38.211 } else if (src_first_rc == rc_float) { 38.212 @@ -1521,16 +1460,8 @@ 38.213 // 64-bit 38.214 int offset = ra_->reg2offset(dst_first); 38.215 if (cbuf) { 38.216 - emit_opcode(*cbuf, 0xF2); 38.217 - if (Matcher::_regEncode[src_first] >= 8) { 38.218 - emit_opcode(*cbuf, Assembler::REX_R); 38.219 - } 38.220 - emit_opcode(*cbuf, 0x0F); 38.221 - emit_opcode(*cbuf, 0x11); 38.222 - encode_RegMem(*cbuf, 38.223 - Matcher::_regEncode[src_first], 38.224 - RSP_enc, 0x4, 0, offset, 38.225 - false); 38.226 + MacroAssembler _masm(cbuf); 38.227 + __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); 38.228 #ifndef PRODUCT 38.229 } else if (!do_size) { 38.230 st->print("movsd [rsp + #%d], %s\t# spill", 38.231 @@ -1540,25 +1471,17 @@ 38.232 } 38.233 return 38.234 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 38.235 - ((Matcher::_regEncode[src_first] < 8) 38.236 - ? 5 38.237 - : 6); // REX 38.238 + ((Matcher::_regEncode[src_first] >= 8) 38.239 + ? 6 38.240 + : (5 + ((UseAVX>0)?1:0))); // REX 38.241 } else { 38.242 // 32-bit 38.243 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 38.244 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 38.245 int offset = ra_->reg2offset(dst_first); 38.246 if (cbuf) { 38.247 - emit_opcode(*cbuf, 0xF3); 38.248 - if (Matcher::_regEncode[src_first] >= 8) { 38.249 - emit_opcode(*cbuf, Assembler::REX_R); 38.250 - } 38.251 - emit_opcode(*cbuf, 0x0F); 38.252 - emit_opcode(*cbuf, 0x11); 38.253 - encode_RegMem(*cbuf, 38.254 - Matcher::_regEncode[src_first], 38.255 - RSP_enc, 0x4, 0, offset, 38.256 - false); 38.257 + MacroAssembler _masm(cbuf); 38.258 + __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); 38.259 #ifndef PRODUCT 38.260 } else if (!do_size) { 38.261 st->print("movss [rsp + #%d], %s\t# spill", 38.262 @@ -1568,9 +1491,9 @@ 38.263 } 38.264 return 38.265 ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 38.266 - ((Matcher::_regEncode[src_first] < 8) 38.267 - ? 5 38.268 - : 6); // REX 38.269 + ((Matcher::_regEncode[src_first] >=8) 38.270 + ? 6 38.271 + : (5 + ((UseAVX>0)?1:0))); // REX 38.272 } 38.273 } else if (dst_first_rc == rc_int) { 38.274 // xmm -> gpr 38.275 @@ -1578,25 +1501,8 @@ 38.276 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 38.277 // 64-bit 38.278 if (cbuf) { 38.279 - emit_opcode(*cbuf, 0x66); 38.280 - if (Matcher::_regEncode[dst_first] < 8) { 38.281 - if (Matcher::_regEncode[src_first] < 8) { 38.282 - emit_opcode(*cbuf, Assembler::REX_W); 38.283 - } else { 38.284 - emit_opcode(*cbuf, Assembler::REX_WR); // attention! 38.285 - } 38.286 - } else { 38.287 - if (Matcher::_regEncode[src_first] < 8) { 38.288 - emit_opcode(*cbuf, Assembler::REX_WB); // attention! 38.289 - } else { 38.290 - emit_opcode(*cbuf, Assembler::REX_WRB); 38.291 - } 38.292 - } 38.293 - emit_opcode(*cbuf, 0x0F); 38.294 - emit_opcode(*cbuf, 0x7E); 38.295 - emit_rm(*cbuf, 0x3, 38.296 - Matcher::_regEncode[src_first] & 7, 38.297 - Matcher::_regEncode[dst_first] & 7); 38.298 + MacroAssembler _masm(cbuf); 38.299 + __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 38.300 #ifndef PRODUCT 38.301 } else if (!do_size) { 38.302 st->print("movdq %s, %s\t# spill", 38.303 @@ -1610,23 +1516,8 @@ 38.304 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 38.305 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 38.306 if (cbuf) { 38.307 - emit_opcode(*cbuf, 0x66); 38.308 - if (Matcher::_regEncode[dst_first] < 8) { 38.309 - if (Matcher::_regEncode[src_first] >= 8) { 38.310 - emit_opcode(*cbuf, Assembler::REX_R); // attention! 38.311 - } 38.312 - } else { 38.313 - if (Matcher::_regEncode[src_first] < 8) { 38.314 - emit_opcode(*cbuf, Assembler::REX_B); // attention! 38.315 - } else { 38.316 - emit_opcode(*cbuf, Assembler::REX_RB); 38.317 - } 38.318 - } 38.319 - emit_opcode(*cbuf, 0x0F); 38.320 - emit_opcode(*cbuf, 0x7E); 38.321 - emit_rm(*cbuf, 0x3, 38.322 - Matcher::_regEncode[src_first] & 7, 38.323 - Matcher::_regEncode[dst_first] & 7); 38.324 + MacroAssembler _masm(cbuf); 38.325 + __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 38.326 #ifndef PRODUCT 38.327 } else if (!do_size) { 38.328 st->print("movdl %s, %s\t# spill", 38.329 @@ -1635,9 +1526,9 @@ 38.330 #endif 38.331 } 38.332 return 38.333 - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) 38.334 - ? 4 38.335 - : 5; // REX 38.336 + (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) 38.337 + ? 5 38.338 + : (4 + ((UseAVX>0)?1:0)); // REX 38.339 } 38.340 } else if (dst_first_rc == rc_float) { 38.341 // xmm -> xmm 38.342 @@ -1645,23 +1536,8 @@ 38.343 (dst_first & 1) == 0 && dst_first + 1 == dst_second) { 38.344 // 64-bit 38.345 if (cbuf) { 38.346 - emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2); 38.347 - if (Matcher::_regEncode[dst_first] < 8) { 38.348 - if (Matcher::_regEncode[src_first] >= 8) { 38.349 - emit_opcode(*cbuf, Assembler::REX_B); 38.350 - } 38.351 - } else { 38.352 - if (Matcher::_regEncode[src_first] < 8) { 38.353 - emit_opcode(*cbuf, Assembler::REX_R); 38.354 - } else { 38.355 - emit_opcode(*cbuf, Assembler::REX_RB); 38.356 - } 38.357 - } 38.358 - emit_opcode(*cbuf, 0x0F); 38.359 - emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); 38.360 - emit_rm(*cbuf, 0x3, 38.361 - Matcher::_regEncode[dst_first] & 7, 38.362 - Matcher::_regEncode[src_first] & 7); 38.363 + MacroAssembler _masm(cbuf); 38.364 + __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 38.365 #ifndef PRODUCT 38.366 } else if (!do_size) { 38.367 st->print("%s %s, %s\t# spill", 38.368 @@ -1671,32 +1547,16 @@ 38.369 #endif 38.370 } 38.371 return 38.372 - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) 38.373 - ? 4 38.374 - : 5; // REX 38.375 + (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) 38.376 + ? 5 38.377 + : (4 + ((UseAVX>0)?1:0)); // REX 38.378 } else { 38.379 // 32-bit 38.380 assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); 38.381 assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); 38.382 if (cbuf) { 38.383 - if (!UseXmmRegToRegMoveAll) 38.384 - emit_opcode(*cbuf, 0xF3); 38.385 - if (Matcher::_regEncode[dst_first] < 8) { 38.386 - if (Matcher::_regEncode[src_first] >= 8) { 38.387 - emit_opcode(*cbuf, Assembler::REX_B); 38.388 - } 38.389 - } else { 38.390 - if (Matcher::_regEncode[src_first] < 8) { 38.391 - emit_opcode(*cbuf, Assembler::REX_R); 38.392 - } else { 38.393 - emit_opcode(*cbuf, Assembler::REX_RB); 38.394 - } 38.395 - } 38.396 - emit_opcode(*cbuf, 0x0F); 38.397 - emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); 38.398 - emit_rm(*cbuf, 0x3, 38.399 - Matcher::_regEncode[dst_first] & 7, 38.400 - Matcher::_regEncode[src_first] & 7); 38.401 + MacroAssembler _masm(cbuf); 38.402 + __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); 38.403 #ifndef PRODUCT 38.404 } else if (!do_size) { 38.405 st->print("%s %s, %s\t# spill", 38.406 @@ -1705,10 +1565,10 @@ 38.407 Matcher::regName[src_first]); 38.408 #endif 38.409 } 38.410 - return 38.411 - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) 38.412 - ? (UseXmmRegToRegMoveAll ? 3 : 4) 38.413 - : (UseXmmRegToRegMoveAll ? 4 : 5); // REX 38.414 + return ((UseAVX>0) ? 5: 38.415 + ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) 38.416 + ? (UseXmmRegToRegMoveAll ? 4 : 5) 38.417 + : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX 38.418 } 38.419 } 38.420 } 38.421 @@ -2205,47 +2065,6 @@ 38.422 emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7); 38.423 %} 38.424 38.425 - enc_class cmpfp_fixup() %{ 38.426 - MacroAssembler _masm(&cbuf); 38.427 - emit_cmpfp_fixup(_masm); 38.428 - %} 38.429 - 38.430 - enc_class cmpfp3(rRegI dst) 38.431 - %{ 38.432 - int dstenc = $dst$$reg; 38.433 - 38.434 - // movl $dst, -1 38.435 - if (dstenc >= 8) { 38.436 - emit_opcode(cbuf, Assembler::REX_B); 38.437 - } 38.438 - emit_opcode(cbuf, 0xB8 | (dstenc & 7)); 38.439 - emit_d32(cbuf, -1); 38.440 - 38.441 - // jp,s done 38.442 - emit_opcode(cbuf, 0x7A); 38.443 - emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A); 38.444 - 38.445 - // jb,s done 38.446 - emit_opcode(cbuf, 0x72); 38.447 - emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08); 38.448 - 38.449 - // setne $dst 38.450 - if (dstenc >= 4) { 38.451 - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B); 38.452 - } 38.453 - emit_opcode(cbuf, 0x0F); 38.454 - emit_opcode(cbuf, 0x95); 38.455 - emit_opcode(cbuf, 0xC0 | (dstenc & 7)); 38.456 - 38.457 - // movzbl $dst, $dst 38.458 - if (dstenc >= 4) { 38.459 - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB); 38.460 - } 38.461 - emit_opcode(cbuf, 0x0F); 38.462 - emit_opcode(cbuf, 0xB6); 38.463 - emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7); 38.464 - %} 38.465 - 38.466 enc_class cdql_enc(no_rax_rdx_RegI div) 38.467 %{ 38.468 // Full implementation of Java idiv and irem; checks for 38.469 @@ -2472,55 +2291,6 @@ 38.470 emit_cc(cbuf, $secondary, $cop$$cmpcode); 38.471 %} 38.472 38.473 - enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src) 38.474 - %{ 38.475 - // Invert sense of branch from sense of cmov 38.476 - emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1); 38.477 - emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8) 38.478 - ? (UseXmmRegToRegMoveAll ? 3 : 4) 38.479 - : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX 38.480 - // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src) 38.481 - if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3); 38.482 - if ($dst$$reg < 8) { 38.483 - if ($src$$reg >= 8) { 38.484 - emit_opcode(cbuf, Assembler::REX_B); 38.485 - } 38.486 - } else { 38.487 - if ($src$$reg < 8) { 38.488 - emit_opcode(cbuf, Assembler::REX_R); 38.489 - } else { 38.490 - emit_opcode(cbuf, Assembler::REX_RB); 38.491 - } 38.492 - } 38.493 - emit_opcode(cbuf, 0x0F); 38.494 - emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); 38.495 - emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7); 38.496 - %} 38.497 - 38.498 - enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src) 38.499 - %{ 38.500 - // Invert sense of branch from sense of cmov 38.501 - emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1); 38.502 - emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX 38.503 - 38.504 - // UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src) 38.505 - emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2); 38.506 - if ($dst$$reg < 8) { 38.507 - if ($src$$reg >= 8) { 38.508 - emit_opcode(cbuf, Assembler::REX_B); 38.509 - } 38.510 - } else { 38.511 - if ($src$$reg < 8) { 38.512 - emit_opcode(cbuf, Assembler::REX_R); 38.513 - } else { 38.514 - emit_opcode(cbuf, Assembler::REX_RB); 38.515 - } 38.516 - } 38.517 - emit_opcode(cbuf, 0x0F); 38.518 - emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10); 38.519 - emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7); 38.520 - %} 38.521 - 38.522 enc_class enc_PartialSubtypeCheck() 38.523 %{ 38.524 Register Rrdi = as_Register(RDI_enc); // result register 38.525 @@ -2751,68 +2521,6 @@ 38.526 } 38.527 %} 38.528 38.529 - // Encode a reg-reg copy. If it is useless, then empty encoding. 38.530 - enc_class enc_copy(rRegI dst, rRegI src) 38.531 - %{ 38.532 - encode_copy(cbuf, $dst$$reg, $src$$reg); 38.533 - %} 38.534 - 38.535 - // Encode xmm reg-reg copy. If it is useless, then empty encoding. 38.536 - enc_class enc_CopyXD( RegD dst, RegD src ) %{ 38.537 - encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); 38.538 - %} 38.539 - 38.540 - enc_class enc_copy_always(rRegI dst, rRegI src) 38.541 - %{ 38.542 - int srcenc = $src$$reg; 38.543 - int dstenc = $dst$$reg; 38.544 - 38.545 - if (dstenc < 8) { 38.546 - if (srcenc >= 8) { 38.547 - emit_opcode(cbuf, Assembler::REX_B); 38.548 - srcenc -= 8; 38.549 - } 38.550 - } else { 38.551 - if (srcenc < 8) { 38.552 - emit_opcode(cbuf, Assembler::REX_R); 38.553 - } else { 38.554 - emit_opcode(cbuf, Assembler::REX_RB); 38.555 - srcenc -= 8; 38.556 - } 38.557 - dstenc -= 8; 38.558 - } 38.559 - 38.560 - emit_opcode(cbuf, 0x8B); 38.561 - emit_rm(cbuf, 0x3, dstenc, srcenc); 38.562 - %} 38.563 - 38.564 - enc_class enc_copy_wide(rRegL dst, rRegL src) 38.565 - %{ 38.566 - int srcenc = $src$$reg; 38.567 - int dstenc = $dst$$reg; 38.568 - 38.569 - if (dstenc != srcenc) { 38.570 - if (dstenc < 8) { 38.571 - if (srcenc < 8) { 38.572 - emit_opcode(cbuf, Assembler::REX_W); 38.573 - } else { 38.574 - emit_opcode(cbuf, Assembler::REX_WB); 38.575 - srcenc -= 8; 38.576 - } 38.577 - } else { 38.578 - if (srcenc < 8) { 38.579 - emit_opcode(cbuf, Assembler::REX_WR); 38.580 - } else { 38.581 - emit_opcode(cbuf, Assembler::REX_WRB); 38.582 - srcenc -= 8; 38.583 - } 38.584 - dstenc -= 8; 38.585 - } 38.586 - emit_opcode(cbuf, 0x8B); 38.587 - emit_rm(cbuf, 0x3, dstenc, srcenc); 38.588 - } 38.589 - %} 38.590 - 38.591 enc_class Con32(immI src) 38.592 %{ 38.593 // Output immediate 38.594 @@ -3212,92 +2920,19 @@ 38.595 %} 38.596 38.597 enc_class Push_ResultXD(regD dst) %{ 38.598 - int dstenc = $dst$$reg; 38.599 - 38.600 - store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP] 38.601 - 38.602 - // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp] 38.603 - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); 38.604 - if (dstenc >= 8) { 38.605 - emit_opcode(cbuf, Assembler::REX_R); 38.606 - } 38.607 - emit_opcode (cbuf, 0x0F ); 38.608 - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 ); 38.609 - encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false); 38.610 - 38.611 - // add rsp,8 38.612 - emit_opcode(cbuf, Assembler::REX_W); 38.613 - emit_opcode(cbuf,0x83); 38.614 - emit_rm(cbuf,0x3, 0x0, RSP_enc); 38.615 - emit_d8(cbuf,0x08); 38.616 + MacroAssembler _masm(&cbuf); 38.617 + __ fstp_d(Address(rsp, 0)); 38.618 + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 38.619 + __ addptr(rsp, 8); 38.620 %} 38.621 38.622 enc_class Push_SrcXD(regD src) %{ 38.623 - int srcenc = $src$$reg; 38.624 - 38.625 - // subq rsp,#8 38.626 - emit_opcode(cbuf, Assembler::REX_W); 38.627 - emit_opcode(cbuf, 0x83); 38.628 - emit_rm(cbuf, 0x3, 0x5, RSP_enc); 38.629 - emit_d8(cbuf, 0x8); 38.630 - 38.631 - // movsd [rsp],src 38.632 - emit_opcode(cbuf, 0xF2); 38.633 - if (srcenc >= 8) { 38.634 - emit_opcode(cbuf, Assembler::REX_R); 38.635 - } 38.636 - emit_opcode(cbuf, 0x0F); 38.637 - emit_opcode(cbuf, 0x11); 38.638 - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); 38.639 - 38.640 - // fldd [rsp] 38.641 - emit_opcode(cbuf, 0x66); 38.642 - emit_opcode(cbuf, 0xDD); 38.643 - encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false); 38.644 - %} 38.645 - 38.646 - 38.647 - enc_class movq_ld(regD dst, memory mem) %{ 38.648 MacroAssembler _masm(&cbuf); 38.649 - __ movq($dst$$XMMRegister, $mem$$Address); 38.650 - %} 38.651 - 38.652 - enc_class movq_st(memory mem, regD src) %{ 38.653 - MacroAssembler _masm(&cbuf); 38.654 - __ movq($mem$$Address, $src$$XMMRegister); 38.655 - %} 38.656 - 38.657 - enc_class pshufd_8x8(regF dst, regF src) %{ 38.658 - MacroAssembler _masm(&cbuf); 38.659 - 38.660 - encode_CopyXD(cbuf, $dst$$reg, $src$$reg); 38.661 - __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); 38.662 - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); 38.663 - %} 38.664 - 38.665 - enc_class pshufd_4x16(regF dst, regF src) %{ 38.666 - MacroAssembler _masm(&cbuf); 38.667 - 38.668 - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); 38.669 - %} 38.670 - 38.671 - enc_class pshufd(regD dst, regD src, int mode) %{ 38.672 - MacroAssembler _masm(&cbuf); 38.673 - 38.674 - __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); 38.675 - %} 38.676 - 38.677 - enc_class pxor(regD dst, regD src) %{ 38.678 - MacroAssembler _masm(&cbuf); 38.679 - 38.680 - __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); 38.681 - %} 38.682 - 38.683 - enc_class mov_i2x(regD dst, rRegI src) %{ 38.684 - MacroAssembler _masm(&cbuf); 38.685 - 38.686 - __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); 38.687 - %} 38.688 + __ subptr(rsp, 8); 38.689 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 38.690 + __ fld_d(Address(rsp, 0)); 38.691 + %} 38.692 + 38.693 38.694 // obj: object to lock 38.695 // box: box address (header location) -- killed 38.696 @@ -3534,303 +3169,6 @@ 38.697 RELOC_DISP32); 38.698 %} 38.699 38.700 - enc_class absF_encoding(regF dst) 38.701 - %{ 38.702 - int dstenc = $dst$$reg; 38.703 - address signmask_address = (address) StubRoutines::x86::float_sign_mask(); 38.704 - 38.705 - cbuf.set_insts_mark(); 38.706 - if (dstenc >= 8) { 38.707 - emit_opcode(cbuf, Assembler::REX_R); 38.708 - dstenc -= 8; 38.709 - } 38.710 - // XXX reg_mem doesn't support RIP-relative addressing yet 38.711 - emit_opcode(cbuf, 0x0F); 38.712 - emit_opcode(cbuf, 0x54); 38.713 - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 38.714 - emit_d32_reloc(cbuf, signmask_address); 38.715 - %} 38.716 - 38.717 - enc_class absD_encoding(regD dst) 38.718 - %{ 38.719 - int dstenc = $dst$$reg; 38.720 - address signmask_address = (address) StubRoutines::x86::double_sign_mask(); 38.721 - 38.722 - cbuf.set_insts_mark(); 38.723 - emit_opcode(cbuf, 0x66); 38.724 - if (dstenc >= 8) { 38.725 - emit_opcode(cbuf, Assembler::REX_R); 38.726 - dstenc -= 8; 38.727 - } 38.728 - // XXX reg_mem doesn't support RIP-relative addressing yet 38.729 - emit_opcode(cbuf, 0x0F); 38.730 - emit_opcode(cbuf, 0x54); 38.731 - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 38.732 - emit_d32_reloc(cbuf, signmask_address); 38.733 - %} 38.734 - 38.735 - enc_class negF_encoding(regF dst) 38.736 - %{ 38.737 - int dstenc = $dst$$reg; 38.738 - address signflip_address = (address) StubRoutines::x86::float_sign_flip(); 38.739 - 38.740 - cbuf.set_insts_mark(); 38.741 - if (dstenc >= 8) { 38.742 - emit_opcode(cbuf, Assembler::REX_R); 38.743 - dstenc -= 8; 38.744 - } 38.745 - // XXX reg_mem doesn't support RIP-relative addressing yet 38.746 - emit_opcode(cbuf, 0x0F); 38.747 - emit_opcode(cbuf, 0x57); 38.748 - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 38.749 - emit_d32_reloc(cbuf, signflip_address); 38.750 - %} 38.751 - 38.752 - enc_class negD_encoding(regD dst) 38.753 - %{ 38.754 - int dstenc = $dst$$reg; 38.755 - address signflip_address = (address) StubRoutines::x86::double_sign_flip(); 38.756 - 38.757 - cbuf.set_insts_mark(); 38.758 - emit_opcode(cbuf, 0x66); 38.759 - if (dstenc >= 8) { 38.760 - emit_opcode(cbuf, Assembler::REX_R); 38.761 - dstenc -= 8; 38.762 - } 38.763 - // XXX reg_mem doesn't support RIP-relative addressing yet 38.764 - emit_opcode(cbuf, 0x0F); 38.765 - emit_opcode(cbuf, 0x57); 38.766 - emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101 38.767 - emit_d32_reloc(cbuf, signflip_address); 38.768 - %} 38.769 - 38.770 - enc_class f2i_fixup(rRegI dst, regF src) 38.771 - %{ 38.772 - int dstenc = $dst$$reg; 38.773 - int srcenc = $src$$reg; 38.774 - 38.775 - // cmpl $dst, #0x80000000 38.776 - if (dstenc >= 8) { 38.777 - emit_opcode(cbuf, Assembler::REX_B); 38.778 - } 38.779 - emit_opcode(cbuf, 0x81); 38.780 - emit_rm(cbuf, 0x3, 0x7, dstenc & 7); 38.781 - emit_d32(cbuf, 0x80000000); 38.782 - 38.783 - // jne,s done 38.784 - emit_opcode(cbuf, 0x75); 38.785 - if (srcenc < 8 && dstenc < 8) { 38.786 - emit_d8(cbuf, 0xF); 38.787 - } else if (srcenc >= 8 && dstenc >= 8) { 38.788 - emit_d8(cbuf, 0x11); 38.789 - } else { 38.790 - emit_d8(cbuf, 0x10); 38.791 - } 38.792 - 38.793 - // subq rsp, #8 38.794 - emit_opcode(cbuf, Assembler::REX_W); 38.795 - emit_opcode(cbuf, 0x83); 38.796 - emit_rm(cbuf, 0x3, 0x5, RSP_enc); 38.797 - emit_d8(cbuf, 8); 38.798 - 38.799 - // movss [rsp], $src 38.800 - emit_opcode(cbuf, 0xF3); 38.801 - if (srcenc >= 8) { 38.802 - emit_opcode(cbuf, Assembler::REX_R); 38.803 - } 38.804 - emit_opcode(cbuf, 0x0F); 38.805 - emit_opcode(cbuf, 0x11); 38.806 - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes 38.807 - 38.808 - // call f2i_fixup 38.809 - cbuf.set_insts_mark(); 38.810 - emit_opcode(cbuf, 0xE8); 38.811 - emit_d32_reloc(cbuf, 38.812 - (int) 38.813 - (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4), 38.814 - runtime_call_Relocation::spec(), 38.815 - RELOC_DISP32); 38.816 - 38.817 - // popq $dst 38.818 - if (dstenc >= 8) { 38.819 - emit_opcode(cbuf, Assembler::REX_B); 38.820 - } 38.821 - emit_opcode(cbuf, 0x58 | (dstenc & 7)); 38.822 - 38.823 - // done: 38.824 - %} 38.825 - 38.826 - enc_class f2l_fixup(rRegL dst, regF src) 38.827 - %{ 38.828 - int dstenc = $dst$$reg; 38.829 - int srcenc = $src$$reg; 38.830 - address const_address = (address) StubRoutines::x86::double_sign_flip(); 38.831 - 38.832 - // cmpq $dst, [0x8000000000000000] 38.833 - cbuf.set_insts_mark(); 38.834 - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR); 38.835 - emit_opcode(cbuf, 0x39); 38.836 - // XXX reg_mem doesn't support RIP-relative addressing yet 38.837 - emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101 38.838 - emit_d32_reloc(cbuf, const_address); 38.839 - 38.840 - 38.841 - // jne,s done 38.842 - emit_opcode(cbuf, 0x75); 38.843 - if (srcenc < 8 && dstenc < 8) { 38.844 - emit_d8(cbuf, 0xF); 38.845 - } else if (srcenc >= 8 && dstenc >= 8) { 38.846 - emit_d8(cbuf, 0x11); 38.847 - } else { 38.848 - emit_d8(cbuf, 0x10); 38.849 - } 38.850 - 38.851 - // subq rsp, #8 38.852 - emit_opcode(cbuf, Assembler::REX_W); 38.853 - emit_opcode(cbuf, 0x83); 38.854 - emit_rm(cbuf, 0x3, 0x5, RSP_enc); 38.855 - emit_d8(cbuf, 8); 38.856 - 38.857 - // movss [rsp], $src 38.858 - emit_opcode(cbuf, 0xF3); 38.859 - if (srcenc >= 8) { 38.860 - emit_opcode(cbuf, Assembler::REX_R); 38.861 - } 38.862 - emit_opcode(cbuf, 0x0F); 38.863 - emit_opcode(cbuf, 0x11); 38.864 - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes 38.865 - 38.866 - // call f2l_fixup 38.867 - cbuf.set_insts_mark(); 38.868 - emit_opcode(cbuf, 0xE8); 38.869 - emit_d32_reloc(cbuf, 38.870 - (int) 38.871 - (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4), 38.872 - runtime_call_Relocation::spec(), 38.873 - RELOC_DISP32); 38.874 - 38.875 - // popq $dst 38.876 - if (dstenc >= 8) { 38.877 - emit_opcode(cbuf, Assembler::REX_B); 38.878 - } 38.879 - emit_opcode(cbuf, 0x58 | (dstenc & 7)); 38.880 - 38.881 - // done: 38.882 - %} 38.883 - 38.884 - enc_class d2i_fixup(rRegI dst, regD src) 38.885 - %{ 38.886 - int dstenc = $dst$$reg; 38.887 - int srcenc = $src$$reg; 38.888 - 38.889 - // cmpl $dst, #0x80000000 38.890 - if (dstenc >= 8) { 38.891 - emit_opcode(cbuf, Assembler::REX_B); 38.892 - } 38.893 - emit_opcode(cbuf, 0x81); 38.894 - emit_rm(cbuf, 0x3, 0x7, dstenc & 7); 38.895 - emit_d32(cbuf, 0x80000000); 38.896 - 38.897 - // jne,s done 38.898 - emit_opcode(cbuf, 0x75); 38.899 - if (srcenc < 8 && dstenc < 8) { 38.900 - emit_d8(cbuf, 0xF); 38.901 - } else if (srcenc >= 8 && dstenc >= 8) { 38.902 - emit_d8(cbuf, 0x11); 38.903 - } else { 38.904 - emit_d8(cbuf, 0x10); 38.905 - } 38.906 - 38.907 - // subq rsp, #8 38.908 - emit_opcode(cbuf, Assembler::REX_W); 38.909 - emit_opcode(cbuf, 0x83); 38.910 - emit_rm(cbuf, 0x3, 0x5, RSP_enc); 38.911 - emit_d8(cbuf, 8); 38.912 - 38.913 - // movsd [rsp], $src 38.914 - emit_opcode(cbuf, 0xF2); 38.915 - if (srcenc >= 8) { 38.916 - emit_opcode(cbuf, Assembler::REX_R); 38.917 - } 38.918 - emit_opcode(cbuf, 0x0F); 38.919 - emit_opcode(cbuf, 0x11); 38.920 - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes 38.921 - 38.922 - // call d2i_fixup 38.923 - cbuf.set_insts_mark(); 38.924 - emit_opcode(cbuf, 0xE8); 38.925 - emit_d32_reloc(cbuf, 38.926 - (int) 38.927 - (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4), 38.928 - runtime_call_Relocation::spec(), 38.929 - RELOC_DISP32); 38.930 - 38.931 - // popq $dst 38.932 - if (dstenc >= 8) { 38.933 - emit_opcode(cbuf, Assembler::REX_B); 38.934 - } 38.935 - emit_opcode(cbuf, 0x58 | (dstenc & 7)); 38.936 - 38.937 - // done: 38.938 - %} 38.939 - 38.940 - enc_class d2l_fixup(rRegL dst, regD src) 38.941 - %{ 38.942 - int dstenc = $dst$$reg; 38.943 - int srcenc = $src$$reg; 38.944 - address const_address = (address) StubRoutines::x86::double_sign_flip(); 38.945 - 38.946 - // cmpq $dst, [0x8000000000000000] 38.947 - cbuf.set_insts_mark(); 38.948 - emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR); 38.949 - emit_opcode(cbuf, 0x39); 38.950 - // XXX reg_mem doesn't support RIP-relative addressing yet 38.951 - emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101 38.952 - emit_d32_reloc(cbuf, const_address); 38.953 - 38.954 - 38.955 - // jne,s done 38.956 - emit_opcode(cbuf, 0x75); 38.957 - if (srcenc < 8 && dstenc < 8) { 38.958 - emit_d8(cbuf, 0xF); 38.959 - } else if (srcenc >= 8 && dstenc >= 8) { 38.960 - emit_d8(cbuf, 0x11); 38.961 - } else { 38.962 - emit_d8(cbuf, 0x10); 38.963 - } 38.964 - 38.965 - // subq rsp, #8 38.966 - emit_opcode(cbuf, Assembler::REX_W); 38.967 - emit_opcode(cbuf, 0x83); 38.968 - emit_rm(cbuf, 0x3, 0x5, RSP_enc); 38.969 - emit_d8(cbuf, 8); 38.970 - 38.971 - // movsd [rsp], $src 38.972 - emit_opcode(cbuf, 0xF2); 38.973 - if (srcenc >= 8) { 38.974 - emit_opcode(cbuf, Assembler::REX_R); 38.975 - } 38.976 - emit_opcode(cbuf, 0x0F); 38.977 - emit_opcode(cbuf, 0x11); 38.978 - encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes 38.979 - 38.980 - // call d2l_fixup 38.981 - cbuf.set_insts_mark(); 38.982 - emit_opcode(cbuf, 0xE8); 38.983 - emit_d32_reloc(cbuf, 38.984 - (int) 38.985 - (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4), 38.986 - runtime_call_Relocation::spec(), 38.987 - RELOC_DISP32); 38.988 - 38.989 - // popq $dst 38.990 - if (dstenc >= 8) { 38.991 - emit_opcode(cbuf, Assembler::REX_B); 38.992 - } 38.993 - emit_opcode(cbuf, 0x58 | (dstenc & 7)); 38.994 - 38.995 - // done: 38.996 - %} 38.997 %} 38.998 38.999 38.1000 @@ -6156,8 +5494,9 @@ 38.1001 38.1002 ins_cost(145); // XXX 38.1003 format %{ "movss $dst, $mem\t# float" %} 38.1004 - opcode(0xF3, 0x0F, 0x10); 38.1005 - ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); 38.1006 + ins_encode %{ 38.1007 + __ movflt($dst$$XMMRegister, $mem$$Address); 38.1008 + %} 38.1009 ins_pipe(pipe_slow); // XXX 38.1010 %} 38.1011 38.1012 @@ -6169,8 +5508,9 @@ 38.1013 38.1014 ins_cost(145); // XXX 38.1015 format %{ "movlpd $dst, $mem\t# double" %} 38.1016 - opcode(0x66, 0x0F, 0x12); 38.1017 - ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); 38.1018 + ins_encode %{ 38.1019 + __ movdbl($dst$$XMMRegister, $mem$$Address); 38.1020 + %} 38.1021 ins_pipe(pipe_slow); // XXX 38.1022 %} 38.1023 38.1024 @@ -6181,8 +5521,9 @@ 38.1025 38.1026 ins_cost(145); // XXX 38.1027 format %{ "movsd $dst, $mem\t# double" %} 38.1028 - opcode(0xF2, 0x0F, 0x10); 38.1029 - ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem)); 38.1030 + ins_encode %{ 38.1031 + __ movdbl($dst$$XMMRegister, $mem$$Address); 38.1032 + %} 38.1033 ins_pipe(pipe_slow); // XXX 38.1034 %} 38.1035 38.1036 @@ -6191,7 +5532,9 @@ 38.1037 match(Set dst (Load8B mem)); 38.1038 ins_cost(125); 38.1039 format %{ "MOVQ $dst,$mem\t! packed8B" %} 38.1040 - ins_encode( movq_ld(dst, mem)); 38.1041 + ins_encode %{ 38.1042 + __ movq($dst$$XMMRegister, $mem$$Address); 38.1043 + %} 38.1044 ins_pipe( pipe_slow ); 38.1045 %} 38.1046 38.1047 @@ -6200,7 +5543,9 @@ 38.1048 match(Set dst (Load4S mem)); 38.1049 ins_cost(125); 38.1050 format %{ "MOVQ $dst,$mem\t! packed4S" %} 38.1051 - ins_encode( movq_ld(dst, mem)); 38.1052 + ins_encode %{ 38.1053 + __ movq($dst$$XMMRegister, $mem$$Address); 38.1054 + %} 38.1055 ins_pipe( pipe_slow ); 38.1056 %} 38.1057 38.1058 @@ -6209,7 +5554,9 @@ 38.1059 match(Set dst (Load4C mem)); 38.1060 ins_cost(125); 38.1061 format %{ "MOVQ $dst,$mem\t! packed4C" %} 38.1062 - ins_encode( movq_ld(dst, mem)); 38.1063 + ins_encode %{ 38.1064 + __ movq($dst$$XMMRegister, $mem$$Address); 38.1065 + %} 38.1066 ins_pipe( pipe_slow ); 38.1067 %} 38.1068 38.1069 @@ -6218,16 +5565,20 @@ 38.1070 match(Set dst (Load2I mem)); 38.1071 ins_cost(125); 38.1072 format %{ "MOVQ $dst,$mem\t! packed2I" %} 38.1073 - ins_encode( movq_ld(dst, mem)); 38.1074 + ins_encode %{ 38.1075 + __ movq($dst$$XMMRegister, $mem$$Address); 38.1076 + %} 38.1077 ins_pipe( pipe_slow ); 38.1078 %} 38.1079 38.1080 // Load Aligned Packed Single to XMM 38.1081 instruct loadA2F(regD dst, memory mem) %{ 38.1082 match(Set dst (Load2F mem)); 38.1083 - ins_cost(145); 38.1084 + ins_cost(125); 38.1085 format %{ "MOVQ $dst,$mem\t! packed2F" %} 38.1086 - ins_encode( movq_ld(dst, mem)); 38.1087 + ins_encode %{ 38.1088 + __ movq($dst$$XMMRegister, $mem$$Address); 38.1089 + %} 38.1090 ins_pipe( pipe_slow ); 38.1091 %} 38.1092 38.1093 @@ -6540,8 +5891,9 @@ 38.1094 ins_cost(100); 38.1095 38.1096 format %{ "xorps $dst, $dst\t# float 0.0" %} 38.1097 - opcode(0x0F, 0x57); 38.1098 - ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst)); 38.1099 + ins_encode %{ 38.1100 + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 38.1101 + %} 38.1102 ins_pipe(pipe_slow); 38.1103 %} 38.1104 38.1105 @@ -6562,8 +5914,9 @@ 38.1106 ins_cost(100); 38.1107 38.1108 format %{ "xorpd $dst, $dst\t# double 0.0" %} 38.1109 - opcode(0x66, 0x0F, 0x57); 38.1110 - ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst)); 38.1111 + ins_encode %{ 38.1112 + __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 38.1113 + %} 38.1114 ins_pipe(pipe_slow); 38.1115 %} 38.1116 38.1117 @@ -6606,8 +5959,9 @@ 38.1118 38.1119 ins_cost(125); 38.1120 format %{ "movss $dst, $src\t# float stk" %} 38.1121 - opcode(0xF3, 0x0F, 0x10); 38.1122 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1123 + ins_encode %{ 38.1124 + __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 38.1125 + %} 38.1126 ins_pipe(pipe_slow); // XXX 38.1127 %} 38.1128 38.1129 @@ -6972,7 +6326,9 @@ 38.1130 match(Set mem (Store8B mem src)); 38.1131 ins_cost(145); 38.1132 format %{ "MOVQ $mem,$src\t! packed8B" %} 38.1133 - ins_encode( movq_st(mem, src)); 38.1134 + ins_encode %{ 38.1135 + __ movq($mem$$Address, $src$$XMMRegister); 38.1136 + %} 38.1137 ins_pipe( pipe_slow ); 38.1138 %} 38.1139 38.1140 @@ -6981,7 +6337,9 @@ 38.1141 match(Set mem (Store4C mem src)); 38.1142 ins_cost(145); 38.1143 format %{ "MOVQ $mem,$src\t! packed4C" %} 38.1144 - ins_encode( movq_st(mem, src)); 38.1145 + ins_encode %{ 38.1146 + __ movq($mem$$Address, $src$$XMMRegister); 38.1147 + %} 38.1148 ins_pipe( pipe_slow ); 38.1149 %} 38.1150 38.1151 @@ -6990,7 +6348,9 @@ 38.1152 match(Set mem (Store2I mem src)); 38.1153 ins_cost(145); 38.1154 format %{ "MOVQ $mem,$src\t! packed2I" %} 38.1155 - ins_encode( movq_st(mem, src)); 38.1156 + ins_encode %{ 38.1157 + __ movq($mem$$Address, $src$$XMMRegister); 38.1158 + %} 38.1159 ins_pipe( pipe_slow ); 38.1160 %} 38.1161 38.1162 @@ -7024,7 +6384,9 @@ 38.1163 match(Set mem (Store2F mem src)); 38.1164 ins_cost(145); 38.1165 format %{ "MOVQ $mem,$src\t! packed2F" %} 38.1166 - ins_encode( movq_st(mem, src)); 38.1167 + ins_encode %{ 38.1168 + __ movq($mem$$Address, $src$$XMMRegister); 38.1169 + %} 38.1170 ins_pipe( pipe_slow ); 38.1171 %} 38.1172 38.1173 @@ -7035,8 +6397,9 @@ 38.1174 38.1175 ins_cost(95); // XXX 38.1176 format %{ "movss $mem, $src\t# float" %} 38.1177 - opcode(0xF3, 0x0F, 0x11); 38.1178 - ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem)); 38.1179 + ins_encode %{ 38.1180 + __ movflt($mem$$Address, $src$$XMMRegister); 38.1181 + %} 38.1182 ins_pipe(pipe_slow); // XXX 38.1183 %} 38.1184 38.1185 @@ -7072,8 +6435,9 @@ 38.1186 38.1187 ins_cost(95); // XXX 38.1188 format %{ "movsd $mem, $src\t# double" %} 38.1189 - opcode(0xF2, 0x0F, 0x11); 38.1190 - ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem)); 38.1191 + ins_encode %{ 38.1192 + __ movdbl($mem$$Address, $src$$XMMRegister); 38.1193 + %} 38.1194 ins_pipe(pipe_slow); // XXX 38.1195 %} 38.1196 38.1197 @@ -7142,8 +6506,9 @@ 38.1198 38.1199 ins_cost(95); // XXX 38.1200 format %{ "movss $dst, $src\t# float stk" %} 38.1201 - opcode(0xF3, 0x0F, 0x11); 38.1202 - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); 38.1203 + ins_encode %{ 38.1204 + __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 38.1205 + %} 38.1206 ins_pipe(pipe_slow); // XXX 38.1207 %} 38.1208 38.1209 @@ -7153,8 +6518,9 @@ 38.1210 38.1211 ins_cost(95); // XXX 38.1212 format %{ "movsd $dst, $src\t# double stk" %} 38.1213 - opcode(0xF2, 0x0F, 0x11); 38.1214 - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); 38.1215 + ins_encode %{ 38.1216 + __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 38.1217 + %} 38.1218 ins_pipe(pipe_slow); // XXX 38.1219 %} 38.1220 38.1221 @@ -7444,6 +6810,16 @@ 38.1222 ins_pipe(empty); 38.1223 %} 38.1224 38.1225 +instruct membar_storestore() %{ 38.1226 + match(MemBarStoreStore); 38.1227 + ins_cost(0); 38.1228 + 38.1229 + size(0); 38.1230 + format %{ "MEMBAR-storestore (empty encoding)" %} 38.1231 + ins_encode( ); 38.1232 + ins_pipe(empty); 38.1233 +%} 38.1234 + 38.1235 //----------Move Instructions-------------------------------------------------- 38.1236 38.1237 instruct castX2P(rRegP dst, rRegL src) 38.1238 @@ -7451,7 +6827,11 @@ 38.1239 match(Set dst (CastX2P src)); 38.1240 38.1241 format %{ "movq $dst, $src\t# long->ptr" %} 38.1242 - ins_encode(enc_copy_wide(dst, src)); 38.1243 + ins_encode %{ 38.1244 + if ($dst$$reg != $src$$reg) { 38.1245 + __ movptr($dst$$Register, $src$$Register); 38.1246 + } 38.1247 + %} 38.1248 ins_pipe(ialu_reg_reg); // XXX 38.1249 %} 38.1250 38.1251 @@ -7460,7 +6840,11 @@ 38.1252 match(Set dst (CastP2X src)); 38.1253 38.1254 format %{ "movq $dst, $src\t# ptr -> long" %} 38.1255 - ins_encode(enc_copy_wide(dst, src)); 38.1256 + ins_encode %{ 38.1257 + if ($dst$$reg != $src$$reg) { 38.1258 + __ movptr($dst$$Register, $src$$Register); 38.1259 + } 38.1260 + %} 38.1261 ins_pipe(ialu_reg_reg); // XXX 38.1262 %} 38.1263 38.1264 @@ -7813,7 +7197,13 @@ 38.1265 format %{ "jn$cop skip\t# signed cmove float\n\t" 38.1266 "movss $dst, $src\n" 38.1267 "skip:" %} 38.1268 - ins_encode(enc_cmovf_branch(cop, dst, src)); 38.1269 + ins_encode %{ 38.1270 + Label Lskip; 38.1271 + // Invert sense of branch from sense of CMOV 38.1272 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 38.1273 + __ movflt($dst$$XMMRegister, $src$$XMMRegister); 38.1274 + __ bind(Lskip); 38.1275 + %} 38.1276 ins_pipe(pipe_slow); 38.1277 %} 38.1278 38.1279 @@ -7837,7 +7227,13 @@ 38.1280 format %{ "jn$cop skip\t# unsigned cmove float\n\t" 38.1281 "movss $dst, $src\n" 38.1282 "skip:" %} 38.1283 - ins_encode(enc_cmovf_branch(cop, dst, src)); 38.1284 + ins_encode %{ 38.1285 + Label Lskip; 38.1286 + // Invert sense of branch from sense of CMOV 38.1287 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 38.1288 + __ movflt($dst$$XMMRegister, $src$$XMMRegister); 38.1289 + __ bind(Lskip); 38.1290 + %} 38.1291 ins_pipe(pipe_slow); 38.1292 %} 38.1293 38.1294 @@ -7857,7 +7253,13 @@ 38.1295 format %{ "jn$cop skip\t# signed cmove double\n\t" 38.1296 "movsd $dst, $src\n" 38.1297 "skip:" %} 38.1298 - ins_encode(enc_cmovd_branch(cop, dst, src)); 38.1299 + ins_encode %{ 38.1300 + Label Lskip; 38.1301 + // Invert sense of branch from sense of CMOV 38.1302 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 38.1303 + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 38.1304 + __ bind(Lskip); 38.1305 + %} 38.1306 ins_pipe(pipe_slow); 38.1307 %} 38.1308 38.1309 @@ -7869,7 +7271,13 @@ 38.1310 format %{ "jn$cop skip\t# unsigned cmove double\n\t" 38.1311 "movsd $dst, $src\n" 38.1312 "skip:" %} 38.1313 - ins_encode(enc_cmovd_branch(cop, dst, src)); 38.1314 + ins_encode %{ 38.1315 + Label Lskip; 38.1316 + // Invert sense of branch from sense of CMOV 38.1317 + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 38.1318 + __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 38.1319 + __ bind(Lskip); 38.1320 + %} 38.1321 ins_pipe(pipe_slow); 38.1322 %} 38.1323 38.1324 @@ -10191,17 +9599,18 @@ 38.1325 "pushfq\t# saw NaN, set CF\n\t" 38.1326 "andq [rsp], #0xffffff2b\n\t" 38.1327 "popfq\n" 38.1328 - "exit: nop\t# avoid branch to branch" %} 38.1329 - opcode(0x0F, 0x2E); 38.1330 - ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2), 38.1331 - cmpfp_fixup); 38.1332 + "exit:" %} 38.1333 + ins_encode %{ 38.1334 + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 38.1335 + emit_cmpfp_fixup(_masm); 38.1336 + %} 38.1337 ins_pipe(pipe_slow); 38.1338 %} 38.1339 38.1340 instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{ 38.1341 match(Set cr (CmpF src1 src2)); 38.1342 38.1343 - ins_cost(145); 38.1344 + ins_cost(100); 38.1345 format %{ "ucomiss $src1, $src2" %} 38.1346 ins_encode %{ 38.1347 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 38.1348 @@ -10219,10 +9628,11 @@ 38.1349 "pushfq\t# saw NaN, set CF\n\t" 38.1350 "andq [rsp], #0xffffff2b\n\t" 38.1351 "popfq\n" 38.1352 - "exit: nop\t# avoid branch to branch" %} 38.1353 - opcode(0x0F, 0x2E); 38.1354 - ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2), 38.1355 - cmpfp_fixup); 38.1356 + "exit:" %} 38.1357 + ins_encode %{ 38.1358 + __ ucomiss($src1$$XMMRegister, $src2$$Address); 38.1359 + emit_cmpfp_fixup(_masm); 38.1360 + %} 38.1361 ins_pipe(pipe_slow); 38.1362 %} 38.1363 38.1364 @@ -10231,8 +9641,9 @@ 38.1365 38.1366 ins_cost(100); 38.1367 format %{ "ucomiss $src1, $src2" %} 38.1368 - opcode(0x0F, 0x2E); 38.1369 - ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2)); 38.1370 + ins_encode %{ 38.1371 + __ ucomiss($src1$$XMMRegister, $src2$$Address); 38.1372 + %} 38.1373 ins_pipe(pipe_slow); 38.1374 %} 38.1375 38.1376 @@ -10245,7 +9656,7 @@ 38.1377 "pushfq\t# saw NaN, set CF\n\t" 38.1378 "andq [rsp], #0xffffff2b\n\t" 38.1379 "popfq\n" 38.1380 - "exit: nop\t# avoid branch to branch" %} 38.1381 + "exit:" %} 38.1382 ins_encode %{ 38.1383 __ ucomiss($src$$XMMRegister, $constantaddress($con)); 38.1384 emit_cmpfp_fixup(_masm); 38.1385 @@ -10273,10 +9684,11 @@ 38.1386 "pushfq\t# saw NaN, set CF\n\t" 38.1387 "andq [rsp], #0xffffff2b\n\t" 38.1388 "popfq\n" 38.1389 - "exit: nop\t# avoid branch to branch" %} 38.1390 - opcode(0x66, 0x0F, 0x2E); 38.1391 - ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2), 38.1392 - cmpfp_fixup); 38.1393 + "exit:" %} 38.1394 + ins_encode %{ 38.1395 + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 38.1396 + emit_cmpfp_fixup(_masm); 38.1397 + %} 38.1398 ins_pipe(pipe_slow); 38.1399 %} 38.1400 38.1401 @@ -10301,10 +9713,11 @@ 38.1402 "pushfq\t# saw NaN, set CF\n\t" 38.1403 "andq [rsp], #0xffffff2b\n\t" 38.1404 "popfq\n" 38.1405 - "exit: nop\t# avoid branch to branch" %} 38.1406 - opcode(0x66, 0x0F, 0x2E); 38.1407 - ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2), 38.1408 - cmpfp_fixup); 38.1409 + "exit:" %} 38.1410 + ins_encode %{ 38.1411 + __ ucomisd($src1$$XMMRegister, $src2$$Address); 38.1412 + emit_cmpfp_fixup(_masm); 38.1413 + %} 38.1414 ins_pipe(pipe_slow); 38.1415 %} 38.1416 38.1417 @@ -10313,8 +9726,9 @@ 38.1418 38.1419 ins_cost(100); 38.1420 format %{ "ucomisd $src1, $src2" %} 38.1421 - opcode(0x66, 0x0F, 0x2E); 38.1422 - ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2)); 38.1423 + ins_encode %{ 38.1424 + __ ucomisd($src1$$XMMRegister, $src2$$Address); 38.1425 + %} 38.1426 ins_pipe(pipe_slow); 38.1427 %} 38.1428 38.1429 @@ -10327,7 +9741,7 @@ 38.1430 "pushfq\t# saw NaN, set CF\n\t" 38.1431 "andq [rsp], #0xffffff2b\n\t" 38.1432 "popfq\n" 38.1433 - "exit: nop\t# avoid branch to branch" %} 38.1434 + "exit:" %} 38.1435 ins_encode %{ 38.1436 __ ucomisd($src$$XMMRegister, $constantaddress($con)); 38.1437 emit_cmpfp_fixup(_masm); 38.1438 @@ -10359,10 +9773,10 @@ 38.1439 "setne $dst\n\t" 38.1440 "movzbl $dst, $dst\n" 38.1441 "done:" %} 38.1442 - 38.1443 - opcode(0x0F, 0x2E); 38.1444 - ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2), 38.1445 - cmpfp3(dst)); 38.1446 + ins_encode %{ 38.1447 + __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 38.1448 + emit_cmpfp3(_masm, $dst$$Register); 38.1449 + %} 38.1450 ins_pipe(pipe_slow); 38.1451 %} 38.1452 38.1453 @@ -10380,10 +9794,10 @@ 38.1454 "setne $dst\n\t" 38.1455 "movzbl $dst, $dst\n" 38.1456 "done:" %} 38.1457 - 38.1458 - opcode(0x0F, 0x2E); 38.1459 - ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2), 38.1460 - cmpfp3(dst)); 38.1461 + ins_encode %{ 38.1462 + __ ucomiss($src1$$XMMRegister, $src2$$Address); 38.1463 + emit_cmpfp3(_masm, $dst$$Register); 38.1464 + %} 38.1465 ins_pipe(pipe_slow); 38.1466 %} 38.1467 38.1468 @@ -10401,15 +9815,8 @@ 38.1469 "movzbl $dst, $dst\n" 38.1470 "done:" %} 38.1471 ins_encode %{ 38.1472 - Label L_done; 38.1473 - Register Rdst = $dst$$Register; 38.1474 __ ucomiss($src$$XMMRegister, $constantaddress($con)); 38.1475 - __ movl(Rdst, -1); 38.1476 - __ jcc(Assembler::parity, L_done); 38.1477 - __ jcc(Assembler::below, L_done); 38.1478 - __ setb(Assembler::notEqual, Rdst); 38.1479 - __ movzbl(Rdst, Rdst); 38.1480 - __ bind(L_done); 38.1481 + emit_cmpfp3(_masm, $dst$$Register); 38.1482 %} 38.1483 ins_pipe(pipe_slow); 38.1484 %} 38.1485 @@ -10428,10 +9835,10 @@ 38.1486 "setne $dst\n\t" 38.1487 "movzbl $dst, $dst\n" 38.1488 "done:" %} 38.1489 - 38.1490 - opcode(0x66, 0x0F, 0x2E); 38.1491 - ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2), 38.1492 - cmpfp3(dst)); 38.1493 + ins_encode %{ 38.1494 + __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 38.1495 + emit_cmpfp3(_masm, $dst$$Register); 38.1496 + %} 38.1497 ins_pipe(pipe_slow); 38.1498 %} 38.1499 38.1500 @@ -10449,10 +9856,10 @@ 38.1501 "setne $dst\n\t" 38.1502 "movzbl $dst, $dst\n" 38.1503 "done:" %} 38.1504 - 38.1505 - opcode(0x66, 0x0F, 0x2E); 38.1506 - ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2), 38.1507 - cmpfp3(dst)); 38.1508 + ins_encode %{ 38.1509 + __ ucomisd($src1$$XMMRegister, $src2$$Address); 38.1510 + emit_cmpfp3(_masm, $dst$$Register); 38.1511 + %} 38.1512 ins_pipe(pipe_slow); 38.1513 %} 38.1514 38.1515 @@ -10470,374 +9877,9 @@ 38.1516 "movzbl $dst, $dst\n" 38.1517 "done:" %} 38.1518 ins_encode %{ 38.1519 - Register Rdst = $dst$$Register; 38.1520 - Label L_done; 38.1521 __ ucomisd($src$$XMMRegister, $constantaddress($con)); 38.1522 - __ movl(Rdst, -1); 38.1523 - __ jcc(Assembler::parity, L_done); 38.1524 - __ jcc(Assembler::below, L_done); 38.1525 - __ setb(Assembler::notEqual, Rdst); 38.1526 - __ movzbl(Rdst, Rdst); 38.1527 - __ bind(L_done); 38.1528 - %} 38.1529 - ins_pipe(pipe_slow); 38.1530 -%} 38.1531 - 38.1532 -instruct addF_reg(regF dst, regF src) 38.1533 -%{ 38.1534 - match(Set dst (AddF dst src)); 38.1535 - 38.1536 - format %{ "addss $dst, $src" %} 38.1537 - ins_cost(150); // XXX 38.1538 - opcode(0xF3, 0x0F, 0x58); 38.1539 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1540 - ins_pipe(pipe_slow); 38.1541 -%} 38.1542 - 38.1543 -instruct addF_mem(regF dst, memory src) 38.1544 -%{ 38.1545 - match(Set dst (AddF dst (LoadF src))); 38.1546 - 38.1547 - format %{ "addss $dst, $src" %} 38.1548 - ins_cost(150); // XXX 38.1549 - opcode(0xF3, 0x0F, 0x58); 38.1550 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1551 - ins_pipe(pipe_slow); 38.1552 -%} 38.1553 - 38.1554 -instruct addF_imm(regF dst, immF con) %{ 38.1555 - match(Set dst (AddF dst con)); 38.1556 - format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 38.1557 - ins_cost(150); // XXX 38.1558 - ins_encode %{ 38.1559 - __ addss($dst$$XMMRegister, $constantaddress($con)); 38.1560 - %} 38.1561 - ins_pipe(pipe_slow); 38.1562 -%} 38.1563 - 38.1564 -instruct addD_reg(regD dst, regD src) 38.1565 -%{ 38.1566 - match(Set dst (AddD dst src)); 38.1567 - 38.1568 - format %{ "addsd $dst, $src" %} 38.1569 - ins_cost(150); // XXX 38.1570 - opcode(0xF2, 0x0F, 0x58); 38.1571 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1572 - ins_pipe(pipe_slow); 38.1573 -%} 38.1574 - 38.1575 -instruct addD_mem(regD dst, memory src) 38.1576 -%{ 38.1577 - match(Set dst (AddD dst (LoadD src))); 38.1578 - 38.1579 - format %{ "addsd $dst, $src" %} 38.1580 - ins_cost(150); // XXX 38.1581 - opcode(0xF2, 0x0F, 0x58); 38.1582 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1583 - ins_pipe(pipe_slow); 38.1584 -%} 38.1585 - 38.1586 -instruct addD_imm(regD dst, immD con) %{ 38.1587 - match(Set dst (AddD dst con)); 38.1588 - format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 38.1589 - ins_cost(150); // XXX 38.1590 - ins_encode %{ 38.1591 - __ addsd($dst$$XMMRegister, $constantaddress($con)); 38.1592 - %} 38.1593 - ins_pipe(pipe_slow); 38.1594 -%} 38.1595 - 38.1596 -instruct subF_reg(regF dst, regF src) 38.1597 -%{ 38.1598 - match(Set dst (SubF dst src)); 38.1599 - 38.1600 - format %{ "subss $dst, $src" %} 38.1601 - ins_cost(150); // XXX 38.1602 - opcode(0xF3, 0x0F, 0x5C); 38.1603 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1604 - ins_pipe(pipe_slow); 38.1605 -%} 38.1606 - 38.1607 -instruct subF_mem(regF dst, memory src) 38.1608 -%{ 38.1609 - match(Set dst (SubF dst (LoadF src))); 38.1610 - 38.1611 - format %{ "subss $dst, $src" %} 38.1612 - ins_cost(150); // XXX 38.1613 - opcode(0xF3, 0x0F, 0x5C); 38.1614 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1615 - ins_pipe(pipe_slow); 38.1616 -%} 38.1617 - 38.1618 -instruct subF_imm(regF dst, immF con) %{ 38.1619 - match(Set dst (SubF dst con)); 38.1620 - format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 38.1621 - ins_cost(150); // XXX 38.1622 - ins_encode %{ 38.1623 - __ subss($dst$$XMMRegister, $constantaddress($con)); 38.1624 - %} 38.1625 - ins_pipe(pipe_slow); 38.1626 -%} 38.1627 - 38.1628 -instruct subD_reg(regD dst, regD src) 38.1629 -%{ 38.1630 - match(Set dst (SubD dst src)); 38.1631 - 38.1632 - format %{ "subsd $dst, $src" %} 38.1633 - ins_cost(150); // XXX 38.1634 - opcode(0xF2, 0x0F, 0x5C); 38.1635 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1636 - ins_pipe(pipe_slow); 38.1637 -%} 38.1638 - 38.1639 -instruct subD_mem(regD dst, memory src) 38.1640 -%{ 38.1641 - match(Set dst (SubD dst (LoadD src))); 38.1642 - 38.1643 - format %{ "subsd $dst, $src" %} 38.1644 - ins_cost(150); // XXX 38.1645 - opcode(0xF2, 0x0F, 0x5C); 38.1646 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1647 - ins_pipe(pipe_slow); 38.1648 -%} 38.1649 - 38.1650 -instruct subD_imm(regD dst, immD con) %{ 38.1651 - match(Set dst (SubD dst con)); 38.1652 - format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 38.1653 - ins_cost(150); // XXX 38.1654 - ins_encode %{ 38.1655 - __ subsd($dst$$XMMRegister, $constantaddress($con)); 38.1656 - %} 38.1657 - ins_pipe(pipe_slow); 38.1658 -%} 38.1659 - 38.1660 -instruct mulF_reg(regF dst, regF src) 38.1661 -%{ 38.1662 - match(Set dst (MulF dst src)); 38.1663 - 38.1664 - format %{ "mulss $dst, $src" %} 38.1665 - ins_cost(150); // XXX 38.1666 - opcode(0xF3, 0x0F, 0x59); 38.1667 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1668 - ins_pipe(pipe_slow); 38.1669 -%} 38.1670 - 38.1671 -instruct mulF_mem(regF dst, memory src) 38.1672 -%{ 38.1673 - match(Set dst (MulF dst (LoadF src))); 38.1674 - 38.1675 - format %{ "mulss $dst, $src" %} 38.1676 - ins_cost(150); // XXX 38.1677 - opcode(0xF3, 0x0F, 0x59); 38.1678 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1679 - ins_pipe(pipe_slow); 38.1680 -%} 38.1681 - 38.1682 -instruct mulF_imm(regF dst, immF con) %{ 38.1683 - match(Set dst (MulF dst con)); 38.1684 - format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 38.1685 - ins_cost(150); // XXX 38.1686 - ins_encode %{ 38.1687 - __ mulss($dst$$XMMRegister, $constantaddress($con)); 38.1688 - %} 38.1689 - ins_pipe(pipe_slow); 38.1690 -%} 38.1691 - 38.1692 -instruct mulD_reg(regD dst, regD src) 38.1693 -%{ 38.1694 - match(Set dst (MulD dst src)); 38.1695 - 38.1696 - format %{ "mulsd $dst, $src" %} 38.1697 - ins_cost(150); // XXX 38.1698 - opcode(0xF2, 0x0F, 0x59); 38.1699 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1700 - ins_pipe(pipe_slow); 38.1701 -%} 38.1702 - 38.1703 -instruct mulD_mem(regD dst, memory src) 38.1704 -%{ 38.1705 - match(Set dst (MulD dst (LoadD src))); 38.1706 - 38.1707 - format %{ "mulsd $dst, $src" %} 38.1708 - ins_cost(150); // XXX 38.1709 - opcode(0xF2, 0x0F, 0x59); 38.1710 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1711 - ins_pipe(pipe_slow); 38.1712 -%} 38.1713 - 38.1714 -instruct mulD_imm(regD dst, immD con) %{ 38.1715 - match(Set dst (MulD dst con)); 38.1716 - format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 38.1717 - ins_cost(150); // XXX 38.1718 - ins_encode %{ 38.1719 - __ mulsd($dst$$XMMRegister, $constantaddress($con)); 38.1720 - %} 38.1721 - ins_pipe(pipe_slow); 38.1722 -%} 38.1723 - 38.1724 -instruct divF_reg(regF dst, regF src) 38.1725 -%{ 38.1726 - match(Set dst (DivF dst src)); 38.1727 - 38.1728 - format %{ "divss $dst, $src" %} 38.1729 - ins_cost(150); // XXX 38.1730 - opcode(0xF3, 0x0F, 0x5E); 38.1731 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1732 - ins_pipe(pipe_slow); 38.1733 -%} 38.1734 - 38.1735 -instruct divF_mem(regF dst, memory src) 38.1736 -%{ 38.1737 - match(Set dst (DivF dst (LoadF src))); 38.1738 - 38.1739 - format %{ "divss $dst, $src" %} 38.1740 - ins_cost(150); // XXX 38.1741 - opcode(0xF3, 0x0F, 0x5E); 38.1742 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1743 - ins_pipe(pipe_slow); 38.1744 -%} 38.1745 - 38.1746 -instruct divF_imm(regF dst, immF con) %{ 38.1747 - match(Set dst (DivF dst con)); 38.1748 - format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 38.1749 - ins_cost(150); // XXX 38.1750 - ins_encode %{ 38.1751 - __ divss($dst$$XMMRegister, $constantaddress($con)); 38.1752 - %} 38.1753 - ins_pipe(pipe_slow); 38.1754 -%} 38.1755 - 38.1756 -instruct divD_reg(regD dst, regD src) 38.1757 -%{ 38.1758 - match(Set dst (DivD dst src)); 38.1759 - 38.1760 - format %{ "divsd $dst, $src" %} 38.1761 - ins_cost(150); // XXX 38.1762 - opcode(0xF2, 0x0F, 0x5E); 38.1763 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1764 - ins_pipe(pipe_slow); 38.1765 -%} 38.1766 - 38.1767 -instruct divD_mem(regD dst, memory src) 38.1768 -%{ 38.1769 - match(Set dst (DivD dst (LoadD src))); 38.1770 - 38.1771 - format %{ "divsd $dst, $src" %} 38.1772 - ins_cost(150); // XXX 38.1773 - opcode(0xF2, 0x0F, 0x5E); 38.1774 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1775 - ins_pipe(pipe_slow); 38.1776 -%} 38.1777 - 38.1778 -instruct divD_imm(regD dst, immD con) %{ 38.1779 - match(Set dst (DivD dst con)); 38.1780 - format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 38.1781 - ins_cost(150); // XXX 38.1782 - ins_encode %{ 38.1783 - __ divsd($dst$$XMMRegister, $constantaddress($con)); 38.1784 - %} 38.1785 - ins_pipe(pipe_slow); 38.1786 -%} 38.1787 - 38.1788 -instruct sqrtF_reg(regF dst, regF src) 38.1789 -%{ 38.1790 - match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 38.1791 - 38.1792 - format %{ "sqrtss $dst, $src" %} 38.1793 - ins_cost(150); // XXX 38.1794 - opcode(0xF3, 0x0F, 0x51); 38.1795 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1796 - ins_pipe(pipe_slow); 38.1797 -%} 38.1798 - 38.1799 -instruct sqrtF_mem(regF dst, memory src) 38.1800 -%{ 38.1801 - match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 38.1802 - 38.1803 - format %{ "sqrtss $dst, $src" %} 38.1804 - ins_cost(150); // XXX 38.1805 - opcode(0xF3, 0x0F, 0x51); 38.1806 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1807 - ins_pipe(pipe_slow); 38.1808 -%} 38.1809 - 38.1810 -instruct sqrtF_imm(regF dst, immF con) %{ 38.1811 - match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 38.1812 - format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 38.1813 - ins_cost(150); // XXX 38.1814 - ins_encode %{ 38.1815 - __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 38.1816 - %} 38.1817 - ins_pipe(pipe_slow); 38.1818 -%} 38.1819 - 38.1820 -instruct sqrtD_reg(regD dst, regD src) 38.1821 -%{ 38.1822 - match(Set dst (SqrtD src)); 38.1823 - 38.1824 - format %{ "sqrtsd $dst, $src" %} 38.1825 - ins_cost(150); // XXX 38.1826 - opcode(0xF2, 0x0F, 0x51); 38.1827 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1828 - ins_pipe(pipe_slow); 38.1829 -%} 38.1830 - 38.1831 -instruct sqrtD_mem(regD dst, memory src) 38.1832 -%{ 38.1833 - match(Set dst (SqrtD (LoadD src))); 38.1834 - 38.1835 - format %{ "sqrtsd $dst, $src" %} 38.1836 - ins_cost(150); // XXX 38.1837 - opcode(0xF2, 0x0F, 0x51); 38.1838 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1839 - ins_pipe(pipe_slow); 38.1840 -%} 38.1841 - 38.1842 -instruct sqrtD_imm(regD dst, immD con) %{ 38.1843 - match(Set dst (SqrtD con)); 38.1844 - format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 38.1845 - ins_cost(150); // XXX 38.1846 - ins_encode %{ 38.1847 - __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 38.1848 - %} 38.1849 - ins_pipe(pipe_slow); 38.1850 -%} 38.1851 - 38.1852 -instruct absF_reg(regF dst) 38.1853 -%{ 38.1854 - match(Set dst (AbsF dst)); 38.1855 - 38.1856 - format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 38.1857 - ins_encode(absF_encoding(dst)); 38.1858 - ins_pipe(pipe_slow); 38.1859 -%} 38.1860 - 38.1861 -instruct absD_reg(regD dst) 38.1862 -%{ 38.1863 - match(Set dst (AbsD dst)); 38.1864 - 38.1865 - format %{ "andpd $dst, [0x7fffffffffffffff]\t" 38.1866 - "# abs double by sign masking" %} 38.1867 - ins_encode(absD_encoding(dst)); 38.1868 - ins_pipe(pipe_slow); 38.1869 -%} 38.1870 - 38.1871 -instruct negF_reg(regF dst) 38.1872 -%{ 38.1873 - match(Set dst (NegF dst)); 38.1874 - 38.1875 - format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 38.1876 - ins_encode(negF_encoding(dst)); 38.1877 - ins_pipe(pipe_slow); 38.1878 -%} 38.1879 - 38.1880 -instruct negD_reg(regD dst) 38.1881 -%{ 38.1882 - match(Set dst (NegD dst)); 38.1883 - 38.1884 - format %{ "xorpd $dst, [0x8000000000000000]\t" 38.1885 - "# neg double by sign flipping" %} 38.1886 - ins_encode(negD_encoding(dst)); 38.1887 + emit_cmpfp3(_masm, $dst$$Register); 38.1888 + %} 38.1889 ins_pipe(pipe_slow); 38.1890 %} 38.1891 38.1892 @@ -10929,8 +9971,9 @@ 38.1893 match(Set dst (ConvF2D src)); 38.1894 38.1895 format %{ "cvtss2sd $dst, $src" %} 38.1896 - opcode(0xF3, 0x0F, 0x5A); 38.1897 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1898 + ins_encode %{ 38.1899 + __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 38.1900 + %} 38.1901 ins_pipe(pipe_slow); // XXX 38.1902 %} 38.1903 38.1904 @@ -10939,8 +9982,9 @@ 38.1905 match(Set dst (ConvF2D (LoadF src))); 38.1906 38.1907 format %{ "cvtss2sd $dst, $src" %} 38.1908 - opcode(0xF3, 0x0F, 0x5A); 38.1909 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1910 + ins_encode %{ 38.1911 + __ cvtss2sd ($dst$$XMMRegister, $src$$Address); 38.1912 + %} 38.1913 ins_pipe(pipe_slow); // XXX 38.1914 %} 38.1915 38.1916 @@ -10949,8 +9993,9 @@ 38.1917 match(Set dst (ConvD2F src)); 38.1918 38.1919 format %{ "cvtsd2ss $dst, $src" %} 38.1920 - opcode(0xF2, 0x0F, 0x5A); 38.1921 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.1922 + ins_encode %{ 38.1923 + __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 38.1924 + %} 38.1925 ins_pipe(pipe_slow); // XXX 38.1926 %} 38.1927 38.1928 @@ -10959,8 +10004,9 @@ 38.1929 match(Set dst (ConvD2F (LoadD src))); 38.1930 38.1931 format %{ "cvtsd2ss $dst, $src" %} 38.1932 - opcode(0xF2, 0x0F, 0x5A); 38.1933 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.1934 + ins_encode %{ 38.1935 + __ cvtsd2ss ($dst$$XMMRegister, $src$$Address); 38.1936 + %} 38.1937 ins_pipe(pipe_slow); // XXX 38.1938 %} 38.1939 38.1940 @@ -10978,9 +10024,17 @@ 38.1941 "call f2i_fixup\n\t" 38.1942 "popq $dst\n" 38.1943 "done: "%} 38.1944 - opcode(0xF3, 0x0F, 0x2C); 38.1945 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src), 38.1946 - f2i_fixup(dst, src)); 38.1947 + ins_encode %{ 38.1948 + Label done; 38.1949 + __ cvttss2sil($dst$$Register, $src$$XMMRegister); 38.1950 + __ cmpl($dst$$Register, 0x80000000); 38.1951 + __ jccb(Assembler::notEqual, done); 38.1952 + __ subptr(rsp, 8); 38.1953 + __ movflt(Address(rsp, 0), $src$$XMMRegister); 38.1954 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup()))); 38.1955 + __ pop($dst$$Register); 38.1956 + __ bind(done); 38.1957 + %} 38.1958 ins_pipe(pipe_slow); 38.1959 %} 38.1960 38.1961 @@ -10997,9 +10051,18 @@ 38.1962 "call f2l_fixup\n\t" 38.1963 "popq $dst\n" 38.1964 "done: "%} 38.1965 - opcode(0xF3, 0x0F, 0x2C); 38.1966 - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src), 38.1967 - f2l_fixup(dst, src)); 38.1968 + ins_encode %{ 38.1969 + Label done; 38.1970 + __ cvttss2siq($dst$$Register, $src$$XMMRegister); 38.1971 + __ cmp64($dst$$Register, 38.1972 + ExternalAddress((address) StubRoutines::x86::double_sign_flip())); 38.1973 + __ jccb(Assembler::notEqual, done); 38.1974 + __ subptr(rsp, 8); 38.1975 + __ movflt(Address(rsp, 0), $src$$XMMRegister); 38.1976 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup()))); 38.1977 + __ pop($dst$$Register); 38.1978 + __ bind(done); 38.1979 + %} 38.1980 ins_pipe(pipe_slow); 38.1981 %} 38.1982 38.1983 @@ -11016,9 +10079,17 @@ 38.1984 "call d2i_fixup\n\t" 38.1985 "popq $dst\n" 38.1986 "done: "%} 38.1987 - opcode(0xF2, 0x0F, 0x2C); 38.1988 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src), 38.1989 - d2i_fixup(dst, src)); 38.1990 + ins_encode %{ 38.1991 + Label done; 38.1992 + __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 38.1993 + __ cmpl($dst$$Register, 0x80000000); 38.1994 + __ jccb(Assembler::notEqual, done); 38.1995 + __ subptr(rsp, 8); 38.1996 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 38.1997 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup()))); 38.1998 + __ pop($dst$$Register); 38.1999 + __ bind(done); 38.2000 + %} 38.2001 ins_pipe(pipe_slow); 38.2002 %} 38.2003 38.2004 @@ -11035,9 +10106,18 @@ 38.2005 "call d2l_fixup\n\t" 38.2006 "popq $dst\n" 38.2007 "done: "%} 38.2008 - opcode(0xF2, 0x0F, 0x2C); 38.2009 - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src), 38.2010 - d2l_fixup(dst, src)); 38.2011 + ins_encode %{ 38.2012 + Label done; 38.2013 + __ cvttsd2siq($dst$$Register, $src$$XMMRegister); 38.2014 + __ cmp64($dst$$Register, 38.2015 + ExternalAddress((address) StubRoutines::x86::double_sign_flip())); 38.2016 + __ jccb(Assembler::notEqual, done); 38.2017 + __ subptr(rsp, 8); 38.2018 + __ movdbl(Address(rsp, 0), $src$$XMMRegister); 38.2019 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup()))); 38.2020 + __ pop($dst$$Register); 38.2021 + __ bind(done); 38.2022 + %} 38.2023 ins_pipe(pipe_slow); 38.2024 %} 38.2025 38.2026 @@ -11047,8 +10127,9 @@ 38.2027 match(Set dst (ConvI2F src)); 38.2028 38.2029 format %{ "cvtsi2ssl $dst, $src\t# i2f" %} 38.2030 - opcode(0xF3, 0x0F, 0x2A); 38.2031 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.2032 + ins_encode %{ 38.2033 + __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 38.2034 + %} 38.2035 ins_pipe(pipe_slow); // XXX 38.2036 %} 38.2037 38.2038 @@ -11057,8 +10138,9 @@ 38.2039 match(Set dst (ConvI2F (LoadI src))); 38.2040 38.2041 format %{ "cvtsi2ssl $dst, $src\t# i2f" %} 38.2042 - opcode(0xF3, 0x0F, 0x2A); 38.2043 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.2044 + ins_encode %{ 38.2045 + __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address); 38.2046 + %} 38.2047 ins_pipe(pipe_slow); // XXX 38.2048 %} 38.2049 38.2050 @@ -11068,8 +10150,9 @@ 38.2051 match(Set dst (ConvI2D src)); 38.2052 38.2053 format %{ "cvtsi2sdl $dst, $src\t# i2d" %} 38.2054 - opcode(0xF2, 0x0F, 0x2A); 38.2055 - ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.2056 + ins_encode %{ 38.2057 + __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 38.2058 + %} 38.2059 ins_pipe(pipe_slow); // XXX 38.2060 %} 38.2061 38.2062 @@ -11078,8 +10161,9 @@ 38.2063 match(Set dst (ConvI2D (LoadI src))); 38.2064 38.2065 format %{ "cvtsi2sdl $dst, $src\t# i2d" %} 38.2066 - opcode(0xF2, 0x0F, 0x2A); 38.2067 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.2068 + ins_encode %{ 38.2069 + __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address); 38.2070 + %} 38.2071 ins_pipe(pipe_slow); // XXX 38.2072 %} 38.2073 38.2074 @@ -11116,8 +10200,9 @@ 38.2075 match(Set dst (ConvL2F src)); 38.2076 38.2077 format %{ "cvtsi2ssq $dst, $src\t# l2f" %} 38.2078 - opcode(0xF3, 0x0F, 0x2A); 38.2079 - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.2080 + ins_encode %{ 38.2081 + __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register); 38.2082 + %} 38.2083 ins_pipe(pipe_slow); // XXX 38.2084 %} 38.2085 38.2086 @@ -11126,8 +10211,9 @@ 38.2087 match(Set dst (ConvL2F (LoadL src))); 38.2088 38.2089 format %{ "cvtsi2ssq $dst, $src\t# l2f" %} 38.2090 - opcode(0xF3, 0x0F, 0x2A); 38.2091 - ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.2092 + ins_encode %{ 38.2093 + __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address); 38.2094 + %} 38.2095 ins_pipe(pipe_slow); // XXX 38.2096 %} 38.2097 38.2098 @@ -11136,8 +10222,9 @@ 38.2099 match(Set dst (ConvL2D src)); 38.2100 38.2101 format %{ "cvtsi2sdq $dst, $src\t# l2d" %} 38.2102 - opcode(0xF2, 0x0F, 0x2A); 38.2103 - ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src)); 38.2104 + ins_encode %{ 38.2105 + __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register); 38.2106 + %} 38.2107 ins_pipe(pipe_slow); // XXX 38.2108 %} 38.2109 38.2110 @@ -11146,8 +10233,9 @@ 38.2111 match(Set dst (ConvL2D (LoadL src))); 38.2112 38.2113 format %{ "cvtsi2sdq $dst, $src\t# l2d" %} 38.2114 - opcode(0xF2, 0x0F, 0x2A); 38.2115 - ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.2116 + ins_encode %{ 38.2117 + __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address); 38.2118 + %} 38.2119 ins_pipe(pipe_slow); // XXX 38.2120 %} 38.2121 38.2122 @@ -11186,7 +10274,11 @@ 38.2123 match(Set dst (AndL (ConvI2L src) mask)); 38.2124 38.2125 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %} 38.2126 - ins_encode(enc_copy(dst, src)); 38.2127 + ins_encode %{ 38.2128 + if ($dst$$reg != $src$$reg) { 38.2129 + __ movl($dst$$Register, $src$$Register); 38.2130 + } 38.2131 + %} 38.2132 ins_pipe(ialu_reg_reg); 38.2133 %} 38.2134 38.2135 @@ -11196,8 +10288,9 @@ 38.2136 match(Set dst (AndL (ConvI2L (LoadI src)) mask)); 38.2137 38.2138 format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %} 38.2139 - opcode(0x8B); 38.2140 - ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src)); 38.2141 + ins_encode %{ 38.2142 + __ movl($dst$$Register, $src$$Address); 38.2143 + %} 38.2144 ins_pipe(ialu_reg_mem); 38.2145 %} 38.2146 38.2147 @@ -11206,7 +10299,9 @@ 38.2148 match(Set dst (AndL src mask)); 38.2149 38.2150 format %{ "movl $dst, $src\t# zero-extend long" %} 38.2151 - ins_encode(enc_copy_always(dst, src)); 38.2152 + ins_encode %{ 38.2153 + __ movl($dst$$Register, $src$$Register); 38.2154 + %} 38.2155 ins_pipe(ialu_reg_reg); 38.2156 %} 38.2157 38.2158 @@ -11215,7 +10310,9 @@ 38.2159 match(Set dst (ConvL2I src)); 38.2160 38.2161 format %{ "movl $dst, $src\t# l2i" %} 38.2162 - ins_encode(enc_copy_always(dst, src)); 38.2163 + ins_encode %{ 38.2164 + __ movl($dst$$Register, $src$$Register); 38.2165 + %} 38.2166 ins_pipe(ialu_reg_reg); 38.2167 %} 38.2168 38.2169 @@ -11226,8 +10323,9 @@ 38.2170 38.2171 ins_cost(125); 38.2172 format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %} 38.2173 - opcode(0x8B); 38.2174 - ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src)); 38.2175 + ins_encode %{ 38.2176 + __ movl($dst$$Register, Address(rsp, $src$$disp)); 38.2177 + %} 38.2178 ins_pipe(ialu_reg_mem); 38.2179 %} 38.2180 38.2181 @@ -11237,8 +10335,9 @@ 38.2182 38.2183 ins_cost(125); 38.2184 format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %} 38.2185 - opcode(0xF3, 0x0F, 0x10); 38.2186 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.2187 + ins_encode %{ 38.2188 + __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 38.2189 + %} 38.2190 ins_pipe(pipe_slow); 38.2191 %} 38.2192 38.2193 @@ -11248,8 +10347,9 @@ 38.2194 38.2195 ins_cost(125); 38.2196 format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %} 38.2197 - opcode(0x8B); 38.2198 - ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src)); 38.2199 + ins_encode %{ 38.2200 + __ movq($dst$$Register, Address(rsp, $src$$disp)); 38.2201 + %} 38.2202 ins_pipe(ialu_reg_mem); 38.2203 %} 38.2204 38.2205 @@ -11260,8 +10360,9 @@ 38.2206 38.2207 ins_cost(125); 38.2208 format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %} 38.2209 - opcode(0x66, 0x0F, 0x12); 38.2210 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.2211 + ins_encode %{ 38.2212 + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 38.2213 + %} 38.2214 ins_pipe(pipe_slow); 38.2215 %} 38.2216 38.2217 @@ -11272,8 +10373,9 @@ 38.2218 38.2219 ins_cost(125); 38.2220 format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %} 38.2221 - opcode(0xF2, 0x0F, 0x10); 38.2222 - ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src)); 38.2223 + ins_encode %{ 38.2224 + __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 38.2225 + %} 38.2226 ins_pipe(pipe_slow); 38.2227 %} 38.2228 38.2229 @@ -11284,8 +10386,9 @@ 38.2230 38.2231 ins_cost(95); // XXX 38.2232 format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %} 38.2233 - opcode(0xF3, 0x0F, 0x11); 38.2234 - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); 38.2235 + ins_encode %{ 38.2236 + __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 38.2237 + %} 38.2238 ins_pipe(pipe_slow); 38.2239 %} 38.2240 38.2241 @@ -11295,8 +10398,9 @@ 38.2242 38.2243 ins_cost(100); 38.2244 format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %} 38.2245 - opcode(0x89); 38.2246 - ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst)); 38.2247 + ins_encode %{ 38.2248 + __ movl(Address(rsp, $dst$$disp), $src$$Register); 38.2249 + %} 38.2250 ins_pipe( ialu_mem_reg ); 38.2251 %} 38.2252 38.2253 @@ -11306,8 +10410,9 @@ 38.2254 38.2255 ins_cost(95); // XXX 38.2256 format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %} 38.2257 - opcode(0xF2, 0x0F, 0x11); 38.2258 - ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst)); 38.2259 + ins_encode %{ 38.2260 + __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 38.2261 + %} 38.2262 ins_pipe(pipe_slow); 38.2263 %} 38.2264 38.2265 @@ -11317,8 +10422,9 @@ 38.2266 38.2267 ins_cost(100); 38.2268 format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %} 38.2269 - opcode(0x89); 38.2270 - ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst)); 38.2271 + ins_encode %{ 38.2272 + __ movq(Address(rsp, $dst$$disp), $src$$Register); 38.2273 + %} 38.2274 ins_pipe(ialu_mem_reg); 38.2275 %} 38.2276 38.2277 @@ -11327,7 +10433,9 @@ 38.2278 effect(DEF dst, USE src); 38.2279 ins_cost(85); 38.2280 format %{ "movd $dst,$src\t# MoveF2I" %} 38.2281 - ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %} 38.2282 + ins_encode %{ 38.2283 + __ movdl($dst$$Register, $src$$XMMRegister); 38.2284 + %} 38.2285 ins_pipe( pipe_slow ); 38.2286 %} 38.2287 38.2288 @@ -11336,7 +10444,9 @@ 38.2289 effect(DEF dst, USE src); 38.2290 ins_cost(85); 38.2291 format %{ "movd $dst,$src\t# MoveD2L" %} 38.2292 - ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %} 38.2293 + ins_encode %{ 38.2294 + __ movdq($dst$$Register, $src$$XMMRegister); 38.2295 + %} 38.2296 ins_pipe( pipe_slow ); 38.2297 %} 38.2298 38.2299 @@ -11346,7 +10456,9 @@ 38.2300 effect(DEF dst, USE src); 38.2301 ins_cost(300); 38.2302 format %{ "movd $dst,$src\t# MoveI2F" %} 38.2303 - ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %} 38.2304 + ins_encode %{ 38.2305 + __ movdl($dst$$XMMRegister, $src$$Register); 38.2306 + %} 38.2307 ins_pipe( pipe_slow ); 38.2308 %} 38.2309 38.2310 @@ -11355,7 +10467,9 @@ 38.2311 effect(DEF dst, USE src); 38.2312 ins_cost(300); 38.2313 format %{ "movd $dst,$src\t# MoveL2D" %} 38.2314 - ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %} 38.2315 + ins_encode %{ 38.2316 + __ movdq($dst$$XMMRegister, $src$$Register); 38.2317 + %} 38.2318 ins_pipe( pipe_slow ); 38.2319 %} 38.2320 38.2321 @@ -11365,7 +10479,13 @@ 38.2322 format %{ "MOVDQA $dst,$src\n\t" 38.2323 "PUNPCKLBW $dst,$dst\n\t" 38.2324 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 38.2325 - ins_encode( pshufd_8x8(dst, src)); 38.2326 + ins_encode %{ 38.2327 + if ($dst$$reg != $src$$reg) { 38.2328 + __ movdqa($dst$$XMMRegister, $src$$XMMRegister); 38.2329 + } 38.2330 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 38.2331 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 38.2332 + %} 38.2333 ins_pipe( pipe_slow ); 38.2334 %} 38.2335 38.2336 @@ -11375,7 +10495,11 @@ 38.2337 format %{ "MOVD $dst,$src\n\t" 38.2338 "PUNPCKLBW $dst,$dst\n\t" 38.2339 "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} 38.2340 - ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst)); 38.2341 + ins_encode %{ 38.2342 + __ movdl($dst$$XMMRegister, $src$$Register); 38.2343 + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 38.2344 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 38.2345 + %} 38.2346 ins_pipe( pipe_slow ); 38.2347 %} 38.2348 38.2349 @@ -11383,7 +10507,9 @@ 38.2350 instruct Repl8B_immI0(regD dst, immI0 zero) %{ 38.2351 match(Set dst (Replicate8B zero)); 38.2352 format %{ "PXOR $dst,$dst\t! replicate8B" %} 38.2353 - ins_encode( pxor(dst, dst)); 38.2354 + ins_encode %{ 38.2355 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 38.2356 + %} 38.2357 ins_pipe( fpu_reg_reg ); 38.2358 %} 38.2359 38.2360 @@ -11391,7 +10517,9 @@ 38.2361 instruct Repl4S_reg(regD dst, regD src) %{ 38.2362 match(Set dst (Replicate4S src)); 38.2363 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} 38.2364 - ins_encode( pshufd_4x16(dst, src)); 38.2365 + ins_encode %{ 38.2366 + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); 38.2367 + %} 38.2368 ins_pipe( fpu_reg_reg ); 38.2369 %} 38.2370 38.2371 @@ -11400,7 +10528,10 @@ 38.2372 match(Set dst (Replicate4S src)); 38.2373 format %{ "MOVD $dst,$src\n\t" 38.2374 "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} 38.2375 - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 38.2376 + ins_encode %{ 38.2377 + __ movdl($dst$$XMMRegister, $src$$Register); 38.2378 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 38.2379 + %} 38.2380 ins_pipe( fpu_reg_reg ); 38.2381 %} 38.2382 38.2383 @@ -11408,7 +10539,9 @@ 38.2384 instruct Repl4S_immI0(regD dst, immI0 zero) %{ 38.2385 match(Set dst (Replicate4S zero)); 38.2386 format %{ "PXOR $dst,$dst\t! replicate4S" %} 38.2387 - ins_encode( pxor(dst, dst)); 38.2388 + ins_encode %{ 38.2389 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 38.2390 + %} 38.2391 ins_pipe( fpu_reg_reg ); 38.2392 %} 38.2393 38.2394 @@ -11416,7 +10549,9 @@ 38.2395 instruct Repl4C_reg(regD dst, regD src) %{ 38.2396 match(Set dst (Replicate4C src)); 38.2397 format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} 38.2398 - ins_encode( pshufd_4x16(dst, src)); 38.2399 + ins_encode %{ 38.2400 + __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); 38.2401 + %} 38.2402 ins_pipe( fpu_reg_reg ); 38.2403 %} 38.2404 38.2405 @@ -11425,7 +10560,10 @@ 38.2406 match(Set dst (Replicate4C src)); 38.2407 format %{ "MOVD $dst,$src\n\t" 38.2408 "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} 38.2409 - ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst)); 38.2410 + ins_encode %{ 38.2411 + __ movdl($dst$$XMMRegister, $src$$Register); 38.2412 + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 38.2413 + %} 38.2414 ins_pipe( fpu_reg_reg ); 38.2415 %} 38.2416 38.2417 @@ -11433,7 +10571,9 @@ 38.2418 instruct Repl4C_immI0(regD dst, immI0 zero) %{ 38.2419 match(Set dst (Replicate4C zero)); 38.2420 format %{ "PXOR $dst,$dst\t! replicate4C" %} 38.2421 - ins_encode( pxor(dst, dst)); 38.2422 + ins_encode %{ 38.2423 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 38.2424 + %} 38.2425 ins_pipe( fpu_reg_reg ); 38.2426 %} 38.2427 38.2428 @@ -11441,7 +10581,9 @@ 38.2429 instruct Repl2I_reg(regD dst, regD src) %{ 38.2430 match(Set dst (Replicate2I src)); 38.2431 format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} 38.2432 - ins_encode( pshufd(dst, src, 0x00)); 38.2433 + ins_encode %{ 38.2434 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 38.2435 + %} 38.2436 ins_pipe( fpu_reg_reg ); 38.2437 %} 38.2438 38.2439 @@ -11450,7 +10592,10 @@ 38.2440 match(Set dst (Replicate2I src)); 38.2441 format %{ "MOVD $dst,$src\n\t" 38.2442 "PSHUFD $dst,$dst,0x00\t! replicate2I" %} 38.2443 - ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00)); 38.2444 + ins_encode %{ 38.2445 + __ movdl($dst$$XMMRegister, $src$$Register); 38.2446 + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 38.2447 + %} 38.2448 ins_pipe( fpu_reg_reg ); 38.2449 %} 38.2450 38.2451 @@ -11458,7 +10603,9 @@ 38.2452 instruct Repl2I_immI0(regD dst, immI0 zero) %{ 38.2453 match(Set dst (Replicate2I zero)); 38.2454 format %{ "PXOR $dst,$dst\t! replicate2I" %} 38.2455 - ins_encode( pxor(dst, dst)); 38.2456 + ins_encode %{ 38.2457 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 38.2458 + %} 38.2459 ins_pipe( fpu_reg_reg ); 38.2460 %} 38.2461 38.2462 @@ -11466,7 +10613,9 @@ 38.2463 instruct Repl2F_reg(regD dst, regD src) %{ 38.2464 match(Set dst (Replicate2F src)); 38.2465 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 38.2466 - ins_encode( pshufd(dst, src, 0xe0)); 38.2467 + ins_encode %{ 38.2468 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); 38.2469 + %} 38.2470 ins_pipe( fpu_reg_reg ); 38.2471 %} 38.2472 38.2473 @@ -11474,7 +10623,9 @@ 38.2474 instruct Repl2F_regF(regD dst, regF src) %{ 38.2475 match(Set dst (Replicate2F src)); 38.2476 format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} 38.2477 - ins_encode( pshufd(dst, src, 0xe0)); 38.2478 + ins_encode %{ 38.2479 + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); 38.2480 + %} 38.2481 ins_pipe( fpu_reg_reg ); 38.2482 %} 38.2483 38.2484 @@ -11482,7 +10633,9 @@ 38.2485 instruct Repl2F_immF0(regD dst, immF0 zero) %{ 38.2486 match(Set dst (Replicate2F zero)); 38.2487 format %{ "PXOR $dst,$dst\t! replicate2F" %} 38.2488 - ins_encode( pxor(dst, dst)); 38.2489 + ins_encode %{ 38.2490 + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 38.2491 + %} 38.2492 ins_pipe( fpu_reg_reg ); 38.2493 %} 38.2494 38.2495 @@ -12162,12 +11315,12 @@ 38.2496 effect(KILL rcx, KILL cr); 38.2497 38.2498 ins_cost(1100); // slightly larger than the next version 38.2499 - format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 38.2500 + format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t" 38.2501 "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" 38.2502 "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" 38.2503 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t" 38.2504 "jne,s miss\t\t# Missed: rdi not-zero\n\t" 38.2505 - "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" 38.2506 + "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t" 38.2507 "xorq $result, $result\t\t Hit: rdi zero\n\t" 38.2508 "miss:\t" %} 38.2509 38.2510 @@ -12185,12 +11338,12 @@ 38.2511 effect(KILL rcx, KILL result); 38.2512 38.2513 ins_cost(1000); 38.2514 - format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 38.2515 + format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t" 38.2516 "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" 38.2517 "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" 38.2518 "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t" 38.2519 "jne,s miss\t\t# Missed: flags nz\n\t" 38.2520 - "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" 38.2521 + "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t" 38.2522 "miss:\t" %} 38.2523 38.2524 opcode(0x0); // No need to XOR RDI
39.1 --- a/src/share/vm/adlc/formssel.cpp Tue Dec 27 12:38:49 2011 -0800 39.2 +++ b/src/share/vm/adlc/formssel.cpp Thu Dec 29 11:37:50 2011 -0800 39.3 @@ -627,6 +627,7 @@ 39.4 if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true; 39.5 if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true; 39.6 if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true; 39.7 + if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true; 39.8 39.9 return false; 39.10 } 39.11 @@ -3978,7 +3979,8 @@ 39.12 !strcmp(_opType,"MemBarAcquireLock") || 39.13 !strcmp(_opType,"MemBarReleaseLock") || 39.14 !strcmp(_opType,"MemBarVolatile" ) || 39.15 - !strcmp(_opType,"MemBarCPUOrder" ) ; 39.16 + !strcmp(_opType,"MemBarCPUOrder" ) || 39.17 + !strcmp(_opType,"MemBarStoreStore" ); 39.18 } 39.19 39.20 bool MatchRule::is_ideal_loadPC() const {
40.1 --- a/src/share/vm/asm/assembler.cpp Tue Dec 27 12:38:49 2011 -0800 40.2 +++ b/src/share/vm/asm/assembler.cpp Thu Dec 29 11:37:50 2011 -0800 40.3 @@ -61,6 +61,7 @@ 40.4 _code_limit = cs->limit(); 40.5 _code_pos = cs->end(); 40.6 _oop_recorder= code->oop_recorder(); 40.7 + DEBUG_ONLY( _short_branch_delta = 0; ) 40.8 if (_code_begin == NULL) { 40.9 vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s", 40.10 code->name()));
41.1 --- a/src/share/vm/asm/assembler.hpp Tue Dec 27 12:38:49 2011 -0800 41.2 +++ b/src/share/vm/asm/assembler.hpp Thu Dec 29 11:37:50 2011 -0800 41.3 @@ -241,6 +241,33 @@ 41.4 // Make it return true on platforms which need to verify 41.5 // instruction boundaries for some operations. 41.6 inline static bool pd_check_instruction_mark(); 41.7 + 41.8 + // Add delta to short branch distance to verify that it still fit into imm8. 41.9 + int _short_branch_delta; 41.10 + 41.11 + int short_branch_delta() const { return _short_branch_delta; } 41.12 + void set_short_branch_delta() { _short_branch_delta = 32; } 41.13 + void clear_short_branch_delta() { _short_branch_delta = 0; } 41.14 + 41.15 + class ShortBranchVerifier: public StackObj { 41.16 + private: 41.17 + AbstractAssembler* _assm; 41.18 + 41.19 + public: 41.20 + ShortBranchVerifier(AbstractAssembler* assm) : _assm(assm) { 41.21 + assert(assm->short_branch_delta() == 0, "overlapping instructions"); 41.22 + _assm->set_short_branch_delta(); 41.23 + } 41.24 + ~ShortBranchVerifier() { 41.25 + _assm->clear_short_branch_delta(); 41.26 + } 41.27 + }; 41.28 + #else 41.29 + // Dummy in product. 41.30 + class ShortBranchVerifier: public StackObj { 41.31 + public: 41.32 + ShortBranchVerifier(AbstractAssembler* assm) {} 41.33 + }; 41.34 #endif 41.35 41.36 // Label functions
42.1 --- a/src/share/vm/c1/c1_LIR.cpp Tue Dec 27 12:38:49 2011 -0800 42.2 +++ b/src/share/vm/c1/c1_LIR.cpp Thu Dec 29 11:37:50 2011 -0800 42.3 @@ -854,6 +854,9 @@ 42.4 if (opTypeCheck->_info_for_exception) do_info(opTypeCheck->_info_for_exception); 42.5 if (opTypeCheck->_info_for_patch) do_info(opTypeCheck->_info_for_patch); 42.6 if (opTypeCheck->_object->is_valid()) do_input(opTypeCheck->_object); 42.7 + if (op->code() == lir_store_check && opTypeCheck->_object->is_valid()) { 42.8 + do_temp(opTypeCheck->_object); 42.9 + } 42.10 if (opTypeCheck->_array->is_valid()) do_input(opTypeCheck->_array); 42.11 if (opTypeCheck->_tmp1->is_valid()) do_temp(opTypeCheck->_tmp1); 42.12 if (opTypeCheck->_tmp2->is_valid()) do_temp(opTypeCheck->_tmp2);
43.1 --- a/src/share/vm/c1/c1_LIRGenerator.cpp Tue Dec 27 12:38:49 2011 -0800 43.2 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Thu Dec 29 11:37:50 2011 -0800 43.3 @@ -1256,8 +1256,7 @@ 43.4 info = state_for(x); 43.5 } 43.6 __ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), result, info); 43.7 - __ move_wide(new LIR_Address(result, Klass::java_mirror_offset_in_bytes() + 43.8 - klassOopDesc::klass_part_offset_in_bytes(), T_OBJECT), result); 43.9 + __ move_wide(new LIR_Address(result, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result); 43.10 } 43.11 43.12
44.1 --- a/src/share/vm/c1/c1_Optimizer.cpp Tue Dec 27 12:38:49 2011 -0800 44.2 +++ b/src/share/vm/c1/c1_Optimizer.cpp Thu Dec 29 11:37:50 2011 -0800 44.3 @@ -122,18 +122,32 @@ 44.4 if (sux != f_goto->default_sux()) return; 44.5 44.6 // check if at least one word was pushed on sux_state 44.7 + // inlining depths must match 44.8 + ValueStack* if_state = if_->state(); 44.9 ValueStack* sux_state = sux->state(); 44.10 - if (sux_state->stack_size() <= if_->state()->stack_size()) return; 44.11 + if (if_state->scope()->level() > sux_state->scope()->level()) { 44.12 + while (sux_state->scope() != if_state->scope()) { 44.13 + if_state = if_state->caller_state(); 44.14 + assert(if_state != NULL, "states do not match up"); 44.15 + } 44.16 + } else if (if_state->scope()->level() < sux_state->scope()->level()) { 44.17 + while (sux_state->scope() != if_state->scope()) { 44.18 + sux_state = sux_state->caller_state(); 44.19 + assert(sux_state != NULL, "states do not match up"); 44.20 + } 44.21 + } 44.22 + 44.23 + if (sux_state->stack_size() <= if_state->stack_size()) return; 44.24 44.25 // check if phi function is present at end of successor stack and that 44.26 // only this phi was pushed on the stack 44.27 - Value sux_phi = sux_state->stack_at(if_->state()->stack_size()); 44.28 + Value sux_phi = sux_state->stack_at(if_state->stack_size()); 44.29 if (sux_phi == NULL || sux_phi->as_Phi() == NULL || sux_phi->as_Phi()->block() != sux) return; 44.30 - if (sux_phi->type()->size() != sux_state->stack_size() - if_->state()->stack_size()) return; 44.31 + if (sux_phi->type()->size() != sux_state->stack_size() - if_state->stack_size()) return; 44.32 44.33 // get the values that were pushed in the true- and false-branch 44.34 - Value t_value = t_goto->state()->stack_at(if_->state()->stack_size()); 44.35 - Value f_value = f_goto->state()->stack_at(if_->state()->stack_size()); 44.36 + Value t_value = t_goto->state()->stack_at(if_state->stack_size()); 44.37 + Value f_value = f_goto->state()->stack_at(if_state->stack_size()); 44.38 44.39 // backend does not support floats 44.40 assert(t_value->type()->base() == f_value->type()->base(), "incompatible types"); 44.41 @@ -180,11 +194,7 @@ 44.42 Goto* goto_ = new Goto(sux, state_before, if_->is_safepoint() || t_goto->is_safepoint() || f_goto->is_safepoint()); 44.43 44.44 // prepare state for Goto 44.45 - ValueStack* goto_state = if_->state(); 44.46 - while (sux_state->scope() != goto_state->scope()) { 44.47 - goto_state = goto_state->caller_state(); 44.48 - assert(goto_state != NULL, "states do not match up"); 44.49 - } 44.50 + ValueStack* goto_state = if_state; 44.51 goto_state = goto_state->copy(ValueStack::StateAfter, goto_state->bci()); 44.52 goto_state->push(result->type(), result); 44.53 assert(goto_state->is_same(sux_state), "states must match now");
45.1 --- a/src/share/vm/oops/arrayKlass.hpp Tue Dec 27 12:38:49 2011 -0800 45.2 +++ b/src/share/vm/oops/arrayKlass.hpp Thu Dec 29 11:37:50 2011 -0800 45.3 @@ -73,7 +73,7 @@ 45.4 oop* adr_component_mirror() { return (oop*)&this->_component_mirror;} 45.5 45.6 // Compiler/Interpreter offset 45.7 - static ByteSize component_mirror_offset() { return byte_offset_of(arrayKlass, _component_mirror); } 45.8 + static ByteSize component_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(arrayKlass, _component_mirror)); } 45.9 45.10 virtual klassOop java_super() const;//{ return SystemDictionary::Object_klass(); } 45.11
46.1 --- a/src/share/vm/oops/instanceKlass.hpp Tue Dec 27 12:38:49 2011 -0800 46.2 +++ b/src/share/vm/oops/instanceKlass.hpp Thu Dec 29 11:37:50 2011 -0800 46.3 @@ -405,7 +405,7 @@ 46.4 ReferenceType reference_type() const { return _reference_type; } 46.5 void set_reference_type(ReferenceType t) { _reference_type = t; } 46.6 46.7 - static int reference_type_offset_in_bytes() { return offset_of(instanceKlass, _reference_type); } 46.8 + static ByteSize reference_type_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _reference_type)); } 46.9 46.10 // find local field, returns true if found 46.11 bool find_local_field(Symbol* name, Symbol* sig, fieldDescriptor* fd) const; 46.12 @@ -616,8 +616,8 @@ 46.13 void set_breakpoints(BreakpointInfo* bps) { _breakpoints = bps; }; 46.14 46.15 // support for stub routines 46.16 - static int init_state_offset_in_bytes() { return offset_of(instanceKlass, _init_state); } 46.17 - static int init_thread_offset_in_bytes() { return offset_of(instanceKlass, _init_thread); } 46.18 + static ByteSize init_state_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_state)); } 46.19 + static ByteSize init_thread_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(instanceKlass, _init_thread)); } 46.20 46.21 // subclass/subinterface checks 46.22 bool implements_interface(klassOop k) const;
47.1 --- a/src/share/vm/oops/klass.cpp Tue Dec 27 12:38:49 2011 -0800 47.2 +++ b/src/share/vm/oops/klass.cpp Thu Dec 29 11:37:50 2011 -0800 47.3 @@ -144,7 +144,7 @@ 47.4 } 47.5 kl->set_secondary_supers(NULL); 47.6 oop_store_without_check((oop*) &kl->_primary_supers[0], k); 47.7 - kl->set_super_check_offset(primary_supers_offset_in_bytes() + sizeof(oopDesc)); 47.8 + kl->set_super_check_offset(in_bytes(primary_supers_offset())); 47.9 } 47.10 47.11 kl->set_java_mirror(NULL);
48.1 --- a/src/share/vm/oops/klass.hpp Tue Dec 27 12:38:49 2011 -0800 48.2 +++ b/src/share/vm/oops/klass.hpp Thu Dec 29 11:37:50 2011 -0800 48.3 @@ -313,7 +313,7 @@ 48.4 // Can this klass be a primary super? False for interfaces and arrays of 48.5 // interfaces. False also for arrays or classes with long super chains. 48.6 bool can_be_primary_super() const { 48.7 - const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc); 48.8 + const juint secondary_offset = in_bytes(secondary_super_cache_offset()); 48.9 return super_check_offset() != secondary_offset; 48.10 } 48.11 virtual bool can_be_primary_super_slow() const; 48.12 @@ -323,7 +323,7 @@ 48.13 if (!can_be_primary_super()) { 48.14 return primary_super_limit(); 48.15 } else { 48.16 - juint d = (super_check_offset() - (primary_supers_offset_in_bytes() + sizeof(oopDesc))) / sizeof(klassOop); 48.17 + juint d = (super_check_offset() - in_bytes(primary_supers_offset())) / sizeof(klassOop); 48.18 assert(d < primary_super_limit(), "oob"); 48.19 assert(_primary_supers[d] == as_klassOop(), "proper init"); 48.20 return d; 48.21 @@ -373,15 +373,15 @@ 48.22 virtual void set_alloc_size(juint n) = 0; 48.23 48.24 // Compiler support 48.25 - static int super_offset_in_bytes() { return offset_of(Klass, _super); } 48.26 - static int super_check_offset_offset_in_bytes() { return offset_of(Klass, _super_check_offset); } 48.27 - static int primary_supers_offset_in_bytes(){ return offset_of(Klass, _primary_supers); } 48.28 - static int secondary_super_cache_offset_in_bytes() { return offset_of(Klass, _secondary_super_cache); } 48.29 - static int secondary_supers_offset_in_bytes() { return offset_of(Klass, _secondary_supers); } 48.30 - static int java_mirror_offset_in_bytes() { return offset_of(Klass, _java_mirror); } 48.31 - static int modifier_flags_offset_in_bytes(){ return offset_of(Klass, _modifier_flags); } 48.32 - static int layout_helper_offset_in_bytes() { return offset_of(Klass, _layout_helper); } 48.33 - static int access_flags_offset_in_bytes() { return offset_of(Klass, _access_flags); } 48.34 + static ByteSize super_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super)); } 48.35 + static ByteSize super_check_offset_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _super_check_offset)); } 48.36 + static ByteSize primary_supers_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _primary_supers)); } 48.37 + static ByteSize secondary_super_cache_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_super_cache)); } 48.38 + static ByteSize secondary_supers_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _secondary_supers)); } 48.39 + static ByteSize java_mirror_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _java_mirror)); } 48.40 + static ByteSize modifier_flags_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _modifier_flags)); } 48.41 + static ByteSize layout_helper_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _layout_helper)); } 48.42 + static ByteSize access_flags_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _access_flags)); } 48.43 48.44 // Unpacking layout_helper: 48.45 enum { 48.46 @@ -478,7 +478,7 @@ 48.47 bool is_subtype_of(klassOop k) const { 48.48 juint off = k->klass_part()->super_check_offset(); 48.49 klassOop sup = *(klassOop*)( (address)as_klassOop() + off ); 48.50 - const juint secondary_offset = secondary_super_cache_offset_in_bytes() + sizeof(oopDesc); 48.51 + const juint secondary_offset = in_bytes(secondary_super_cache_offset()); 48.52 if (sup == k) { 48.53 return true; 48.54 } else if (off != secondary_offset) { 48.55 @@ -674,7 +674,7 @@ 48.56 // are potential problems in setting the bias pattern for 48.57 // JVM-internal oops. 48.58 inline void set_prototype_header(markOop header); 48.59 - static int prototype_header_offset_in_bytes() { return offset_of(Klass, _prototype_header); } 48.60 + static ByteSize prototype_header_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(Klass, _prototype_header)); } 48.61 48.62 int biased_lock_revocation_count() const { return (int) _biased_lock_revocation_count; } 48.63 // Atomically increments biased_lock_revocation_count and returns updated value
49.1 --- a/src/share/vm/oops/klassOop.hpp Tue Dec 27 12:38:49 2011 -0800 49.2 +++ b/src/share/vm/oops/klassOop.hpp Thu Dec 29 11:37:50 2011 -0800 49.3 @@ -38,14 +38,8 @@ 49.4 49.5 class klassOopDesc : public oopDesc { 49.6 public: 49.7 - // size operation 49.8 - static int header_size() { return sizeof(klassOopDesc)/HeapWordSize; } 49.9 - 49.10 - // support for code generation 49.11 - static int klass_part_offset_in_bytes() { return sizeof(klassOopDesc); } 49.12 - 49.13 // returns the Klass part containing dispatching behavior 49.14 - Klass* klass_part() const { return (Klass*)((address)this + klass_part_offset_in_bytes()); } 49.15 + Klass* klass_part() const { return (Klass*)((address)this + sizeof(klassOopDesc)); } 49.16 49.17 // Convenience wrapper 49.18 inline oop java_mirror() const;
50.1 --- a/src/share/vm/oops/objArrayKlass.hpp Tue Dec 27 12:38:49 2011 -0800 50.2 +++ b/src/share/vm/oops/objArrayKlass.hpp Thu Dec 29 11:37:50 2011 -0800 50.3 @@ -47,7 +47,7 @@ 50.4 oop* bottom_klass_addr() { return (oop*)&_bottom_klass; } 50.5 50.6 // Compiler/Interpreter offset 50.7 - static int element_klass_offset_in_bytes() { return offset_of(objArrayKlass, _element_klass); } 50.8 + static ByteSize element_klass_offset() { return in_ByteSize(sizeof(klassOopDesc) + offset_of(objArrayKlass, _element_klass)); } 50.9 50.10 // Dispatched operation 50.11 bool can_be_primary_super_slow() const;
51.1 --- a/src/share/vm/opto/callnode.hpp Tue Dec 27 12:38:49 2011 -0800 51.2 +++ b/src/share/vm/opto/callnode.hpp Thu Dec 29 11:37:50 2011 -0800 51.3 @@ -791,6 +791,10 @@ 51.4 // are defined in graphKit.cpp, which sets up the bidirectional relation.) 51.5 InitializeNode* initialization(); 51.6 51.7 + // Return the corresponding storestore barrier (or null if none). 51.8 + // Walks out edges to find it... 51.9 + MemBarStoreStoreNode* storestore(); 51.10 + 51.11 // Convenience for initialization->maybe_set_complete(phase) 51.12 bool maybe_set_complete(PhaseGVN* phase); 51.13 };
52.1 --- a/src/share/vm/opto/classes.hpp Tue Dec 27 12:38:49 2011 -0800 52.2 +++ b/src/share/vm/opto/classes.hpp Thu Dec 29 11:37:50 2011 -0800 52.3 @@ -166,6 +166,7 @@ 52.4 macro(MemBarRelease) 52.5 macro(MemBarReleaseLock) 52.6 macro(MemBarVolatile) 52.7 +macro(MemBarStoreStore) 52.8 macro(MergeMem) 52.9 macro(MinI) 52.10 macro(ModD)
53.1 --- a/src/share/vm/opto/compile.cpp Tue Dec 27 12:38:49 2011 -0800 53.2 +++ b/src/share/vm/opto/compile.cpp Thu Dec 29 11:37:50 2011 -0800 53.3 @@ -1282,12 +1282,11 @@ 53.4 if( tk ) { 53.5 // If we are referencing a field within a Klass, we need 53.6 // to assume the worst case of an Object. Both exact and 53.7 - // inexact types must flatten to the same alias class. 53.8 - // Since the flattened result for a klass is defined to be 53.9 - // precisely java.lang.Object, use a constant ptr. 53.10 + // inexact types must flatten to the same alias class so 53.11 + // use NotNull as the PTR. 53.12 if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) { 53.13 53.14 - tj = tk = TypeKlassPtr::make(TypePtr::Constant, 53.15 + tj = tk = TypeKlassPtr::make(TypePtr::NotNull, 53.16 TypeKlassPtr::OBJECT->klass(), 53.17 offset); 53.18 } 53.19 @@ -1307,10 +1306,12 @@ 53.20 // these 2 disparate memories into the same alias class. Since the 53.21 // primary supertype array is read-only, there's no chance of confusion 53.22 // where we bypass an array load and an array store. 53.23 - uint off2 = offset - Klass::primary_supers_offset_in_bytes(); 53.24 - if( offset == Type::OffsetBot || 53.25 - off2 < Klass::primary_super_limit()*wordSize ) { 53.26 - offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes(); 53.27 + int primary_supers_offset = in_bytes(Klass::primary_supers_offset()); 53.28 + if (offset == Type::OffsetBot || 53.29 + (offset >= primary_supers_offset && 53.30 + offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) || 53.31 + offset == (int)in_bytes(Klass::secondary_super_cache_offset())) { 53.32 + offset = in_bytes(Klass::secondary_super_cache_offset()); 53.33 tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset ); 53.34 } 53.35 } 53.36 @@ -1489,13 +1490,13 @@ 53.37 alias_type(idx)->set_rewritable(false); 53.38 } 53.39 if (flat->isa_klassptr()) { 53.40 - if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) 53.41 + if (flat->offset() == in_bytes(Klass::super_check_offset_offset())) 53.42 alias_type(idx)->set_rewritable(false); 53.43 - if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) 53.44 + if (flat->offset() == in_bytes(Klass::modifier_flags_offset())) 53.45 alias_type(idx)->set_rewritable(false); 53.46 - if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) 53.47 + if (flat->offset() == in_bytes(Klass::access_flags_offset())) 53.48 alias_type(idx)->set_rewritable(false); 53.49 - if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) 53.50 + if (flat->offset() == in_bytes(Klass::java_mirror_offset())) 53.51 alias_type(idx)->set_rewritable(false); 53.52 } 53.53 // %%% (We would like to finalize JavaThread::threadObj_offset(), 53.54 @@ -2521,7 +2522,7 @@ 53.55 break; 53.56 } 53.57 } 53.58 - assert(p != NULL, "must be found"); 53.59 + assert(proj != NULL, "must be found"); 53.60 p->subsume_by(proj); 53.61 } 53.62 }
54.1 --- a/src/share/vm/opto/escape.cpp Tue Dec 27 12:38:49 2011 -0800 54.2 +++ b/src/share/vm/opto/escape.cpp Thu Dec 29 11:37:50 2011 -0800 54.3 @@ -1595,6 +1595,7 @@ 54.4 GrowableArray<Node*> alloc_worklist; 54.5 GrowableArray<Node*> addp_worklist; 54.6 GrowableArray<Node*> ptr_cmp_worklist; 54.7 + GrowableArray<Node*> storestore_worklist; 54.8 PhaseGVN* igvn = _igvn; 54.9 54.10 // Push all useful nodes onto CG list and set their type. 54.11 @@ -1618,6 +1619,11 @@ 54.12 (n->Opcode() == Op_CmpP || n->Opcode() == Op_CmpN)) { 54.13 // Compare pointers nodes 54.14 ptr_cmp_worklist.append(n); 54.15 + } else if (n->is_MemBarStoreStore()) { 54.16 + // Collect all MemBarStoreStore nodes so that depending on the 54.17 + // escape status of the associated Allocate node some of them 54.18 + // may be eliminated. 54.19 + storestore_worklist.append(n); 54.20 } 54.21 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 54.22 Node* m = n->fast_out(i); // Get user 54.23 @@ -1724,11 +1730,20 @@ 54.24 uint alloc_length = alloc_worklist.length(); 54.25 for (uint next = 0; next < alloc_length; ++next) { 54.26 Node* n = alloc_worklist.at(next); 54.27 - if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) { 54.28 + PointsToNode::EscapeState es = ptnode_adr(n->_idx)->escape_state(); 54.29 + if (es == PointsToNode::NoEscape) { 54.30 has_non_escaping_obj = true; 54.31 if (n->is_Allocate()) { 54.32 find_init_values(n, &visited, igvn); 54.33 + // The object allocated by this Allocate node will never be 54.34 + // seen by an other thread. Mark it so that when it is 54.35 + // expanded no MemBarStoreStore is added. 54.36 + n->as_Allocate()->initialization()->set_does_not_escape(); 54.37 } 54.38 + } else if ((es == PointsToNode::ArgEscape) && n->is_Allocate()) { 54.39 + // Same as above. Mark this Allocate node so that when it is 54.40 + // expanded no MemBarStoreStore is added. 54.41 + n->as_Allocate()->initialization()->set_does_not_escape(); 54.42 } 54.43 } 54.44 54.45 @@ -1874,6 +1889,25 @@ 54.46 igvn->hash_delete(_pcmp_eq); 54.47 } 54.48 54.49 + // For MemBarStoreStore nodes added in library_call.cpp, check 54.50 + // escape status of associated AllocateNode and optimize out 54.51 + // MemBarStoreStore node if the allocated object never escapes. 54.52 + while (storestore_worklist.length() != 0) { 54.53 + Node *n = storestore_worklist.pop(); 54.54 + MemBarStoreStoreNode *storestore = n ->as_MemBarStoreStore(); 54.55 + Node *alloc = storestore->in(MemBarNode::Precedent)->in(0); 54.56 + assert (alloc->is_Allocate(), "storestore should point to AllocateNode"); 54.57 + PointsToNode::EscapeState es = ptnode_adr(alloc->_idx)->escape_state(); 54.58 + if (es == PointsToNode::NoEscape || es == PointsToNode::ArgEscape) { 54.59 + MemBarNode* mb = MemBarNode::make(C, Op_MemBarCPUOrder, Compile::AliasIdxBot); 54.60 + mb->init_req(TypeFunc::Memory, storestore->in(TypeFunc::Memory)); 54.61 + mb->init_req(TypeFunc::Control, storestore->in(TypeFunc::Control)); 54.62 + 54.63 + _igvn->register_new_node_with_optimizer(mb); 54.64 + _igvn->replace_node(storestore, mb); 54.65 + } 54.66 + } 54.67 + 54.68 #ifndef PRODUCT 54.69 if (PrintEscapeAnalysis) { 54.70 dump(); // Dump ConnectionGraph
55.1 --- a/src/share/vm/opto/graphKit.cpp Tue Dec 27 12:38:49 2011 -0800 55.2 +++ b/src/share/vm/opto/graphKit.cpp Thu Dec 29 11:37:50 2011 -0800 55.3 @@ -2304,9 +2304,9 @@ 55.4 // will always succeed. We could leave a dependency behind to ensure this. 55.5 55.6 // First load the super-klass's check-offset 55.7 - Node *p1 = basic_plus_adr( superklass, superklass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ); 55.8 + Node *p1 = basic_plus_adr( superklass, superklass, in_bytes(Klass::super_check_offset_offset()) ); 55.9 Node *chk_off = _gvn.transform( new (C, 3) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) ); 55.10 - int cacheoff_con = sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes(); 55.11 + int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset()); 55.12 bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con); 55.13 55.14 // Load from the sub-klass's super-class display list, or a 1-word cache of 55.15 @@ -2934,7 +2934,7 @@ 55.16 } 55.17 } 55.18 constant_value = Klass::_lh_neutral_value; // put in a known value 55.19 - Node* lhp = basic_plus_adr(klass_node, klass_node, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc)); 55.20 + Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset())); 55.21 return make_load(NULL, lhp, TypeInt::INT, T_INT); 55.22 } 55.23 55.24 @@ -3337,6 +3337,19 @@ 55.25 return NULL; 55.26 } 55.27 55.28 +// Trace Allocate -> Proj[Parm] -> MemBarStoreStore 55.29 +MemBarStoreStoreNode* AllocateNode::storestore() { 55.30 + ProjNode* rawoop = proj_out(AllocateNode::RawAddress); 55.31 + if (rawoop == NULL) return NULL; 55.32 + for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) { 55.33 + Node* storestore = rawoop->fast_out(i); 55.34 + if (storestore->is_MemBarStoreStore()) { 55.35 + return storestore->as_MemBarStoreStore(); 55.36 + } 55.37 + } 55.38 + return NULL; 55.39 +} 55.40 + 55.41 //----------------------------- loop predicates --------------------------- 55.42 55.43 //------------------------------add_predicate_impl----------------------------
56.1 --- a/src/share/vm/opto/library_call.cpp Tue Dec 27 12:38:49 2011 -0800 56.2 +++ b/src/share/vm/opto/library_call.cpp Thu Dec 29 11:37:50 2011 -0800 56.3 @@ -2165,8 +2165,7 @@ 56.4 IdealKit ideal(this); 56.5 #define __ ideal. 56.6 56.7 - const int reference_type_offset = instanceKlass::reference_type_offset_in_bytes() + 56.8 - sizeof(oopDesc); 56.9 + const int reference_type_offset = in_bytes(instanceKlass::reference_type_offset()); 56.10 56.11 Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset); 56.12 56.13 @@ -2806,7 +2805,7 @@ 56.14 // Note: The argument might still be an illegal value like 56.15 // Serializable.class or Object[].class. The runtime will handle it. 56.16 // But we must make an explicit check for initialization. 56.17 - Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)); 56.18 + Node* insp = basic_plus_adr(kls, in_bytes(instanceKlass::init_state_offset())); 56.19 Node* inst = make_load(NULL, insp, TypeInt::INT, T_INT); 56.20 Node* bits = intcon(instanceKlass::fully_initialized); 56.21 Node* test = _gvn.transform( new (C, 3) SubINode(inst, bits) ); 56.22 @@ -2954,7 +2953,7 @@ 56.23 //---------------------------load_mirror_from_klass---------------------------- 56.24 // Given a klass oop, load its java mirror (a java.lang.Class oop). 56.25 Node* LibraryCallKit::load_mirror_from_klass(Node* klass) { 56.26 - Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc)); 56.27 + Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset())); 56.28 return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT); 56.29 } 56.30 56.31 @@ -2994,7 +2993,7 @@ 56.32 Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) { 56.33 // Branch around if the given klass has the given modifier bit set. 56.34 // Like generate_guard, adds a new path onto the region. 56.35 - Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); 56.36 + Node* modp = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset())); 56.37 Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT); 56.38 Node* mask = intcon(modifier_mask); 56.39 Node* bits = intcon(modifier_bits); 56.40 @@ -3115,7 +3114,7 @@ 56.41 break; 56.42 56.43 case vmIntrinsics::_getModifiers: 56.44 - p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc)); 56.45 + p = basic_plus_adr(kls, in_bytes(Klass::modifier_flags_offset())); 56.46 query_value = make_load(NULL, p, TypeInt::INT, T_INT); 56.47 break; 56.48 56.49 @@ -3155,7 +3154,7 @@ 56.50 // A guard was added. If the guard is taken, it was an array. 56.51 phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror()))); 56.52 // If we fall through, it's a plain class. Get its _super. 56.53 - p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc)); 56.54 + p = basic_plus_adr(kls, in_bytes(Klass::super_offset())); 56.55 kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) ); 56.56 null_ctl = top(); 56.57 kls = null_check_oop(kls, &null_ctl); 56.58 @@ -3173,7 +3172,7 @@ 56.59 if (generate_array_guard(kls, region) != NULL) { 56.60 // Be sure to pin the oop load to the guard edge just created: 56.61 Node* is_array_ctrl = region->in(region->req()-1); 56.62 - Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc)); 56.63 + Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset())); 56.64 Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT); 56.65 phi->add_req(cmo); 56.66 } 56.67 @@ -3181,7 +3180,7 @@ 56.68 break; 56.69 56.70 case vmIntrinsics::_getClassAccessFlags: 56.71 - p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); 56.72 + p = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset())); 56.73 query_value = make_load(NULL, p, TypeInt::INT, T_INT); 56.74 break; 56.75 56.76 @@ -4194,12 +4193,17 @@ 56.77 Node* raw_obj = alloc_obj->in(1); 56.78 assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), ""); 56.79 56.80 + AllocateNode* alloc = NULL; 56.81 if (ReduceBulkZeroing) { 56.82 // We will be completely responsible for initializing this object - 56.83 // mark Initialize node as complete. 56.84 - AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn); 56.85 + alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn); 56.86 // The object was just allocated - there should be no any stores! 56.87 guarantee(alloc != NULL && alloc->maybe_set_complete(&_gvn), ""); 56.88 + // Mark as complete_with_arraycopy so that on AllocateNode 56.89 + // expansion, we know this AllocateNode is initialized by an array 56.90 + // copy and a StoreStore barrier exists after the array copy. 56.91 + alloc->initialization()->set_complete_with_arraycopy(); 56.92 } 56.93 56.94 // Copy the fastest available way. 56.95 @@ -4261,7 +4265,18 @@ 56.96 } 56.97 56.98 // Do not let reads from the cloned object float above the arraycopy. 56.99 - insert_mem_bar(Op_MemBarCPUOrder); 56.100 + if (alloc != NULL) { 56.101 + // Do not let stores that initialize this object be reordered with 56.102 + // a subsequent store that would make this object accessible by 56.103 + // other threads. 56.104 + // Record what AllocateNode this StoreStore protects so that 56.105 + // escape analysis can go from the MemBarStoreStoreNode to the 56.106 + // AllocateNode and eliminate the MemBarStoreStoreNode if possible 56.107 + // based on the escape status of the AllocateNode. 56.108 + insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress)); 56.109 + } else { 56.110 + insert_mem_bar(Op_MemBarCPUOrder); 56.111 + } 56.112 } 56.113 56.114 //------------------------inline_native_clone---------------------------- 56.115 @@ -4857,7 +4872,7 @@ 56.116 PreserveJVMState pjvms(this); 56.117 set_control(not_subtype_ctrl); 56.118 // (At this point we can assume disjoint_bases, since types differ.) 56.119 - int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc); 56.120 + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); 56.121 Node* p1 = basic_plus_adr(dest_klass, ek_offset); 56.122 Node* n1 = LoadKlassNode::make(_gvn, immutable_memory(), p1, TypeRawPtr::BOTTOM); 56.123 Node* dest_elem_klass = _gvn.transform(n1); 56.124 @@ -5004,7 +5019,16 @@ 56.125 // the membar also. 56.126 // 56.127 // Do not let reads from the cloned object float above the arraycopy. 56.128 - if (InsertMemBarAfterArraycopy || alloc != NULL) 56.129 + if (alloc != NULL) { 56.130 + // Do not let stores that initialize this object be reordered with 56.131 + // a subsequent store that would make this object accessible by 56.132 + // other threads. 56.133 + // Record what AllocateNode this StoreStore protects so that 56.134 + // escape analysis can go from the MemBarStoreStoreNode to the 56.135 + // AllocateNode and eliminate the MemBarStoreStoreNode if possible 56.136 + // based on the escape status of the AllocateNode. 56.137 + insert_mem_bar(Op_MemBarStoreStore, alloc->proj_out(AllocateNode::RawAddress)); 56.138 + } else if (InsertMemBarAfterArraycopy) 56.139 insert_mem_bar(Op_MemBarCPUOrder); 56.140 } 56.141 56.142 @@ -5308,7 +5332,7 @@ 56.143 // for the target array. This is an optimistic check. It will 56.144 // look in each non-null element's class, at the desired klass's 56.145 // super_check_offset, for the desired klass. 56.146 - int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc); 56.147 + int sco_offset = in_bytes(Klass::super_check_offset_offset()); 56.148 Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset); 56.149 Node* n3 = new(C, 3) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr()); 56.150 Node* check_offset = ConvI2X(_gvn.transform(n3));
57.1 --- a/src/share/vm/opto/macro.cpp Tue Dec 27 12:38:49 2011 -0800 57.2 +++ b/src/share/vm/opto/macro.cpp Thu Dec 29 11:37:50 2011 -0800 57.3 @@ -1088,6 +1088,12 @@ 57.4 Node* klass_node = alloc->in(AllocateNode::KlassNode); 57.5 Node* initial_slow_test = alloc->in(AllocateNode::InitialTest); 57.6 57.7 + Node* storestore = alloc->storestore(); 57.8 + if (storestore != NULL) { 57.9 + // Break this link that is no longer useful and confuses register allocation 57.10 + storestore->set_req(MemBarNode::Precedent, top()); 57.11 + } 57.12 + 57.13 assert(ctrl != NULL, "must have control"); 57.14 // We need a Region and corresponding Phi's to merge the slow-path and fast-path results. 57.15 // they will not be used if "always_slow" is set 57.16 @@ -1289,10 +1295,66 @@ 57.17 0, new_alloc_bytes, T_LONG); 57.18 } 57.19 57.20 + InitializeNode* init = alloc->initialization(); 57.21 fast_oop_rawmem = initialize_object(alloc, 57.22 fast_oop_ctrl, fast_oop_rawmem, fast_oop, 57.23 klass_node, length, size_in_bytes); 57.24 57.25 + // If initialization is performed by an array copy, any required 57.26 + // MemBarStoreStore was already added. If the object does not 57.27 + // escape no need for a MemBarStoreStore. Otherwise we need a 57.28 + // MemBarStoreStore so that stores that initialize this object 57.29 + // can't be reordered with a subsequent store that makes this 57.30 + // object accessible by other threads. 57.31 + if (init == NULL || (!init->is_complete_with_arraycopy() && !init->does_not_escape())) { 57.32 + if (init == NULL || init->req() < InitializeNode::RawStores) { 57.33 + // No InitializeNode or no stores captured by zeroing 57.34 + // elimination. Simply add the MemBarStoreStore after object 57.35 + // initialization. 57.36 + MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot, fast_oop_rawmem); 57.37 + transform_later(mb); 57.38 + 57.39 + mb->init_req(TypeFunc::Memory, fast_oop_rawmem); 57.40 + mb->init_req(TypeFunc::Control, fast_oop_ctrl); 57.41 + fast_oop_ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control); 57.42 + transform_later(fast_oop_ctrl); 57.43 + fast_oop_rawmem = new (C, 1) ProjNode(mb,TypeFunc::Memory); 57.44 + transform_later(fast_oop_rawmem); 57.45 + } else { 57.46 + // Add the MemBarStoreStore after the InitializeNode so that 57.47 + // all stores performing the initialization that were moved 57.48 + // before the InitializeNode happen before the storestore 57.49 + // barrier. 57.50 + 57.51 + Node* init_ctrl = init->proj_out(TypeFunc::Control); 57.52 + Node* init_mem = init->proj_out(TypeFunc::Memory); 57.53 + 57.54 + MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot); 57.55 + transform_later(mb); 57.56 + 57.57 + Node* ctrl = new (C, 1) ProjNode(init,TypeFunc::Control); 57.58 + transform_later(ctrl); 57.59 + Node* mem = new (C, 1) ProjNode(init,TypeFunc::Memory); 57.60 + transform_later(mem); 57.61 + 57.62 + // The MemBarStoreStore depends on control and memory coming 57.63 + // from the InitializeNode 57.64 + mb->init_req(TypeFunc::Memory, mem); 57.65 + mb->init_req(TypeFunc::Control, ctrl); 57.66 + 57.67 + ctrl = new (C, 1) ProjNode(mb,TypeFunc::Control); 57.68 + transform_later(ctrl); 57.69 + mem = new (C, 1) ProjNode(mb,TypeFunc::Memory); 57.70 + transform_later(mem); 57.71 + 57.72 + // All nodes that depended on the InitializeNode for control 57.73 + // and memory must now depend on the MemBarNode that itself 57.74 + // depends on the InitializeNode 57.75 + _igvn.replace_node(init_ctrl, ctrl); 57.76 + _igvn.replace_node(init_mem, mem); 57.77 + } 57.78 + } 57.79 + 57.80 if (C->env()->dtrace_extended_probes()) { 57.81 // Slow-path call 57.82 int size = TypeFunc::Parms + 2; 57.83 @@ -1326,6 +1388,7 @@ 57.84 result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem); 57.85 } else { 57.86 slow_region = ctrl; 57.87 + result_phi_i_o = i_o; // Rename it to use in the following code. 57.88 } 57.89 57.90 // Generate slow-path call 57.91 @@ -1350,6 +1413,10 @@ 57.92 copy_call_debug_info((CallNode *) alloc, call); 57.93 if (!always_slow) { 57.94 call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON. 57.95 + } else { 57.96 + // Hook i_o projection to avoid its elimination during allocation 57.97 + // replacement (when only a slow call is generated). 57.98 + call->set_req(TypeFunc::I_O, result_phi_i_o); 57.99 } 57.100 _igvn.replace_node(alloc, call); 57.101 transform_later(call); 57.102 @@ -1366,8 +1433,10 @@ 57.103 // 57.104 extract_call_projections(call); 57.105 57.106 - // An allocate node has separate memory projections for the uses on the control and i_o paths 57.107 - // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call) 57.108 + // An allocate node has separate memory projections for the uses on 57.109 + // the control and i_o paths. Replace the control memory projection with 57.110 + // result_phi_rawmem (unless we are only generating a slow call when 57.111 + // both memory projections are combined) 57.112 if (!always_slow && _memproj_fallthrough != NULL) { 57.113 for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) { 57.114 Node *use = _memproj_fallthrough->fast_out(i); 57.115 @@ -1378,8 +1447,8 @@ 57.116 --i; 57.117 } 57.118 } 57.119 - // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so 57.120 - // we end up with a call that has only 1 memory projection 57.121 + // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete 57.122 + // _memproj_catchall so we end up with a call that has only 1 memory projection. 57.123 if (_memproj_catchall != NULL ) { 57.124 if (_memproj_fallthrough == NULL) { 57.125 _memproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::Memory); 57.126 @@ -1393,17 +1462,18 @@ 57.127 // back up iterator 57.128 --i; 57.129 } 57.130 + assert(_memproj_catchall->outcnt() == 0, "all uses must be deleted"); 57.131 + _igvn.remove_dead_node(_memproj_catchall); 57.132 } 57.133 57.134 - // An allocate node has separate i_o projections for the uses on the control and i_o paths 57.135 - // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call) 57.136 - if (_ioproj_fallthrough == NULL) { 57.137 - _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O); 57.138 - transform_later(_ioproj_fallthrough); 57.139 - } else if (!always_slow) { 57.140 + // An allocate node has separate i_o projections for the uses on the control 57.141 + // and i_o paths. Always replace the control i_o projection with result i_o 57.142 + // otherwise incoming i_o become dead when only a slow call is generated 57.143 + // (it is different from memory projections where both projections are 57.144 + // combined in such case). 57.145 + if (_ioproj_fallthrough != NULL) { 57.146 for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) { 57.147 Node *use = _ioproj_fallthrough->fast_out(i); 57.148 - 57.149 _igvn.hash_delete(use); 57.150 imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o); 57.151 _igvn._worklist.push(use); 57.152 @@ -1411,9 +1481,13 @@ 57.153 --i; 57.154 } 57.155 } 57.156 - // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so 57.157 - // we end up with a call that has only 1 control projection 57.158 + // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete 57.159 + // _ioproj_catchall so we end up with a call that has only 1 i_o projection. 57.160 if (_ioproj_catchall != NULL ) { 57.161 + if (_ioproj_fallthrough == NULL) { 57.162 + _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O); 57.163 + transform_later(_ioproj_fallthrough); 57.164 + } 57.165 for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) { 57.166 Node *use = _ioproj_catchall->fast_out(i); 57.167 _igvn.hash_delete(use); 57.168 @@ -1422,11 +1496,18 @@ 57.169 // back up iterator 57.170 --i; 57.171 } 57.172 + assert(_ioproj_catchall->outcnt() == 0, "all uses must be deleted"); 57.173 + _igvn.remove_dead_node(_ioproj_catchall); 57.174 } 57.175 57.176 // if we generated only a slow call, we are done 57.177 - if (always_slow) 57.178 + if (always_slow) { 57.179 + // Now we can unhook i_o. 57.180 + call->set_req(TypeFunc::I_O, top()); 57.181 + if (result_phi_i_o->outcnt() == 0) 57.182 + _igvn.remove_dead_node(result_phi_i_o); 57.183 return; 57.184 + } 57.185 57.186 57.187 if (_fallthroughcatchproj != NULL) { 57.188 @@ -1470,7 +1551,7 @@ 57.189 Node* mark_node = NULL; 57.190 // For now only enable fast locking for non-array types 57.191 if (UseBiasedLocking && (length == NULL)) { 57.192 - mark_node = make_load(control, rawmem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeRawPtr::BOTTOM, T_ADDRESS); 57.193 + mark_node = make_load(control, rawmem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeRawPtr::BOTTOM, T_ADDRESS); 57.194 } else { 57.195 mark_node = makecon(TypeRawPtr::make((address)markOopDesc::prototype())); 57.196 } 57.197 @@ -1958,7 +2039,7 @@ 57.198 #endif 57.199 klass_node->init_req(0, ctrl); 57.200 } 57.201 - Node *proto_node = make_load(ctrl, mem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeX_X, TypeX_X->basic_type()); 57.202 + Node *proto_node = make_load(ctrl, mem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeX_X, TypeX_X->basic_type()); 57.203 57.204 Node* thread = transform_later(new (C, 1) ThreadLocalNode()); 57.205 Node* cast_thread = transform_later(new (C, 2) CastP2XNode(ctrl, thread));
58.1 --- a/src/share/vm/opto/matcher.cpp Tue Dec 27 12:38:49 2011 -0800 58.2 +++ b/src/share/vm/opto/matcher.cpp Thu Dec 29 11:37:50 2011 -0800 58.3 @@ -1365,31 +1365,36 @@ 58.4 58.5 const Type *t = m->bottom_type(); 58.6 58.7 - if( t->singleton() ) { 58.8 + if (t->singleton()) { 58.9 // Never force constants into registers. Allow them to match as 58.10 // constants or registers. Copies of the same value will share 58.11 // the same register. See find_shared_node. 58.12 return false; 58.13 } else { // Not a constant 58.14 // Stop recursion if they have different Controls. 58.15 - // Slot 0 of constants is not really a Control. 58.16 - if( control && m->in(0) && control != m->in(0) ) { 58.17 + Node* m_control = m->in(0); 58.18 + // Control of load's memory can post-dominates load's control. 58.19 + // So use it since load can't float above its memory. 58.20 + Node* mem_control = (m->is_Load()) ? m->in(MemNode::Memory)->in(0) : NULL; 58.21 + if (control && m_control && control != m_control && control != mem_control) { 58.22 58.23 // Actually, we can live with the most conservative control we 58.24 // find, if it post-dominates the others. This allows us to 58.25 // pick up load/op/store trees where the load can float a little 58.26 // above the store. 58.27 Node *x = control; 58.28 - const uint max_scan = 6; // Arbitrary scan cutoff 58.29 + const uint max_scan = 6; // Arbitrary scan cutoff 58.30 uint j; 58.31 - for( j=0; j<max_scan; j++ ) { 58.32 - if( x->is_Region() ) // Bail out at merge points 58.33 + for (j=0; j<max_scan; j++) { 58.34 + if (x->is_Region()) // Bail out at merge points 58.35 return true; 58.36 x = x->in(0); 58.37 - if( x == m->in(0) ) // Does 'control' post-dominate 58.38 + if (x == m_control) // Does 'control' post-dominate 58.39 break; // m->in(0)? If so, we can use it 58.40 + if (x == mem_control) // Does 'control' post-dominate 58.41 + break; // mem_control? If so, we can use it 58.42 } 58.43 - if( j == max_scan ) // No post-domination before scan end? 58.44 + if (j == max_scan) // No post-domination before scan end? 58.45 return true; // Then break the match tree up 58.46 } 58.47 if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) {
59.1 --- a/src/share/vm/opto/memnode.cpp Tue Dec 27 12:38:49 2011 -0800 59.2 +++ b/src/share/vm/opto/memnode.cpp Thu Dec 29 11:37:50 2011 -0800 59.3 @@ -1473,19 +1473,19 @@ 59.4 const Type* 59.5 LoadNode::load_array_final_field(const TypeKlassPtr *tkls, 59.6 ciKlass* klass) const { 59.7 - if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) { 59.8 + if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) { 59.9 // The field is Klass::_modifier_flags. Return its (constant) value. 59.10 // (Folds up the 2nd indirection in aClassConstant.getModifiers().) 59.11 assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags"); 59.12 return TypeInt::make(klass->modifier_flags()); 59.13 } 59.14 - if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) { 59.15 + if (tkls->offset() == in_bytes(Klass::access_flags_offset())) { 59.16 // The field is Klass::_access_flags. Return its (constant) value. 59.17 // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).) 59.18 assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags"); 59.19 return TypeInt::make(klass->access_flags()); 59.20 } 59.21 - if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) { 59.22 + if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) { 59.23 // The field is Klass::_layout_helper. Return its constant value if known. 59.24 assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper"); 59.25 return TypeInt::make(klass->layout_helper()); 59.26 @@ -1636,14 +1636,14 @@ 59.27 // We are loading a field from a Klass metaobject whose identity 59.28 // is known at compile time (the type is "exact" or "precise"). 59.29 // Check for fields we know are maintained as constants by the VM. 59.30 - if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) { 59.31 + if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) { 59.32 // The field is Klass::_super_check_offset. Return its (constant) value. 59.33 // (Folds up type checking code.) 59.34 assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset"); 59.35 return TypeInt::make(klass->super_check_offset()); 59.36 } 59.37 // Compute index into primary_supers array 59.38 - juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop); 59.39 + juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop); 59.40 // Check for overflowing; use unsigned compare to handle the negative case. 59.41 if( depth < ciKlass::primary_super_limit() ) { 59.42 // The field is an element of Klass::_primary_supers. Return its (constant) value. 59.43 @@ -1654,14 +1654,14 @@ 59.44 } 59.45 const Type* aift = load_array_final_field(tkls, klass); 59.46 if (aift != NULL) return aift; 59.47 - if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc) 59.48 + if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) 59.49 && klass->is_array_klass()) { 59.50 // The field is arrayKlass::_component_mirror. Return its (constant) value. 59.51 // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.) 59.52 assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror"); 59.53 return TypeInstPtr::make(klass->as_array_klass()->component_mirror()); 59.54 } 59.55 - if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) { 59.56 + if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) { 59.57 // The field is Klass::_java_mirror. Return its (constant) value. 59.58 // (Folds up the 2nd indirection in anObjConstant.getClass().) 59.59 assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror"); 59.60 @@ -1679,7 +1679,7 @@ 59.61 if( inner->is_instance_klass() && 59.62 !inner->as_instance_klass()->flags().is_interface() ) { 59.63 // Compute index into primary_supers array 59.64 - juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop); 59.65 + juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(klassOop); 59.66 // Check for overflowing; use unsigned compare to handle the negative case. 59.67 if( depth < ciKlass::primary_super_limit() && 59.68 depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case 59.69 @@ -1695,7 +1695,7 @@ 59.70 // If the type is enough to determine that the thing is not an array, 59.71 // we can give the layout_helper a positive interval type. 59.72 // This will help short-circuit some reflective code. 59.73 - if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc) 59.74 + if (tkls->offset() == in_bytes(Klass::layout_helper_offset()) 59.75 && !klass->is_array_klass() // not directly typed as an array 59.76 && !klass->is_interface() // specifically not Serializable & Cloneable 59.77 && !klass->is_java_lang_Object() // not the supertype of all T[] 59.78 @@ -1938,7 +1938,7 @@ 59.79 if( !klass->is_loaded() ) 59.80 return _type; // Bail out if not loaded 59.81 if( klass->is_obj_array_klass() && 59.82 - (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) { 59.83 + tkls->offset() == in_bytes(objArrayKlass::element_klass_offset())) { 59.84 ciKlass* elem = klass->as_obj_array_klass()->element_klass(); 59.85 // // Always returning precise element type is incorrect, 59.86 // // e.g., element type could be object and array may contain strings 59.87 @@ -1949,7 +1949,7 @@ 59.88 return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/); 59.89 } 59.90 if( klass->is_instance_klass() && tkls->klass_is_exact() && 59.91 - (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) { 59.92 + tkls->offset() == in_bytes(Klass::super_offset())) { 59.93 ciKlass* sup = klass->as_instance_klass()->super(); 59.94 // The field is Klass::_super. Return its (constant) value. 59.95 // (Folds up the 2nd indirection in aClassConstant.getSuperClass().) 59.96 @@ -2013,11 +2013,11 @@ 59.97 tkls->klass()->is_array_klass()) 59.98 && adr2->is_AddP() 59.99 ) { 59.100 - int mirror_field = Klass::java_mirror_offset_in_bytes(); 59.101 + int mirror_field = in_bytes(Klass::java_mirror_offset()); 59.102 if (offset == java_lang_Class::array_klass_offset_in_bytes()) { 59.103 mirror_field = in_bytes(arrayKlass::component_mirror_offset()); 59.104 } 59.105 - if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) { 59.106 + if (tkls->offset() == mirror_field) { 59.107 return adr2->in(AddPNode::Base); 59.108 } 59.109 } 59.110 @@ -2721,6 +2721,7 @@ 59.111 case Op_MemBarVolatile: return new(C, len) MemBarVolatileNode(C, atp, pn); 59.112 case Op_MemBarCPUOrder: return new(C, len) MemBarCPUOrderNode(C, atp, pn); 59.113 case Op_Initialize: return new(C, len) InitializeNode(C, atp, pn); 59.114 + case Op_MemBarStoreStore: return new(C, len) MemBarStoreStoreNode(C, atp, pn); 59.115 default: ShouldNotReachHere(); return NULL; 59.116 } 59.117 } 59.118 @@ -2870,7 +2871,7 @@ 59.119 59.120 //---------------------------InitializeNode------------------------------------ 59.121 InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop) 59.122 - : _is_complete(Incomplete), 59.123 + : _is_complete(Incomplete), _does_not_escape(false), 59.124 MemBarNode(C, adr_type, rawoop) 59.125 { 59.126 init_class_id(Class_Initialize);
60.1 --- a/src/share/vm/opto/memnode.hpp Tue Dec 27 12:38:49 2011 -0800 60.2 +++ b/src/share/vm/opto/memnode.hpp Thu Dec 29 11:37:50 2011 -0800 60.3 @@ -918,6 +918,15 @@ 60.4 virtual int Opcode() const; 60.5 }; 60.6 60.7 +class MemBarStoreStoreNode: public MemBarNode { 60.8 +public: 60.9 + MemBarStoreStoreNode(Compile* C, int alias_idx, Node* precedent) 60.10 + : MemBarNode(C, alias_idx, precedent) { 60.11 + init_class_id(Class_MemBarStoreStore); 60.12 + } 60.13 + virtual int Opcode() const; 60.14 +}; 60.15 + 60.16 // Ordering between a volatile store and a following volatile load. 60.17 // Requires multi-CPU visibility? 60.18 class MemBarVolatileNode: public MemBarNode { 60.19 @@ -950,6 +959,8 @@ 60.20 }; 60.21 int _is_complete; 60.22 60.23 + bool _does_not_escape; 60.24 + 60.25 public: 60.26 enum { 60.27 Control = TypeFunc::Control, 60.28 @@ -989,6 +1000,9 @@ 60.29 void set_complete(PhaseGVN* phase); 60.30 void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; } 60.31 60.32 + bool does_not_escape() { return _does_not_escape; } 60.33 + void set_does_not_escape() { _does_not_escape = true; } 60.34 + 60.35 #ifdef ASSERT 60.36 // ensure all non-degenerate stores are ordered and non-overlapping 60.37 bool stores_are_sane(PhaseTransform* phase);
61.1 --- a/src/share/vm/opto/node.hpp Tue Dec 27 12:38:49 2011 -0800 61.2 +++ b/src/share/vm/opto/node.hpp Thu Dec 29 11:37:50 2011 -0800 61.3 @@ -97,6 +97,7 @@ 61.4 class MachTempNode; 61.5 class Matcher; 61.6 class MemBarNode; 61.7 +class MemBarStoreStoreNode; 61.8 class MemNode; 61.9 class MergeMemNode; 61.10 class MultiNode; 61.11 @@ -564,7 +565,8 @@ 61.12 DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2) 61.13 DEFINE_CLASS_ID(Start, Multi, 2) 61.14 DEFINE_CLASS_ID(MemBar, Multi, 3) 61.15 - DEFINE_CLASS_ID(Initialize, MemBar, 0) 61.16 + DEFINE_CLASS_ID(Initialize, MemBar, 0) 61.17 + DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1) 61.18 61.19 DEFINE_CLASS_ID(Mach, Node, 1) 61.20 DEFINE_CLASS_ID(MachReturn, Mach, 0) 61.21 @@ -744,6 +746,7 @@ 61.22 DEFINE_CLASS_QUERY(MachTemp) 61.23 DEFINE_CLASS_QUERY(Mem) 61.24 DEFINE_CLASS_QUERY(MemBar) 61.25 + DEFINE_CLASS_QUERY(MemBarStoreStore) 61.26 DEFINE_CLASS_QUERY(MergeMem) 61.27 DEFINE_CLASS_QUERY(Multi) 61.28 DEFINE_CLASS_QUERY(MultiBranch)
62.1 --- a/src/share/vm/opto/parse1.cpp Tue Dec 27 12:38:49 2011 -0800 62.2 +++ b/src/share/vm/opto/parse1.cpp Thu Dec 29 11:37:50 2011 -0800 62.3 @@ -1911,7 +1911,7 @@ 62.4 Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() ); 62.5 Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) ); 62.6 62.7 - Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)); 62.8 + Node* access_flags_addr = basic_plus_adr(klass, klass, in_bytes(Klass::access_flags_offset())); 62.9 Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT); 62.10 62.11 Node* mask = _gvn.transform(new (C, 3) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER)));
63.1 --- a/src/share/vm/opto/parseHelper.cpp Tue Dec 27 12:38:49 2011 -0800 63.2 +++ b/src/share/vm/opto/parseHelper.cpp Thu Dec 29 11:37:50 2011 -0800 63.3 @@ -200,7 +200,7 @@ 63.4 // Come here for polymorphic array klasses 63.5 63.6 // Extract the array element class 63.7 - int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc); 63.8 + int element_klass_offset = in_bytes(objArrayKlass::element_klass_offset()); 63.9 Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset); 63.10 Node *a_e_klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p2, tak) ); 63.11 63.12 @@ -220,7 +220,7 @@ 63.13 _gvn.set_type(merge, Type::CONTROL); 63.14 Node* kls = makecon(TypeKlassPtr::make(klass)); 63.15 63.16 - Node* init_thread_offset = _gvn.MakeConX(instanceKlass::init_thread_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()); 63.17 + Node* init_thread_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_thread_offset())); 63.18 Node* adr_node = basic_plus_adr(kls, kls, init_thread_offset); 63.19 Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS); 63.20 Node *tst = Bool( CmpP( init_thread, cur_thread), BoolTest::eq); 63.21 @@ -228,7 +228,7 @@ 63.22 set_control(IfTrue(iff)); 63.23 merge->set_req(1, IfFalse(iff)); 63.24 63.25 - Node* init_state_offset = _gvn.MakeConX(instanceKlass::init_state_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()); 63.26 + Node* init_state_offset = _gvn.MakeConX(in_bytes(instanceKlass::init_state_offset())); 63.27 adr_node = basic_plus_adr(kls, kls, init_state_offset); 63.28 Node* init_state = make_load(NULL, adr_node, TypeInt::INT, T_INT); 63.29 Node* being_init = _gvn.intcon(instanceKlass::being_initialized);
64.1 --- a/src/share/vm/runtime/advancedThresholdPolicy.cpp Tue Dec 27 12:38:49 2011 -0800 64.2 +++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp Thu Dec 29 11:37:50 2011 -0800 64.3 @@ -156,20 +156,19 @@ 64.4 // Called with the queue locked and with at least one element 64.5 CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) { 64.6 CompileTask *max_task = NULL; 64.7 - methodOop max_method; 64.8 + methodHandle max_method; 64.9 jlong t = os::javaTimeMillis(); 64.10 // Iterate through the queue and find a method with a maximum rate. 64.11 for (CompileTask* task = compile_queue->first(); task != NULL;) { 64.12 CompileTask* next_task = task->next(); 64.13 - methodOop method = (methodOop)JNIHandles::resolve(task->method_handle()); 64.14 - methodDataOop mdo = method->method_data(); 64.15 - update_rate(t, method); 64.16 + methodHandle method = (methodOop)JNIHandles::resolve(task->method_handle()); 64.17 + update_rate(t, method()); 64.18 if (max_task == NULL) { 64.19 max_task = task; 64.20 max_method = method; 64.21 } else { 64.22 // If a method has been stale for some time, remove it from the queue. 64.23 - if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) { 64.24 + if (is_stale(t, TieredCompileTaskTimeout, method()) && !is_old(method())) { 64.25 if (PrintTieredEvents) { 64.26 print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level()); 64.27 } 64.28 @@ -181,7 +180,7 @@ 64.29 } 64.30 64.31 // Select a method with a higher rate 64.32 - if (compare_methods(method, max_method)) { 64.33 + if (compare_methods(method(), max_method())) { 64.34 max_task = task; 64.35 max_method = method; 64.36 } 64.37 @@ -190,7 +189,7 @@ 64.38 } 64.39 64.40 if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile 64.41 - && is_method_profiled(max_method)) { 64.42 + && is_method_profiled(max_method())) { 64.43 max_task->set_comp_level(CompLevel_limited_profile); 64.44 if (PrintTieredEvents) { 64.45 print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
65.1 --- a/src/share/vm/runtime/arguments.cpp Tue Dec 27 12:38:49 2011 -0800 65.2 +++ b/src/share/vm/runtime/arguments.cpp Thu Dec 29 11:37:50 2011 -0800 65.3 @@ -1000,6 +1000,13 @@ 65.4 UseInterpreter = false; 65.5 BackgroundCompilation = false; 65.6 ClipInlining = false; 65.7 + // Be much more aggressive in tiered mode with -Xcomp and exercise C2 more. 65.8 + // We will first compile a level 3 version (C1 with full profiling), then do one invocation of it and 65.9 + // compile a level 4 (C2) and then continue executing it. 65.10 + if (TieredCompilation) { 65.11 + Tier3InvokeNotifyFreqLog = 0; 65.12 + Tier4InvocationThreshold = 0; 65.13 + } 65.14 break; 65.15 } 65.16 }
66.1 --- a/src/share/vm/runtime/globals.hpp Tue Dec 27 12:38:49 2011 -0800 66.2 +++ b/src/share/vm/runtime/globals.hpp Thu Dec 29 11:37:50 2011 -0800 66.3 @@ -527,6 +527,9 @@ 66.4 product(intx, UseSSE, 99, \ 66.5 "Highest supported SSE instructions set on x86/x64") \ 66.6 \ 66.7 + product(intx, UseAVX, 99, \ 66.8 + "Highest supported AVX instructions set on x86/x64") \ 66.9 + \ 66.10 product(intx, UseVIS, 99, \ 66.11 "Highest supported VIS instructions set on Sparc") \ 66.12 \
67.1 --- a/src/share/vm/shark/sharkIntrinsics.cpp Tue Dec 27 12:38:49 2011 -0800 67.2 +++ b/src/share/vm/shark/sharkIntrinsics.cpp Thu Dec 29 11:37:50 2011 -0800 67.3 @@ -213,17 +213,11 @@ 67.4 SharkType::oop_type(), 67.5 "klass"); 67.6 67.7 - Value *klass_part = builder()->CreateAddressOfStructEntry( 67.8 - klass, 67.9 - in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()), 67.10 - SharkType::klass_type(), 67.11 - "klass_part"); 67.12 - 67.13 state()->push( 67.14 SharkValue::create_jobject( 67.15 builder()->CreateValueOfStructEntry( 67.16 - klass_part, 67.17 - in_ByteSize(Klass::java_mirror_offset_in_bytes()), 67.18 + klass, 67.19 + Klass::java_mirror_offset(), 67.20 SharkType::oop_type(), 67.21 "java_mirror"), 67.22 true));
68.1 --- a/src/share/vm/shark/sharkTopLevelBlock.cpp Tue Dec 27 12:38:49 2011 -0800 68.2 +++ b/src/share/vm/shark/sharkTopLevelBlock.cpp Thu Dec 29 11:37:50 2011 -0800 68.3 @@ -745,15 +745,9 @@ 68.4 SharkType::oop_type(), 68.5 "klass"); 68.6 68.7 - Value *klass_part = builder()->CreateAddressOfStructEntry( 68.8 + Value *access_flags = builder()->CreateValueOfStructEntry( 68.9 klass, 68.10 - in_ByteSize(klassOopDesc::klass_part_offset_in_bytes()), 68.11 - SharkType::klass_type(), 68.12 - "klass_part"); 68.13 - 68.14 - Value *access_flags = builder()->CreateValueOfStructEntry( 68.15 - klass_part, 68.16 - in_ByteSize(Klass::access_flags_offset_in_bytes()), 68.17 + Klass::access_flags_offset(), 68.18 SharkType::jint_type(), 68.19 "access_flags"); 68.20
69.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 69.2 +++ b/test/compiler/7123108/Test7123108.java Thu Dec 29 11:37:50 2011 -0800 69.3 @@ -0,0 +1,60 @@ 69.4 +/* 69.5 + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. 69.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 69.7 + * 69.8 + * This code is free software; you can redistribute it and/or modify it 69.9 + * under the terms of the GNU General Public License version 2 only, as 69.10 + * published by the Free Software Foundation. 69.11 + * 69.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 69.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 69.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 69.15 + * version 2 for more details (a copy is included in the LICENSE file that 69.16 + * accompanied this code). 69.17 + * 69.18 + * You should have received a copy of the GNU General Public License version 69.19 + * 2 along with this work; if not, write to the Free Software Foundation, 69.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 69.21 + * 69.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 69.23 + * or visit www.oracle.com if you need additional information or have any 69.24 + * questions. 69.25 + * 69.26 + */ 69.27 + 69.28 +/** 69.29 + * @test 69.30 + * @bug 7123108 69.31 + * @summary C1 crashes with assert(if_state != NULL) failed: states do not match up 69.32 + * 69.33 + * @run main/othervm -Xcomp Test7123108 69.34 + */ 69.35 + 69.36 +public class Test7123108 { 69.37 + 69.38 + static class Test_Class_0 { 69.39 + final static byte var_2 = 67; 69.40 + byte var_3; 69.41 + } 69.42 + 69.43 + Object var_25 = "kgfpyhcms"; 69.44 + static long var_27 = 6899666748616086528L; 69.45 + 69.46 + static float func_1() 69.47 + { 69.48 + return 0.0F; 69.49 + } 69.50 + 69.51 + private void test() 69.52 + { 69.53 + "dlwq".charAt(((short)'x' > var_27 | func_1() <= (((Test_Class_0)var_25).var_3) ? true : true) ? Test_Class_0.var_2 & (short)-1.1173839E38F : 'Y'); 69.54 + } 69.55 + 69.56 + public static void main(String[] args) 69.57 + { 69.58 + Test7123108 t = new Test7123108(); 69.59 + try { 69.60 + t.test(); 69.61 + } catch (Throwable e) { } 69.62 + } 69.63 +}