Tue, 26 Nov 2013 18:38:19 -0800
8028515: PPPC64 (part 113.2): opto: Introduce LoadFence/StoreFence.
Summary: Use new nodes for loadFence/storeFence intrinsics in C2.
Reviewed-by: kvn, dholmes
1.1 --- a/make/jprt.properties Fri Nov 22 12:14:09 2013 -0800 1.2 +++ b/make/jprt.properties Tue Nov 26 18:38:19 2013 -0800 1.3 @@ -329,9 +329,81 @@ 1.4 1.5 # The complete list of test targets for jprt 1.6 # Note: no PPC or ARM tests at this stage 1.7 +jprt.my.linux.armvfpsflt.test.targets.embedded = \ 1.8 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-scimark, \ 1.9 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_default, \ 1.10 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_SerialGC, \ 1.11 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParallelGC, \ 1.12 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParNewGC, \ 1.13 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_CMS, \ 1.14 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_G1, \ 1.15 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParOldGC, \ 1.16 + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_default, \ 1.17 + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_SerialGC, \ 1.18 + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_ParallelGC, \ 1.19 + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_ParNewGC, \ 1.20 + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_CMS, \ 1.21 + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_G1, \ 1.22 + linux_armvfpsflt_2.6-productEmb-{c1|c2}-GCOld_ParOldGC, \ 1.23 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_default, \ 1.24 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c2-jbb_default_nontiered, \ 1.25 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParallelGC, \ 1.26 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_CMS, \ 1.27 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_G1, \ 1.28 + linux_armvfpsflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParOldGC 1.29 + 1.30 +# QEMU Emulators for ARM VFP HFLT 1.31 +jprt.my.linux.armvfphflt.test.targets.embedded = \ 1.32 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-scimark, \ 1.33 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_default, \ 1.34 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_SerialGC, \ 1.35 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParallelGC, \ 1.36 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParNewGC, \ 1.37 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_CMS, \ 1.38 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_G1, \ 1.39 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParOldGC, \ 1.40 + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_default, \ 1.41 + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_SerialGC, \ 1.42 + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_ParallelGC, \ 1.43 + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_ParNewGC, \ 1.44 + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_CMS, \ 1.45 + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_G1, \ 1.46 + linux_armvfphflt_2.6-productEmb-{c1|c2}-GCOld_ParOldGC, \ 1.47 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_default, \ 1.48 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c2-jbb_default_nontiered, \ 1.49 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParallelGC, \ 1.50 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_CMS, \ 1.51 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_G1, \ 1.52 + linux_armvfphflt_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParOldGC 1.53 + 1.54 +jprt.my.linux.ppc.test.targets.embedded = \ 1.55 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-scimark, \ 1.56 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_default, \ 1.57 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_SerialGC, \ 1.58 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParallelGC, \ 1.59 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParNewGC, \ 1.60 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_CMS, \ 1.61 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_G1, \ 1.62 + linux_ppc_2.6-{productEmb|fastdebugEmb}-{c1|c2}-GCBasher_ParOldGC, \ 1.63 + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_default, \ 1.64 + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_SerialGC, \ 1.65 + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_ParallelGC, \ 1.66 + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_ParNewGC, \ 1.67 + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_CMS, \ 1.68 + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_G1, \ 1.69 + linux_ppc_2.6-productEmb-{c1|c2}-GCOld_ParOldGC, \ 1.70 + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_default, \ 1.71 + linux_ppc_2.6-{productEmb|fastdebugEmb}-c2-jbb_default_nontiered, \ 1.72 + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParallelGC, \ 1.73 + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_CMS, \ 1.74 + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_G1, \ 1.75 + linux_ppc_2.6-{productEmb|fastdebugEmb}-c1-jbb_ParOldGC 1.76 1.77 jprt.test.targets.standard = \ 1.78 ${jprt.my.linux.i586.test.targets.embedded}, \ 1.79 + ${jprt.my.linux.armvfpsflt.test.targets.embedded}, \ 1.80 + ${jprt.my.linux.armvfphflt.test.targets.embedded}, \ 1.81 + ${jprt.my.linux.ppc.test.targets.embedded}, \ 1.82 ${jprt.my.solaris.sparcv9.test.targets}, \ 1.83 ${jprt.my.solaris.x64.test.targets}, \ 1.84 ${jprt.my.linux.i586.test.targets}, \
2.1 --- a/src/cpu/sparc/vm/sparc.ad Fri Nov 22 12:14:09 2013 -0800 2.2 +++ b/src/cpu/sparc/vm/sparc.ad Tue Nov 26 18:38:19 2013 -0800 2.3 @@ -6651,6 +6651,7 @@ 2.4 2.5 instruct membar_acquire() %{ 2.6 match(MemBarAcquire); 2.7 + match(LoadFence); 2.8 ins_cost(4*MEMORY_REF_COST); 2.9 2.10 size(0); 2.11 @@ -6671,6 +6672,7 @@ 2.12 2.13 instruct membar_release() %{ 2.14 match(MemBarRelease); 2.15 + match(StoreFence); 2.16 ins_cost(4*MEMORY_REF_COST); 2.17 2.18 size(0);
3.1 --- a/src/cpu/x86/vm/x86_32.ad Fri Nov 22 12:14:09 2013 -0800 3.2 +++ b/src/cpu/x86/vm/x86_32.ad Tue Nov 26 18:38:19 2013 -0800 3.3 @@ -7096,6 +7096,7 @@ 3.4 3.5 instruct membar_acquire() %{ 3.6 match(MemBarAcquire); 3.7 + match(LoadFence); 3.8 ins_cost(400); 3.9 3.10 size(0); 3.11 @@ -7116,6 +7117,7 @@ 3.12 3.13 instruct membar_release() %{ 3.14 match(MemBarRelease); 3.15 + match(StoreFence); 3.16 ins_cost(400); 3.17 3.18 size(0);
4.1 --- a/src/cpu/x86/vm/x86_64.ad Fri Nov 22 12:14:09 2013 -0800 4.2 +++ b/src/cpu/x86/vm/x86_64.ad Tue Nov 26 18:38:19 2013 -0800 4.3 @@ -6345,6 +6345,7 @@ 4.4 instruct membar_acquire() 4.5 %{ 4.6 match(MemBarAcquire); 4.7 + match(LoadFence); 4.8 ins_cost(0); 4.9 4.10 size(0); 4.11 @@ -6367,6 +6368,7 @@ 4.12 instruct membar_release() 4.13 %{ 4.14 match(MemBarRelease); 4.15 + match(StoreFence); 4.16 ins_cost(0); 4.17 4.18 size(0);
5.1 --- a/src/share/vm/adlc/formssel.cpp Fri Nov 22 12:14:09 2013 -0800 5.2 +++ b/src/share/vm/adlc/formssel.cpp Tue Nov 26 18:38:19 2013 -0800 5.3 @@ -648,6 +648,8 @@ 5.4 if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true; 5.5 if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true; 5.6 if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true; 5.7 + if( strcmp(_matrule->_opType,"StoreFence") == 0 ) return true; 5.8 + if( strcmp(_matrule->_opType,"LoadFence") == 0 ) return true; 5.9 5.10 return false; 5.11 } 5.12 @@ -4054,13 +4056,15 @@ 5.13 bool MatchRule::is_ideal_membar() const { 5.14 if( !_opType ) return false; 5.15 return 5.16 - !strcmp(_opType,"MemBarAcquire" ) || 5.17 - !strcmp(_opType,"MemBarRelease" ) || 5.18 + !strcmp(_opType,"MemBarAcquire") || 5.19 + !strcmp(_opType,"MemBarRelease") || 5.20 !strcmp(_opType,"MemBarAcquireLock") || 5.21 !strcmp(_opType,"MemBarReleaseLock") || 5.22 - !strcmp(_opType,"MemBarVolatile" ) || 5.23 - !strcmp(_opType,"MemBarCPUOrder" ) || 5.24 - !strcmp(_opType,"MemBarStoreStore" ); 5.25 + !strcmp(_opType,"LoadFence" ) || 5.26 + !strcmp(_opType,"StoreFence") || 5.27 + !strcmp(_opType,"MemBarVolatile") || 5.28 + !strcmp(_opType,"MemBarCPUOrder") || 5.29 + !strcmp(_opType,"MemBarStoreStore"); 5.30 } 5.31 5.32 bool MatchRule::is_ideal_loadPC() const {
6.1 --- a/src/share/vm/opto/classes.hpp Fri Nov 22 12:14:09 2013 -0800 6.2 +++ b/src/share/vm/opto/classes.hpp Tue Nov 26 18:38:19 2013 -0800 6.3 @@ -175,9 +175,11 @@ 6.4 macro(MathExactL) 6.5 macro(MaxI) 6.6 macro(MemBarAcquire) 6.7 +macro(LoadFence) 6.8 macro(MemBarAcquireLock) 6.9 macro(MemBarCPUOrder) 6.10 macro(MemBarRelease) 6.11 +macro(StoreFence) 6.12 macro(MemBarReleaseLock) 6.13 macro(MemBarVolatile) 6.14 macro(MemBarStoreStore)
7.1 --- a/src/share/vm/opto/library_call.cpp Fri Nov 22 12:14:09 2013 -0800 7.2 +++ b/src/share/vm/opto/library_call.cpp Tue Nov 26 18:38:19 2013 -0800 7.3 @@ -3105,10 +3105,10 @@ 7.4 insert_mem_bar(Op_MemBarCPUOrder); 7.5 switch(id) { 7.6 case vmIntrinsics::_loadFence: 7.7 - insert_mem_bar(Op_MemBarAcquire); 7.8 + insert_mem_bar(Op_LoadFence); 7.9 return true; 7.10 case vmIntrinsics::_storeFence: 7.11 - insert_mem_bar(Op_MemBarRelease); 7.12 + insert_mem_bar(Op_StoreFence); 7.13 return true; 7.14 case vmIntrinsics::_fullFence: 7.15 insert_mem_bar(Op_MemBarVolatile);
8.1 --- a/src/share/vm/opto/matcher.cpp Fri Nov 22 12:14:09 2013 -0800 8.2 +++ b/src/share/vm/opto/matcher.cpp Tue Nov 26 18:38:19 2013 -0800 8.3 @@ -2333,7 +2333,7 @@ 8.4 bool Matcher::post_store_load_barrier(const Node* vmb) { 8.5 Compile* C = Compile::current(); 8.6 assert(vmb->is_MemBar(), ""); 8.7 - assert(vmb->Opcode() != Op_MemBarAcquire, ""); 8.8 + assert(vmb->Opcode() != Op_MemBarAcquire && vmb->Opcode() != Op_LoadFence, ""); 8.9 const MemBarNode* membar = vmb->as_MemBar(); 8.10 8.11 // Get the Ideal Proj node, ctrl, that can be used to iterate forward 8.12 @@ -2378,7 +2378,7 @@ 8.13 if (x->is_MemBar()) { 8.14 // We must retain this membar if there is an upcoming volatile 8.15 // load, which will be followed by acquire membar. 8.16 - if (xop == Op_MemBarAcquire) { 8.17 + if (xop == Op_MemBarAcquire || xop == Op_LoadFence) { 8.18 return false; 8.19 } else { 8.20 // For other kinds of barriers, check by pretending we
9.1 --- a/src/share/vm/opto/memnode.cpp Fri Nov 22 12:14:09 2013 -0800 9.2 +++ b/src/share/vm/opto/memnode.cpp Tue Nov 26 18:38:19 2013 -0800 9.3 @@ -1002,9 +1002,13 @@ 9.4 // a synchronized region. 9.5 while (current->is_Proj()) { 9.6 int opc = current->in(0)->Opcode(); 9.7 - if ((final && (opc == Op_MemBarAcquire || opc == Op_MemBarAcquireLock)) || 9.8 - opc == Op_MemBarRelease || opc == Op_MemBarCPUOrder || 9.9 - opc == Op_MemBarReleaseLock) { 9.10 + if ((final && (opc == Op_MemBarAcquire || 9.11 + opc == Op_MemBarAcquireLock || 9.12 + opc == Op_LoadFence)) || 9.13 + opc == Op_MemBarRelease || 9.14 + opc == Op_StoreFence || 9.15 + opc == Op_MemBarReleaseLock || 9.16 + opc == Op_MemBarCPUOrder) { 9.17 Node* mem = current->in(0)->in(TypeFunc::Memory); 9.18 if (mem->is_MergeMem()) { 9.19 MergeMemNode* merge = mem->as_MergeMem(); 9.20 @@ -2973,15 +2977,17 @@ 9.21 //------------------------------make------------------------------------------- 9.22 MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) { 9.23 switch (opcode) { 9.24 - case Op_MemBarAcquire: return new(C) MemBarAcquireNode(C, atp, pn); 9.25 - case Op_MemBarRelease: return new(C) MemBarReleaseNode(C, atp, pn); 9.26 - case Op_MemBarAcquireLock: return new(C) MemBarAcquireLockNode(C, atp, pn); 9.27 - case Op_MemBarReleaseLock: return new(C) MemBarReleaseLockNode(C, atp, pn); 9.28 - case Op_MemBarVolatile: return new(C) MemBarVolatileNode(C, atp, pn); 9.29 - case Op_MemBarCPUOrder: return new(C) MemBarCPUOrderNode(C, atp, pn); 9.30 - case Op_Initialize: return new(C) InitializeNode(C, atp, pn); 9.31 - case Op_MemBarStoreStore: return new(C) MemBarStoreStoreNode(C, atp, pn); 9.32 - default: ShouldNotReachHere(); return NULL; 9.33 + case Op_MemBarAcquire: return new(C) MemBarAcquireNode(C, atp, pn); 9.34 + case Op_LoadFence: return new(C) LoadFenceNode(C, atp, pn); 9.35 + case Op_MemBarRelease: return new(C) MemBarReleaseNode(C, atp, pn); 9.36 + case Op_StoreFence: return new(C) StoreFenceNode(C, atp, pn); 9.37 + case Op_MemBarAcquireLock: return new(C) MemBarAcquireLockNode(C, atp, pn); 9.38 + case Op_MemBarReleaseLock: return new(C) MemBarReleaseLockNode(C, atp, pn); 9.39 + case Op_MemBarVolatile: return new(C) MemBarVolatileNode(C, atp, pn); 9.40 + case Op_MemBarCPUOrder: return new(C) MemBarCPUOrderNode(C, atp, pn); 9.41 + case Op_Initialize: return new(C) InitializeNode(C, atp, pn); 9.42 + case Op_MemBarStoreStore: return new(C) MemBarStoreStoreNode(C, atp, pn); 9.43 + default: ShouldNotReachHere(); return NULL; 9.44 } 9.45 } 9.46
10.1 --- a/src/share/vm/opto/memnode.hpp Fri Nov 22 12:14:09 2013 -0800 10.2 +++ b/src/share/vm/opto/memnode.hpp Tue Nov 26 18:38:19 2013 -0800 10.3 @@ -994,6 +994,17 @@ 10.4 virtual int Opcode() const; 10.5 }; 10.6 10.7 +// "Acquire" - no following ref can move before (but earlier refs can 10.8 +// follow, like an early Load stalled in cache). Requires multi-cpu 10.9 +// visibility. Inserted independ of any load, as required 10.10 +// for intrinsic sun.misc.Unsafe.loadFence(). 10.11 +class LoadFenceNode: public MemBarNode { 10.12 +public: 10.13 + LoadFenceNode(Compile* C, int alias_idx, Node* precedent) 10.14 + : MemBarNode(C, alias_idx, precedent) {} 10.15 + virtual int Opcode() const; 10.16 +}; 10.17 + 10.18 // "Release" - no earlier ref can move after (but later refs can move 10.19 // up, like a speculative pipelined cache-hitting Load). Requires 10.20 // multi-cpu visibility. Inserted before a volatile store. 10.21 @@ -1004,6 +1015,17 @@ 10.22 virtual int Opcode() const; 10.23 }; 10.24 10.25 +// "Release" - no earlier ref can move after (but later refs can move 10.26 +// up, like a speculative pipelined cache-hitting Load). Requires 10.27 +// multi-cpu visibility. Inserted independent of any store, as required 10.28 +// for intrinsic sun.misc.Unsafe.storeFence(). 10.29 +class StoreFenceNode: public MemBarNode { 10.30 +public: 10.31 + StoreFenceNode(Compile* C, int alias_idx, Node* precedent) 10.32 + : MemBarNode(C, alias_idx, precedent) {} 10.33 + virtual int Opcode() const; 10.34 +}; 10.35 + 10.36 // "Acquire" - no following ref can move before (but earlier refs can 10.37 // follow, like an early Load stalled in cache). Requires multi-cpu 10.38 // visibility. Inserted after a FastLock.
11.1 --- a/src/share/vm/runtime/vmStructs.cpp Fri Nov 22 12:14:09 2013 -0800 11.2 +++ b/src/share/vm/runtime/vmStructs.cpp Tue Nov 26 18:38:19 2013 -0800 11.3 @@ -1820,6 +1820,8 @@ 11.4 declare_c2_type(MemBarNode, MultiNode) \ 11.5 declare_c2_type(MemBarAcquireNode, MemBarNode) \ 11.6 declare_c2_type(MemBarReleaseNode, MemBarNode) \ 11.7 + declare_c2_type(LoadFenceNode, MemBarNode) \ 11.8 + declare_c2_type(StoreFenceNode, MemBarNode) \ 11.9 declare_c2_type(MemBarVolatileNode, MemBarNode) \ 11.10 declare_c2_type(MemBarCPUOrderNode, MemBarNode) \ 11.11 declare_c2_type(InitializeNode, MemBarNode) \