Tue, 08 Oct 2013 17:35:51 +0200
8014555: G1: Memory ordering problem with Conc refinement and card marking
Summary: Add a StoreLoad barrier in the G1 post-barrier to fix a race with concurrent refinement. Also-reviewed-by: martin.doerr@sap.com
Reviewed-by: iveresov, tschatzl, brutisso, roland, kvn
1.1 --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Fri Oct 04 13:33:02 2013 +0200 1.2 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Tue Oct 08 17:35:51 2013 +0200 1.3 @@ -37,6 +37,9 @@ 1.4 #include "runtime/vframeArray.hpp" 1.5 #include "utilities/macros.hpp" 1.6 #include "vmreg_sparc.inline.hpp" 1.7 +#if INCLUDE_ALL_GCS 1.8 +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 1.9 +#endif 1.10 1.11 // Implementation of StubAssembler 1.12 1.13 @@ -912,7 +915,7 @@ 1.14 Register tmp2 = G3_scratch; 1.15 jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base; 1.16 1.17 - Label not_already_dirty, restart, refill; 1.18 + Label not_already_dirty, restart, refill, young_card; 1.19 1.20 #ifdef _LP64 1.21 __ srlx(addr, CardTableModRefBS::card_shift, addr); 1.22 @@ -924,9 +927,15 @@ 1.23 __ set(rs, cardtable); // cardtable := <card table base> 1.24 __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] 1.25 1.26 + __ cmp_and_br_short(tmp, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card); 1.27 + 1.28 + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); 1.29 + __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable] 1.30 + 1.31 assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); 1.32 __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); 1.33 1.34 + __ bind(young_card); 1.35 // We didn't take the branch, so we're already dirty: return. 1.36 // Use return-from-leaf 1.37 __ retl();
2.1 --- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp Fri Oct 04 13:33:02 2013 +0200 2.2 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Oct 08 17:35:51 2013 +0200 2.3 @@ -3752,7 +3752,7 @@ 2.4 #define __ masm. 2.5 address start = __ pc(); 2.6 2.7 - Label not_already_dirty, restart, refill; 2.8 + Label not_already_dirty, restart, refill, young_card; 2.9 2.10 #ifdef _LP64 2.11 __ srlx(O0, CardTableModRefBS::card_shift, O0); 2.12 @@ -3763,9 +3763,15 @@ 2.13 __ set(addrlit, O1); // O1 := <card table base> 2.14 __ ldub(O0, O1, O2); // O2 := [O0 + O1] 2.15 2.16 + __ cmp_and_br_short(O2, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card); 2.17 + 2.18 + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); 2.19 + __ ldub(O0, O1, O2); // O2 := [O0 + O1] 2.20 + 2.21 assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); 2.22 __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); 2.23 2.24 + __ bind(young_card); 2.25 // We didn't take the branch, so we're already dirty: return. 2.26 // Use return-from-leaf 2.27 __ retl();
3.1 --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Fri Oct 04 13:33:02 2013 +0200 3.2 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Tue Oct 08 17:35:51 2013 +0200 3.3 @@ -38,6 +38,9 @@ 3.4 #include "runtime/vframeArray.hpp" 3.5 #include "utilities/macros.hpp" 3.6 #include "vmreg_x86.inline.hpp" 3.7 +#if INCLUDE_ALL_GCS 3.8 +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 3.9 +#endif 3.10 3.11 3.12 // Implementation of StubAssembler 3.13 @@ -1753,13 +1756,17 @@ 3.14 __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index))); 3.15 #endif 3.16 3.17 - __ cmpb(Address(card_addr, 0), 0); 3.18 + __ cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val()); 3.19 + __ jcc(Assembler::equal, done); 3.20 + 3.21 + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); 3.22 + __ cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 3.23 __ jcc(Assembler::equal, done); 3.24 3.25 // storing region crossing non-NULL, card is clean. 3.26 // dirty card and log. 3.27 3.28 - __ movb(Address(card_addr, 0), 0); 3.29 + __ movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 3.30 3.31 __ cmpl(queue_index, 0); 3.32 __ jcc(Assembler::equal, runtime);
4.1 --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Fri Oct 04 13:33:02 2013 +0200 4.2 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Oct 08 17:35:51 2013 +0200 4.3 @@ -3389,13 +3389,18 @@ 4.4 const Register card_addr = tmp; 4.5 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 4.6 #endif 4.7 - cmpb(Address(card_addr, 0), 0); 4.8 + cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val()); 4.9 jcc(Assembler::equal, done); 4.10 4.11 + membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); 4.12 + cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 4.13 + jcc(Assembler::equal, done); 4.14 + 4.15 + 4.16 // storing a region crossing, non-NULL oop, card is clean. 4.17 // dirty card and log. 4.18 4.19 - movb(Address(card_addr, 0), 0); 4.20 + movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 4.21 4.22 cmpl(queue_index, 0); 4.23 jcc(Assembler::equal, runtime);
5.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Oct 04 13:33:02 2013 +0200 5.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Oct 08 17:35:51 2013 +0200 5.3 @@ -6035,7 +6035,11 @@ 5.4 // is dirty. 5.5 G1SATBCardTableModRefBS* ct_bs = g1_barrier_set(); 5.6 MemRegion mr(hr->bottom(), hr->pre_dummy_top()); 5.7 - ct_bs->verify_dirty_region(mr); 5.8 + if (hr->is_young()) { 5.9 + ct_bs->verify_g1_young_region(mr); 5.10 + } else { 5.11 + ct_bs->verify_dirty_region(mr); 5.12 + } 5.13 } 5.14 5.15 void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
6.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Fri Oct 04 13:33:02 2013 +0200 6.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp Tue Oct 08 17:35:51 2013 +0200 6.3 @@ -29,6 +29,7 @@ 6.4 #include "gc_implementation/g1/g1CollectedHeap.hpp" 6.5 #include "gc_implementation/g1/g1AllocRegion.inline.hpp" 6.6 #include "gc_implementation/g1/g1CollectorPolicy.hpp" 6.7 +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 6.8 #include "gc_implementation/g1/heapRegionSeq.inline.hpp" 6.9 #include "utilities/taskqueue.hpp" 6.10 6.11 @@ -134,7 +135,7 @@ 6.12 assert(containing_hr->is_in(end - 1), "it should also contain end - 1"); 6.13 6.14 MemRegion mr(start, end); 6.15 - g1_barrier_set()->dirty(mr); 6.16 + g1_barrier_set()->g1_mark_as_young(mr); 6.17 } 6.18 6.19 inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const {
7.1 --- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp Fri Oct 04 13:33:02 2013 +0200 7.2 +++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp Tue Oct 08 17:35:51 2013 +0200 7.3 @@ -70,6 +70,12 @@ 7.4 if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { 7.5 return false; 7.6 } 7.7 + 7.8 + if (val == g1_young_gen) { 7.9 + // the card is for a young gen region. We don't need to keep track of all pointers into young 7.10 + return false; 7.11 + } 7.12 + 7.13 // Cached bit can be installed either on a clean card or on a claimed card. 7.14 jbyte new_val = val; 7.15 if (val == clean_card_val()) { 7.16 @@ -85,6 +91,19 @@ 7.17 return true; 7.18 } 7.19 7.20 +void G1SATBCardTableModRefBS::g1_mark_as_young(const MemRegion& mr) { 7.21 + jbyte *const first = byte_for(mr.start()); 7.22 + jbyte *const last = byte_after(mr.last()); 7.23 + 7.24 + memset(first, g1_young_gen, last - first); 7.25 +} 7.26 + 7.27 +#ifndef PRODUCT 7.28 +void G1SATBCardTableModRefBS::verify_g1_young_region(MemRegion mr) { 7.29 + verify_region(mr, g1_young_gen, true); 7.30 +} 7.31 +#endif 7.32 + 7.33 G1SATBCardTableLoggingModRefBS:: 7.34 G1SATBCardTableLoggingModRefBS(MemRegion whole_heap, 7.35 int max_covered_regions) : 7.36 @@ -97,7 +116,11 @@ 7.37 void 7.38 G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field, 7.39 oop new_val) { 7.40 - jbyte* byte = byte_for(field); 7.41 + volatile jbyte* byte = byte_for(field); 7.42 + if (*byte == g1_young_gen) { 7.43 + return; 7.44 + } 7.45 + OrderAccess::storeload(); 7.46 if (*byte != dirty_card) { 7.47 *byte = dirty_card; 7.48 Thread* thr = Thread::current(); 7.49 @@ -129,7 +152,7 @@ 7.50 7.51 void 7.52 G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) { 7.53 - jbyte* byte = byte_for(mr.start()); 7.54 + volatile jbyte* byte = byte_for(mr.start()); 7.55 jbyte* last_byte = byte_for(mr.last()); 7.56 Thread* thr = Thread::current(); 7.57 if (whole_heap) { 7.58 @@ -138,25 +161,35 @@ 7.59 byte++; 7.60 } 7.61 } else { 7.62 - // Enqueue if necessary. 7.63 - if (thr->is_Java_thread()) { 7.64 - JavaThread* jt = (JavaThread*)thr; 7.65 - while (byte <= last_byte) { 7.66 - if (*byte != dirty_card) { 7.67 - *byte = dirty_card; 7.68 - jt->dirty_card_queue().enqueue(byte); 7.69 + // skip all consecutive young cards 7.70 + for (; byte <= last_byte && *byte == g1_young_gen; byte++); 7.71 + 7.72 + if (byte <= last_byte) { 7.73 + OrderAccess::storeload(); 7.74 + // Enqueue if necessary. 7.75 + if (thr->is_Java_thread()) { 7.76 + JavaThread* jt = (JavaThread*)thr; 7.77 + for (; byte <= last_byte; byte++) { 7.78 + if (*byte == g1_young_gen) { 7.79 + continue; 7.80 + } 7.81 + if (*byte != dirty_card) { 7.82 + *byte = dirty_card; 7.83 + jt->dirty_card_queue().enqueue(byte); 7.84 + } 7.85 } 7.86 - byte++; 7.87 - } 7.88 - } else { 7.89 - MutexLockerEx x(Shared_DirtyCardQ_lock, 7.90 - Mutex::_no_safepoint_check_flag); 7.91 - while (byte <= last_byte) { 7.92 - if (*byte != dirty_card) { 7.93 - *byte = dirty_card; 7.94 - _dcqs.shared_dirty_card_queue()->enqueue(byte); 7.95 + } else { 7.96 + MutexLockerEx x(Shared_DirtyCardQ_lock, 7.97 + Mutex::_no_safepoint_check_flag); 7.98 + for (; byte <= last_byte; byte++) { 7.99 + if (*byte == g1_young_gen) { 7.100 + continue; 7.101 + } 7.102 + if (*byte != dirty_card) { 7.103 + *byte = dirty_card; 7.104 + _dcqs.shared_dirty_card_queue()->enqueue(byte); 7.105 + } 7.106 } 7.107 - byte++; 7.108 } 7.109 } 7.110 }
8.1 --- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp Fri Oct 04 13:33:02 2013 +0200 8.2 +++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp Tue Oct 08 17:35:51 2013 +0200 8.3 @@ -38,7 +38,14 @@ 8.4 // snapshot-at-the-beginning marking. 8.5 8.6 class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS { 8.7 +protected: 8.8 + enum G1CardValues { 8.9 + g1_young_gen = CT_MR_BS_last_reserved << 1 8.10 + }; 8.11 + 8.12 public: 8.13 + static int g1_young_card_val() { return g1_young_gen; } 8.14 + 8.15 // Add "pre_val" to a set of objects that may have been disconnected from the 8.16 // pre-marking object graph. 8.17 static void enqueue(oop pre_val); 8.18 @@ -118,6 +125,9 @@ 8.19 _byte_map[card_index] = val; 8.20 } 8.21 8.22 + void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN; 8.23 + void g1_mark_as_young(const MemRegion& mr); 8.24 + 8.25 bool mark_card_deferred(size_t card_index); 8.26 8.27 bool is_card_deferred(size_t card_index) {
9.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Fri Oct 04 13:33:02 2013 +0200 9.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Tue Oct 08 17:35:51 2013 +0200 9.3 @@ -80,6 +80,10 @@ 9.4 9.5 void reset() { if (_buf != NULL) _index = _sz; } 9.6 9.7 + void enqueue(volatile void* ptr) { 9.8 + enqueue((void*)(ptr)); 9.9 + } 9.10 + 9.11 // Enqueues the given "obj". 9.12 void enqueue(void* ptr) { 9.13 if (!_active) return;
10.1 --- a/src/share/vm/opto/graphKit.cpp Fri Oct 04 13:33:02 2013 +0200 10.2 +++ b/src/share/vm/opto/graphKit.cpp Tue Oct 08 17:35:51 2013 +0200 10.3 @@ -3713,7 +3713,8 @@ 10.4 Node* no_base = __ top(); 10.5 float likely = PROB_LIKELY(0.999); 10.6 float unlikely = PROB_UNLIKELY(0.999); 10.7 - Node* zero = __ ConI(0); 10.8 + Node* young_card = __ ConI((jint)G1SATBCardTableModRefBS::g1_young_card_val()); 10.9 + Node* dirty_card = __ ConI((jint)CardTableModRefBS::dirty_card_val()); 10.10 Node* zeroX = __ ConX(0); 10.11 10.12 // Get the alias_index for raw card-mark memory 10.13 @@ -3769,8 +3770,16 @@ 10.14 // load the original value of the card 10.15 Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); 10.16 10.17 - __ if_then(card_val, BoolTest::ne, zero); { 10.18 - g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); 10.19 + __ if_then(card_val, BoolTest::ne, young_card); { 10.20 + sync_kit(ideal); 10.21 + // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier. 10.22 + insert_mem_bar(Op_MemBarVolatile, oop_store); 10.23 + __ sync_kit(this); 10.24 + 10.25 + Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); 10.26 + __ if_then(card_val_reload, BoolTest::ne, dirty_card); { 10.27 + g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); 10.28 + } __ end_if(); 10.29 } __ end_if(); 10.30 } __ end_if(); 10.31 } __ end_if();