Wed, 13 Jan 2010 15:26:39 -0800
6896647: card marks can be deferred too long
Summary: Deferred card marks are now flushed during the gc prologue. Parallel[Scavege,OldGC] and SerialGC no longer defer card marks generated by COMPILER2 as a result of ReduceInitialCardMarks. For these cases, introduced a diagnostic option to defer the card marks, only for the purposes of testing and diagnostics. CMS and G1 continue to defer card marks. Potential performance concern related to single-threaded flushing of deferred card marks in the gc prologue will be addressed in the future.
Reviewed-by: never, johnc
1.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Jan 12 14:56:46 2010 -0800 1.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jan 13 15:26:39 2010 -0800 1.3 @@ -1441,6 +1441,7 @@ 1.4 } 1.5 1.6 jint G1CollectedHeap::initialize() { 1.7 + CollectedHeap::pre_initialize(); 1.8 os::enable_vtime(); 1.9 1.10 // Necessary to satisfy locking discipline assertions.
2.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Jan 12 14:56:46 2010 -0800 2.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jan 13 15:26:39 2010 -0800 2.3 @@ -1007,6 +1007,10 @@ 2.4 return true; 2.5 } 2.6 2.7 + virtual bool card_mark_must_follow_store() const { 2.8 + return true; 2.9 + } 2.10 + 2.11 bool is_in_young(oop obj) { 2.12 HeapRegion* hr = heap_region_containing(obj); 2.13 return hr != NULL && hr->is_young();
3.1 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Tue Jan 12 14:56:46 2010 -0800 3.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Wed Jan 13 15:26:39 2010 -0800 3.3 @@ -51,6 +51,8 @@ 3.4 } 3.5 3.6 jint ParallelScavengeHeap::initialize() { 3.7 + CollectedHeap::pre_initialize(); 3.8 + 3.9 // Cannot be initialized until after the flags are parsed 3.10 GenerationSizer flag_parser; 3.11 3.12 @@ -717,10 +719,6 @@ 3.13 return young_gen()->allocate(size, true); 3.14 } 3.15 3.16 -void ParallelScavengeHeap::fill_all_tlabs(bool retire) { 3.17 - CollectedHeap::fill_all_tlabs(retire); 3.18 -} 3.19 - 3.20 void ParallelScavengeHeap::accumulate_statistics_all_tlabs() { 3.21 CollectedHeap::accumulate_statistics_all_tlabs(); 3.22 }
4.1 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Tue Jan 12 14:56:46 2010 -0800 4.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Wed Jan 13 15:26:39 2010 -0800 4.3 @@ -54,7 +54,6 @@ 4.4 protected: 4.5 static inline size_t total_invocations(); 4.6 HeapWord* allocate_new_tlab(size_t size); 4.7 - void fill_all_tlabs(bool retire); 4.8 4.9 public: 4.10 ParallelScavengeHeap() : CollectedHeap() { 4.11 @@ -191,6 +190,10 @@ 4.12 return true; 4.13 } 4.14 4.15 + virtual bool card_mark_must_follow_store() const { 4.16 + return false; 4.17 + } 4.18 + 4.19 // Return true if we don't we need a store barrier for 4.20 // initializing stores to an object at this address. 4.21 virtual bool can_elide_initializing_store_barrier(oop new_obj);
5.1 --- a/src/share/vm/gc_interface/collectedHeap.cpp Tue Jan 12 14:56:46 2010 -0800 5.2 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Wed Jan 13 15:26:39 2010 -0800 5.3 @@ -59,8 +59,18 @@ 5.4 PerfDataManager::create_string_variable(SUN_GC, "lastCause", 5.5 80, GCCause::to_string(_gc_lastcause), CHECK); 5.6 } 5.7 + _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below. 5.8 } 5.9 5.10 +void CollectedHeap::pre_initialize() { 5.11 + // Used for ReduceInitialCardMarks (when COMPILER2 is used); 5.12 + // otherwise remains unused. 5.13 +#ifdef COMPLER2 5.14 + _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store()); 5.15 +#else 5.16 + assert(_defer_initial_card_mark == false, "Who would set it?"); 5.17 +#endif 5.18 +} 5.19 5.20 #ifndef PRODUCT 5.21 void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) { 5.22 @@ -140,12 +150,13 @@ 5.23 void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) { 5.24 MemRegion deferred = thread->deferred_card_mark(); 5.25 if (!deferred.is_empty()) { 5.26 + assert(_defer_initial_card_mark, "Otherwise should be empty"); 5.27 { 5.28 // Verify that the storage points to a parsable object in heap 5.29 DEBUG_ONLY(oop old_obj = oop(deferred.start());) 5.30 assert(is_in(old_obj), "Not in allocated heap"); 5.31 assert(!can_elide_initializing_store_barrier(old_obj), 5.32 - "Else should have been filtered in defer_store_barrier()"); 5.33 + "Else should have been filtered in new_store_pre_barrier()"); 5.34 assert(!is_in_permanent(old_obj), "Sanity: not expected"); 5.35 assert(old_obj->is_oop(true), "Not an oop"); 5.36 assert(old_obj->is_parsable(), "Will not be concurrently parsable"); 5.37 @@ -174,9 +185,7 @@ 5.38 // so long as the card-mark is completed before the next 5.39 // scavenge. For all these cases, we can do a card mark 5.40 // at the point at which we do a slow path allocation 5.41 -// in the old gen. For uniformity, however, we end 5.42 -// up using the same scheme (see below) for all three 5.43 -// cases (deferring the card-mark appropriately). 5.44 +// in the old gen, i.e. in this call. 5.45 // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires 5.46 // in addition that the card-mark for an old gen allocated 5.47 // object strictly follow any associated initializing stores. 5.48 @@ -199,12 +208,13 @@ 5.49 // but, like in CMS, because of the presence of concurrent refinement 5.50 // (much like CMS' precleaning), must strictly follow the oop-store. 5.51 // Thus, using the same protocol for maintaining the intended 5.52 -// invariants turns out, serendepitously, to be the same for all 5.53 -// three collectors/heap types above. 5.54 +// invariants turns out, serendepitously, to be the same for both 5.55 +// G1 and CMS. 5.56 // 5.57 -// For each future collector, this should be reexamined with 5.58 -// that specific collector in mind. 5.59 -oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) { 5.60 +// For any future collector, this code should be reexamined with 5.61 +// that specific collector in mind, and the documentation above suitably 5.62 +// extended and updated. 5.63 +oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) { 5.64 // If a previous card-mark was deferred, flush it now. 5.65 flush_deferred_store_barrier(thread); 5.66 if (can_elide_initializing_store_barrier(new_obj)) { 5.67 @@ -212,10 +222,17 @@ 5.68 // following the flush above. 5.69 assert(thread->deferred_card_mark().is_empty(), "Error"); 5.70 } else { 5.71 - // Remember info for the newly deferred store barrier 5.72 - MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size()); 5.73 - assert(!deferred.is_empty(), "Error"); 5.74 - thread->set_deferred_card_mark(deferred); 5.75 + MemRegion mr((HeapWord*)new_obj, new_obj->size()); 5.76 + assert(!mr.is_empty(), "Error"); 5.77 + if (_defer_initial_card_mark) { 5.78 + // Defer the card mark 5.79 + thread->set_deferred_card_mark(mr); 5.80 + } else { 5.81 + // Do the card mark 5.82 + BarrierSet* bs = barrier_set(); 5.83 + assert(bs->has_write_region_opt(), "No write_region() on BarrierSet"); 5.84 + bs->write_region(mr); 5.85 + } 5.86 } 5.87 return new_obj; 5.88 } 5.89 @@ -313,22 +330,6 @@ 5.90 return NULL; 5.91 } 5.92 5.93 -void CollectedHeap::fill_all_tlabs(bool retire) { 5.94 - assert(UseTLAB, "should not reach here"); 5.95 - // See note in ensure_parsability() below. 5.96 - assert(SafepointSynchronize::is_at_safepoint() || 5.97 - !is_init_completed(), 5.98 - "should only fill tlabs at safepoint"); 5.99 - // The main thread starts allocating via a TLAB even before it 5.100 - // has added itself to the threads list at vm boot-up. 5.101 - assert(Threads::first() != NULL, 5.102 - "Attempt to fill tlabs before main thread has been added" 5.103 - " to threads list is doomed to failure!"); 5.104 - for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) { 5.105 - thread->tlab().make_parsable(retire); 5.106 - } 5.107 -} 5.108 - 5.109 void CollectedHeap::ensure_parsability(bool retire_tlabs) { 5.110 // The second disjunct in the assertion below makes a concession 5.111 // for the start-up verification done while the VM is being 5.112 @@ -343,8 +344,24 @@ 5.113 "Should only be called at a safepoint or at start-up" 5.114 " otherwise concurrent mutator activity may make heap " 5.115 " unparsable again"); 5.116 - if (UseTLAB) { 5.117 - fill_all_tlabs(retire_tlabs); 5.118 + const bool use_tlab = UseTLAB; 5.119 + const bool deferred = _defer_initial_card_mark; 5.120 + // The main thread starts allocating via a TLAB even before it 5.121 + // has added itself to the threads list at vm boot-up. 5.122 + assert(!use_tlab || Threads::first() != NULL, 5.123 + "Attempt to fill tlabs before main thread has been added" 5.124 + " to threads list is doomed to failure!"); 5.125 + for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) { 5.126 + if (use_tlab) thread->tlab().make_parsable(retire_tlabs); 5.127 +#ifdef COMPILER2 5.128 + // The deferred store barriers must all have been flushed to the 5.129 + // card-table (or other remembered set structure) before GC starts 5.130 + // processing the card-table (or other remembered set). 5.131 + if (deferred) flush_deferred_store_barrier(thread); 5.132 +#else 5.133 + assert(!deferred, "Should be false"); 5.134 + assert(thread->deferred_card_mark().is_empty(), "Should be empty"); 5.135 +#endif 5.136 } 5.137 } 5.138
6.1 --- a/src/share/vm/gc_interface/collectedHeap.hpp Tue Jan 12 14:56:46 2010 -0800 6.2 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Wed Jan 13 15:26:39 2010 -0800 6.3 @@ -51,6 +51,9 @@ 6.4 // Used for filler objects (static, but initialized in ctor). 6.5 static size_t _filler_array_max_size; 6.6 6.7 + // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used 6.8 + bool _defer_initial_card_mark; 6.9 + 6.10 protected: 6.11 MemRegion _reserved; 6.12 BarrierSet* _barrier_set; 6.13 @@ -70,13 +73,16 @@ 6.14 // Constructor 6.15 CollectedHeap(); 6.16 6.17 + // Do common initializations that must follow instance construction, 6.18 + // for example, those needing virtual calls. 6.19 + // This code could perhaps be moved into initialize() but would 6.20 + // be slightly more awkward because we want the latter to be a 6.21 + // pure virtual. 6.22 + void pre_initialize(); 6.23 + 6.24 // Create a new tlab 6.25 virtual HeapWord* allocate_new_tlab(size_t size); 6.26 6.27 - // Fix up tlabs to make the heap well-formed again, 6.28 - // optionally retiring the tlabs. 6.29 - virtual void fill_all_tlabs(bool retire); 6.30 - 6.31 // Accumulate statistics on all tlabs. 6.32 virtual void accumulate_statistics_all_tlabs(); 6.33 6.34 @@ -431,14 +437,25 @@ 6.35 // promises to call this function on such a slow-path-allocated 6.36 // object before performing initializations that have elided 6.37 // store barriers. Returns new_obj, or maybe a safer copy thereof. 6.38 - virtual oop defer_store_barrier(JavaThread* thread, oop new_obj); 6.39 + virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj); 6.40 6.41 // Answers whether an initializing store to a new object currently 6.42 - // allocated at the given address doesn't need a (deferred) store 6.43 + // allocated at the given address doesn't need a store 6.44 // barrier. Returns "true" if it doesn't need an initializing 6.45 // store barrier; answers "false" if it does. 6.46 virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0; 6.47 6.48 + // If a compiler is eliding store barriers for TLAB-allocated objects, 6.49 + // we will be informed of a slow-path allocation by a call 6.50 + // to new_store_pre_barrier() above. Such a call precedes the 6.51 + // initialization of the object itself, and no post-store-barriers will 6.52 + // be issued. Some heap types require that the barrier strictly follows 6.53 + // the initializing stores. (This is currently implemented by deferring the 6.54 + // barrier until the next slow-path allocation or gc-related safepoint.) 6.55 + // This interface answers whether a particular heap type needs the card 6.56 + // mark to be thus strictly sequenced after the stores. 6.57 + virtual bool card_mark_must_follow_store() const = 0; 6.58 + 6.59 // If the CollectedHeap was asked to defer a store barrier above, 6.60 // this informs it to flush such a deferred store barrier to the 6.61 // remembered set.
7.1 --- a/src/share/vm/memory/genCollectedHeap.cpp Tue Jan 12 14:56:46 2010 -0800 7.2 +++ b/src/share/vm/memory/genCollectedHeap.cpp Wed Jan 13 15:26:39 2010 -0800 7.3 @@ -51,6 +51,8 @@ 7.4 } 7.5 7.6 jint GenCollectedHeap::initialize() { 7.7 + CollectedHeap::pre_initialize(); 7.8 + 7.9 int i; 7.10 _n_gens = gen_policy()->number_of_generations(); 7.11 7.12 @@ -129,6 +131,7 @@ 7.13 7.14 _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions); 7.15 set_barrier_set(rem_set()->bs()); 7.16 + 7.17 _gch = this; 7.18 7.19 for (i = 0; i < _n_gens; i++) {
8.1 --- a/src/share/vm/memory/genCollectedHeap.hpp Tue Jan 12 14:56:46 2010 -0800 8.2 +++ b/src/share/vm/memory/genCollectedHeap.hpp Wed Jan 13 15:26:39 2010 -0800 8.3 @@ -260,6 +260,10 @@ 8.4 return true; 8.5 } 8.6 8.7 + virtual bool card_mark_must_follow_store() const { 8.8 + return UseConcMarkSweepGC; 8.9 + } 8.10 + 8.11 // We don't need barriers for stores to objects in the 8.12 // young gen and, a fortiori, for initializing stores to 8.13 // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
9.1 --- a/src/share/vm/opto/graphKit.cpp Tue Jan 12 14:56:46 2010 -0800 9.2 +++ b/src/share/vm/opto/graphKit.cpp Wed Jan 13 15:26:39 2010 -0800 9.3 @@ -3259,9 +3259,10 @@ 9.4 if (use_ReduceInitialCardMarks() 9.5 && obj == just_allocated_object(control())) { 9.6 // We can skip marks on a freshly-allocated object in Eden. 9.7 - // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp. 9.8 - // That routine informs GC to take appropriate compensating steps 9.9 - // so as to make this card-mark elision safe. 9.10 + // Keep this code in sync with new_store_pre_barrier() in runtime.cpp. 9.11 + // That routine informs GC to take appropriate compensating steps, 9.12 + // upon a slow-path allocation, so as to make this card-mark 9.13 + // elision safe. 9.14 return; 9.15 } 9.16
10.1 --- a/src/share/vm/opto/runtime.cpp Tue Jan 12 14:56:46 2010 -0800 10.2 +++ b/src/share/vm/opto/runtime.cpp Wed Jan 13 15:26:39 2010 -0800 10.3 @@ -143,7 +143,7 @@ 10.4 // We failed the fast-path allocation. Now we need to do a scavenge or GC 10.5 // and try allocation again. 10.6 10.7 -void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) { 10.8 +void OptoRuntime::new_store_pre_barrier(JavaThread* thread) { 10.9 // After any safepoint, just before going back to compiled code, 10.10 // we inform the GC that we will be doing initializing writes to 10.11 // this object in the future without emitting card-marks, so 10.12 @@ -156,7 +156,7 @@ 10.13 assert(Universe::heap()->can_elide_tlab_store_barriers(), 10.14 "compiler must check this first"); 10.15 // GC may decide to give back a safer copy of new_obj. 10.16 - new_obj = Universe::heap()->defer_store_barrier(thread, new_obj); 10.17 + new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj); 10.18 thread->set_vm_result(new_obj); 10.19 } 10.20 10.21 @@ -200,7 +200,7 @@ 10.22 10.23 if (GraphKit::use_ReduceInitialCardMarks()) { 10.24 // inform GC that we won't do card marks for initializing writes. 10.25 - maybe_defer_card_mark(thread); 10.26 + new_store_pre_barrier(thread); 10.27 } 10.28 JRT_END 10.29 10.30 @@ -239,7 +239,7 @@ 10.31 10.32 if (GraphKit::use_ReduceInitialCardMarks()) { 10.33 // inform GC that we won't do card marks for initializing writes. 10.34 - maybe_defer_card_mark(thread); 10.35 + new_store_pre_barrier(thread); 10.36 } 10.37 JRT_END 10.38
11.1 --- a/src/share/vm/opto/runtime.hpp Tue Jan 12 14:56:46 2010 -0800 11.2 +++ b/src/share/vm/opto/runtime.hpp Wed Jan 13 15:26:39 2010 -0800 11.3 @@ -133,8 +133,9 @@ 11.4 // Allocate storage for a objArray or typeArray 11.5 static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread); 11.6 11.7 - // Post-slow-path-allocation step for implementing ReduceInitialCardMarks: 11.8 - static void maybe_defer_card_mark(JavaThread* thread); 11.9 + // Post-slow-path-allocation, pre-initializing-stores step for 11.10 + // implementing ReduceInitialCardMarks 11.11 + static void new_store_pre_barrier(JavaThread* thread); 11.12 11.13 // Allocate storage for a multi-dimensional arrays 11.14 // Note: needs to be fixed for arbitrary number of dimensions
12.1 --- a/src/share/vm/runtime/globals.hpp Tue Jan 12 14:56:46 2010 -0800 12.2 +++ b/src/share/vm/runtime/globals.hpp Wed Jan 13 15:26:39 2010 -0800 12.3 @@ -2015,6 +2015,10 @@ 12.4 diagnostic(bool, GCParallelVerificationEnabled, true, \ 12.5 "Enable parallel memory system verification") \ 12.6 \ 12.7 + diagnostic(bool, DeferInitialCardMark, false, \ 12.8 + "When +ReduceInitialCardMarks, explicitly defer any that " \ 12.9 + "may arise from new_pre_store_barrier") \ 12.10 + \ 12.11 diagnostic(bool, VerifyRememberedSets, false, \ 12.12 "Verify GC remembered sets") \ 12.13 \
13.1 --- a/src/share/vm/runtime/thread.cpp Tue Jan 12 14:56:46 2010 -0800 13.2 +++ b/src/share/vm/runtime/thread.cpp Wed Jan 13 15:26:39 2010 -0800 13.3 @@ -2357,9 +2357,8 @@ 13.4 }; 13.5 13.6 void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) { 13.7 - // Flush deferred store-barriers, if any, associated with 13.8 - // initializing stores done by this JavaThread in the current epoch. 13.9 - Universe::heap()->flush_deferred_store_barrier(this); 13.10 + // Verify that the deferred card marks have been flushed. 13.11 + assert(deferred_card_mark().is_empty(), "Should be empty during GC"); 13.12 13.13 // The ThreadProfiler oops_do is done from FlatProfiler::oops_do 13.14 // since there may be more than one thread using each ThreadProfiler.
14.1 --- a/src/share/vm/runtime/vmStructs.cpp Tue Jan 12 14:56:46 2010 -0800 14.2 +++ b/src/share/vm/runtime/vmStructs.cpp Wed Jan 13 15:26:39 2010 -0800 14.3 @@ -309,6 +309,7 @@ 14.4 nonstatic_field(CollectedHeap, _reserved, MemRegion) \ 14.5 nonstatic_field(SharedHeap, _perm_gen, PermGen*) \ 14.6 nonstatic_field(CollectedHeap, _barrier_set, BarrierSet*) \ 14.7 + nonstatic_field(CollectedHeap, _defer_initial_card_mark, bool) \ 14.8 nonstatic_field(CollectedHeap, _is_gc_active, bool) \ 14.9 nonstatic_field(CompactibleSpace, _compaction_top, HeapWord*) \ 14.10 nonstatic_field(CompactibleSpace, _first_dead, HeapWord*) \