Tue, 25 Jan 2011 17:58:19 -0500
7014261: G1: RSet-related failures
Summary: A race between the concurrent cleanup thread and the VM thread while it is processing the "expanded sparse table list" causes both threads to try to free the same sparse table entry and either causes one of the threads to fail or leaves the entry in an inconsistent state. The solution is purge all entries on the expanded list that correspond go regions that are being cleaned up.
Reviewed-by: brutisso, johnc
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP
28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
29 #include "gc_implementation/g1/heapRegion.hpp"
30 #include "memory/allocation.hpp"
31 #include "memory/cardTableModRefBS.hpp"
32 #include "runtime/mutex.hpp"
33 #include "utilities/globalDefinitions.hpp"
35 // Sparse remembered set for a heap region (the "owning" region). Maps
36 // indices of other regions to short sequences of cards in the other region
37 // that might contain pointers into the owner region.
39 // These tables only expand while they are accessed in parallel --
40 // deletions may be done in single-threaded code. This allows us to allow
41 // unsynchronized reads/iterations, as long as expansions caused by
42 // insertions only enqueue old versions for deletions, but do not delete
43 // old versions synchronously.
45 class SparsePRTEntry: public CHeapObj {
46 public:
47 enum SomePublicConstants {
48 NullEntry = -1,
49 UnrollFactor = 4
50 };
51 private:
52 RegionIdx_t _region_ind;
53 int _next_index;
54 CardIdx_t _cards[1];
55 // WARNING: Don't put any data members beyond this line. Card array has, in fact, variable length.
56 // It should always be the last data member.
57 public:
58 // Returns the size of the entry, used for entry allocation.
59 static size_t size() { return sizeof(SparsePRTEntry) + sizeof(CardIdx_t) * (cards_num() - 1); }
60 // Returns the size of the card array.
61 static int cards_num() {
62 // The number of cards should be a multiple of 4, because that's our current
63 // unrolling factor.
64 static const int s = MAX2<int>(G1RSetSparseRegionEntries & ~(UnrollFactor - 1), UnrollFactor);
65 return s;
66 }
68 // Set the region_ind to the given value, and delete all cards.
69 inline void init(RegionIdx_t region_ind);
71 RegionIdx_t r_ind() const { return _region_ind; }
72 bool valid_entry() const { return r_ind() >= 0; }
73 void set_r_ind(RegionIdx_t rind) { _region_ind = rind; }
75 int next_index() const { return _next_index; }
76 int* next_index_addr() { return &_next_index; }
77 void set_next_index(int ni) { _next_index = ni; }
79 // Returns "true" iff the entry contains the given card index.
80 inline bool contains_card(CardIdx_t card_index) const;
82 // Returns the number of non-NULL card entries.
83 inline int num_valid_cards() const;
85 // Requires that the entry not contain the given card index. If there is
86 // space available, add the given card index to the entry and return
87 // "true"; otherwise, return "false" to indicate that the entry is full.
88 enum AddCardResult {
89 overflow,
90 found,
91 added
92 };
93 inline AddCardResult add_card(CardIdx_t card_index);
95 // Copy the current entry's cards into "cards".
96 inline void copy_cards(CardIdx_t* cards) const;
97 // Copy the current entry's cards into the "_card" array of "e."
98 inline void copy_cards(SparsePRTEntry* e) const;
100 inline CardIdx_t card(int i) const { return _cards[i]; }
101 };
104 class RSHashTable : public CHeapObj {
106 friend class RSHashTableIter;
108 enum SomePrivateConstants {
109 NullEntry = -1
110 };
112 size_t _capacity;
113 size_t _capacity_mask;
114 size_t _occupied_entries;
115 size_t _occupied_cards;
117 SparsePRTEntry* _entries;
118 int* _buckets;
119 int _free_region;
120 int _free_list;
122 // Requires that the caller hold a lock preventing parallel modifying
123 // operations, and that the the table be less than completely full. If
124 // an entry for "region_ind" is already in the table, finds it and
125 // returns its address; otherwise returns "NULL."
126 SparsePRTEntry* entry_for_region_ind(RegionIdx_t region_ind) const;
128 // Requires that the caller hold a lock preventing parallel modifying
129 // operations, and that the the table be less than completely full. If
130 // an entry for "region_ind" is already in the table, finds it and
131 // returns its address; otherwise allocates, initializes, inserts and
132 // returns a new entry for "region_ind".
133 SparsePRTEntry* entry_for_region_ind_create(RegionIdx_t region_ind);
135 // Returns the index of the next free entry in "_entries".
136 int alloc_entry();
137 // Declares the entry "fi" to be free. (It must have already been
138 // deleted from any bucket lists.
139 void free_entry(int fi);
141 public:
142 RSHashTable(size_t capacity);
143 ~RSHashTable();
145 // Attempts to ensure that the given card_index in the given region is in
146 // the sparse table. If successful (because the card was already
147 // present, or because it was successfullly added) returns "true".
148 // Otherwise, returns "false" to indicate that the addition would
149 // overflow the entry for the region. The caller must transfer these
150 // entries to a larger-capacity representation.
151 bool add_card(RegionIdx_t region_id, CardIdx_t card_index);
153 bool get_cards(RegionIdx_t region_id, CardIdx_t* cards);
155 bool delete_entry(RegionIdx_t region_id);
157 bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const;
159 void add_entry(SparsePRTEntry* e);
161 SparsePRTEntry* get_entry(RegionIdx_t region_id);
163 void clear();
165 size_t capacity() const { return _capacity; }
166 size_t capacity_mask() const { return _capacity_mask; }
167 size_t occupied_entries() const { return _occupied_entries; }
168 size_t occupied_cards() const { return _occupied_cards; }
169 size_t mem_size() const;
171 SparsePRTEntry* entry(int i) const { return (SparsePRTEntry*)((char*)_entries + SparsePRTEntry::size() * i); }
173 void print();
174 };
176 // ValueObj because will be embedded in HRRS iterator.
177 class RSHashTableIter VALUE_OBJ_CLASS_SPEC {
178 int _tbl_ind; // [-1, 0.._rsht->_capacity)
179 int _bl_ind; // [-1, 0.._rsht->_capacity)
180 short _card_ind; // [0..SparsePRTEntry::cards_num())
181 RSHashTable* _rsht;
183 // If the bucket list pointed to by _bl_ind contains a card, sets
184 // _bl_ind to the index of that entry, and returns the card.
185 // Otherwise, returns SparseEntry::NullEntry.
186 CardIdx_t find_first_card_in_list();
188 // Computes the proper card index for the card whose offset in the
189 // current region (as indicated by _bl_ind) is "ci".
190 // This is subject to errors when there is iteration concurrent with
191 // modification, but these errors should be benign.
192 size_t compute_card_ind(CardIdx_t ci);
194 public:
195 RSHashTableIter() :
196 _tbl_ind(RSHashTable::NullEntry),
197 _bl_ind(RSHashTable::NullEntry),
198 _card_ind((SparsePRTEntry::cards_num() - 1)),
199 _rsht(NULL) {}
201 void init(RSHashTable* rsht) {
202 _rsht = rsht;
203 _tbl_ind = -1; // So that first increment gets to 0.
204 _bl_ind = RSHashTable::NullEntry;
205 _card_ind = (SparsePRTEntry::cards_num() - 1);
206 }
208 bool has_next(size_t& card_index);
209 };
211 // Concurrent accesss to a SparsePRT must be serialized by some external
212 // mutex.
214 class SparsePRTIter;
215 class SparsePRTCleanupTask;
217 class SparsePRT VALUE_OBJ_CLASS_SPEC {
218 friend class SparsePRTCleanupTask;
220 // Iterations are done on the _cur hash table, since they only need to
221 // see entries visible at the start of a collection pause.
222 // All other operations are done using the _next hash table.
223 RSHashTable* _cur;
224 RSHashTable* _next;
226 HeapRegion* _hr;
228 enum SomeAdditionalPrivateConstants {
229 InitialCapacity = 16
230 };
232 void expand();
234 bool _expanded;
236 bool expanded() { return _expanded; }
237 void set_expanded(bool b) { _expanded = b; }
239 SparsePRT* _next_expanded;
241 SparsePRT* next_expanded() { return _next_expanded; }
242 void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
244 bool should_be_on_expanded_list();
246 static SparsePRT* _head_expanded_list;
248 public:
249 SparsePRT(HeapRegion* hr);
251 ~SparsePRT();
253 size_t occupied() const { return _next->occupied_cards(); }
254 size_t mem_size() const;
256 // Attempts to ensure that the given card_index in the given region is in
257 // the sparse table. If successful (because the card was already
258 // present, or because it was successfullly added) returns "true".
259 // Otherwise, returns "false" to indicate that the addition would
260 // overflow the entry for the region. The caller must transfer these
261 // entries to a larger-capacity representation.
262 bool add_card(RegionIdx_t region_id, CardIdx_t card_index);
264 // If the table hold an entry for "region_ind", Copies its
265 // cards into "cards", which must be an array of length at least
266 // "SparePRTEntry::cards_num()", and returns "true"; otherwise,
267 // returns "false".
268 bool get_cards(RegionIdx_t region_ind, CardIdx_t* cards);
270 // Return the pointer to the entry associated with the given region.
271 SparsePRTEntry* get_entry(RegionIdx_t region_ind);
273 // If there is an entry for "region_ind", removes it and return "true";
274 // otherwise returns "false."
275 bool delete_entry(RegionIdx_t region_ind);
277 // Clear the table, and reinitialize to initial capacity.
278 void clear();
280 // Ensure that "_cur" and "_next" point to the same table.
281 void cleanup();
283 // Clean up all tables on the expanded list. Called single threaded.
284 static void cleanup_all();
285 RSHashTable* cur() const { return _cur; }
287 void init_iterator(SparsePRTIter* sprt_iter);
289 static void add_to_expanded_list(SparsePRT* sprt);
290 static SparsePRT* get_from_expanded_list();
292 // The purpose of these three methods is to help the GC workers
293 // during the cleanup pause to recreate the expanded list, purging
294 // any tables from it that belong to regions that are freed during
295 // cleanup (if we don't purge those tables, there is a race that
296 // causes various crashes; see CR 7014261).
297 //
298 // We chose to recreate the expanded list, instead of purging
299 // entries from it by iterating over it, to avoid this serial phase
300 // at the end of the cleanup pause.
301 //
302 // The three methods below work as follows:
303 // * reset_for_cleanup_tasks() : Nulls the expanded list head at the
304 // start of the cleanup pause.
305 // * do_cleanup_work() : Called by the cleanup workers for every
306 // region that is not free / is being freed by the cleanup
307 // pause. It creates a list of expanded tables whose head / tail
308 // are on the thread-local SparsePRTCleanupTask object.
309 // * finish_cleanup_task() : Called by the cleanup workers after
310 // they complete their cleanup task. It adds the local list into
311 // the global expanded list. It assumes that the
312 // ParGCRareEvent_lock is being held to ensure MT-safety.
313 static void reset_for_cleanup_tasks();
314 void do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task);
315 static void finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task);
317 bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const {
318 return _next->contains_card(region_id, card_index);
319 }
320 };
322 class SparsePRTIter: public RSHashTableIter {
323 public:
324 void init(const SparsePRT* sprt) {
325 RSHashTableIter::init(sprt->cur());
326 }
327 bool has_next(size_t& card_index) {
328 return RSHashTableIter::has_next(card_index);
329 }
330 };
332 // This allows each worker during a cleanup pause to create a
333 // thread-local list of sparse tables that have been expanded and need
334 // to be processed at the beginning of the next GC pause. This lists
335 // are concatenated into the single expanded list at the end of the
336 // cleanup pause.
337 class SparsePRTCleanupTask VALUE_OBJ_CLASS_SPEC {
338 private:
339 SparsePRT* _head;
340 SparsePRT* _tail;
342 public:
343 SparsePRTCleanupTask() : _head(NULL), _tail(NULL) { }
345 void add(SparsePRT* sprt);
346 SparsePRT* head() { return _head; }
347 SparsePRT* tail() { return _tail; }
348 };
350 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP