Thu, 20 Sep 2012 09:52:56 -0700
7190666: G1: assert(_unused == 0) failed: Inconsistency in PLAB stats
Summary: Reset the fields in ParGCAllocBuffer, that are used for accumulating values for the ResizePLAB sensors in PLABStats, to zero after flushing the values to the PLABStats fields. Flush PLABStats values only when retiring the final allocation buffers prior to disposing of a G1ParScanThreadState object, rather than when retiring every allocation buffer.
Reviewed-by: jwilhelm, jmasa, ysr
1 /*
2 * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP
28 #ifndef SERIALGC
29 #include "gc_implementation/shared/gcUtil.hpp"
30 #include "gc_implementation/shared/mutableSpace.hpp"
31 #endif
33 /*
34 * The NUMA-aware allocator (MutableNUMASpace) is basically a modification
35 * of MutableSpace which preserves interfaces but implements different
36 * functionality. The space is split into chunks for each locality group
37 * (resizing for adaptive size policy is also supported). For each thread
38 * allocations are performed in the chunk corresponding to the home locality
39 * group of the thread. Whenever any chunk fills-in the young generation
40 * collection occurs.
41 * The chunks can be also be adaptively resized. The idea behind the adaptive
42 * sizing is to reduce the loss of the space in the eden due to fragmentation.
43 * The main cause of fragmentation is uneven allocation rates of threads.
44 * The allocation rate difference between locality groups may be caused either by
45 * application specifics or by uneven LWP distribution by the OS. Besides,
46 * application can have less threads then the number of locality groups.
47 * In order to resize the chunk we measure the allocation rate of the
48 * application between collections. After that we reshape the chunks to reflect
49 * the allocation rate pattern. The AdaptiveWeightedAverage exponentially
50 * decaying average is used to smooth the measurements. The NUMASpaceResizeRate
51 * parameter is used to control the adaptation speed by restricting the number of
52 * bytes that can be moved during the adaptation phase.
53 * Chunks may contain pages from a wrong locality group. The page-scanner has
54 * been introduced to address the problem. Remote pages typically appear due to
55 * the memory shortage in the target locality group. Besides Solaris would
56 * allocate a large page from the remote locality group even if there are small
57 * local pages available. The page-scanner scans the pages right after the
58 * collection and frees remote pages in hope that subsequent reallocation would
59 * be more successful. This approach proved to be useful on systems with high
60 * load where multiple processes are competing for the memory.
61 */
63 class MutableNUMASpace : public MutableSpace {
64 friend class VMStructs;
66 class LGRPSpace : public CHeapObj<mtGC> {
67 int _lgrp_id;
68 MutableSpace* _space;
69 MemRegion _invalid_region;
70 AdaptiveWeightedAverage *_alloc_rate;
71 bool _allocation_failed;
73 struct SpaceStats {
74 size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
75 size_t _large_pages, _small_pages;
77 SpaceStats() {
78 _local_space = 0;
79 _remote_space = 0;
80 _unbiased_space = 0;
81 _uncommited_space = 0;
82 _large_pages = 0;
83 _small_pages = 0;
84 }
85 };
87 SpaceStats _space_stats;
89 char* _last_page_scanned;
90 char* last_page_scanned() { return _last_page_scanned; }
91 void set_last_page_scanned(char* p) { _last_page_scanned = p; }
92 public:
93 LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
94 _space = new MutableSpace(alignment);
95 _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
96 }
97 ~LGRPSpace() {
98 delete _space;
99 delete _alloc_rate;
100 }
102 void add_invalid_region(MemRegion r) {
103 if (!_invalid_region.is_empty()) {
104 _invalid_region.set_start(MIN2(_invalid_region.start(), r.start()));
105 _invalid_region.set_end(MAX2(_invalid_region.end(), r.end()));
106 } else {
107 _invalid_region = r;
108 }
109 }
111 static bool equals(void* lgrp_id_value, LGRPSpace* p) {
112 return *(int*)lgrp_id_value == p->lgrp_id();
113 }
115 // Report a failed allocation.
116 void set_allocation_failed() { _allocation_failed = true; }
118 void sample() {
119 // If there was a failed allocation make allocation rate equal
120 // to the size of the whole chunk. This ensures the progress of
121 // the adaptation process.
122 size_t alloc_rate_sample;
123 if (_allocation_failed) {
124 alloc_rate_sample = space()->capacity_in_bytes();
125 _allocation_failed = false;
126 } else {
127 alloc_rate_sample = space()->used_in_bytes();
128 }
129 alloc_rate()->sample(alloc_rate_sample);
130 }
132 MemRegion invalid_region() const { return _invalid_region; }
133 void set_invalid_region(MemRegion r) { _invalid_region = r; }
134 int lgrp_id() const { return _lgrp_id; }
135 MutableSpace* space() const { return _space; }
136 AdaptiveWeightedAverage* alloc_rate() const { return _alloc_rate; }
137 void clear_alloc_rate() { _alloc_rate->clear(); }
138 SpaceStats* space_stats() { return &_space_stats; }
139 void clear_space_stats() { _space_stats = SpaceStats(); }
141 void accumulate_statistics(size_t page_size);
142 void scan_pages(size_t page_size, size_t page_count);
143 };
145 GrowableArray<LGRPSpace*>* _lgrp_spaces;
146 size_t _page_size;
147 unsigned _adaptation_cycles, _samples_count;
149 void set_page_size(size_t psz) { _page_size = psz; }
150 size_t page_size() const { return _page_size; }
152 unsigned adaptation_cycles() { return _adaptation_cycles; }
153 void set_adaptation_cycles(int v) { _adaptation_cycles = v; }
155 unsigned samples_count() { return _samples_count; }
156 void increment_samples_count() { ++_samples_count; }
158 size_t _base_space_size;
159 void set_base_space_size(size_t v) { _base_space_size = v; }
160 size_t base_space_size() const { return _base_space_size; }
162 // Check if the NUMA topology has changed. Add and remove spaces if needed.
163 // The update can be forced by setting the force parameter equal to true.
164 bool update_layout(bool force);
165 // Bias region towards the lgrp.
166 void bias_region(MemRegion mr, int lgrp_id);
167 // Free pages in a given region.
168 void free_region(MemRegion mr);
169 // Get current chunk size.
170 size_t current_chunk_size(int i);
171 // Get default chunk size (equally divide the space).
172 size_t default_chunk_size();
173 // Adapt the chunk size to follow the allocation rate.
174 size_t adaptive_chunk_size(int i, size_t limit);
175 // Scan and free invalid pages.
176 void scan_pages(size_t page_count);
177 // Return the bottom_region and the top_region. Align them to page_size() boundary.
178 // |------------------new_region---------------------------------|
179 // |----bottom_region--|---intersection---|------top_region------|
180 void select_tails(MemRegion new_region, MemRegion intersection,
181 MemRegion* bottom_region, MemRegion *top_region);
182 // Try to merge the invalid region with the bottom or top region by decreasing
183 // the intersection area. Return the invalid_region aligned to the page_size()
184 // boundary if it's inside the intersection. Return non-empty invalid_region
185 // if it lies inside the intersection (also page-aligned).
186 // |------------------new_region---------------------------------|
187 // |----------------|-------invalid---|--------------------------|
188 // |----bottom_region--|---intersection---|------top_region------|
189 void merge_regions(MemRegion new_region, MemRegion* intersection,
190 MemRegion *invalid_region);
192 public:
193 GrowableArray<LGRPSpace*>* lgrp_spaces() const { return _lgrp_spaces; }
194 MutableNUMASpace(size_t alignment);
195 virtual ~MutableNUMASpace();
196 // Space initialization.
197 virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages);
198 // Update space layout if necessary. Do all adaptive resizing job.
199 virtual void update();
200 // Update allocation rate averages.
201 virtual void accumulate_statistics();
203 virtual void clear(bool mangle_space);
204 virtual void mangle_unused_area() PRODUCT_RETURN;
205 virtual void mangle_unused_area_complete() PRODUCT_RETURN;
206 virtual void mangle_region(MemRegion mr) PRODUCT_RETURN;
207 virtual void check_mangled_unused_area(HeapWord* limit) PRODUCT_RETURN;
208 virtual void check_mangled_unused_area_complete() PRODUCT_RETURN;
209 virtual void set_top_for_allocations(HeapWord* v) PRODUCT_RETURN;
210 virtual void set_top_for_allocations() PRODUCT_RETURN;
212 virtual void ensure_parsability();
213 virtual size_t used_in_words() const;
214 virtual size_t free_in_words() const;
216 using MutableSpace::capacity_in_words;
217 virtual size_t capacity_in_words(Thread* thr) const;
218 virtual size_t tlab_capacity(Thread* thr) const;
219 virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
221 // Allocation (return NULL if full)
222 virtual HeapWord* allocate(size_t word_size);
223 virtual HeapWord* cas_allocate(size_t word_size);
225 // Debugging
226 virtual void print_on(outputStream* st) const;
227 virtual void print_short_on(outputStream* st) const;
228 virtual void verify();
230 virtual void set_top(HeapWord* value);
231 };
233 #endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_MUTABLENUMASPACE_HPP