Wed, 07 Apr 2010 12:39:27 -0700
6940726: Use BIS instruction for allocation prefetch on Sparc
Summary: Use BIS instruction for allocation prefetch on Sparc
Reviewed-by: twisti
1 /*
2 * Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 class GlobalTLABStats;
27 // ThreadLocalAllocBuffer: a descriptor for thread-local storage used by
28 // the threads for allocation.
29 // It is thread-private at any time, but maybe multiplexed over
30 // time across multiple threads. The park()/unpark() pair is
31 // used to make it avaiable for such multiplexing.
32 class ThreadLocalAllocBuffer: public CHeapObj {
33 friend class VMStructs;
34 private:
35 HeapWord* _start; // address of TLAB
36 HeapWord* _top; // address after last allocation
37 HeapWord* _pf_top; // allocation prefetch watermark
38 HeapWord* _end; // allocation end (excluding alignment_reserve)
39 size_t _desired_size; // desired size (including alignment_reserve)
40 size_t _refill_waste_limit; // hold onto tlab if free() is larger than this
42 static unsigned _target_refills; // expected number of refills between GCs
44 unsigned _number_of_refills;
45 unsigned _fast_refill_waste;
46 unsigned _slow_refill_waste;
47 unsigned _gc_waste;
48 unsigned _slow_allocations;
50 AdaptiveWeightedAverage _allocation_fraction; // fraction of eden allocated in tlabs
52 void accumulate_statistics();
53 void initialize_statistics();
55 void set_start(HeapWord* start) { _start = start; }
56 void set_end(HeapWord* end) { _end = end; }
57 void set_top(HeapWord* top) { _top = top; }
58 void set_pf_top(HeapWord* pf_top) { _pf_top = pf_top; }
59 void set_desired_size(size_t desired_size) { _desired_size = desired_size; }
60 void set_refill_waste_limit(size_t waste) { _refill_waste_limit = waste; }
62 size_t initial_refill_waste_limit() { return desired_size() / TLABRefillWasteFraction; }
64 static int target_refills() { return _target_refills; }
65 size_t initial_desired_size();
67 size_t remaining() const { return end() == NULL ? 0 : pointer_delta(hard_end(), top()); }
69 // Make parsable and release it.
70 void reset();
72 // Resize based on amount of allocation, etc.
73 void resize();
75 void invariants() const { assert(top() >= start() && top() <= end(), "invalid tlab"); }
77 void initialize(HeapWord* start, HeapWord* top, HeapWord* end);
79 void print_stats(const char* tag);
81 Thread* myThread();
83 // statistics
85 int number_of_refills() const { return _number_of_refills; }
86 int fast_refill_waste() const { return _fast_refill_waste; }
87 int slow_refill_waste() const { return _slow_refill_waste; }
88 int gc_waste() const { return _gc_waste; }
89 int slow_allocations() const { return _slow_allocations; }
91 static GlobalTLABStats* _global_stats;
92 static GlobalTLABStats* global_stats() { return _global_stats; }
94 public:
95 ThreadLocalAllocBuffer() : _allocation_fraction(TLABAllocationWeight) {
96 // do nothing. tlabs must be inited by initialize() calls
97 }
99 static const size_t min_size() { return align_object_size(MinTLABSize / HeapWordSize); }
100 static const size_t max_size();
102 HeapWord* start() const { return _start; }
103 HeapWord* end() const { return _end; }
104 HeapWord* hard_end() const { return _end + alignment_reserve(); }
105 HeapWord* top() const { return _top; }
106 HeapWord* pf_top() const { return _pf_top; }
107 size_t desired_size() const { return _desired_size; }
108 size_t free() const { return pointer_delta(end(), top()); }
109 // Don't discard tlab if remaining space is larger than this.
110 size_t refill_waste_limit() const { return _refill_waste_limit; }
112 // Allocate size HeapWords. The memory is NOT initialized to zero.
113 inline HeapWord* allocate(size_t size);
115 // Reserve space at the end of TLAB
116 static size_t end_reserve() {
117 int reserve_size = typeArrayOopDesc::header_size(T_INT);
118 if (AllocatePrefetchStyle == 3) {
119 // BIS is used to prefetch - we need a space for it.
120 // +1 for rounding up to next cache line +1 to be safe
121 int lines = AllocatePrefetchLines + 2;
122 int step_size = AllocatePrefetchStepSize;
123 int distance = AllocatePrefetchDistance;
124 int prefetch_end = (distance + step_size*lines)/(int)HeapWordSize;
125 reserve_size = MAX2(reserve_size, prefetch_end);
126 }
127 return reserve_size;
128 }
129 static size_t alignment_reserve() { return align_object_size(end_reserve()); }
130 static size_t alignment_reserve_in_bytes() { return alignment_reserve() * HeapWordSize; }
132 // Return tlab size or remaining space in eden such that the
133 // space is large enough to hold obj_size and necessary fill space.
134 // Otherwise return 0;
135 inline size_t compute_size(size_t obj_size);
137 // Record slow allocation
138 inline void record_slow_allocation(size_t obj_size);
140 // Initialization at startup
141 static void startup_initialization();
143 // Make an in-use tlab parsable, optionally also retiring it.
144 void make_parsable(bool retire);
146 // Retire in-use tlab before allocation of a new tlab
147 void clear_before_allocation();
149 // Accumulate statistics across all tlabs before gc
150 static void accumulate_statistics_before_gc();
152 // Resize tlabs for all threads
153 static void resize_all_tlabs();
155 void fill(HeapWord* start, HeapWord* top, size_t new_size);
156 void initialize();
158 static size_t refill_waste_limit_increment() { return TLABWasteIncrement; }
160 // Code generation support
161 static ByteSize start_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _start); }
162 static ByteSize end_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _end ); }
163 static ByteSize top_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _top ); }
164 static ByteSize pf_top_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _pf_top ); }
165 static ByteSize size_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _desired_size ); }
166 static ByteSize refill_waste_limit_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _refill_waste_limit ); }
168 static ByteSize number_of_refills_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _number_of_refills ); }
169 static ByteSize fast_refill_waste_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _fast_refill_waste ); }
170 static ByteSize slow_allocations_offset() { return byte_offset_of(ThreadLocalAllocBuffer, _slow_allocations ); }
172 void verify();
173 };
175 class GlobalTLABStats: public CHeapObj {
176 private:
178 // Accumulate perfdata in private variables because
179 // PerfData should be write-only for security reasons
180 // (see perfData.hpp)
181 unsigned _allocating_threads;
182 unsigned _total_refills;
183 unsigned _max_refills;
184 size_t _total_allocation;
185 size_t _total_gc_waste;
186 size_t _max_gc_waste;
187 size_t _total_slow_refill_waste;
188 size_t _max_slow_refill_waste;
189 size_t _total_fast_refill_waste;
190 size_t _max_fast_refill_waste;
191 unsigned _total_slow_allocations;
192 unsigned _max_slow_allocations;
194 PerfVariable* _perf_allocating_threads;
195 PerfVariable* _perf_total_refills;
196 PerfVariable* _perf_max_refills;
197 PerfVariable* _perf_allocation;
198 PerfVariable* _perf_gc_waste;
199 PerfVariable* _perf_max_gc_waste;
200 PerfVariable* _perf_slow_refill_waste;
201 PerfVariable* _perf_max_slow_refill_waste;
202 PerfVariable* _perf_fast_refill_waste;
203 PerfVariable* _perf_max_fast_refill_waste;
204 PerfVariable* _perf_slow_allocations;
205 PerfVariable* _perf_max_slow_allocations;
207 AdaptiveWeightedAverage _allocating_threads_avg;
209 public:
210 GlobalTLABStats();
212 // Initialize all counters
213 void initialize();
215 // Write all perf counters to the perf_counters
216 void publish();
218 void print();
220 // Accessors
221 unsigned allocating_threads_avg() {
222 return MAX2((unsigned)(_allocating_threads_avg.average() + 0.5), 1U);
223 }
225 size_t allocation() {
226 return _total_allocation;
227 }
229 // Update methods
231 void update_allocating_threads() {
232 _allocating_threads++;
233 }
234 void update_number_of_refills(unsigned value) {
235 _total_refills += value;
236 _max_refills = MAX2(_max_refills, value);
237 }
238 void update_allocation(size_t value) {
239 _total_allocation += value;
240 }
241 void update_gc_waste(size_t value) {
242 _total_gc_waste += value;
243 _max_gc_waste = MAX2(_max_gc_waste, value);
244 }
245 void update_fast_refill_waste(size_t value) {
246 _total_fast_refill_waste += value;
247 _max_fast_refill_waste = MAX2(_max_fast_refill_waste, value);
248 }
249 void update_slow_refill_waste(size_t value) {
250 _total_slow_refill_waste += value;
251 _max_slow_refill_waste = MAX2(_max_slow_refill_waste, value);
252 }
253 void update_slow_allocations(unsigned value) {
254 _total_slow_allocations += value;
255 _max_slow_allocations = MAX2(_max_slow_allocations, value);
256 }
257 };