Mon, 03 Jan 2011 14:09:11 -0500
6302804: Hotspot VM dies ungraceful death when C heap is exhausted in various places.
Summary: enhance the error reporting mechanism to help user to fix the problem rather than making it look like a VM error.
Reviewed-by: kvn, kamg
1 /*
2 * Copyright (c) 2005, 2010 Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #ifndef SERIALGC
27 #include "utilities/yieldingWorkgroup.hpp"
28 #endif
30 // Forward declaration of classes declared here.
32 class GangWorker;
33 class WorkData;
35 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
36 const char* name, int workers, bool are_GC_task_threads) :
37 FlexibleWorkGang(name, workers, are_GC_task_threads, false),
38 _yielded_workers(0) {}
40 GangWorker* YieldingFlexibleWorkGang::allocate_worker(int which) {
41 YieldingFlexibleGangWorker* new_member =
42 new YieldingFlexibleGangWorker(this, which);
43 return (YieldingFlexibleGangWorker*) new_member;
44 }
46 // Run a task; returns when the task is done, or the workers yield,
47 // or the task is aborted, or the work gang is terminated via stop().
48 // A task that has been yielded can be continued via this interface
49 // by using the same task repeatedly as the argument to the call.
50 // It is expected that the YieldingFlexibleGangTask carries the appropriate
51 // continuation information used by workers to continue the task
52 // from its last yield point. Thus, a completed task will return
53 // immediately with no actual work having been done by the workers.
54 /////////////////////
55 // Implementatiuon notes: remove before checking XXX
56 /*
57 Each gang is working on a task at a certain time.
58 Some subset of workers may have yielded and some may
59 have finished their quota of work. Until this task has
60 been completed, the workers are bound to that task.
61 Once the task has been completed, the gang unbounds
62 itself from the task.
64 The yielding work gang thus exports two invokation
65 interfaces: run_task() and continue_task(). The
66 first is used to initiate a new task and bind it
67 to the workers; the second is used to continue an
68 already bound task that has yielded. Upon completion
69 the binding is released and a new binding may be
70 created.
72 The shape of a yielding work gang is as follows:
74 Overseer invokes run_task(*task).
75 Lock gang monitor
76 Check that there is no existing binding for the gang
77 If so, abort with an error
78 Else, create a new binding of this gang to the given task
79 Set number of active workers (as asked)
80 Notify workers that work is ready to be done
81 [the requisite # workers would then start up
82 and do the task]
83 Wait on the monitor until either
84 all work is completed or the task has yielded
85 -- this is normally done through
86 yielded + completed == active
87 [completed workers are rest to idle state by overseer?]
88 return appropriate status to caller
90 Overseer invokes continue_task(*task),
91 Lock gang monitor
92 Check that task is the same as current binding
93 If not, abort with an error
94 Else, set the number of active workers as requested?
95 Notify workers that they can continue from yield points
96 New workers can also start up as required
97 while satisfying the constraint that
98 active + yielded does not exceed required number
99 Wait (as above).
101 NOTE: In the above, for simplicity in a first iteration
102 our gangs will be of fixed population and will not
103 therefore be flexible work gangs, just yielding work
104 gangs. Once this works well, we will in a second
105 iteration.refinement introduce flexibility into
106 the work gang.
108 NOTE: we can always create a new gang per each iteration
109 in order to get the flexibility, but we will for now
110 desist that simplified route.
112 */
113 /////////////////////
114 void YieldingFlexibleWorkGang::start_task(YieldingFlexibleGangTask* new_task) {
115 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
116 assert(task() == NULL, "Gang currently tied to a task");
117 assert(new_task != NULL, "Null task");
118 // Bind task to gang
119 _task = new_task;
120 new_task->set_gang(this); // Establish 2-way binding to support yielding
121 _sequence_number++;
123 int requested_size = new_task->requested_size();
124 assert(requested_size >= 0, "Should be non-negative");
125 if (requested_size != 0) {
126 _active_workers = MIN2(requested_size, total_workers());
127 } else {
128 _active_workers = total_workers();
129 }
130 new_task->set_actual_size(_active_workers);
131 new_task->set_for_termination(_active_workers);
133 assert(_started_workers == 0, "Tabula rasa non");
134 assert(_finished_workers == 0, "Tabula rasa non");
135 assert(_yielded_workers == 0, "Tabula rasa non");
136 yielding_task()->set_status(ACTIVE);
138 // Wake up all the workers, the first few will get to work,
139 // and the rest will go back to sleep
140 monitor()->notify_all();
141 wait_for_gang();
142 }
144 void YieldingFlexibleWorkGang::wait_for_gang() {
146 assert(monitor()->owned_by_self(), "Data race");
147 // Wait for task to complete or yield
148 for (Status status = yielding_task()->status();
149 status != COMPLETED && status != YIELDED && status != ABORTED;
150 status = yielding_task()->status()) {
151 assert(started_workers() <= total_workers(), "invariant");
152 assert(finished_workers() <= total_workers(), "invariant");
153 assert(yielded_workers() <= total_workers(), "invariant");
154 monitor()->wait(Mutex::_no_safepoint_check_flag);
155 }
156 switch (yielding_task()->status()) {
157 case COMPLETED:
158 case ABORTED: {
159 assert(finished_workers() == total_workers(), "Inconsistent status");
160 assert(yielded_workers() == 0, "Invariant");
161 reset(); // for next task; gang<->task binding released
162 break;
163 }
164 case YIELDED: {
165 assert(yielded_workers() > 0, "Invariant");
166 assert(yielded_workers() + finished_workers() == total_workers(),
167 "Inconsistent counts");
168 break;
169 }
170 case ACTIVE:
171 case INACTIVE:
172 case COMPLETING:
173 case YIELDING:
174 case ABORTING:
175 default:
176 ShouldNotReachHere();
177 }
178 }
180 void YieldingFlexibleWorkGang::continue_task(
181 YieldingFlexibleGangTask* gang_task) {
183 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
184 assert(task() != NULL && task() == gang_task, "Incorrect usage");
185 // assert(_active_workers == total_workers(), "For now");
186 assert(_started_workers == _active_workers, "Precondition");
187 assert(_yielded_workers > 0 && yielding_task()->status() == YIELDED,
188 "Else why are we calling continue_task()");
189 // Restart the yielded gang workers
190 yielding_task()->set_status(ACTIVE);
191 monitor()->notify_all();
192 wait_for_gang();
193 }
195 void YieldingFlexibleWorkGang::reset() {
196 _started_workers = 0;
197 _finished_workers = 0;
198 yielding_task()->set_gang(NULL);
199 _task = NULL; // unbind gang from task
200 }
202 void YieldingFlexibleWorkGang::yield() {
203 assert(task() != NULL, "Inconsistency; should have task binding");
204 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
205 assert(yielded_workers() < total_workers(), "Consistency check");
206 if (yielding_task()->status() == ABORTING) {
207 // Do not yield; we need to abort as soon as possible
208 // XXX NOTE: This can cause a performance pathology in the
209 // current implementation in Mustang, as of today, and
210 // pre-Mustang in that as soon as an overflow occurs,
211 // yields will not be honoured. The right way to proceed
212 // of course is to fix bug # TBF, so that abort's cause
213 // us to return at each potential yield point.
214 return;
215 }
216 if (++_yielded_workers + finished_workers() == total_workers()) {
217 yielding_task()->set_status(YIELDED);
218 monitor()->notify_all();
219 } else {
220 yielding_task()->set_status(YIELDING);
221 }
223 while (true) {
224 switch (yielding_task()->status()) {
225 case YIELDING:
226 case YIELDED: {
227 monitor()->wait(Mutex::_no_safepoint_check_flag);
228 break; // from switch
229 }
230 case ACTIVE:
231 case ABORTING:
232 case COMPLETING: {
233 assert(_yielded_workers > 0, "Else why am i here?");
234 _yielded_workers--;
235 return;
236 }
237 case INACTIVE:
238 case ABORTED:
239 case COMPLETED:
240 default: {
241 ShouldNotReachHere();
242 }
243 }
244 }
245 // Only return is from inside switch statement above
246 ShouldNotReachHere();
247 }
249 void YieldingFlexibleWorkGang::abort() {
250 assert(task() != NULL, "Inconsistency; should have task binding");
251 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
252 assert(yielded_workers() < active_workers(), "Consistency check");
253 #ifndef PRODUCT
254 switch (yielding_task()->status()) {
255 // allowed states
256 case ACTIVE:
257 case ABORTING:
258 case COMPLETING:
259 case YIELDING:
260 break;
261 // not allowed states
262 case INACTIVE:
263 case ABORTED:
264 case COMPLETED:
265 case YIELDED:
266 default:
267 ShouldNotReachHere();
268 }
269 #endif // !PRODUCT
270 Status prev_status = yielding_task()->status();
271 yielding_task()->set_status(ABORTING);
272 if (prev_status == YIELDING) {
273 assert(yielded_workers() > 0, "Inconsistency");
274 // At least one thread has yielded, wake it up
275 // so it can go back to waiting stations ASAP.
276 monitor()->notify_all();
277 }
278 }
280 ///////////////////////////////
281 // YieldingFlexibleGangTask
282 ///////////////////////////////
283 void YieldingFlexibleGangTask::yield() {
284 assert(gang() != NULL, "No gang to signal");
285 gang()->yield();
286 }
288 void YieldingFlexibleGangTask::abort() {
289 assert(gang() != NULL, "No gang to signal");
290 gang()->abort();
291 }
293 ///////////////////////////////
294 // YieldingFlexibleGangWorker
295 ///////////////////////////////
296 void YieldingFlexibleGangWorker::loop() {
297 int previous_sequence_number = 0;
298 Monitor* gang_monitor = gang()->monitor();
299 MutexLockerEx ml(gang_monitor, Mutex::_no_safepoint_check_flag);
300 WorkData data;
301 int id;
302 while (true) {
303 // Check if there is work to do or if we have been asked
304 // to terminate
305 gang()->internal_worker_poll(&data);
306 if (data.terminate()) {
307 // We have been asked to terminate.
308 assert(gang()->task() == NULL, "No task binding");
309 // set_status(TERMINATED);
310 return;
311 } else if (data.task() != NULL &&
312 data.sequence_number() != previous_sequence_number) {
313 // There is work to be done.
314 // First check if we need to become active or if there
315 // are already the requisite number of workers
316 if (gang()->started_workers() == yf_gang()->active_workers()) {
317 // There are already enough workers, we do not need to
318 // to run; fall through and wait on monitor.
319 } else {
320 // We need to pitch in and do the work.
321 assert(gang()->started_workers() < yf_gang()->active_workers(),
322 "Unexpected state");
323 id = gang()->started_workers();
324 gang()->internal_note_start();
325 // Now, release the gang mutex and do the work.
326 {
327 MutexUnlockerEx mul(gang_monitor, Mutex::_no_safepoint_check_flag);
328 data.task()->work(id); // This might include yielding
329 }
330 // Reacquire monitor and note completion of this worker
331 gang()->internal_note_finish();
332 // Update status of task based on whether all workers have
333 // finished or some have yielded
334 assert(data.task() == gang()->task(), "Confused task binding");
335 if (gang()->finished_workers() == yf_gang()->active_workers()) {
336 switch (data.yf_task()->status()) {
337 case ABORTING: {
338 data.yf_task()->set_status(ABORTED);
339 break;
340 }
341 case ACTIVE:
342 case COMPLETING: {
343 data.yf_task()->set_status(COMPLETED);
344 break;
345 }
346 default:
347 ShouldNotReachHere();
348 }
349 gang_monitor->notify_all(); // Notify overseer
350 } else { // at least one worker is still working or yielded
351 assert(gang()->finished_workers() < yf_gang()->active_workers(),
352 "Counts inconsistent");
353 switch (data.yf_task()->status()) {
354 case ACTIVE: {
355 // first, but not only thread to complete
356 data.yf_task()->set_status(COMPLETING);
357 break;
358 }
359 case YIELDING: {
360 if (gang()->finished_workers() + yf_gang()->yielded_workers()
361 == yf_gang()->active_workers()) {
362 data.yf_task()->set_status(YIELDED);
363 gang_monitor->notify_all(); // notify overseer
364 }
365 break;
366 }
367 case ABORTING:
368 case COMPLETING: {
369 break; // nothing to do
370 }
371 default: // everything else: INACTIVE, YIELDED, ABORTED, COMPLETED
372 ShouldNotReachHere();
373 }
374 }
375 }
376 }
377 // Remember the sequence number
378 previous_sequence_number = data.sequence_number();
379 // Wait for more work
380 gang_monitor->wait(Mutex::_no_safepoint_check_flag);
381 }
382 }