src/share/vm/utilities/yieldingWorkgroup.cpp

Mon, 03 Jan 2011 14:09:11 -0500

author
coleenp
date
Mon, 03 Jan 2011 14:09:11 -0500
changeset 2418
36c186bcc085
parent 2314
f95d63e2154a
child 3058
3be7439273c5
permissions
-rw-r--r--

6302804: Hotspot VM dies ungraceful death when C heap is exhausted in various places.
Summary: enhance the error reporting mechanism to help user to fix the problem rather than making it look like a VM error.
Reviewed-by: kvn, kamg

     1 /*
     2  * Copyright (c) 2005, 2010 Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #ifndef SERIALGC
    27 #include "utilities/yieldingWorkgroup.hpp"
    28 #endif
    30 // Forward declaration of classes declared here.
    32 class GangWorker;
    33 class WorkData;
    35 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
    36   const char* name, int workers, bool are_GC_task_threads) :
    37   FlexibleWorkGang(name, workers, are_GC_task_threads, false),
    38     _yielded_workers(0) {}
    40 GangWorker* YieldingFlexibleWorkGang::allocate_worker(int which) {
    41   YieldingFlexibleGangWorker* new_member =
    42       new YieldingFlexibleGangWorker(this, which);
    43   return (YieldingFlexibleGangWorker*) new_member;
    44 }
    46 // Run a task; returns when the task is done, or the workers yield,
    47 // or the task is aborted, or the work gang is terminated via stop().
    48 // A task that has been yielded can be continued via this interface
    49 // by using the same task repeatedly as the argument to the call.
    50 // It is expected that the YieldingFlexibleGangTask carries the appropriate
    51 // continuation information used by workers to continue the task
    52 // from its last yield point. Thus, a completed task will return
    53 // immediately with no actual work having been done by the workers.
    54 /////////////////////
    55 // Implementatiuon notes: remove before checking XXX
    56 /*
    57 Each gang is working on a task at a certain time.
    58 Some subset of workers may have yielded and some may
    59 have finished their quota of work. Until this task has
    60 been completed, the workers are bound to that task.
    61 Once the task has been completed, the gang unbounds
    62 itself from the task.
    64 The yielding work gang thus exports two invokation
    65 interfaces: run_task() and continue_task(). The
    66 first is used to initiate a new task and bind it
    67 to the workers; the second is used to continue an
    68 already bound task that has yielded. Upon completion
    69 the binding is released and a new binding may be
    70 created.
    72 The shape of a yielding work gang is as follows:
    74 Overseer invokes run_task(*task).
    75    Lock gang monitor
    76    Check that there is no existing binding for the gang
    77    If so, abort with an error
    78    Else, create a new binding of this gang to the given task
    79    Set number of active workers (as asked)
    80    Notify workers that work is ready to be done
    81      [the requisite # workers would then start up
    82       and do the task]
    83    Wait on the monitor until either
    84      all work is completed or the task has yielded
    85      -- this is normally done through
    86         yielded + completed == active
    87         [completed workers are rest to idle state by overseer?]
    88    return appropriate status to caller
    90 Overseer invokes continue_task(*task),
    91    Lock gang monitor
    92    Check that task is the same as current binding
    93    If not, abort with an error
    94    Else, set the number of active workers as requested?
    95    Notify workers that they can continue from yield points
    96     New workers can also start up as required
    97       while satisfying the constraint that
    98          active + yielded does not exceed required number
    99    Wait (as above).
   101 NOTE: In the above, for simplicity in a first iteration
   102   our gangs will be of fixed population and will not
   103   therefore be flexible work gangs, just yielding work
   104   gangs. Once this works well, we will in a second
   105   iteration.refinement introduce flexibility into
   106   the work gang.
   108 NOTE: we can always create a new gang per each iteration
   109   in order to get the flexibility, but we will for now
   110   desist that simplified route.
   112  */
   113 /////////////////////
   114 void YieldingFlexibleWorkGang::start_task(YieldingFlexibleGangTask* new_task) {
   115   MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   116   assert(task() == NULL, "Gang currently tied to a task");
   117   assert(new_task != NULL, "Null task");
   118   // Bind task to gang
   119   _task = new_task;
   120   new_task->set_gang(this);  // Establish 2-way binding to support yielding
   121   _sequence_number++;
   123   int requested_size = new_task->requested_size();
   124   assert(requested_size >= 0, "Should be non-negative");
   125   if (requested_size != 0) {
   126     _active_workers = MIN2(requested_size, total_workers());
   127   } else {
   128     _active_workers = total_workers();
   129   }
   130   new_task->set_actual_size(_active_workers);
   131   new_task->set_for_termination(_active_workers);
   133   assert(_started_workers == 0, "Tabula rasa non");
   134   assert(_finished_workers == 0, "Tabula rasa non");
   135   assert(_yielded_workers == 0, "Tabula rasa non");
   136   yielding_task()->set_status(ACTIVE);
   138   // Wake up all the workers, the first few will get to work,
   139   // and the rest will go back to sleep
   140   monitor()->notify_all();
   141   wait_for_gang();
   142 }
   144 void YieldingFlexibleWorkGang::wait_for_gang() {
   146   assert(monitor()->owned_by_self(), "Data race");
   147   // Wait for task to complete or yield
   148   for (Status status = yielding_task()->status();
   149        status != COMPLETED && status != YIELDED && status != ABORTED;
   150        status = yielding_task()->status()) {
   151     assert(started_workers() <= total_workers(), "invariant");
   152     assert(finished_workers() <= total_workers(), "invariant");
   153     assert(yielded_workers() <= total_workers(), "invariant");
   154     monitor()->wait(Mutex::_no_safepoint_check_flag);
   155   }
   156   switch (yielding_task()->status()) {
   157     case COMPLETED:
   158     case ABORTED: {
   159       assert(finished_workers() == total_workers(), "Inconsistent status");
   160       assert(yielded_workers() == 0, "Invariant");
   161       reset();   // for next task; gang<->task binding released
   162       break;
   163     }
   164     case YIELDED: {
   165       assert(yielded_workers() > 0, "Invariant");
   166       assert(yielded_workers() + finished_workers() == total_workers(),
   167              "Inconsistent counts");
   168       break;
   169     }
   170     case ACTIVE:
   171     case INACTIVE:
   172     case COMPLETING:
   173     case YIELDING:
   174     case ABORTING:
   175     default:
   176       ShouldNotReachHere();
   177   }
   178 }
   180 void YieldingFlexibleWorkGang::continue_task(
   181   YieldingFlexibleGangTask* gang_task) {
   183   MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   184   assert(task() != NULL && task() == gang_task, "Incorrect usage");
   185   // assert(_active_workers == total_workers(), "For now");
   186   assert(_started_workers == _active_workers, "Precondition");
   187   assert(_yielded_workers > 0 && yielding_task()->status() == YIELDED,
   188          "Else why are we calling continue_task()");
   189   // Restart the yielded gang workers
   190   yielding_task()->set_status(ACTIVE);
   191   monitor()->notify_all();
   192   wait_for_gang();
   193 }
   195 void YieldingFlexibleWorkGang::reset() {
   196   _started_workers  = 0;
   197   _finished_workers = 0;
   198   yielding_task()->set_gang(NULL);
   199   _task = NULL;    // unbind gang from task
   200 }
   202 void YieldingFlexibleWorkGang::yield() {
   203   assert(task() != NULL, "Inconsistency; should have task binding");
   204   MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   205   assert(yielded_workers() < total_workers(), "Consistency check");
   206   if (yielding_task()->status() == ABORTING) {
   207     // Do not yield; we need to abort as soon as possible
   208     // XXX NOTE: This can cause a performance pathology in the
   209     // current implementation in Mustang, as of today, and
   210     // pre-Mustang in that as soon as an overflow occurs,
   211     // yields will not be honoured. The right way to proceed
   212     // of course is to fix bug # TBF, so that abort's cause
   213     // us to return at each potential yield point.
   214     return;
   215   }
   216   if (++_yielded_workers + finished_workers() == total_workers()) {
   217     yielding_task()->set_status(YIELDED);
   218     monitor()->notify_all();
   219   } else {
   220     yielding_task()->set_status(YIELDING);
   221   }
   223   while (true) {
   224     switch (yielding_task()->status()) {
   225       case YIELDING:
   226       case YIELDED: {
   227         monitor()->wait(Mutex::_no_safepoint_check_flag);
   228         break;  // from switch
   229       }
   230       case ACTIVE:
   231       case ABORTING:
   232       case COMPLETING: {
   233         assert(_yielded_workers > 0, "Else why am i here?");
   234         _yielded_workers--;
   235         return;
   236       }
   237       case INACTIVE:
   238       case ABORTED:
   239       case COMPLETED:
   240       default: {
   241         ShouldNotReachHere();
   242       }
   243     }
   244   }
   245   // Only return is from inside switch statement above
   246   ShouldNotReachHere();
   247 }
   249 void YieldingFlexibleWorkGang::abort() {
   250   assert(task() != NULL, "Inconsistency; should have task binding");
   251   MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   252   assert(yielded_workers() < active_workers(), "Consistency check");
   253   #ifndef PRODUCT
   254     switch (yielding_task()->status()) {
   255       // allowed states
   256       case ACTIVE:
   257       case ABORTING:
   258       case COMPLETING:
   259       case YIELDING:
   260         break;
   261       // not allowed states
   262       case INACTIVE:
   263       case ABORTED:
   264       case COMPLETED:
   265       case YIELDED:
   266       default:
   267         ShouldNotReachHere();
   268     }
   269   #endif // !PRODUCT
   270   Status prev_status = yielding_task()->status();
   271   yielding_task()->set_status(ABORTING);
   272   if (prev_status == YIELDING) {
   273     assert(yielded_workers() > 0, "Inconsistency");
   274     // At least one thread has yielded, wake it up
   275     // so it can go back to waiting stations ASAP.
   276     monitor()->notify_all();
   277   }
   278 }
   280 ///////////////////////////////
   281 // YieldingFlexibleGangTask
   282 ///////////////////////////////
   283 void YieldingFlexibleGangTask::yield() {
   284   assert(gang() != NULL, "No gang to signal");
   285   gang()->yield();
   286 }
   288 void YieldingFlexibleGangTask::abort() {
   289   assert(gang() != NULL, "No gang to signal");
   290   gang()->abort();
   291 }
   293 ///////////////////////////////
   294 // YieldingFlexibleGangWorker
   295 ///////////////////////////////
   296 void YieldingFlexibleGangWorker::loop() {
   297   int previous_sequence_number = 0;
   298   Monitor* gang_monitor = gang()->monitor();
   299   MutexLockerEx ml(gang_monitor, Mutex::_no_safepoint_check_flag);
   300   WorkData data;
   301   int id;
   302   while (true) {
   303     // Check if there is work to do or if we have been asked
   304     // to terminate
   305     gang()->internal_worker_poll(&data);
   306     if (data.terminate()) {
   307       // We have been asked to terminate.
   308       assert(gang()->task() == NULL, "No task binding");
   309       // set_status(TERMINATED);
   310       return;
   311     } else if (data.task() != NULL &&
   312                data.sequence_number() != previous_sequence_number) {
   313       // There is work to be done.
   314       // First check if we need to become active or if there
   315       // are already the requisite number of workers
   316       if (gang()->started_workers() == yf_gang()->active_workers()) {
   317         // There are already enough workers, we do not need to
   318         // to run; fall through and wait on monitor.
   319       } else {
   320         // We need to pitch in and do the work.
   321         assert(gang()->started_workers() < yf_gang()->active_workers(),
   322                "Unexpected state");
   323         id = gang()->started_workers();
   324         gang()->internal_note_start();
   325         // Now, release the gang mutex and do the work.
   326         {
   327           MutexUnlockerEx mul(gang_monitor, Mutex::_no_safepoint_check_flag);
   328           data.task()->work(id);   // This might include yielding
   329         }
   330         // Reacquire monitor and note completion of this worker
   331         gang()->internal_note_finish();
   332         // Update status of task based on whether all workers have
   333         // finished or some have yielded
   334         assert(data.task() == gang()->task(), "Confused task binding");
   335         if (gang()->finished_workers() == yf_gang()->active_workers()) {
   336           switch (data.yf_task()->status()) {
   337             case ABORTING: {
   338               data.yf_task()->set_status(ABORTED);
   339               break;
   340             }
   341             case ACTIVE:
   342             case COMPLETING: {
   343               data.yf_task()->set_status(COMPLETED);
   344               break;
   345             }
   346             default:
   347               ShouldNotReachHere();
   348           }
   349           gang_monitor->notify_all();  // Notify overseer
   350         } else { // at least one worker is still working or yielded
   351           assert(gang()->finished_workers() < yf_gang()->active_workers(),
   352                  "Counts inconsistent");
   353           switch (data.yf_task()->status()) {
   354             case ACTIVE: {
   355               // first, but not only thread to complete
   356               data.yf_task()->set_status(COMPLETING);
   357               break;
   358             }
   359             case YIELDING: {
   360               if (gang()->finished_workers() + yf_gang()->yielded_workers()
   361                   == yf_gang()->active_workers()) {
   362                 data.yf_task()->set_status(YIELDED);
   363                 gang_monitor->notify_all();  // notify overseer
   364               }
   365               break;
   366             }
   367             case ABORTING:
   368             case COMPLETING: {
   369               break; // nothing to do
   370             }
   371             default: // everything else: INACTIVE, YIELDED, ABORTED, COMPLETED
   372               ShouldNotReachHere();
   373           }
   374         }
   375       }
   376     }
   377     // Remember the sequence number
   378     previous_sequence_number = data.sequence_number();
   379     // Wait for more work
   380     gang_monitor->wait(Mutex::_no_safepoint_check_flag);
   381   }
   382 }

mercurial