src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp

Mon, 12 Mar 2012 14:59:00 -0700

author
johnc
date
Mon, 12 Mar 2012 14:59:00 -0700
changeset 3666
64bf7c8270cb
parent 3464
eff609af17d7
child 3710
5c86f8211d1e
permissions
-rw-r--r--

7147724: G1: hang in SurrogateLockerThread::manipulatePLL
Summary: Attempting to initiate a marking cycle when allocating a humongous object can, if a marking cycle is successfully initiated by another thread, result in the allocating thread spinning until the marking cycle is complete. Eliminate a deadlock between the main ConcurrentMarkThread, the SurrogateLocker thread, the VM thread, and a mutator thread waiting on the SecondaryFreeList_lock (while free regions are going to become available) by not manipulating the pending list lock during the prologue and epilogue of the cleanup pause.
Reviewed-by: brutisso, jcoomes, tonyp

     1 /*
     2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
    27 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
    28 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
    29 #include "gc_implementation/g1/g1MMUTracker.hpp"
    30 #include "gc_implementation/g1/vm_operations_g1.hpp"
    31 #include "memory/resourceArea.hpp"
    32 #include "runtime/vmThread.hpp"
    34 // ======= Concurrent Mark Thread ========
    36 // The CM thread is created when the G1 garbage collector is used
    38 SurrogateLockerThread*
    39      ConcurrentMarkThread::_slt = NULL;
    41 ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) :
    42   ConcurrentGCThread(),
    43   _cm(cm),
    44   _started(false),
    45   _in_progress(false),
    46   _vtime_accum(0.0),
    47   _vtime_mark_accum(0.0) {
    48   create_and_start();
    49 }
    51 class CMCheckpointRootsFinalClosure: public VoidClosure {
    53   ConcurrentMark* _cm;
    54 public:
    56   CMCheckpointRootsFinalClosure(ConcurrentMark* cm) :
    57     _cm(cm) {}
    59   void do_void(){
    60     _cm->checkpointRootsFinal(false); // !clear_all_soft_refs
    61   }
    62 };
    64 class CMCleanUp: public VoidClosure {
    65   ConcurrentMark* _cm;
    66 public:
    68   CMCleanUp(ConcurrentMark* cm) :
    69     _cm(cm) {}
    71   void do_void(){
    72     _cm->cleanup();
    73   }
    74 };
    78 void ConcurrentMarkThread::run() {
    79   initialize_in_thread();
    80   _vtime_start = os::elapsedVTime();
    81   wait_for_universe_init();
    83   G1CollectedHeap* g1h = G1CollectedHeap::heap();
    84   G1CollectorPolicy* g1_policy = g1h->g1_policy();
    85   G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker();
    86   Thread *current_thread = Thread::current();
    88   while (!_should_terminate) {
    89     // wait until started is set.
    90     sleepBeforeNextCycle();
    91     {
    92       ResourceMark rm;
    93       HandleMark   hm;
    94       double cycle_start = os::elapsedVTime();
    95       char verbose_str[128];
    97       // We have to ensure that we finish scanning the root regions
    98       // before the next GC takes place. To ensure this we have to
    99       // make sure that we do not join the STS until the root regions
   100       // have been scanned. If we did then it's possible that a
   101       // subsequent GC could block us from joining the STS and proceed
   102       // without the root regions have been scanned which would be a
   103       // correctness issue.
   105       double scan_start = os::elapsedTime();
   106       if (!cm()->has_aborted()) {
   107         if (PrintGC) {
   108           gclog_or_tty->date_stamp(PrintGCDateStamps);
   109           gclog_or_tty->stamp(PrintGCTimeStamps);
   110           gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
   111         }
   113         _cm->scanRootRegions();
   115         double scan_end = os::elapsedTime();
   116         if (PrintGC) {
   117           gclog_or_tty->date_stamp(PrintGCDateStamps);
   118           gclog_or_tty->stamp(PrintGCTimeStamps);
   119           gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf]",
   120                                  scan_end - scan_start);
   121         }
   122       }
   124       double mark_start_sec = os::elapsedTime();
   125       if (PrintGC) {
   126         gclog_or_tty->date_stamp(PrintGCDateStamps);
   127         gclog_or_tty->stamp(PrintGCTimeStamps);
   128         gclog_or_tty->print_cr("[GC concurrent-mark-start]");
   129       }
   131       int iter = 0;
   132       do {
   133         iter++;
   134         if (!cm()->has_aborted()) {
   135           _cm->markFromRoots();
   136         }
   138         double mark_end_time = os::elapsedVTime();
   139         double mark_end_sec = os::elapsedTime();
   140         _vtime_mark_accum += (mark_end_time - cycle_start);
   141         if (!cm()->has_aborted()) {
   142           if (g1_policy->adaptive_young_list_length()) {
   143             double now = os::elapsedTime();
   144             double remark_prediction_ms = g1_policy->predict_remark_time_ms();
   145             jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms);
   146             os::sleep(current_thread, sleep_time_ms, false);
   147           }
   149           if (PrintGC) {
   150             gclog_or_tty->date_stamp(PrintGCDateStamps);
   151             gclog_or_tty->stamp(PrintGCTimeStamps);
   152             gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
   153                                       mark_end_sec - mark_start_sec);
   154           }
   156           CMCheckpointRootsFinalClosure final_cl(_cm);
   157           sprintf(verbose_str, "GC remark");
   158           VM_CGC_Operation op(&final_cl, verbose_str, true /* needs_pll */);
   159           VMThread::execute(&op);
   160         }
   161         if (cm()->restart_for_overflow() &&
   162             G1TraceMarkStackOverflow) {
   163           gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
   164                                  "in remark (restart #%d).", iter);
   165         }
   167         if (cm()->restart_for_overflow()) {
   168           if (PrintGC) {
   169             gclog_or_tty->date_stamp(PrintGCDateStamps);
   170             gclog_or_tty->stamp(PrintGCTimeStamps);
   171             gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
   172           }
   173         }
   174       } while (cm()->restart_for_overflow());
   176       double end_time = os::elapsedVTime();
   177       // Update the total virtual time before doing this, since it will try
   178       // to measure it to get the vtime for this marking.  We purposely
   179       // neglect the presumably-short "completeCleanup" phase here.
   180       _vtime_accum = (end_time - _vtime_start);
   182       if (!cm()->has_aborted()) {
   183         if (g1_policy->adaptive_young_list_length()) {
   184           double now = os::elapsedTime();
   185           double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms();
   186           jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms);
   187           os::sleep(current_thread, sleep_time_ms, false);
   188         }
   190         CMCleanUp cl_cl(_cm);
   191         sprintf(verbose_str, "GC cleanup");
   192         VM_CGC_Operation op(&cl_cl, verbose_str, false /* needs_pll */);
   193         VMThread::execute(&op);
   194       } else {
   195         // We don't want to update the marking status if a GC pause
   196         // is already underway.
   197         _sts.join();
   198         g1h->set_marking_complete();
   199         _sts.leave();
   200       }
   202       // Check if cleanup set the free_regions_coming flag. If it
   203       // hasn't, we can just skip the next step.
   204       if (g1h->free_regions_coming()) {
   205         // The following will finish freeing up any regions that we
   206         // found to be empty during cleanup. We'll do this part
   207         // without joining the suspendible set. If an evacuation pause
   208         // takes place, then we would carry on freeing regions in
   209         // case they are needed by the pause. If a Full GC takes
   210         // place, it would wait for us to process the regions
   211         // reclaimed by cleanup.
   213         double cleanup_start_sec = os::elapsedTime();
   214         if (PrintGC) {
   215           gclog_or_tty->date_stamp(PrintGCDateStamps);
   216           gclog_or_tty->stamp(PrintGCTimeStamps);
   217           gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
   218         }
   220         // Now do the concurrent cleanup operation.
   221         _cm->completeCleanup();
   223         // Notify anyone who's waiting that there are no more free
   224         // regions coming. We have to do this before we join the STS
   225         // (in fact, we should not attempt to join the STS in the
   226         // interval between finishing the cleanup pause and clearing
   227         // the free_regions_coming flag) otherwise we might deadlock:
   228         // a GC worker could be blocked waiting for the notification
   229         // whereas this thread will be blocked for the pause to finish
   230         // while it's trying to join the STS, which is conditional on
   231         // the GC workers finishing.
   232         g1h->reset_free_regions_coming();
   234         double cleanup_end_sec = os::elapsedTime();
   235         if (PrintGC) {
   236           gclog_or_tty->date_stamp(PrintGCDateStamps);
   237           gclog_or_tty->stamp(PrintGCTimeStamps);
   238           gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
   239                                  cleanup_end_sec - cleanup_start_sec);
   240         }
   241       }
   242       guarantee(cm()->cleanup_list_is_empty(),
   243                 "at this point there should be no regions on the cleanup list");
   245       // There is a tricky race before recording that the concurrent
   246       // cleanup has completed and a potential Full GC starting around
   247       // the same time. We want to make sure that the Full GC calls
   248       // abort() on concurrent mark after
   249       // record_concurrent_mark_cleanup_completed(), since abort() is
   250       // the method that will reset the concurrent mark state. If we
   251       // end up calling record_concurrent_mark_cleanup_completed()
   252       // after abort() then we might incorrectly undo some of the work
   253       // abort() did. Checking the has_aborted() flag after joining
   254       // the STS allows the correct ordering of the two methods. There
   255       // are two scenarios:
   256       //
   257       // a) If we reach here before the Full GC, the fact that we have
   258       // joined the STS means that the Full GC cannot start until we
   259       // leave the STS, so record_concurrent_mark_cleanup_completed()
   260       // will complete before abort() is called.
   261       //
   262       // b) If we reach here during the Full GC, we'll be held up from
   263       // joining the STS until the Full GC is done, which means that
   264       // abort() will have completed and has_aborted() will return
   265       // true to prevent us from calling
   266       // record_concurrent_mark_cleanup_completed() (and, in fact, it's
   267       // not needed any more as the concurrent mark state has been
   268       // already reset).
   269       _sts.join();
   270       if (!cm()->has_aborted()) {
   271         g1_policy->record_concurrent_mark_cleanup_completed();
   272       }
   273       _sts.leave();
   275       if (cm()->has_aborted()) {
   276         if (PrintGC) {
   277           gclog_or_tty->date_stamp(PrintGCDateStamps);
   278           gclog_or_tty->stamp(PrintGCTimeStamps);
   279           gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
   280         }
   281       }
   283       // We now want to allow clearing of the marking bitmap to be
   284       // suspended by a collection pause.
   285       _sts.join();
   286       _cm->clearNextBitmap();
   287       _sts.leave();
   288     }
   290     // Update the number of full collections that have been
   291     // completed. This will also notify the FullGCCount_lock in case a
   292     // Java thread is waiting for a full GC to happen (e.g., it
   293     // called System.gc() with +ExplicitGCInvokesConcurrent).
   294     _sts.join();
   295     g1h->increment_full_collections_completed(true /* concurrent */);
   296     _sts.leave();
   297   }
   298   assert(_should_terminate, "just checking");
   300   terminate();
   301 }
   304 void ConcurrentMarkThread::yield() {
   305   _sts.yield("Concurrent Mark");
   306 }
   308 void ConcurrentMarkThread::stop() {
   309   // it is ok to take late safepoints here, if needed
   310   MutexLockerEx mu(Terminator_lock);
   311   _should_terminate = true;
   312   while (!_has_terminated) {
   313     Terminator_lock->wait();
   314   }
   315 }
   317 void ConcurrentMarkThread::print() const {
   318   print_on(tty);
   319 }
   321 void ConcurrentMarkThread::print_on(outputStream* st) const {
   322   st->print("\"G1 Main Concurrent Mark GC Thread\" ");
   323   Thread::print_on(st);
   324   st->cr();
   325 }
   327 void ConcurrentMarkThread::sleepBeforeNextCycle() {
   328   // We join here because we don't want to do the "shouldConcurrentMark()"
   329   // below while the world is otherwise stopped.
   330   assert(!in_progress(), "should have been cleared");
   332   MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
   333   while (!started()) {
   334     CGC_lock->wait(Mutex::_no_safepoint_check_flag);
   335   }
   336   set_in_progress();
   337   clear_started();
   338 }
   340 // Note: As is the case with CMS - this method, although exported
   341 // by the ConcurrentMarkThread, which is a non-JavaThread, can only
   342 // be called by a JavaThread. Currently this is done at vm creation
   343 // time (post-vm-init) by the main/Primordial (Java)Thread.
   344 // XXX Consider changing this in the future to allow the CM thread
   345 // itself to create this thread?
   346 void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) {
   347   assert(UseG1GC, "SLT thread needed only for concurrent GC");
   348   assert(THREAD->is_Java_thread(), "must be a Java thread");
   349   assert(_slt == NULL, "SLT already created");
   350   _slt = SurrogateLockerThread::make(THREAD);
   351 }

mercurial