Merge

Thu, 29 Jan 2009 21:25:42 -0800

author
ysr
date
Thu, 29 Jan 2009 21:25:42 -0800
changeset 971
5b39c489c39d
parent 968
dc3ad84615cf
parent 970
4e400c36026f
child 972
3f844a28c5f4
child 976
23673011938d

Merge

src/share/vm/gc_implementation/includeDB_gc_parNew file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Jan 26 12:07:54 2009 -0800
     1.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Jan 29 21:25:42 2009 -0800
     1.3 @@ -8508,7 +8508,7 @@
     1.4    size_t i = num;
     1.5    oop  cur = _overflow_list;
     1.6    const markOop proto = markOopDesc::prototype();
     1.7 -  NOT_PRODUCT(size_t n = 0;)
     1.8 +  NOT_PRODUCT(ssize_t n = 0;)
     1.9    for (oop next; i > 0 && cur != NULL; cur = next, i--) {
    1.10      next = oop(cur->mark());
    1.11      cur->set_mark(proto);   // until proven otherwise
    1.12 @@ -8525,45 +8525,131 @@
    1.13    return !stack->isEmpty();
    1.14  }
    1.15  
    1.16 -// Multi-threaded; use CAS to break off a prefix
    1.17 +#define BUSY  (oop(0x1aff1aff))
    1.18 +// (MT-safe) Get a prefix of at most "num" from the list.
    1.19 +// The overflow list is chained through the mark word of
    1.20 +// each object in the list. We fetch the entire list,
    1.21 +// break off a prefix of the right size and return the
    1.22 +// remainder. If other threads try to take objects from
    1.23 +// the overflow list at that time, they will wait for
    1.24 +// some time to see if data becomes available. If (and
    1.25 +// only if) another thread places one or more object(s)
    1.26 +// on the global list before we have returned the suffix
    1.27 +// to the global list, we will walk down our local list
    1.28 +// to find its end and append the global list to
    1.29 +// our suffix before returning it. This suffix walk can
    1.30 +// prove to be expensive (quadratic in the amount of traffic)
    1.31 +// when there are many objects in the overflow list and
    1.32 +// there is much producer-consumer contention on the list.
    1.33 +// *NOTE*: The overflow list manipulation code here and
    1.34 +// in ParNewGeneration:: are very similar in shape,
    1.35 +// except that in the ParNew case we use the old (from/eden)
    1.36 +// copy of the object to thread the list via its klass word.
    1.37 +// Because of the common code, if you make any changes in
    1.38 +// the code below, please check the ParNew version to see if
    1.39 +// similar changes might be needed.
    1.40 +// CR 6797058 has been filed to consolidate the common code.
    1.41  bool CMSCollector::par_take_from_overflow_list(size_t num,
    1.42                                                 OopTaskQueue* work_q) {
    1.43 -  assert(work_q->size() == 0, "That's the current policy");
    1.44 +  assert(work_q->size() == 0, "First empty local work queue");
    1.45    assert(num < work_q->max_elems(), "Can't bite more than we can chew");
    1.46    if (_overflow_list == NULL) {
    1.47      return false;
    1.48    }
    1.49    // Grab the entire list; we'll put back a suffix
    1.50 -  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
    1.51 -  if (prefix == NULL) {  // someone grabbed it before we did ...
    1.52 -    // ... we could spin for a short while, but for now we don't
    1.53 -    return false;
    1.54 -  }
    1.55 +  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
    1.56 +  Thread* tid = Thread::current();
    1.57 +  size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
    1.58 +  size_t sleep_time_millis = MAX2((size_t)1, num/100);
    1.59 +  // If the list is busy, we spin for a short while,
    1.60 +  // sleeping between attempts to get the list.
    1.61 +  for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
    1.62 +    os::sleep(tid, sleep_time_millis, false);
    1.63 +    if (_overflow_list == NULL) {
    1.64 +      // Nothing left to take
    1.65 +      return false;
    1.66 +    } else if (_overflow_list != BUSY) {
    1.67 +      // Try and grab the prefix
    1.68 +      prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
    1.69 +    }
    1.70 +  }
    1.71 +  // If the list was found to be empty, or we spun long
    1.72 +  // enough, we give up and return empty-handed. If we leave
    1.73 +  // the list in the BUSY state below, it must be the case that
    1.74 +  // some other thread holds the overflow list and will set it
    1.75 +  // to a non-BUSY state in the future.
    1.76 +  if (prefix == NULL || prefix == BUSY) {
    1.77 +     // Nothing to take or waited long enough
    1.78 +     if (prefix == NULL) {
    1.79 +       // Write back the NULL in case we overwrote it with BUSY above
    1.80 +       // and it is still the same value.
    1.81 +       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
    1.82 +     }
    1.83 +     return false;
    1.84 +  }
    1.85 +  assert(prefix != NULL && prefix != BUSY, "Error");
    1.86    size_t i = num;
    1.87    oop cur = prefix;
    1.88 +  // Walk down the first "num" objects, unless we reach the end.
    1.89    for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
    1.90 -  if (cur->mark() != NULL) {
    1.91 +  if (cur->mark() == NULL) {
    1.92 +    // We have "num" or fewer elements in the list, so there
    1.93 +    // is nothing to return to the global list.
    1.94 +    // Write back the NULL in lieu of the BUSY we wrote
    1.95 +    // above, if it is still the same value.
    1.96 +    if (_overflow_list == BUSY) {
    1.97 +      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
    1.98 +    }
    1.99 +  } else {
   1.100 +    // Chop off the suffix and rerturn it to the global list.
   1.101 +    assert(cur->mark() != BUSY, "Error");
   1.102      oop suffix_head = cur->mark(); // suffix will be put back on global list
   1.103      cur->set_mark(NULL);           // break off suffix
   1.104 -    // Find tail of suffix so we can prepend suffix to global list
   1.105 -    for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
   1.106 -    oop suffix_tail = cur;
   1.107 -    assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
   1.108 -           "Tautology");
   1.109 +    // It's possible that the list is still in the empty(busy) state
   1.110 +    // we left it in a short while ago; in that case we may be
   1.111 +    // able to place back the suffix without incurring the cost
   1.112 +    // of a walk down the list.
   1.113      oop observed_overflow_list = _overflow_list;
   1.114 -    do {
   1.115 -      cur = observed_overflow_list;
   1.116 -      suffix_tail->set_mark(markOop(cur));
   1.117 +    oop cur_overflow_list = observed_overflow_list;
   1.118 +    bool attached = false;
   1.119 +    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
   1.120        observed_overflow_list =
   1.121 -        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur);
   1.122 -    } while (cur != observed_overflow_list);
   1.123 +        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
   1.124 +      if (cur_overflow_list == observed_overflow_list) {
   1.125 +        attached = true;
   1.126 +        break;
   1.127 +      } else cur_overflow_list = observed_overflow_list;
   1.128 +    }
   1.129 +    if (!attached) {
   1.130 +      // Too bad, someone else sneaked in (at least) an element; we'll need
   1.131 +      // to do a splice. Find tail of suffix so we can prepend suffix to global
   1.132 +      // list.
   1.133 +      for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
   1.134 +      oop suffix_tail = cur;
   1.135 +      assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
   1.136 +             "Tautology");
   1.137 +      observed_overflow_list = _overflow_list;
   1.138 +      do {
   1.139 +        cur_overflow_list = observed_overflow_list;
   1.140 +        if (cur_overflow_list != BUSY) {
   1.141 +          // Do the splice ...
   1.142 +          suffix_tail->set_mark(markOop(cur_overflow_list));
   1.143 +        } else { // cur_overflow_list == BUSY
   1.144 +          suffix_tail->set_mark(NULL);
   1.145 +        }
   1.146 +        // ... and try to place spliced list back on overflow_list ...
   1.147 +        observed_overflow_list =
   1.148 +          (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
   1.149 +      } while (cur_overflow_list != observed_overflow_list);
   1.150 +      // ... until we have succeeded in doing so.
   1.151 +    }
   1.152    }
   1.153  
   1.154    // Push the prefix elements on work_q
   1.155    assert(prefix != NULL, "control point invariant");
   1.156    const markOop proto = markOopDesc::prototype();
   1.157    oop next;
   1.158 -  NOT_PRODUCT(size_t n = 0;)
   1.159 +  NOT_PRODUCT(ssize_t n = 0;)
   1.160    for (cur = prefix; cur != NULL; cur = next) {
   1.161      next = oop(cur->mark());
   1.162      cur->set_mark(proto);   // until proven otherwise
   1.163 @@ -8597,11 +8683,16 @@
   1.164    oop cur_overflow_list;
   1.165    do {
   1.166      cur_overflow_list = observed_overflow_list;
   1.167 -    p->set_mark(markOop(cur_overflow_list));
   1.168 +    if (cur_overflow_list != BUSY) {
   1.169 +      p->set_mark(markOop(cur_overflow_list));
   1.170 +    } else {
   1.171 +      p->set_mark(NULL);
   1.172 +    }
   1.173      observed_overflow_list =
   1.174        (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
   1.175    } while (cur_overflow_list != observed_overflow_list);
   1.176  }
   1.177 +#undef BUSY
   1.178  
   1.179  // Single threaded
   1.180  // General Note on GrowableArray: pushes may silently fail
   1.181 @@ -8610,7 +8701,7 @@
   1.182  // a lot of code in the JVM. The prudent thing for GrowableArray
   1.183  // to do (for now) is to exit with an error. However, that may
   1.184  // be too draconian in some cases because the caller may be
   1.185 -// able to recover without much harm. For suych cases, we
   1.186 +// able to recover without much harm. For such cases, we
   1.187  // should probably introduce a "soft_push" method which returns
   1.188  // an indication of success or failure with the assumption that
   1.189  // the caller may be able to recover from a failure; code in
   1.190 @@ -8618,8 +8709,6 @@
   1.191  // failures where possible, thus, incrementally hardening the VM
   1.192  // in such low resource situations.
   1.193  void CMSCollector::preserve_mark_work(oop p, markOop m) {
   1.194 -  int PreserveMarkStackSize = 128;
   1.195 -
   1.196    if (_preserved_oop_stack == NULL) {
   1.197      assert(_preserved_mark_stack == NULL,
   1.198             "bijection with preserved_oop_stack");
     2.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Mon Jan 26 12:07:54 2009 -0800
     2.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Jan 29 21:25:42 2009 -0800
     2.3 @@ -595,7 +595,7 @@
     2.4    size_t        _ser_kac_preclean_ovflw;
     2.5    size_t        _ser_kac_ovflw;
     2.6    size_t        _par_kac_ovflw;
     2.7 -  NOT_PRODUCT(size_t _num_par_pushes;)
     2.8 +  NOT_PRODUCT(ssize_t _num_par_pushes;)
     2.9  
    2.10    // ("Weak") Reference processing support
    2.11    ReferenceProcessor*            _ref_processor;
     3.1 --- a/src/share/vm/gc_implementation/includeDB_gc_parNew	Mon Jan 26 12:07:54 2009 -0800
     3.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_parNew	Thu Jan 29 21:25:42 2009 -0800
     3.3 @@ -79,6 +79,7 @@
     3.4  parNewGeneration.cpp                    sharedHeap.hpp
     3.5  parNewGeneration.cpp                    space.hpp
     3.6  parNewGeneration.cpp                    spaceDecorator.hpp
     3.7 +parNewGeneration.cpp                    thread.hpp
     3.8  parNewGeneration.cpp                    workgroup.hpp
     3.9  
    3.10  parNewGeneration.hpp                    defNewGeneration.hpp
     4.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Jan 26 12:07:54 2009 -0800
     4.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Jan 29 21:25:42 2009 -0800
     4.3 @@ -404,6 +404,8 @@
     4.4      if (terminator()->offer_termination()) break;
     4.5      par_scan_state()->end_term_time();
     4.6    }
     4.7 +  assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
     4.8 +         "Broken overflow list?");
     4.9    // Finish the last termination pause.
    4.10    par_scan_state()->end_term_time();
    4.11  }
    4.12 @@ -456,6 +458,8 @@
    4.13    _is_alive_closure(this),
    4.14    _plab_stats(YoungPLABSize, PLABWeight)
    4.15  {
    4.16 +  NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
    4.17 +  NOT_PRODUCT(_num_par_pushes = 0;)
    4.18    _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
    4.19    guarantee(_task_queues != NULL, "task_queues allocation failure.");
    4.20  
    4.21 @@ -993,12 +997,19 @@
    4.22               "push forwarded object");
    4.23      }
    4.24      // Push it on one of the queues of to-be-scanned objects.
    4.25 -    if (!par_scan_state->work_queue()->push(obj_to_push)) {
    4.26 +    bool simulate_overflow = false;
    4.27 +    NOT_PRODUCT(
    4.28 +      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
    4.29 +        // simulate a stack overflow
    4.30 +        simulate_overflow = true;
    4.31 +      }
    4.32 +    )
    4.33 +    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
    4.34        // Add stats for overflow pushes.
    4.35        if (Verbose && PrintGCDetails) {
    4.36          gclog_or_tty->print("queue overflow!\n");
    4.37        }
    4.38 -      push_on_overflow_list(old);
    4.39 +      push_on_overflow_list(old, par_scan_state);
    4.40        par_scan_state->note_overflow_push();
    4.41      }
    4.42      par_scan_state->note_push();
    4.43 @@ -1110,9 +1121,16 @@
    4.44               "push forwarded object");
    4.45      }
    4.46      // Push it on one of the queues of to-be-scanned objects.
    4.47 -    if (!par_scan_state->work_queue()->push(obj_to_push)) {
    4.48 +    bool simulate_overflow = false;
    4.49 +    NOT_PRODUCT(
    4.50 +      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
    4.51 +        // simulate a stack overflow
    4.52 +        simulate_overflow = true;
    4.53 +      }
    4.54 +    )
    4.55 +    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
    4.56        // Add stats for overflow pushes.
    4.57 -      push_on_overflow_list(old);
    4.58 +      push_on_overflow_list(old, par_scan_state);
    4.59        par_scan_state->note_overflow_push();
    4.60      }
    4.61      par_scan_state->note_push();
    4.62 @@ -1135,89 +1153,190 @@
    4.63    return forward_ptr;
    4.64  }
    4.65  
    4.66 -void ParNewGeneration::push_on_overflow_list(oop from_space_obj) {
    4.67 -  oop cur_overflow_list = _overflow_list;
    4.68 +#ifndef PRODUCT
    4.69 +// It's OK to call this multi-threaded;  the worst thing
    4.70 +// that can happen is that we'll get a bunch of closely
    4.71 +// spaced simulated oveflows, but that's OK, in fact
    4.72 +// probably good as it would exercise the overflow code
    4.73 +// under contention.
    4.74 +bool ParNewGeneration::should_simulate_overflow() {
    4.75 +  if (_overflow_counter-- <= 0) { // just being defensive
    4.76 +    _overflow_counter = ParGCWorkQueueOverflowInterval;
    4.77 +    return true;
    4.78 +  } else {
    4.79 +    return false;
    4.80 +  }
    4.81 +}
    4.82 +#endif
    4.83 +
    4.84 +#define BUSY (oop(0x1aff1aff))
    4.85 +void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
    4.86    // if the object has been forwarded to itself, then we cannot
    4.87    // use the klass pointer for the linked list.  Instead we have
    4.88    // to allocate an oopDesc in the C-Heap and use that for the linked list.
    4.89 +  // XXX This is horribly inefficient when a promotion failure occurs
    4.90 +  // and should be fixed. XXX FIX ME !!!
    4.91 +#ifndef PRODUCT
    4.92 +  Atomic::inc_ptr(&_num_par_pushes);
    4.93 +  assert(_num_par_pushes > 0, "Tautology");
    4.94 +#endif
    4.95    if (from_space_obj->forwardee() == from_space_obj) {
    4.96      oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
    4.97      listhead->forward_to(from_space_obj);
    4.98      from_space_obj = listhead;
    4.99    }
   4.100 -  while (true) {
   4.101 -    from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
   4.102 -    oop observed_overflow_list =
   4.103 +  oop observed_overflow_list = _overflow_list;
   4.104 +  oop cur_overflow_list;
   4.105 +  do {
   4.106 +    cur_overflow_list = observed_overflow_list;
   4.107 +    if (cur_overflow_list != BUSY) {
   4.108 +      from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
   4.109 +    } else {
   4.110 +      from_space_obj->set_klass_to_list_ptr(NULL);
   4.111 +    }
   4.112 +    observed_overflow_list =
   4.113        (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
   4.114 -    if (observed_overflow_list == cur_overflow_list) break;
   4.115 -    // Otherwise...
   4.116 -    cur_overflow_list = observed_overflow_list;
   4.117 -  }
   4.118 +  } while (cur_overflow_list != observed_overflow_list);
   4.119  }
   4.120  
   4.121 +// *NOTE*: The overflow list manipulation code here and
   4.122 +// in CMSCollector:: are very similar in shape,
   4.123 +// except that in the CMS case we thread the objects
   4.124 +// directly into the list via their mark word, and do
   4.125 +// not need to deal with special cases below related
   4.126 +// to chunking of object arrays and promotion failure
   4.127 +// handling.
   4.128 +// CR 6797058 has been filed to attempt consolidation of
   4.129 +// the common code.
   4.130 +// Because of the common code, if you make any changes in
   4.131 +// the code below, please check the CMS version to see if
   4.132 +// similar changes might be needed.
   4.133 +// See CMSCollector::par_take_from_overflow_list() for
   4.134 +// more extensive documentation comments.
   4.135  bool
   4.136  ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
   4.137    ObjToScanQueue* work_q = par_scan_state->work_queue();
   4.138 +  assert(work_q->size() == 0, "Should first empty local work queue");
   4.139    // How many to take?
   4.140 -  int objsFromOverflow = MIN2(work_q->max_elems()/4,
   4.141 -                              (juint)ParGCDesiredObjsFromOverflowList);
   4.142 +  size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4,
   4.143 +                                 (size_t)ParGCDesiredObjsFromOverflowList);
   4.144  
   4.145    if (_overflow_list == NULL) return false;
   4.146  
   4.147    // Otherwise, there was something there; try claiming the list.
   4.148 -  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
   4.149 -
   4.150 -  if (prefix == NULL) {
   4.151 -    return false;
   4.152 +  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
   4.153 +  // Trim off a prefix of at most objsFromOverflow items
   4.154 +  Thread* tid = Thread::current();
   4.155 +  size_t spin_count = (size_t)ParallelGCThreads;
   4.156 +  size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
   4.157 +  for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
   4.158 +    // someone grabbed it before we did ...
   4.159 +    // ... we spin for a short while...
   4.160 +    os::sleep(tid, sleep_time_millis, false);
   4.161 +    if (_overflow_list == NULL) {
   4.162 +      // nothing left to take
   4.163 +      return false;
   4.164 +    } else if (_overflow_list != BUSY) {
   4.165 +     // try and grab the prefix
   4.166 +     prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
   4.167 +    }
   4.168    }
   4.169 -  // Trim off a prefix of at most objsFromOverflow items
   4.170 -  int i = 1;
   4.171 +  if (prefix == NULL || prefix == BUSY) {
   4.172 +     // Nothing to take or waited long enough
   4.173 +     if (prefix == NULL) {
   4.174 +       // Write back the NULL in case we overwrote it with BUSY above
   4.175 +       // and it is still the same value.
   4.176 +       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
   4.177 +     }
   4.178 +     return false;
   4.179 +  }
   4.180 +  assert(prefix != NULL && prefix != BUSY, "Error");
   4.181 +  size_t i = 1;
   4.182    oop cur = prefix;
   4.183    while (i < objsFromOverflow && cur->klass_or_null() != NULL) {
   4.184      i++; cur = oop(cur->klass());
   4.185    }
   4.186  
   4.187    // Reattach remaining (suffix) to overflow list
   4.188 -  if (cur->klass_or_null() != NULL) {
   4.189 -    oop suffix = oop(cur->klass());
   4.190 -    cur->set_klass_to_list_ptr(NULL);
   4.191 -
   4.192 -    // Find last item of suffix list
   4.193 -    oop last = suffix;
   4.194 -    while (last->klass_or_null() != NULL) {
   4.195 -      last = oop(last->klass());
   4.196 +  if (cur->klass_or_null() == NULL) {
   4.197 +    // Write back the NULL in lieu of the BUSY we wrote
   4.198 +    // above and it is still the same value.
   4.199 +    if (_overflow_list == BUSY) {
   4.200 +      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
   4.201      }
   4.202 -    // Atomically prepend suffix to current overflow list
   4.203 -    oop cur_overflow_list = _overflow_list;
   4.204 -    while (true) {
   4.205 -      last->set_klass_to_list_ptr(cur_overflow_list);
   4.206 -      oop observed_overflow_list =
   4.207 -        (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
   4.208 -      if (observed_overflow_list == cur_overflow_list) break;
   4.209 -      // Otherwise...
   4.210 -      cur_overflow_list = observed_overflow_list;
   4.211 +  } else {
   4.212 +    assert(cur->klass_or_null() != BUSY, "Error");
   4.213 +    oop suffix = oop(cur->klass());       // suffix will be put back on global list
   4.214 +    cur->set_klass_to_list_ptr(NULL);     // break off suffix
   4.215 +    // It's possible that the list is still in the empty(busy) state
   4.216 +    // we left it in a short while ago; in that case we may be
   4.217 +    // able to place back the suffix.
   4.218 +    oop observed_overflow_list = _overflow_list;
   4.219 +    oop cur_overflow_list = observed_overflow_list;
   4.220 +    bool attached = false;
   4.221 +    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
   4.222 +      observed_overflow_list =
   4.223 +        (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
   4.224 +      if (cur_overflow_list == observed_overflow_list) {
   4.225 +        attached = true;
   4.226 +        break;
   4.227 +      } else cur_overflow_list = observed_overflow_list;
   4.228 +    }
   4.229 +    if (!attached) {
   4.230 +      // Too bad, someone else got in in between; we'll need to do a splice.
   4.231 +      // Find the last item of suffix list
   4.232 +      oop last = suffix;
   4.233 +      while (last->klass_or_null() != NULL) {
   4.234 +        last = oop(last->klass());
   4.235 +      }
   4.236 +      // Atomically prepend suffix to current overflow list
   4.237 +      observed_overflow_list = _overflow_list;
   4.238 +      do {
   4.239 +        cur_overflow_list = observed_overflow_list;
   4.240 +        if (cur_overflow_list != BUSY) {
   4.241 +          // Do the splice ...
   4.242 +          last->set_klass_to_list_ptr(cur_overflow_list);
   4.243 +        } else { // cur_overflow_list == BUSY
   4.244 +          last->set_klass_to_list_ptr(NULL);
   4.245 +        }
   4.246 +        observed_overflow_list =
   4.247 +          (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
   4.248 +      } while (cur_overflow_list != observed_overflow_list);
   4.249      }
   4.250    }
   4.251  
   4.252    // Push objects on prefix list onto this thread's work queue
   4.253 -  assert(cur != NULL, "program logic");
   4.254 +  assert(prefix != NULL && prefix != BUSY, "program logic");
   4.255    cur = prefix;
   4.256 -  int n = 0;
   4.257 +  ssize_t n = 0;
   4.258    while (cur != NULL) {
   4.259      oop obj_to_push = cur->forwardee();
   4.260      oop next        = oop(cur->klass_or_null());
   4.261      cur->set_klass(obj_to_push->klass());
   4.262 -    if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
   4.263 +    // This may be an array object that is self-forwarded. In that case, the list pointer
   4.264 +    // space, cur, is not in the Java heap, but rather in the C-heap and should be freed.
   4.265 +    if (!is_in_reserved(cur)) {
   4.266 +      // This can become a scaling bottleneck when there is work queue overflow coincident
   4.267 +      // with promotion failure.
   4.268 +      oopDesc* f = cur;
   4.269 +      FREE_C_HEAP_ARRAY(oopDesc, f);
   4.270 +    } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
   4.271 +      assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
   4.272        obj_to_push = cur;
   4.273 -      assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
   4.274      }
   4.275 -    work_q->push(obj_to_push);
   4.276 +    bool ok = work_q->push(obj_to_push);
   4.277 +    assert(ok, "Should have succeeded");
   4.278      cur = next;
   4.279      n++;
   4.280    }
   4.281    par_scan_state->note_overflow_refill(n);
   4.282 +#ifndef PRODUCT
   4.283 +  assert(_num_par_pushes >= n, "Too many pops?");
   4.284 +  Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
   4.285 +#endif
   4.286    return true;
   4.287  }
   4.288 +#undef BUSY
   4.289  
   4.290  void ParNewGeneration::ref_processor_init()
   4.291  {
     5.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Jan 26 12:07:54 2009 -0800
     5.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Jan 29 21:25:42 2009 -0800
     5.3 @@ -278,6 +278,7 @@
     5.4    friend class ParNewRefProcTask;
     5.5    friend class ParNewRefProcTaskExecutor;
     5.6    friend class ParScanThreadStateSet;
     5.7 +  friend class ParEvacuateFollowersClosure;
     5.8  
     5.9   private:
    5.10    // XXX use a global constant instead of 64!
    5.11 @@ -296,6 +297,7 @@
    5.12    // klass-pointers (klass information already copied to the forwarded
    5.13    // image.)  Manipulated with CAS.
    5.14    oop _overflow_list;
    5.15 +  NOT_PRODUCT(ssize_t _num_par_pushes;)
    5.16  
    5.17    // If true, older generation does not support promotion undo, so avoid.
    5.18    static bool _avoid_promotion_undo;
    5.19 @@ -372,8 +374,12 @@
    5.20    oop copy_to_survivor_space_with_undo(ParScanThreadState* par_scan_state,
    5.21                               oop obj, size_t obj_sz, markOop m);
    5.22  
    5.23 +  // in support of testing overflow code
    5.24 +  NOT_PRODUCT(int _overflow_counter;)
    5.25 +  NOT_PRODUCT(bool should_simulate_overflow();)
    5.26 +
    5.27    // Push the given (from-space) object on the global overflow list.
    5.28 -  void push_on_overflow_list(oop from_space_obj);
    5.29 +  void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state);
    5.30  
    5.31    // If the global overflow list is non-empty, move some tasks from it
    5.32    // onto "work_q" (which must be empty).  No more than 1/4 of the
     6.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Mon Jan 26 12:07:54 2009 -0800
     6.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Thu Jan 29 21:25:42 2009 -0800
     6.3 @@ -116,7 +116,7 @@
     6.4    // ObjectSpace stuff
     6.5    //
     6.6  
     6.7 -  _object_space = new MutableSpace();
     6.8 +  _object_space = new MutableSpace(virtual_space()->alignment());
     6.9  
    6.10    if (_object_space == NULL)
    6.11      vm_exit_during_initialization("Could not allocate an old gen space");
    6.12 @@ -385,10 +385,10 @@
    6.13    start_array()->set_covered_region(new_memregion);
    6.14    Universe::heap()->barrier_set()->resize_covered_region(new_memregion);
    6.15  
    6.16 -  HeapWord* const virtual_space_high = (HeapWord*) virtual_space()->high();
    6.17 -
    6.18    // ALWAYS do this last!!
    6.19 -  object_space()->set_end(virtual_space_high);
    6.20 +  object_space()->initialize(new_memregion,
    6.21 +                             SpaceDecorator::DontClear,
    6.22 +                             SpaceDecorator::DontMangle);
    6.23  
    6.24    assert(new_word_size == heap_word_size(object_space()->capacity_in_bytes()),
    6.25      "Sanity");
     7.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp	Mon Jan 26 12:07:54 2009 -0800
     7.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp	Thu Jan 29 21:25:42 2009 -0800
     7.3 @@ -78,7 +78,7 @@
     7.4    _special = false;
     7.5  }
     7.6  
     7.7 -bool PSVirtualSpace::expand_by(size_t bytes, bool pre_touch) {
     7.8 +bool PSVirtualSpace::expand_by(size_t bytes) {
     7.9    assert(is_aligned(bytes), "arg not aligned");
    7.10    DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));
    7.11  
    7.12 @@ -92,15 +92,6 @@
    7.13      _committed_high_addr += bytes;
    7.14    }
    7.15  
    7.16 -  if (pre_touch || AlwaysPreTouch) {
    7.17 -    for (char* curr = base_addr;
    7.18 -         curr < _committed_high_addr;
    7.19 -         curr += os::vm_page_size()) {
    7.20 -      char tmp = *curr;
    7.21 -      *curr = 0;
    7.22 -    }
    7.23 -  }
    7.24 -
    7.25    return result;
    7.26  }
    7.27  
    7.28 @@ -255,7 +246,7 @@
    7.29    DEBUG_ONLY(verify());
    7.30  }
    7.31  
    7.32 -bool PSVirtualSpaceHighToLow::expand_by(size_t bytes, bool pre_touch) {
    7.33 +bool PSVirtualSpaceHighToLow::expand_by(size_t bytes) {
    7.34    assert(is_aligned(bytes), "arg not aligned");
    7.35    DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));
    7.36  
    7.37 @@ -269,15 +260,6 @@
    7.38      _committed_low_addr -= bytes;
    7.39    }
    7.40  
    7.41 -  if (pre_touch || AlwaysPreTouch) {
    7.42 -    for (char* curr = base_addr;
    7.43 -         curr < _committed_high_addr;
    7.44 -         curr += os::vm_page_size()) {
    7.45 -      char tmp = *curr;
    7.46 -      *curr = 0;
    7.47 -    }
    7.48 -  }
    7.49 -
    7.50    return result;
    7.51  }
    7.52  
     8.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Mon Jan 26 12:07:54 2009 -0800
     8.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Thu Jan 29 21:25:42 2009 -0800
     8.3 @@ -80,7 +80,7 @@
     8.4    inline  void   set_reserved(char* low_addr, char* high_addr, bool special);
     8.5    inline  void   set_reserved(ReservedSpace rs);
     8.6    inline  void   set_committed(char* low_addr, char* high_addr);
     8.7 -  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
     8.8 +  virtual bool   expand_by(size_t bytes);
     8.9    virtual bool   shrink_by(size_t bytes);
    8.10    virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
    8.11    void           release();
    8.12 @@ -127,7 +127,7 @@
    8.13    PSVirtualSpaceHighToLow(ReservedSpace rs, size_t alignment);
    8.14    PSVirtualSpaceHighToLow(ReservedSpace rs);
    8.15  
    8.16 -  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
    8.17 +  virtual bool   expand_by(size_t bytes);
    8.18    virtual bool   shrink_by(size_t bytes);
    8.19    virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
    8.20  
     9.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Mon Jan 26 12:07:54 2009 -0800
     9.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Thu Jan 29 21:25:42 2009 -0800
     9.3 @@ -64,12 +64,12 @@
     9.4    }
     9.5  
     9.6    if (UseNUMA) {
     9.7 -    _eden_space = new MutableNUMASpace();
     9.8 +    _eden_space = new MutableNUMASpace(virtual_space()->alignment());
     9.9    } else {
    9.10 -    _eden_space = new MutableSpace();
    9.11 +    _eden_space = new MutableSpace(virtual_space()->alignment());
    9.12    }
    9.13 -  _from_space = new MutableSpace();
    9.14 -  _to_space   = new MutableSpace();
    9.15 +  _from_space = new MutableSpace(virtual_space()->alignment());
    9.16 +  _to_space   = new MutableSpace(virtual_space()->alignment());
    9.17  
    9.18    if (_eden_space == NULL || _from_space == NULL || _to_space == NULL) {
    9.19      vm_exit_during_initialization("Could not allocate a young gen space");
    10.1 --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Mon Jan 26 12:07:54 2009 -0800
    10.2 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Thu Jan 29 21:25:42 2009 -0800
    10.3 @@ -27,7 +27,7 @@
    10.4  # include "incls/_mutableNUMASpace.cpp.incl"
    10.5  
    10.6  
    10.7 -MutableNUMASpace::MutableNUMASpace() {
    10.8 +MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment) {
    10.9    _lgrp_spaces = new (ResourceObj::C_HEAP) GrowableArray<LGRPSpace*>(0, true);
   10.10    _page_size = os::vm_page_size();
   10.11    _adaptation_cycles = 0;
   10.12 @@ -221,7 +221,7 @@
   10.13          }
   10.14        }
   10.15        if (!found) {
   10.16 -        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i]));
   10.17 +        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i], alignment()));
   10.18        }
   10.19      }
   10.20  
   10.21 @@ -443,10 +443,10 @@
   10.22    // Is there bottom?
   10.23    if (new_region.start() < intersection.start()) { // Yes
   10.24      // Try to coalesce small pages into a large one.
   10.25 -    if (UseLargePages && page_size() >= os::large_page_size()) {
   10.26 -      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), os::large_page_size());
   10.27 +    if (UseLargePages && page_size() >= alignment()) {
   10.28 +      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), alignment());
   10.29        if (new_region.contains(p)
   10.30 -          && pointer_delta(p, new_region.start(), sizeof(char)) >= os::large_page_size()) {
   10.31 +          && pointer_delta(p, new_region.start(), sizeof(char)) >= alignment()) {
   10.32          if (intersection.contains(p)) {
   10.33            intersection = MemRegion(p, intersection.end());
   10.34          } else {
   10.35 @@ -462,10 +462,10 @@
   10.36    // Is there top?
   10.37    if (intersection.end() < new_region.end()) { // Yes
   10.38      // Try to coalesce small pages into a large one.
   10.39 -    if (UseLargePages && page_size() >= os::large_page_size()) {
   10.40 -      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), os::large_page_size());
   10.41 +    if (UseLargePages && page_size() >= alignment()) {
   10.42 +      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), alignment());
   10.43        if (new_region.contains(p)
   10.44 -          && pointer_delta(new_region.end(), p, sizeof(char)) >= os::large_page_size()) {
   10.45 +          && pointer_delta(new_region.end(), p, sizeof(char)) >= alignment()) {
   10.46          if (intersection.contains(p)) {
   10.47            intersection = MemRegion(intersection.start(), p);
   10.48          } else {
   10.49 @@ -504,12 +504,12 @@
   10.50              // That's the only case we have to make an additional bias_region() call.
   10.51              HeapWord* start = invalid_region->start();
   10.52              HeapWord* end = invalid_region->end();
   10.53 -            if (UseLargePages && page_size() >= os::large_page_size()) {
   10.54 -              HeapWord *p = (HeapWord*)round_down((intptr_t) start, os::large_page_size());
   10.55 +            if (UseLargePages && page_size() >= alignment()) {
   10.56 +              HeapWord *p = (HeapWord*)round_down((intptr_t) start, alignment());
   10.57                if (new_region.contains(p)) {
   10.58                  start = p;
   10.59                }
   10.60 -              p = (HeapWord*)round_to((intptr_t) end, os::large_page_size());
   10.61 +              p = (HeapWord*)round_to((intptr_t) end, alignment());
   10.62                if (new_region.contains(end)) {
   10.63                  end = p;
   10.64                }
   10.65 @@ -526,7 +526,8 @@
   10.66  
   10.67  void MutableNUMASpace::initialize(MemRegion mr,
   10.68                                    bool clear_space,
   10.69 -                                  bool mangle_space) {
   10.70 +                                  bool mangle_space,
   10.71 +                                  bool setup_pages) {
   10.72    assert(clear_space, "Reallocation will destory data!");
   10.73    assert(lgrp_spaces()->length() > 0, "There should be at least one space");
   10.74  
   10.75 @@ -538,7 +539,7 @@
   10.76  
   10.77    // Compute chunk sizes
   10.78    size_t prev_page_size = page_size();
   10.79 -  set_page_size(UseLargePages ? os::large_page_size() : os::vm_page_size());
   10.80 +  set_page_size(UseLargePages ? alignment() : os::vm_page_size());
   10.81    HeapWord* rounded_bottom = (HeapWord*)round_to((intptr_t) bottom(), page_size());
   10.82    HeapWord* rounded_end = (HeapWord*)round_down((intptr_t) end(), page_size());
   10.83    size_t base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
   10.84 @@ -666,7 +667,7 @@
   10.85      }
   10.86  
   10.87      // Clear space (set top = bottom) but never mangle.
   10.88 -    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle);
   10.89 +    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle, MutableSpace::DontSetupPages);
   10.90  
   10.91      set_adaptation_cycles(samples_count());
   10.92    }
    11.1 --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Mon Jan 26 12:07:54 2009 -0800
    11.2 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Thu Jan 29 21:25:42 2009 -0800
    11.3 @@ -82,8 +82,8 @@
    11.4      char* last_page_scanned()            { return _last_page_scanned; }
    11.5      void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
    11.6     public:
    11.7 -    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
    11.8 -      _space = new MutableSpace();
    11.9 +    LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
   11.10 +      _space = new MutableSpace(alignment);
   11.11        _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
   11.12      }
   11.13      ~LGRPSpace() {
   11.14 @@ -183,10 +183,10 @@
   11.15  
   11.16   public:
   11.17    GrowableArray<LGRPSpace*>* lgrp_spaces() const     { return _lgrp_spaces;       }
   11.18 -  MutableNUMASpace();
   11.19 +  MutableNUMASpace(size_t alignment);
   11.20    virtual ~MutableNUMASpace();
   11.21    // Space initialization.
   11.22 -  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
   11.23 +  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages);
   11.24    // Update space layout if necessary. Do all adaptive resizing job.
   11.25    virtual void update();
   11.26    // Update allocation rate averages.
    12.1 --- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Mon Jan 26 12:07:54 2009 -0800
    12.2 +++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Thu Jan 29 21:25:42 2009 -0800
    12.3 @@ -25,7 +25,10 @@
    12.4  # include "incls/_precompiled.incl"
    12.5  # include "incls/_mutableSpace.cpp.incl"
    12.6  
    12.7 -MutableSpace::MutableSpace(): ImmutableSpace(), _top(NULL) {
    12.8 +MutableSpace::MutableSpace(size_t alignment): ImmutableSpace(), _top(NULL), _alignment(alignment) {
    12.9 +  assert(MutableSpace::alignment() >= 0 &&
   12.10 +         MutableSpace::alignment() % os::vm_page_size() == 0,
   12.11 +         "Space should be aligned");
   12.12    _mangler = new MutableSpaceMangler(this);
   12.13  }
   12.14  
   12.15 @@ -33,16 +36,88 @@
   12.16    delete _mangler;
   12.17  }
   12.18  
   12.19 +void MutableSpace::numa_setup_pages(MemRegion mr, bool clear_space) {
   12.20 +  if (!mr.is_empty()) {
   12.21 +    size_t page_size = UseLargePages ? alignment() : os::vm_page_size();
   12.22 +    HeapWord *start = (HeapWord*)round_to((intptr_t) mr.start(), page_size);
   12.23 +    HeapWord *end =  (HeapWord*)round_down((intptr_t) mr.end(), page_size);
   12.24 +    if (end > start) {
   12.25 +      size_t size = pointer_delta(end, start, sizeof(char));
   12.26 +      if (clear_space) {
   12.27 +        // Prefer page reallocation to migration.
   12.28 +        os::free_memory((char*)start, size);
   12.29 +      }
   12.30 +      os::numa_make_global((char*)start, size);
   12.31 +    }
   12.32 +  }
   12.33 +}
   12.34 +
   12.35 +void MutableSpace::pretouch_pages(MemRegion mr) {
   12.36 +  for (volatile char *p = (char*)mr.start(); p < (char*)mr.end(); p += os::vm_page_size()) {
   12.37 +    char t = *p; *p = t;
   12.38 +  }
   12.39 +}
   12.40 +
   12.41  void MutableSpace::initialize(MemRegion mr,
   12.42                                bool clear_space,
   12.43 -                              bool mangle_space) {
   12.44 -  HeapWord* bottom = mr.start();
   12.45 -  HeapWord* end    = mr.end();
   12.46 +                              bool mangle_space,
   12.47 +                              bool setup_pages) {
   12.48  
   12.49 -  assert(Universe::on_page_boundary(bottom) && Universe::on_page_boundary(end),
   12.50 +  assert(Universe::on_page_boundary(mr.start()) && Universe::on_page_boundary(mr.end()),
   12.51           "invalid space boundaries");
   12.52 -  set_bottom(bottom);
   12.53 -  set_end(end);
   12.54 +
   12.55 +  if (setup_pages && (UseNUMA || AlwaysPreTouch)) {
   12.56 +    // The space may move left and right or expand/shrink.
   12.57 +    // We'd like to enforce the desired page placement.
   12.58 +    MemRegion head, tail;
   12.59 +    if (last_setup_region().is_empty()) {
   12.60 +      // If it's the first initialization don't limit the amount of work.
   12.61 +      head = mr;
   12.62 +      tail = MemRegion(mr.end(), mr.end());
   12.63 +    } else {
   12.64 +      // Is there an intersection with the address space?
   12.65 +      MemRegion intersection = last_setup_region().intersection(mr);
   12.66 +      if (intersection.is_empty()) {
   12.67 +        intersection = MemRegion(mr.end(), mr.end());
   12.68 +      }
   12.69 +      // All the sizes below are in words.
   12.70 +      size_t head_size = 0, tail_size = 0;
   12.71 +      if (mr.start() <= intersection.start()) {
   12.72 +        head_size = pointer_delta(intersection.start(), mr.start());
   12.73 +      }
   12.74 +      if(intersection.end() <= mr.end()) {
   12.75 +        tail_size = pointer_delta(mr.end(), intersection.end());
   12.76 +      }
   12.77 +      // Limit the amount of page manipulation if necessary.
   12.78 +      if (NUMASpaceResizeRate > 0 && !AlwaysPreTouch) {
   12.79 +        const size_t change_size = head_size + tail_size;
   12.80 +        const float setup_rate_words = NUMASpaceResizeRate >> LogBytesPerWord;
   12.81 +        head_size = MIN2((size_t)(setup_rate_words * head_size / change_size),
   12.82 +                         head_size);
   12.83 +        tail_size = MIN2((size_t)(setup_rate_words * tail_size / change_size),
   12.84 +                         tail_size);
   12.85 +      }
   12.86 +      head = MemRegion(intersection.start() - head_size, intersection.start());
   12.87 +      tail = MemRegion(intersection.end(), intersection.end() + tail_size);
   12.88 +    }
   12.89 +    assert(mr.contains(head) && mr.contains(tail), "Sanity");
   12.90 +
   12.91 +    if (UseNUMA) {
   12.92 +      numa_setup_pages(head, clear_space);
   12.93 +      numa_setup_pages(tail, clear_space);
   12.94 +    }
   12.95 +
   12.96 +    if (AlwaysPreTouch) {
   12.97 +      pretouch_pages(head);
   12.98 +      pretouch_pages(tail);
   12.99 +    }
  12.100 +
  12.101 +    // Remember where we stopped so that we can continue later.
  12.102 +    set_last_setup_region(MemRegion(head.start(), tail.end()));
  12.103 +  }
  12.104 +
  12.105 +  set_bottom(mr.start());
  12.106 +  set_end(mr.end());
  12.107  
  12.108    if (clear_space) {
  12.109      clear(mangle_space);
    13.1 --- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Mon Jan 26 12:07:54 2009 -0800
    13.2 +++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Thu Jan 29 21:25:42 2009 -0800
    13.3 @@ -25,7 +25,10 @@
    13.4  // A MutableSpace is a subtype of ImmutableSpace that supports the
    13.5  // concept of allocation. This includes the concepts that a space may
    13.6  // be only partially full, and the querry methods that go with such
    13.7 -// an assumption.
    13.8 +// an assumption. MutableSpace is also responsible for minimizing the
    13.9 +// page allocation time by having the memory pretouched (with
   13.10 +// AlwaysPretouch) and for optimizing page placement on NUMA systems
   13.11 +// by make the underlying region interleaved (with UseNUMA).
   13.12  //
   13.13  // Invariant: (ImmutableSpace +) bottom() <= top() <= end()
   13.14  // top() is inclusive and end() is exclusive.
   13.15 @@ -37,15 +40,23 @@
   13.16  
   13.17    // Helper for mangling unused space in debug builds
   13.18    MutableSpaceMangler* _mangler;
   13.19 -
   13.20 +  // The last region which page had been setup to be interleaved.
   13.21 +  MemRegion _last_setup_region;
   13.22 +  size_t _alignment;
   13.23   protected:
   13.24    HeapWord* _top;
   13.25  
   13.26    MutableSpaceMangler* mangler() { return _mangler; }
   13.27  
   13.28 +  void numa_setup_pages(MemRegion mr, bool clear_space);
   13.29 +  void pretouch_pages(MemRegion mr);
   13.30 +
   13.31 +  void set_last_setup_region(MemRegion mr) { _last_setup_region = mr;   }
   13.32 +  MemRegion last_setup_region() const      { return _last_setup_region; }
   13.33 +
   13.34   public:
   13.35    virtual ~MutableSpace();
   13.36 -  MutableSpace();
   13.37 +  MutableSpace(size_t page_size);
   13.38  
   13.39    // Accessors
   13.40    HeapWord* top() const                    { return _top;    }
   13.41 @@ -57,13 +68,20 @@
   13.42    virtual void set_bottom(HeapWord* value) { _bottom = value; }
   13.43    virtual void set_end(HeapWord* value)    { _end = value; }
   13.44  
   13.45 +  size_t alignment()                       { return _alignment; }
   13.46 +
   13.47    // Returns a subregion containing all objects in this space.
   13.48    MemRegion used_region() { return MemRegion(bottom(), top()); }
   13.49  
   13.50 +  static const bool SetupPages = true;
   13.51 +  static const bool DontSetupPages = false;
   13.52 +
   13.53    // Initialization
   13.54    virtual void initialize(MemRegion mr,
   13.55                            bool clear_space,
   13.56 -                          bool mangle_space);
   13.57 +                          bool mangle_space,
   13.58 +                          bool setup_pages = SetupPages);
   13.59 +
   13.60    virtual void clear(bool mangle_space);
   13.61    // Does the usual initialization but optionally resets top to bottom.
   13.62  #if 0  // MANGLE_SPACE
    14.1 --- a/src/share/vm/memory/referenceProcessor.cpp	Mon Jan 26 12:07:54 2009 -0800
    14.2 +++ b/src/share/vm/memory/referenceProcessor.cpp	Thu Jan 29 21:25:42 2009 -0800
    14.3 @@ -721,12 +721,6 @@
    14.4                               iter.obj(), iter.obj()->blueprint()->internal_name());
    14.5      }
    14.6      assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference");
    14.7 -    // If discovery is concurrent, we may have objects with null referents,
    14.8 -    // being those that were concurrently cleared after they were discovered
    14.9 -    // (and not subsequently precleaned).
   14.10 -    assert(   (discovery_is_atomic() && iter.referent()->is_oop())
   14.11 -           || (!discovery_is_atomic() && iter.referent()->is_oop_or_null(UseConcMarkSweepGC)),
   14.12 -           "Adding a bad referent");
   14.13      iter.next();
   14.14    }
   14.15    // Remember to keep sentinel pointer around
    15.1 --- a/src/share/vm/runtime/globals.hpp	Mon Jan 26 12:07:54 2009 -0800
    15.2 +++ b/src/share/vm/runtime/globals.hpp	Thu Jan 29 21:25:42 2009 -0800
    15.3 @@ -1307,7 +1307,14 @@
    15.4    product(intx, ParGCArrayScanChunk, 50,                                    \
    15.5            "Scan a subset and push remainder, if array is bigger than this") \
    15.6                                                                              \
    15.7 -  product(intx, ParGCDesiredObjsFromOverflowList, 20,                       \
    15.8 +  notproduct(bool, ParGCWorkQueueOverflowALot, false,                       \
    15.9 +          "Whether we should simulate work queue overflow in ParNew")       \
   15.10 +                                                                            \
   15.11 +  notproduct(uintx, ParGCWorkQueueOverflowInterval, 1000,                   \
   15.12 +          "An `interval' counter that determines how frequently"            \
   15.13 +          " we simulate overflow; a smaller number increases frequency")    \
   15.14 +                                                                            \
   15.15 +  product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
   15.16            "The desired number of objects to claim from the overflow list")  \
   15.17                                                                              \
   15.18    product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
   15.19 @@ -1429,8 +1436,8 @@
   15.20            "Whether we should simulate frequent marking stack / work queue"  \
   15.21            " overflow")                                                      \
   15.22                                                                              \
   15.23 -  notproduct(intx, CMSMarkStackOverflowInterval, 1000,                      \
   15.24 -          "A per-thread `interval' counter that determines how frequently"  \
   15.25 +  notproduct(uintx, CMSMarkStackOverflowInterval, 1000,                     \
   15.26 +          "An `interval' counter that determines how frequently"            \
   15.27            " we simulate overflow; a smaller number increases frequency")    \
   15.28                                                                              \
   15.29    product(uintx, CMSMaxAbortablePrecleanLoops, 0,                           \
   15.30 @@ -1648,7 +1655,7 @@
   15.31    develop(uintx, WorkStealingYieldsBeforeSleep, 1000,                       \
   15.32            "Number of yields before a sleep is done during workstealing")    \
   15.33                                                                              \
   15.34 -  product(uintx, PreserveMarkStackSize, 40,                                 \
   15.35 +  product(uintx, PreserveMarkStackSize, 1024,                               \
   15.36             "Size for stack used in promotion failure handling")             \
   15.37                                                                              \
   15.38    product_pd(bool, UseTLAB, "Use thread-local object allocation")           \

mercurial