6787254: Work queue capacity can be increased substantially on some platforms

Fri, 30 Jan 2009 14:17:52 -0800

author
ysr
date
Fri, 30 Jan 2009 14:17:52 -0800
changeset 976
23673011938d
parent 971
5b39c489c39d
child 977
9a25e0c45327

6787254: Work queue capacity can be increased substantially on some platforms
Summary: Increased the default and maximum size of the CMS marking stack and the size of the parallel workers' work queues in 64-bit mode. The latter was accomplished by an increase in the width of the Taskqueue's Age struct and its Tag field in 64-bit mode.
Reviewed-by: jmasa, tonyp

src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/utilities/taskqueue.cpp file | annotate | diff | comparison | revisions
src/share/vm/utilities/taskqueue.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/runtime/globals.hpp	Thu Jan 29 21:25:42 2009 -0800
     1.2 +++ b/src/share/vm/runtime/globals.hpp	Fri Jan 30 14:17:52 2009 -0800
     1.3 @@ -1426,10 +1426,10 @@
     1.4    develop(bool, CMSOverflowEarlyRestoration, false,                         \
     1.5            "Whether preserved marks should be restored early")               \
     1.6                                                                              \
     1.7 -  product(uintx, CMSMarkStackSize, 32*K,                                    \
     1.8 +  product(uintx, CMSMarkStackSize, NOT_LP64(32*K) LP64_ONLY(4*M),           \
     1.9            "Size of CMS marking stack")                                      \
    1.10                                                                              \
    1.11 -  product(uintx, CMSMarkStackSizeMax, 4*M,                                  \
    1.12 +  product(uintx, CMSMarkStackSizeMax, NOT_LP64(4*M) LP64_ONLY(512*M),       \
    1.13            "Max size of CMS marking stack")                                  \
    1.14                                                                              \
    1.15    notproduct(bool, CMSMarkStackOverflowALot, false,                         \
     2.1 --- a/src/share/vm/utilities/taskqueue.cpp	Thu Jan 29 21:25:42 2009 -0800
     2.2 +++ b/src/share/vm/utilities/taskqueue.cpp	Fri Jan 30 14:17:52 2009 -0800
     2.3 @@ -69,7 +69,7 @@
     2.4  ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
     2.5    Atomic::inc(&_offered_termination);
     2.6  
     2.7 -  juint yield_count = 0;
     2.8 +  uint yield_count = 0;
     2.9    while (true) {
    2.10      if (_offered_termination == _n_threads) {
    2.11        //inner_termination_loop();
     3.1 --- a/src/share/vm/utilities/taskqueue.hpp	Thu Jan 29 21:25:42 2009 -0800
     3.2 +++ b/src/share/vm/utilities/taskqueue.hpp	Fri Jan 30 14:17:52 2009 -0800
     3.3 @@ -22,67 +22,76 @@
     3.4   *
     3.5   */
     3.6  
     3.7 +#ifdef LP64
     3.8 +typedef juint TAG_TYPE;
     3.9 +// for a taskqueue size of 4M
    3.10 +#define LOG_TASKQ_SIZE 22
    3.11 +#else
    3.12 +typedef jushort TAG_TYPE;
    3.13 +// for a taskqueue size of 16K
    3.14 +#define LOG_TASKQ_SIZE 14
    3.15 +#endif
    3.16 +
    3.17  class TaskQueueSuper: public CHeapObj {
    3.18  protected:
    3.19    // The first free element after the last one pushed (mod _n).
    3.20 -  // (For now we'll assume only 32-bit CAS).
    3.21 -  volatile juint _bottom;
    3.22 +  volatile uint _bottom;
    3.23  
    3.24    // log2 of the size of the queue.
    3.25    enum SomeProtectedConstants {
    3.26 -    Log_n = 14
    3.27 +    Log_n = LOG_TASKQ_SIZE
    3.28    };
    3.29 +#undef LOG_TASKQ_SIZE
    3.30  
    3.31    // Size of the queue.
    3.32 -  juint n() { return (1 << Log_n); }
    3.33 +  uint n() { return (1 << Log_n); }
    3.34    // For computing "x mod n" efficiently.
    3.35 -  juint n_mod_mask() { return n() - 1; }
    3.36 +  uint n_mod_mask() { return n() - 1; }
    3.37  
    3.38    struct Age {
    3.39 -    jushort _top;
    3.40 -    jushort _tag;
    3.41 +    TAG_TYPE _top;
    3.42 +    TAG_TYPE _tag;
    3.43  
    3.44 -    jushort tag() const { return _tag; }
    3.45 -    jushort top() const { return _top; }
    3.46 +    TAG_TYPE tag() const { return _tag; }
    3.47 +    TAG_TYPE top() const { return _top; }
    3.48  
    3.49      Age() { _tag = 0; _top = 0; }
    3.50  
    3.51      friend bool operator ==(const Age& a1, const Age& a2) {
    3.52        return a1.tag() == a2.tag() && a1.top() == a2.top();
    3.53      }
    3.54 -
    3.55    };
    3.56    Age _age;
    3.57    // These make sure we do single atomic reads and writes.
    3.58    Age get_age() {
    3.59 -    jint res = *(volatile jint*)(&_age);
    3.60 +    uint res = *(volatile uint*)(&_age);
    3.61      return *(Age*)(&res);
    3.62    }
    3.63    void set_age(Age a) {
    3.64 -    *(volatile jint*)(&_age) = *(int*)(&a);
    3.65 +    *(volatile uint*)(&_age) = *(uint*)(&a);
    3.66    }
    3.67  
    3.68 -  jushort get_top() {
    3.69 +  TAG_TYPE get_top() {
    3.70      return get_age().top();
    3.71    }
    3.72  
    3.73    // These both operate mod _n.
    3.74 -  juint increment_index(juint ind) {
    3.75 +  uint increment_index(uint ind) {
    3.76      return (ind + 1) & n_mod_mask();
    3.77    }
    3.78 -  juint decrement_index(juint ind) {
    3.79 +  uint decrement_index(uint ind) {
    3.80      return (ind - 1) & n_mod_mask();
    3.81    }
    3.82  
    3.83    // Returns a number in the range [0.._n).  If the result is "n-1", it
    3.84    // should be interpreted as 0.
    3.85 -  juint dirty_size(juint bot, juint top) {
    3.86 -    return ((jint)bot - (jint)top) & n_mod_mask();
    3.87 +  uint dirty_size(uint bot, uint top) {
    3.88 +    return ((int)bot - (int)top) & n_mod_mask();
    3.89    }
    3.90  
    3.91    // Returns the size corresponding to the given "bot" and "top".
    3.92 -  juint size(juint bot, juint top) {
    3.93 -    juint sz = dirty_size(bot, top);
    3.94 +  uint size(uint bot, uint top) {
    3.95 +    uint sz = dirty_size(bot, top);
    3.96      // Has the queue "wrapped", so that bottom is less than top?
    3.97      // There's a complicated special case here.  A pair of threads could
    3.98      // perform pop_local and pop_global operations concurrently, starting
    3.99 @@ -94,7 +103,7 @@
   3.100      // owner performs pop_local's, and several concurrent threads
   3.101      // attempting to perform the pop_global will all perform the same CAS,
   3.102      // and only one can succeed.  Any stealing thread that reads after
   3.103 -    // either the increment or decrement will seen an empty queue, and will
   3.104 +    // either the increment or decrement will see an empty queue, and will
   3.105      // not join the competitors.  The "sz == -1 || sz == _n-1" state will
   3.106      // not be modified  by concurrent queues, so the owner thread can reset
   3.107      // the state to _bottom == top so subsequent pushes will be performed
   3.108 @@ -112,11 +121,11 @@
   3.109    // Return an estimate of the number of elements in the queue.
   3.110    // The "careful" version admits the possibility of pop_local/pop_global
   3.111    // races.
   3.112 -  juint size() {
   3.113 +  uint size() {
   3.114      return size(_bottom, get_top());
   3.115    }
   3.116  
   3.117 -  juint dirty_size() {
   3.118 +  uint dirty_size() {
   3.119      return dirty_size(_bottom, get_top());
   3.120    }
   3.121  
   3.122 @@ -127,15 +136,15 @@
   3.123  
   3.124    // Maximum number of elements allowed in the queue.  This is two less
   3.125    // than the actual queue size, for somewhat complicated reasons.
   3.126 -  juint max_elems() { return n() - 2; }
   3.127 +  uint max_elems() { return n() - 2; }
   3.128  
   3.129  };
   3.130  
   3.131  template<class E> class GenericTaskQueue: public TaskQueueSuper {
   3.132  private:
   3.133    // Slow paths for push, pop_local.  (pop_global has no fast path.)
   3.134 -  bool push_slow(E t, juint dirty_n_elems);
   3.135 -  bool pop_local_slow(juint localBot, Age oldAge);
   3.136 +  bool push_slow(E t, uint dirty_n_elems);
   3.137 +  bool pop_local_slow(uint localBot, Age oldAge);
   3.138  
   3.139  public:
   3.140    // Initializes the queue to empty.
   3.141 @@ -170,7 +179,7 @@
   3.142  
   3.143  template<class E>
   3.144  GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
   3.145 -  assert(sizeof(Age) == sizeof(jint), "Depends on this.");
   3.146 +  assert(sizeof(Age) == sizeof(int), "Depends on this.");
   3.147  }
   3.148  
   3.149  template<class E>
   3.150 @@ -182,9 +191,9 @@
   3.151  template<class E>
   3.152  void GenericTaskQueue<E>::oops_do(OopClosure* f) {
   3.153    // tty->print_cr("START OopTaskQueue::oops_do");
   3.154 -  int iters = size();
   3.155 -  juint index = _bottom;
   3.156 -  for (int i = 0; i < iters; ++i) {
   3.157 +  uint iters = size();
   3.158 +  uint index = _bottom;
   3.159 +  for (uint i = 0; i < iters; ++i) {
   3.160      index = decrement_index(index);
   3.161      // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
   3.162      //            index, &_elems[index], _elems[index]);
   3.163 @@ -198,10 +207,10 @@
   3.164  
   3.165  
   3.166  template<class E>
   3.167 -bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) {
   3.168 +bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
   3.169    if (dirty_n_elems == n() - 1) {
   3.170      // Actually means 0, so do the push.
   3.171 -    juint localBot = _bottom;
   3.172 +    uint localBot = _bottom;
   3.173      _elems[localBot] = t;
   3.174      _bottom = increment_index(localBot);
   3.175      return true;
   3.176 @@ -211,7 +220,7 @@
   3.177  
   3.178  template<class E>
   3.179  bool GenericTaskQueue<E>::
   3.180 -pop_local_slow(juint localBot, Age oldAge) {
   3.181 +pop_local_slow(uint localBot, Age oldAge) {
   3.182    // This queue was observed to contain exactly one element; either this
   3.183    // thread will claim it, or a competing "pop_global".  In either case,
   3.184    // the queue will be logically empty afterwards.  Create a new Age value
   3.185 @@ -230,9 +239,8 @@
   3.186      Age tempAge;
   3.187      // No competing pop_global has yet incremented "top"; we'll try to
   3.188      // install new_age, thus claiming the element.
   3.189 -    assert(sizeof(Age) == sizeof(jint) && sizeof(jint) == sizeof(juint),
   3.190 -           "Assumption about CAS unit.");
   3.191 -    *(jint*)&tempAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
   3.192 +    assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit.");
   3.193 +    *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
   3.194      if (tempAge == oldAge) {
   3.195        // We win.
   3.196        assert(dirty_size(localBot, get_top()) != n() - 1,
   3.197 @@ -253,8 +261,8 @@
   3.198  bool GenericTaskQueue<E>::pop_global(E& t) {
   3.199    Age newAge;
   3.200    Age oldAge = get_age();
   3.201 -  juint localBot = _bottom;
   3.202 -  juint n_elems = size(localBot, oldAge.top());
   3.203 +  uint localBot = _bottom;
   3.204 +  uint n_elems = size(localBot, oldAge.top());
   3.205    if (n_elems == 0) {
   3.206      return false;
   3.207    }
   3.208 @@ -263,7 +271,7 @@
   3.209    newAge._top = increment_index(newAge.top());
   3.210    if ( newAge._top == 0 ) newAge._tag++;
   3.211    Age resAge;
   3.212 -  *(jint*)&resAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
   3.213 +  *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
   3.214    // Note that using "_bottom" here might fail, since a pop_local might
   3.215    // have decremented it.
   3.216    assert(dirty_size(localBot, newAge._top) != n() - 1,
   3.217 @@ -287,7 +295,7 @@
   3.218  
   3.219  template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper {
   3.220  private:
   3.221 -  int _n;
   3.222 +  uint _n;
   3.223    GenericTaskQueue<E>** _queues;
   3.224  
   3.225  public:
   3.226 @@ -300,51 +308,51 @@
   3.227      }
   3.228    }
   3.229  
   3.230 -  bool steal_1_random(int queue_num, int* seed, E& t);
   3.231 -  bool steal_best_of_2(int queue_num, int* seed, E& t);
   3.232 -  bool steal_best_of_all(int queue_num, int* seed, E& t);
   3.233 +  bool steal_1_random(uint queue_num, int* seed, E& t);
   3.234 +  bool steal_best_of_2(uint queue_num, int* seed, E& t);
   3.235 +  bool steal_best_of_all(uint queue_num, int* seed, E& t);
   3.236  
   3.237 -  void register_queue(int i, GenericTaskQueue<E>* q);
   3.238 +  void register_queue(uint i, GenericTaskQueue<E>* q);
   3.239  
   3.240 -  GenericTaskQueue<E>* queue(int n);
   3.241 +  GenericTaskQueue<E>* queue(uint n);
   3.242  
   3.243    // The thread with queue number "queue_num" (and whose random number seed
   3.244    // is at "seed") is trying to steal a task from some other queue.  (It
   3.245    // may try several queues, according to some configuration parameter.)
   3.246    // If some steal succeeds, returns "true" and sets "t" the stolen task,
   3.247    // otherwise returns false.
   3.248 -  bool steal(int queue_num, int* seed, E& t);
   3.249 +  bool steal(uint queue_num, int* seed, E& t);
   3.250  
   3.251    bool peek();
   3.252  };
   3.253  
   3.254  template<class E>
   3.255 -void GenericTaskQueueSet<E>::register_queue(int i, GenericTaskQueue<E>* q) {
   3.256 -  assert(0 <= i && i < _n, "index out of range.");
   3.257 +void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) {
   3.258 +  assert(i < _n, "index out of range.");
   3.259    _queues[i] = q;
   3.260  }
   3.261  
   3.262  template<class E>
   3.263 -GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(int i) {
   3.264 +GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) {
   3.265    return _queues[i];
   3.266  }
   3.267  
   3.268  template<class E>
   3.269 -bool GenericTaskQueueSet<E>::steal(int queue_num, int* seed, E& t) {
   3.270 -  for (int i = 0; i < 2 * _n; i++)
   3.271 +bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) {
   3.272 +  for (uint i = 0; i < 2 * _n; i++)
   3.273      if (steal_best_of_2(queue_num, seed, t))
   3.274        return true;
   3.275    return false;
   3.276  }
   3.277  
   3.278  template<class E>
   3.279 -bool GenericTaskQueueSet<E>::steal_best_of_all(int queue_num, int* seed, E& t) {
   3.280 +bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) {
   3.281    if (_n > 2) {
   3.282      int best_k;
   3.283 -    jint best_sz = 0;
   3.284 -    for (int k = 0; k < _n; k++) {
   3.285 +    uint best_sz = 0;
   3.286 +    for (uint k = 0; k < _n; k++) {
   3.287        if (k == queue_num) continue;
   3.288 -      jint sz = _queues[k]->size();
   3.289 +      uint sz = _queues[k]->size();
   3.290        if (sz > best_sz) {
   3.291          best_sz = sz;
   3.292          best_k = k;
   3.293 @@ -362,9 +370,9 @@
   3.294  }
   3.295  
   3.296  template<class E>
   3.297 -bool GenericTaskQueueSet<E>::steal_1_random(int queue_num, int* seed, E& t) {
   3.298 +bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) {
   3.299    if (_n > 2) {
   3.300 -    int k = queue_num;
   3.301 +    uint k = queue_num;
   3.302      while (k == queue_num) k = randomParkAndMiller(seed) % _n;
   3.303      return _queues[2]->pop_global(t);
   3.304    } else if (_n == 2) {
   3.305 @@ -378,20 +386,20 @@
   3.306  }
   3.307  
   3.308  template<class E>
   3.309 -bool GenericTaskQueueSet<E>::steal_best_of_2(int queue_num, int* seed, E& t) {
   3.310 +bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) {
   3.311    if (_n > 2) {
   3.312 -    int k1 = queue_num;
   3.313 +    uint k1 = queue_num;
   3.314      while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n;
   3.315 -    int k2 = queue_num;
   3.316 +    uint k2 = queue_num;
   3.317      while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n;
   3.318      // Sample both and try the larger.
   3.319 -    juint sz1 = _queues[k1]->size();
   3.320 -    juint sz2 = _queues[k2]->size();
   3.321 +    uint sz1 = _queues[k1]->size();
   3.322 +    uint sz2 = _queues[k2]->size();
   3.323      if (sz2 > sz1) return _queues[k2]->pop_global(t);
   3.324      else return _queues[k1]->pop_global(t);
   3.325    } else if (_n == 2) {
   3.326      // Just try the other one.
   3.327 -    int k = (queue_num + 1) % 2;
   3.328 +    uint k = (queue_num + 1) % 2;
   3.329      return _queues[k]->pop_global(t);
   3.330    } else {
   3.331      assert(_n == 1, "can't be zero.");
   3.332 @@ -402,7 +410,7 @@
   3.333  template<class E>
   3.334  bool GenericTaskQueueSet<E>::peek() {
   3.335    // Try all the queues.
   3.336 -  for (int j = 0; j < _n; j++) {
   3.337 +  for (uint j = 0; j < _n; j++) {
   3.338      if (_queues[j]->peek())
   3.339        return true;
   3.340    }
   3.341 @@ -422,7 +430,7 @@
   3.342  private:
   3.343    int _n_threads;
   3.344    TaskQueueSetSuper* _queue_set;
   3.345 -  jint _offered_termination;
   3.346 +  int _offered_termination;
   3.347  
   3.348    bool peek_in_queue_set();
   3.349  protected:
   3.350 @@ -460,7 +468,7 @@
   3.351  
   3.352  template<class E> inline bool GenericTaskQueue<E>::push(E t) {
   3.353  #if SIMPLE_STACK
   3.354 -  juint localBot = _bottom;
   3.355 +  uint localBot = _bottom;
   3.356    if (_bottom < max_elems()) {
   3.357      _elems[localBot] = t;
   3.358      _bottom = localBot + 1;
   3.359 @@ -469,10 +477,10 @@
   3.360      return false;
   3.361    }
   3.362  #else
   3.363 -  juint localBot = _bottom;
   3.364 +  uint localBot = _bottom;
   3.365    assert((localBot >= 0) && (localBot < n()), "_bottom out of range.");
   3.366 -  jushort top = get_top();
   3.367 -  juint dirty_n_elems = dirty_size(localBot, top);
   3.368 +  TAG_TYPE top = get_top();
   3.369 +  uint dirty_n_elems = dirty_size(localBot, top);
   3.370    assert((dirty_n_elems >= 0) && (dirty_n_elems < n()),
   3.371           "n_elems out of range.");
   3.372    if (dirty_n_elems < max_elems()) {
   3.373 @@ -487,19 +495,19 @@
   3.374  
   3.375  template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
   3.376  #if SIMPLE_STACK
   3.377 -  juint localBot = _bottom;
   3.378 +  uint localBot = _bottom;
   3.379    assert(localBot > 0, "precondition.");
   3.380    localBot--;
   3.381    t = _elems[localBot];
   3.382    _bottom = localBot;
   3.383    return true;
   3.384  #else
   3.385 -  juint localBot = _bottom;
   3.386 +  uint localBot = _bottom;
   3.387    // This value cannot be n-1.  That can only occur as a result of
   3.388    // the assignment to bottom in this method.  If it does, this method
   3.389    // resets the size( to 0 before the next call (which is sequential,
   3.390    // since this is pop_local.)
   3.391 -  juint dirty_n_elems = dirty_size(localBot, get_top());
   3.392 +  uint dirty_n_elems = dirty_size(localBot, get_top());
   3.393    assert(dirty_n_elems != n() - 1, "Shouldn't be possible...");
   3.394    if (dirty_n_elems == 0) return false;
   3.395    localBot = decrement_index(localBot);
   3.396 @@ -512,7 +520,7 @@
   3.397    // If there's still at least one element in the queue, based on the
   3.398    // "_bottom" and "age" we've read, then there can be no interference with
   3.399    // a "pop_global" operation, and we're done.
   3.400 -  juint tp = get_top();
   3.401 +  TAG_TYPE tp = get_top();    // XXX
   3.402    if (size(localBot, tp) > 0) {
   3.403      assert(dirty_size(localBot, tp) != n() - 1,
   3.404             "Shouldn't be possible...");
   3.405 @@ -581,7 +589,7 @@
   3.406    bool is_empty();
   3.407    bool stealable_is_empty();
   3.408    bool overflow_is_empty();
   3.409 -  juint stealable_size() { return _region_queue.size(); }
   3.410 +  uint stealable_size() { return _region_queue.size(); }
   3.411    RegionTaskQueue* task_queue() { return &_region_queue; }
   3.412  };
   3.413  

mercurial