src/share/vm/gc_implementation/parNew/parNewGeneration.cpp

changeset 2065
94251661de76
parent 2020
a93a9eda13f7
child 2067
2d6b74c9a797
     1.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Aug 09 05:41:05 2010 -0700
     1.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Aug 09 18:03:50 2010 -0700
     1.3 @@ -51,9 +51,14 @@
     1.4    _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
     1.5    _keep_alive_closure(&_scan_weak_ref_closure),
     1.6    _promotion_failure_size(0),
     1.7 -  _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
     1.8    _strong_roots_time(0.0), _term_time(0.0)
     1.9  {
    1.10 +  #if TASKQUEUE_STATS
    1.11 +  _term_attempts = 0;
    1.12 +  _overflow_refills = 0;
    1.13 +  _overflow_refill_objs = 0;
    1.14 +  #endif // TASKQUEUE_STATS
    1.15 +
    1.16    _survivor_chunk_array =
    1.17      (ChunkArray*) old_gen()->get_data_recorder(thread_num());
    1.18    _hash_seed = 17;  // Might want to take time-based random value.
    1.19 @@ -100,7 +105,6 @@
    1.20      // Push remainder.
    1.21      bool ok = work_queue()->push(old);
    1.22      assert(ok, "just popped, push must be okay");
    1.23 -    note_push();
    1.24    } else {
    1.25      // Restore length so that it can be used if there
    1.26      // is a promotion failure and forwarding pointers
    1.27 @@ -126,7 +130,6 @@
    1.28      while (queue->size() > (juint)max_size) {
    1.29        oop obj_to_scan;
    1.30        if (queue->pop_local(obj_to_scan)) {
    1.31 -        note_pop();
    1.32          if ((HeapWord *)obj_to_scan < young_old_boundary()) {
    1.33            if (obj_to_scan->is_objArray() &&
    1.34                obj_to_scan->is_forwarded() &&
    1.35 @@ -271,20 +274,28 @@
    1.36                          GrowableArray<oop>**    overflow_stacks_,
    1.37                          size_t                  desired_plab_sz,
    1.38                          ParallelTaskTerminator& term);
    1.39 +
    1.40 +  ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
    1.41 +
    1.42    inline ParScanThreadState& thread_state(int i);
    1.43 -  int pushes() { return _pushes; }
    1.44 -  int pops()   { return _pops; }
    1.45 -  int steals() { return _steals; }
    1.46 +
    1.47    void reset(bool promotion_failed);
    1.48    void flush();
    1.49 +
    1.50 +  #if TASKQUEUE_STATS
    1.51 +  static void
    1.52 +    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
    1.53 +  void print_termination_stats(outputStream* const st = gclog_or_tty);
    1.54 +  static void
    1.55 +    print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
    1.56 +  void print_taskqueue_stats(outputStream* const st = gclog_or_tty);
    1.57 +  void reset_stats();
    1.58 +  #endif // TASKQUEUE_STATS
    1.59 +
    1.60  private:
    1.61    ParallelTaskTerminator& _term;
    1.62    ParNewGeneration&       _gen;
    1.63    Generation&             _next_gen;
    1.64 -  // staticstics
    1.65 -  int _pushes;
    1.66 -  int _pops;
    1.67 -  int _steals;
    1.68  };
    1.69  
    1.70  
    1.71 @@ -294,8 +305,7 @@
    1.72    GrowableArray<oop>** overflow_stack_set_,
    1.73    size_t desired_plab_sz, ParallelTaskTerminator& term)
    1.74    : ResourceArray(sizeof(ParScanThreadState), num_threads),
    1.75 -    _gen(gen), _next_gen(old_gen), _term(term),
    1.76 -    _pushes(0), _pops(0), _steals(0)
    1.77 +    _gen(gen), _next_gen(old_gen), _term(term)
    1.78  {
    1.79    assert(num_threads > 0, "sanity check!");
    1.80    // Initialize states.
    1.81 @@ -323,6 +333,82 @@
    1.82    }
    1.83  }
    1.84  
    1.85 +#if TASKQUEUE_STATS
    1.86 +void
    1.87 +ParScanThreadState::reset_stats()
    1.88 +{
    1.89 +  taskqueue_stats().reset();
    1.90 +  _term_attempts = 0;
    1.91 +  _overflow_refills = 0;
    1.92 +  _overflow_refill_objs = 0;
    1.93 +}
    1.94 +
    1.95 +void ParScanThreadStateSet::reset_stats()
    1.96 +{
    1.97 +  for (int i = 0; i < length(); ++i) {
    1.98 +    thread_state(i).reset_stats();
    1.99 +  }
   1.100 +}
   1.101 +
   1.102 +void
   1.103 +ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st)
   1.104 +{
   1.105 +  st->print_raw_cr("GC Termination Stats");
   1.106 +  st->print_raw_cr("     elapsed  --strong roots-- "
   1.107 +                   "-------termination-------");
   1.108 +  st->print_raw_cr("thr     ms        ms       %   "
   1.109 +                   "    ms       %   attempts");
   1.110 +  st->print_raw_cr("--- --------- --------- ------ "
   1.111 +                   "--------- ------ --------");
   1.112 +}
   1.113 +
   1.114 +void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
   1.115 +{
   1.116 +  print_termination_stats_hdr(st);
   1.117 +
   1.118 +  for (int i = 0; i < length(); ++i) {
   1.119 +    const ParScanThreadState & pss = thread_state(i);
   1.120 +    const double elapsed_ms = pss.elapsed_time() * 1000.0;
   1.121 +    const double s_roots_ms = pss.strong_roots_time() * 1000.0;
   1.122 +    const double term_ms = pss.term_time() * 1000.0;
   1.123 +    st->print_cr("%3d %9.2f %9.2f %6.2f "
   1.124 +                 "%9.2f %6.2f " SIZE_FORMAT_W(8),
   1.125 +                 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
   1.126 +                 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
   1.127 +  }
   1.128 +}
   1.129 +
   1.130 +// Print stats related to work queue activity.
   1.131 +void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st)
   1.132 +{
   1.133 +  st->print_raw_cr("GC Task Stats");
   1.134 +  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
   1.135 +  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
   1.136 +}
   1.137 +
   1.138 +void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
   1.139 +{
   1.140 +  print_taskqueue_stats_hdr(st);
   1.141 +
   1.142 +  TaskQueueStats totals;
   1.143 +  for (int i = 0; i < length(); ++i) {
   1.144 +    const ParScanThreadState & pss = thread_state(i);
   1.145 +    const TaskQueueStats & stats = pss.taskqueue_stats();
   1.146 +    st->print("%3d ", i); stats.print(st); st->cr();
   1.147 +    totals += stats;
   1.148 +
   1.149 +    if (pss.overflow_refills() > 0) {
   1.150 +      st->print_cr("    " SIZE_FORMAT_W(10) " overflow refills    "
   1.151 +                   SIZE_FORMAT_W(10) " overflow objects",
   1.152 +                   pss.overflow_refills(), pss.overflow_refill_objs());
   1.153 +    }
   1.154 +  }
   1.155 +  st->print("tot "); totals.print(st); st->cr();
   1.156 +
   1.157 +  DEBUG_ONLY(totals.verify());
   1.158 +}
   1.159 +#endif // TASKQUEUE_STATS
   1.160 +
   1.161  void ParScanThreadStateSet::flush()
   1.162  {
   1.163    // Work in this loop should be kept as lightweight as
   1.164 @@ -346,42 +432,8 @@
   1.165      // Inform old gen that we're done.
   1.166      _next_gen.par_promote_alloc_done(i);
   1.167      _next_gen.par_oop_since_save_marks_iterate_done(i);
   1.168 +  }
   1.169  
   1.170 -    // Flush stats related to work queue activity (push/pop/steal)
   1.171 -    // This could conceivably become a bottleneck; if so, we'll put the
   1.172 -    // stat's gathering under the flag.
   1.173 -    if (PAR_STATS_ENABLED) {
   1.174 -      _pushes += par_scan_state.pushes();
   1.175 -      _pops   += par_scan_state.pops();
   1.176 -      _steals += par_scan_state.steals();
   1.177 -      if (ParallelGCVerbose) {
   1.178 -        gclog_or_tty->print("Thread %d complete:\n"
   1.179 -                            "  Pushes: %7d    Pops: %7d    Steals %7d (in %d attempts)\n",
   1.180 -                            i, par_scan_state.pushes(), par_scan_state.pops(),
   1.181 -                            par_scan_state.steals(), par_scan_state.steal_attempts());
   1.182 -        if (par_scan_state.overflow_pushes() > 0 ||
   1.183 -            par_scan_state.overflow_refills() > 0) {
   1.184 -          gclog_or_tty->print("  Overflow pushes: %7d    "
   1.185 -                              "Overflow refills: %7d for %d objs.\n",
   1.186 -                              par_scan_state.overflow_pushes(),
   1.187 -                              par_scan_state.overflow_refills(),
   1.188 -                              par_scan_state.overflow_refill_objs());
   1.189 -        }
   1.190 -
   1.191 -        double elapsed = par_scan_state.elapsed();
   1.192 -        double strong_roots = par_scan_state.strong_roots_time();
   1.193 -        double term = par_scan_state.term_time();
   1.194 -        gclog_or_tty->print(
   1.195 -                            "  Elapsed: %7.2f ms.\n"
   1.196 -                            "    Strong roots: %7.2f ms (%6.2f%%)\n"
   1.197 -                            "    Termination:  %7.2f ms (%6.2f%%) (in %d entries)\n",
   1.198 -                           elapsed * 1000.0,
   1.199 -                           strong_roots * 1000.0, (strong_roots*100.0/elapsed),
   1.200 -                           term * 1000.0, (term*100.0/elapsed),
   1.201 -                           par_scan_state.term_attempts());
   1.202 -      }
   1.203 -    }
   1.204 -  }
   1.205    if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
   1.206      // We need to call this even when ResizeOldPLAB is disabled
   1.207      // so as to avoid breaking some asserts. While we may be able
   1.208 @@ -456,15 +508,12 @@
   1.209      // We have no local work, attempt to steal from other threads.
   1.210  
   1.211      // attempt to steal work from promoted.
   1.212 -    par_scan_state()->note_steal_attempt();
   1.213      if (task_queues()->steal(par_scan_state()->thread_num(),
   1.214                               par_scan_state()->hash_seed(),
   1.215                               obj_to_scan)) {
   1.216 -      par_scan_state()->note_steal();
   1.217        bool res = work_q->push(obj_to_scan);
   1.218        assert(res, "Empty queue should have room for a push.");
   1.219  
   1.220 -      par_scan_state()->note_push();
   1.221        //   if successful, goto Start.
   1.222        continue;
   1.223  
   1.224 @@ -842,17 +891,6 @@
   1.225    }
   1.226    thread_state_set.reset(promotion_failed());
   1.227  
   1.228 -  if (PAR_STATS_ENABLED && ParallelGCVerbose) {
   1.229 -    gclog_or_tty->print("Thread totals:\n"
   1.230 -               "  Pushes: %7d    Pops: %7d    Steals %7d (sum = %7d).\n",
   1.231 -               thread_state_set.pushes(), thread_state_set.pops(),
   1.232 -               thread_state_set.steals(),
   1.233 -               thread_state_set.pops()+thread_state_set.steals());
   1.234 -  }
   1.235 -  assert(thread_state_set.pushes() == thread_state_set.pops()
   1.236 -                                    + thread_state_set.steals(),
   1.237 -         "Or else the queues are leaky.");
   1.238 -
   1.239    // Process (weak) reference objects found during scavenge.
   1.240    ReferenceProcessor* rp = ref_processor();
   1.241    IsAliveClosure is_alive(this);
   1.242 @@ -932,6 +970,9 @@
   1.243      gch->print_heap_change(gch_prev_used);
   1.244    }
   1.245  
   1.246 +  TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
   1.247 +  TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
   1.248 +
   1.249    if (UseAdaptiveSizePolicy) {
   1.250      size_policy->minor_collection_end(gch->gc_cause());
   1.251      size_policy->avg_survived()->sample(from()->used());
   1.252 @@ -1104,9 +1145,8 @@
   1.253          gclog_or_tty->print("queue overflow!\n");
   1.254        }
   1.255        push_on_overflow_list(old, par_scan_state);
   1.256 -      par_scan_state->note_overflow_push();
   1.257 +      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
   1.258      }
   1.259 -    par_scan_state->note_push();
   1.260  
   1.261      return new_obj;
   1.262    }
   1.263 @@ -1227,9 +1267,8 @@
   1.264      if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
   1.265        // Add stats for overflow pushes.
   1.266        push_on_overflow_list(old, par_scan_state);
   1.267 -      par_scan_state->note_overflow_push();
   1.268 +      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
   1.269      }
   1.270 -    par_scan_state->note_push();
   1.271  
   1.272      return new_obj;
   1.273    }
   1.274 @@ -1466,7 +1505,7 @@
   1.275      cur = next;
   1.276      n++;
   1.277    }
   1.278 -  par_scan_state->note_overflow_refill(n);
   1.279 +  TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n));
   1.280  #ifndef PRODUCT
   1.281    assert(_num_par_pushes >= n, "Too many pops?");
   1.282    Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);

mercurial