6725714: par compact - add a table to speed up bitmap searches

Thu, 30 May 2013 13:04:51 -0700

author
jcoomes
date
Thu, 30 May 2013 13:04:51 -0700
changeset 5201
5534bd30c151
parent 5196
8dbc025ff709
child 5202
47bdfb3d010f

6725714: par compact - add a table to speed up bitmap searches
Reviewed-by: jmasa, tschatzl

src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Mon May 27 12:58:42 2013 +0200
     1.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Thu May 30 13:04:51 2013 -0700
     1.3 @@ -116,7 +116,7 @@
     1.4  
     1.5    // The alignment used for eden and survivors within the young gen
     1.6    // and for boundary between young gen and old gen.
     1.7 -  size_t intra_heap_alignment() const { return 64 * K; }
     1.8 +  size_t intra_heap_alignment() const { return 64 * K * HeapWordSize; }
     1.9  
    1.10    size_t capacity() const;
    1.11    size_t used() const;
     2.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon May 27 12:58:42 2013 +0200
     2.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu May 30 13:04:51 2013 -0700
     2.3 @@ -59,13 +59,25 @@
     2.4  #include <math.h>
     2.5  
     2.6  // All sizes are in HeapWords.
     2.7 -const size_t ParallelCompactData::Log2RegionSize  = 9; // 512 words
     2.8 +const size_t ParallelCompactData::Log2RegionSize  = 16; // 64K words
     2.9  const size_t ParallelCompactData::RegionSize      = (size_t)1 << Log2RegionSize;
    2.10  const size_t ParallelCompactData::RegionSizeBytes =
    2.11    RegionSize << LogHeapWordSize;
    2.12  const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
    2.13  const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
    2.14 -const size_t ParallelCompactData::RegionAddrMask  = ~RegionAddrOffsetMask;
    2.15 +const size_t ParallelCompactData::RegionAddrMask       = ~RegionAddrOffsetMask;
    2.16 +
    2.17 +const size_t ParallelCompactData::Log2BlockSize   = 7; // 128 words
    2.18 +const size_t ParallelCompactData::BlockSize       = (size_t)1 << Log2BlockSize;
    2.19 +const size_t ParallelCompactData::BlockSizeBytes  =
    2.20 +  BlockSize << LogHeapWordSize;
    2.21 +const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1;
    2.22 +const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1;
    2.23 +const size_t ParallelCompactData::BlockAddrMask       = ~BlockAddrOffsetMask;
    2.24 +
    2.25 +const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize;
    2.26 +const size_t ParallelCompactData::Log2BlocksPerRegion =
    2.27 +  Log2RegionSize - Log2BlockSize;
    2.28  
    2.29  const ParallelCompactData::RegionData::region_sz_t
    2.30  ParallelCompactData::RegionData::dc_shift = 27;
    2.31 @@ -359,6 +371,10 @@
    2.32    _reserved_byte_size = 0;
    2.33    _region_data = 0;
    2.34    _region_count = 0;
    2.35 +
    2.36 +  _block_vspace = 0;
    2.37 +  _block_data = 0;
    2.38 +  _block_count = 0;
    2.39  }
    2.40  
    2.41  bool ParallelCompactData::initialize(MemRegion covered_region)
    2.42 @@ -372,8 +388,7 @@
    2.43    assert((region_size & RegionSizeOffsetMask) == 0,
    2.44           "region size not a multiple of RegionSize");
    2.45  
    2.46 -  bool result = initialize_region_data(region_size);
    2.47 -
    2.48 +  bool result = initialize_region_data(region_size) && initialize_block_data();
    2.49    return result;
    2.50  }
    2.51  
    2.52 @@ -418,17 +433,36 @@
    2.53    return false;
    2.54  }
    2.55  
    2.56 +bool ParallelCompactData::initialize_block_data()
    2.57 +{
    2.58 +  assert(_region_count != 0, "region data must be initialized first");
    2.59 +  const size_t count = _region_count << Log2BlocksPerRegion;
    2.60 +  _block_vspace = create_vspace(count, sizeof(BlockData));
    2.61 +  if (_block_vspace != 0) {
    2.62 +    _block_data = (BlockData*)_block_vspace->reserved_low_addr();
    2.63 +    _block_count = count;
    2.64 +    return true;
    2.65 +  }
    2.66 +  return false;
    2.67 +}
    2.68 +
    2.69  void ParallelCompactData::clear()
    2.70  {
    2.71    memset(_region_data, 0, _region_vspace->committed_size());
    2.72 +  memset(_block_data, 0, _block_vspace->committed_size());
    2.73  }
    2.74  
    2.75  void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
    2.76    assert(beg_region <= _region_count, "beg_region out of range");
    2.77    assert(end_region <= _region_count, "end_region out of range");
    2.78 +  assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize");
    2.79  
    2.80    const size_t region_cnt = end_region - beg_region;
    2.81    memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
    2.82 +
    2.83 +  const size_t beg_block = beg_region * BlocksPerRegion;
    2.84 +  const size_t block_cnt = region_cnt * BlocksPerRegion;
    2.85 +  memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
    2.86  }
    2.87  
    2.88  HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
    2.89 @@ -707,49 +741,48 @@
    2.90  
    2.91  HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
    2.92    assert(addr != NULL, "Should detect NULL oop earlier");
    2.93 -  assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
    2.94 -#ifdef ASSERT
    2.95 -  if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
    2.96 -    gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
    2.97 -  }
    2.98 -#endif
    2.99 -  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
   2.100 +  assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap");
   2.101 +  assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked");
   2.102  
   2.103    // Region covering the object.
   2.104 -  size_t region_index = addr_to_region_idx(addr);
   2.105 -  const RegionData* const region_ptr = region(region_index);
   2.106 -  HeapWord* const region_addr = region_align_down(addr);
   2.107 -
   2.108 -  assert(addr < region_addr + RegionSize, "Region does not cover object");
   2.109 -  assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
   2.110 -
   2.111 +  RegionData* const region_ptr = addr_to_region_ptr(addr);
   2.112    HeapWord* result = region_ptr->destination();
   2.113  
   2.114 -  // If all the data in the region is live, then the new location of the object
   2.115 -  // can be calculated from the destination of the region plus the offset of the
   2.116 -  // object in the region.
   2.117 +  // If the entire Region is live, the new location is region->destination + the
   2.118 +  // offset of the object within in the Region.
   2.119 +
   2.120 +  // Run some performance tests to determine if this special case pays off.  It
   2.121 +  // is worth it for pointers into the dense prefix.  If the optimization to
   2.122 +  // avoid pointer updates in regions that only point to the dense prefix is
   2.123 +  // ever implemented, this should be revisited.
   2.124    if (region_ptr->data_size() == RegionSize) {
   2.125 -    result += pointer_delta(addr, region_addr);
   2.126 -    DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
   2.127 +    result += region_offset(addr);
   2.128      return result;
   2.129    }
   2.130  
   2.131 -  // The new location of the object is
   2.132 -  //    region destination +
   2.133 -  //    size of the partial object extending onto the region +
   2.134 -  //    sizes of the live objects in the Region that are to the left of addr
   2.135 -  const size_t partial_obj_size = region_ptr->partial_obj_size();
   2.136 -  HeapWord* const search_start = region_addr + partial_obj_size;
   2.137 +  // Otherwise, the new location is region->destination + block offset + the
   2.138 +  // number of live words in the Block that are (a) to the left of addr and (b)
   2.139 +  // due to objects that start in the Block.
   2.140 +
   2.141 +  // Fill in the block table if necessary.  This is unsynchronized, so multiple
   2.142 +  // threads may fill the block table for a region (harmless, since it is
   2.143 +  // idempotent).
   2.144 +  if (!region_ptr->blocks_filled()) {
   2.145 +    PSParallelCompact::fill_blocks(addr_to_region_idx(addr));
   2.146 +    region_ptr->set_blocks_filled();
   2.147 +  }
   2.148 +
   2.149 +  HeapWord* const search_start = block_align_down(addr);
   2.150 +  const size_t block_offset = addr_to_block_ptr(addr)->offset();
   2.151  
   2.152    const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
   2.153 -  size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
   2.154 -
   2.155 -  result += partial_obj_size + live_to_left;
   2.156 -  DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
   2.157 +  const size_t live = bitmap->live_words_in_range(search_start, oop(addr));
   2.158 +  result += block_offset + live;
   2.159 +  DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result));
   2.160    return result;
   2.161  }
   2.162  
   2.163 -#ifdef  ASSERT
   2.164 +#ifdef ASSERT
   2.165  void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)
   2.166  {
   2.167    const size_t* const beg = (const size_t*)vspace->committed_low_addr();
   2.168 @@ -762,16 +795,10 @@
   2.169  void ParallelCompactData::verify_clear()
   2.170  {
   2.171    verify_clear(_region_vspace);
   2.172 +  verify_clear(_block_vspace);
   2.173  }
   2.174  #endif  // #ifdef ASSERT
   2.175  
   2.176 -#ifdef NOT_PRODUCT
   2.177 -ParallelCompactData::RegionData* debug_region(size_t region_index) {
   2.178 -  ParallelCompactData& sd = PSParallelCompact::summary_data();
   2.179 -  return sd.region(region_index);
   2.180 -}
   2.181 -#endif
   2.182 -
   2.183  elapsedTimer        PSParallelCompact::_accumulated_time;
   2.184  unsigned int        PSParallelCompact::_total_invocations = 0;
   2.185  unsigned int        PSParallelCompact::_maximum_compaction_gc_num = 0;
   2.186 @@ -1961,11 +1988,6 @@
   2.187                                        maximum_heap_compaction);
   2.188  }
   2.189  
   2.190 -bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
   2.191 -  size_t addr_region_index = addr_to_region_idx(addr);
   2.192 -  return region_index == addr_region_index;
   2.193 -}
   2.194 -
   2.195  // This method contains no policy. You should probably
   2.196  // be calling invoke() instead.
   2.197  bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
   2.198 @@ -2627,6 +2649,41 @@
   2.199    }
   2.200  }
   2.201  
   2.202 +#ifdef ASSERT
   2.203 +// Write a histogram of the number of times the block table was filled for a
   2.204 +// region.
   2.205 +void PSParallelCompact::write_block_fill_histogram(outputStream* const out)
   2.206 +{
   2.207 +  if (!TraceParallelOldGCCompactionPhase) return;
   2.208 +
   2.209 +  typedef ParallelCompactData::RegionData rd_t;
   2.210 +  ParallelCompactData& sd = summary_data();
   2.211 +
   2.212 +  for (unsigned int id = old_space_id; id < last_space_id; ++id) {
   2.213 +    MutableSpace* const spc = _space_info[id].space();
   2.214 +    if (spc->bottom() != spc->top()) {
   2.215 +      const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom());
   2.216 +      HeapWord* const top_aligned_up = sd.region_align_up(spc->top());
   2.217 +      const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up);
   2.218 +
   2.219 +      size_t histo[5] = { 0, 0, 0, 0, 0 };
   2.220 +      const size_t histo_len = sizeof(histo) / sizeof(size_t);
   2.221 +      const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t));
   2.222 +
   2.223 +      for (const rd_t* cur = beg; cur < end; ++cur) {
   2.224 +        ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)];
   2.225 +      }
   2.226 +      out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt);
   2.227 +      for (size_t i = 0; i < histo_len; ++i) {
   2.228 +        out->print(" " SIZE_FORMAT_W(5) " %5.1f%%",
   2.229 +                   histo[i], 100.0 * histo[i] / region_cnt);
   2.230 +      }
   2.231 +      out->cr();
   2.232 +    }
   2.233 +  }
   2.234 +}
   2.235 +#endif // #ifdef ASSERT
   2.236 +
   2.237  void PSParallelCompact::compact() {
   2.238    // trace("5");
   2.239    TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty);
   2.240 @@ -2666,6 +2723,8 @@
   2.241        update_deferred_objects(cm, SpaceId(id));
   2.242      }
   2.243    }
   2.244 +
   2.245 +  DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty));
   2.246  }
   2.247  
   2.248  #ifdef  ASSERT
   2.249 @@ -3130,6 +3189,57 @@
   2.250    } while (true);
   2.251  }
   2.252  
   2.253 +void PSParallelCompact::fill_blocks(size_t region_idx)
   2.254 +{
   2.255 +  // Fill in the block table elements for the specified region.  Each block
   2.256 +  // table element holds the number of live words in the region that are to the
   2.257 +  // left of the first object that starts in the block.  Thus only blocks in
   2.258 +  // which an object starts need to be filled.
   2.259 +  //
   2.260 +  // The algorithm scans the section of the bitmap that corresponds to the
   2.261 +  // region, keeping a running total of the live words.  When an object start is
   2.262 +  // found, if it's the first to start in the block that contains it, the
   2.263 +  // current total is written to the block table element.
   2.264 +  const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize;
   2.265 +  const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize;
   2.266 +  const size_t RegionSize = ParallelCompactData::RegionSize;
   2.267 +
   2.268 +  ParallelCompactData& sd = summary_data();
   2.269 +  const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size();
   2.270 +  if (partial_obj_size >= RegionSize) {
   2.271 +    return; // No objects start in this region.
   2.272 +  }
   2.273 +
   2.274 +  // Ensure the first loop iteration decides that the block has changed.
   2.275 +  size_t cur_block = sd.block_count();
   2.276 +
   2.277 +  const ParMarkBitMap* const bitmap = mark_bitmap();
   2.278 +
   2.279 +  const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment;
   2.280 +  assert((size_t)1 << Log2BitsPerBlock ==
   2.281 +         bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity");
   2.282 +
   2.283 +  size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize);
   2.284 +  const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize);
   2.285 +  size_t live_bits = bitmap->words_to_bits(partial_obj_size);
   2.286 +  beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end);
   2.287 +  while (beg_bit < range_end) {
   2.288 +    const size_t new_block = beg_bit >> Log2BitsPerBlock;
   2.289 +    if (new_block != cur_block) {
   2.290 +      cur_block = new_block;
   2.291 +      sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
   2.292 +    }
   2.293 +
   2.294 +    const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
   2.295 +    if (end_bit < range_end - 1) {
   2.296 +      live_bits += end_bit - beg_bit + 1;
   2.297 +      beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end);
   2.298 +    } else {
   2.299 +      return;
   2.300 +    }
   2.301 +  }
   2.302 +}
   2.303 +
   2.304  void
   2.305  PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) {
   2.306    const MutableSpace* sp = space(space_id);
     3.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Mon May 27 12:58:42 2013 +0200
     3.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu May 30 13:04:51 2013 -0700
     3.3 @@ -220,6 +220,17 @@
     3.4    // Mask for the bits in a pointer to get the address of the start of a region.
     3.5    static const size_t RegionAddrMask;
     3.6  
     3.7 +  static const size_t Log2BlockSize;
     3.8 +  static const size_t BlockSize;
     3.9 +  static const size_t BlockSizeBytes;
    3.10 +
    3.11 +  static const size_t BlockSizeOffsetMask;
    3.12 +  static const size_t BlockAddrOffsetMask;
    3.13 +  static const size_t BlockAddrMask;
    3.14 +
    3.15 +  static const size_t BlocksPerRegion;
    3.16 +  static const size_t Log2BlocksPerRegion;
    3.17 +
    3.18    class RegionData
    3.19    {
    3.20    public:
    3.21 @@ -272,6 +283,12 @@
    3.22      inline uint destination_count() const;
    3.23      inline uint destination_count_raw() const;
    3.24  
    3.25 +    // Whether the block table for this region has been filled.
    3.26 +    inline bool blocks_filled() const;
    3.27 +
    3.28 +    // Number of times the block table was filled.
    3.29 +    DEBUG_ONLY(inline size_t blocks_filled_count() const;)
    3.30 +
    3.31      // The location of the java heap data that corresponds to this region.
    3.32      inline HeapWord* data_location() const;
    3.33  
    3.34 @@ -296,6 +313,7 @@
    3.35      void set_partial_obj_size(size_t words)    {
    3.36        _partial_obj_size = (region_sz_t) words;
    3.37      }
    3.38 +    inline void set_blocks_filled();
    3.39  
    3.40      inline void set_destination_count(uint count);
    3.41      inline void set_live_obj_size(size_t words);
    3.42 @@ -328,7 +346,11 @@
    3.43      HeapWord*            _partial_obj_addr;
    3.44      region_sz_t          _partial_obj_size;
    3.45      region_sz_t volatile _dc_and_los;
    3.46 +    bool                 _blocks_filled;
    3.47 +
    3.48  #ifdef ASSERT
    3.49 +    size_t               _blocks_filled_count;   // Number of block table fills.
    3.50 +
    3.51      // These enable optimizations that are only partially implemented.  Use
    3.52      // debug builds to prevent the code fragments from breaking.
    3.53      HeapWord*            _data_location;
    3.54 @@ -337,11 +359,26 @@
    3.55  
    3.56  #ifdef ASSERT
    3.57     public:
    3.58 -    uint            _pushed;   // 0 until region is pushed onto a worker's stack
    3.59 +    uint                 _pushed;   // 0 until region is pushed onto a stack
    3.60     private:
    3.61  #endif
    3.62    };
    3.63  
    3.64 +  // "Blocks" allow shorter sections of the bitmap to be searched.  Each Block
    3.65 +  // holds an offset, which is the amount of live data in the Region to the left
    3.66 +  // of the first live object that starts in the Block.
    3.67 +  class BlockData
    3.68 +  {
    3.69 +  public:
    3.70 +    typedef unsigned short int blk_ofs_t;
    3.71 +
    3.72 +    blk_ofs_t offset() const    { return _offset; }
    3.73 +    void set_offset(size_t val) { _offset = (blk_ofs_t)val; }
    3.74 +
    3.75 +  private:
    3.76 +    blk_ofs_t _offset;
    3.77 +  };
    3.78 +
    3.79  public:
    3.80    ParallelCompactData();
    3.81    bool initialize(MemRegion covered_region);
    3.82 @@ -353,8 +390,9 @@
    3.83    inline RegionData* region(size_t region_idx) const;
    3.84    inline size_t     region(const RegionData* const region_ptr) const;
    3.85  
    3.86 -  // Returns true if the given address is contained within the region
    3.87 -  bool region_contains(size_t region_index, HeapWord* addr);
    3.88 +  size_t block_count() const { return _block_count; }
    3.89 +  inline BlockData* block(size_t block_idx) const;
    3.90 +  inline size_t     block(const BlockData* block_ptr) const;
    3.91  
    3.92    void add_obj(HeapWord* addr, size_t len);
    3.93    void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
    3.94 @@ -394,11 +432,24 @@
    3.95    inline HeapWord*  region_align_up(HeapWord* addr) const;
    3.96    inline bool       is_region_aligned(HeapWord* addr) const;
    3.97  
    3.98 +  // Analogous to region_offset() for blocks.
    3.99 +  size_t     block_offset(const HeapWord* addr) const;
   3.100 +  size_t     addr_to_block_idx(const HeapWord* addr) const;
   3.101 +  size_t     addr_to_block_idx(const oop obj) const {
   3.102 +    return addr_to_block_idx((HeapWord*) obj);
   3.103 +  }
   3.104 +  inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
   3.105 +  inline HeapWord*  block_to_addr(size_t block) const;
   3.106 +  inline size_t     region_to_block_idx(size_t region) const;
   3.107 +
   3.108 +  inline HeapWord*  block_align_down(HeapWord* addr) const;
   3.109 +  inline HeapWord*  block_align_up(HeapWord* addr) const;
   3.110 +  inline bool       is_block_aligned(HeapWord* addr) const;
   3.111 +
   3.112    // Return the address one past the end of the partial object.
   3.113    HeapWord* partial_obj_end(size_t region_idx) const;
   3.114  
   3.115 -  // Return the new location of the object p after the
   3.116 -  // the compaction.
   3.117 +  // Return the location of the object after compaction.
   3.118    HeapWord* calc_new_pointer(HeapWord* addr);
   3.119  
   3.120    HeapWord* calc_new_pointer(oop p) {
   3.121 @@ -411,6 +462,7 @@
   3.122  #endif  // #ifdef ASSERT
   3.123  
   3.124  private:
   3.125 +  bool initialize_block_data();
   3.126    bool initialize_region_data(size_t region_size);
   3.127    PSVirtualSpace* create_vspace(size_t count, size_t element_size);
   3.128  
   3.129 @@ -424,6 +476,10 @@
   3.130    size_t          _reserved_byte_size;
   3.131    RegionData*     _region_data;
   3.132    size_t          _region_count;
   3.133 +
   3.134 +  PSVirtualSpace* _block_vspace;
   3.135 +  BlockData*      _block_data;
   3.136 +  size_t          _block_count;
   3.137  };
   3.138  
   3.139  inline uint
   3.140 @@ -438,6 +494,28 @@
   3.141    return destination_count_raw() >> dc_shift;
   3.142  }
   3.143  
   3.144 +inline bool
   3.145 +ParallelCompactData::RegionData::blocks_filled() const
   3.146 +{
   3.147 +  return _blocks_filled;
   3.148 +}
   3.149 +
   3.150 +#ifdef ASSERT
   3.151 +inline size_t
   3.152 +ParallelCompactData::RegionData::blocks_filled_count() const
   3.153 +{
   3.154 +  return _blocks_filled_count;
   3.155 +}
   3.156 +#endif // #ifdef ASSERT
   3.157 +
   3.158 +inline void
   3.159 +ParallelCompactData::RegionData::set_blocks_filled()
   3.160 +{
   3.161 +  _blocks_filled = true;
   3.162 +  // Debug builds count the number of times the table was filled.
   3.163 +  DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
   3.164 +}
   3.165 +
   3.166  inline void
   3.167  ParallelCompactData::RegionData::set_destination_count(uint count)
   3.168  {
   3.169 @@ -532,6 +610,12 @@
   3.170    return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
   3.171  }
   3.172  
   3.173 +inline ParallelCompactData::BlockData*
   3.174 +ParallelCompactData::block(size_t n) const {
   3.175 +  assert(n < block_count(), "bad arg");
   3.176 +  return _block_data + n;
   3.177 +}
   3.178 +
   3.179  inline size_t
   3.180  ParallelCompactData::region_offset(const HeapWord* addr) const
   3.181  {
   3.182 @@ -598,6 +682,63 @@
   3.183    return region_offset(addr) == 0;
   3.184  }
   3.185  
   3.186 +inline size_t
   3.187 +ParallelCompactData::block_offset(const HeapWord* addr) const
   3.188 +{
   3.189 +  assert(addr >= _region_start, "bad addr");
   3.190 +  assert(addr <= _region_end, "bad addr");
   3.191 +  return (size_t(addr) & BlockAddrOffsetMask) >> LogHeapWordSize;
   3.192 +}
   3.193 +
   3.194 +inline size_t
   3.195 +ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
   3.196 +{
   3.197 +  assert(addr >= _region_start, "bad addr");
   3.198 +  assert(addr <= _region_end, "bad addr");
   3.199 +  return pointer_delta(addr, _region_start) >> Log2BlockSize;
   3.200 +}
   3.201 +
   3.202 +inline ParallelCompactData::BlockData*
   3.203 +ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
   3.204 +{
   3.205 +  return block(addr_to_block_idx(addr));
   3.206 +}
   3.207 +
   3.208 +inline HeapWord*
   3.209 +ParallelCompactData::block_to_addr(size_t block) const
   3.210 +{
   3.211 +  assert(block < _block_count, "block out of range");
   3.212 +  return _region_start + (block << Log2BlockSize);
   3.213 +}
   3.214 +
   3.215 +inline size_t
   3.216 +ParallelCompactData::region_to_block_idx(size_t region) const
   3.217 +{
   3.218 +  return region << Log2BlocksPerRegion;
   3.219 +}
   3.220 +
   3.221 +inline HeapWord*
   3.222 +ParallelCompactData::block_align_down(HeapWord* addr) const
   3.223 +{
   3.224 +  assert(addr >= _region_start, "bad addr");
   3.225 +  assert(addr < _region_end + RegionSize, "bad addr");
   3.226 +  return (HeapWord*)(size_t(addr) & BlockAddrMask);
   3.227 +}
   3.228 +
   3.229 +inline HeapWord*
   3.230 +ParallelCompactData::block_align_up(HeapWord* addr) const
   3.231 +{
   3.232 +  assert(addr >= _region_start, "bad addr");
   3.233 +  assert(addr <= _region_end, "bad addr");
   3.234 +  return block_align_down(addr + BlockSizeOffsetMask);
   3.235 +}
   3.236 +
   3.237 +inline bool
   3.238 +ParallelCompactData::is_block_aligned(HeapWord* addr) const
   3.239 +{
   3.240 +  return block_offset(addr) == 0;
   3.241 +}
   3.242 +
   3.243  // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
   3.244  // do_addr() method.
   3.245  //
   3.246 @@ -775,6 +916,7 @@
   3.247    // Convenient access to type names.
   3.248    typedef ParMarkBitMap::idx_t idx_t;
   3.249    typedef ParallelCompactData::RegionData RegionData;
   3.250 +  typedef ParallelCompactData::BlockData BlockData;
   3.251  
   3.252    typedef enum {
   3.253      old_space_id, eden_space_id,
   3.254 @@ -962,6 +1104,8 @@
   3.255    // Adjust addresses in roots.  Does not adjust addresses in heap.
   3.256    static void adjust_roots();
   3.257  
   3.258 +  DEBUG_ONLY(static void write_block_fill_histogram(outputStream* const out);)
   3.259 +
   3.260    // Move objects to new locations.
   3.261    static void compact_perm(ParCompactionManager* cm);
   3.262    static void compact();
   3.263 @@ -1128,6 +1272,9 @@
   3.264      fill_region(cm, region);
   3.265    }
   3.266  
   3.267 +  // Fill in the block table for the specified region.
   3.268 +  static void fill_blocks(size_t region_idx);
   3.269 +
   3.270    // Update the deferred objects in the space.
   3.271    static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);
   3.272  

mercurial