1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,198 @@ 1.4 +/* 1.5 + * Copyright 2006-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +/* 1.29 + * The NUMA-aware allocator (MutableNUMASpace) is basically a modification 1.30 + * of MutableSpace which preserves interfaces but implements different 1.31 + * functionality. The space is split into chunks for each locality group 1.32 + * (resizing for adaptive size policy is also supported). For each thread 1.33 + * allocations are performed in the chunk corresponding to the home locality 1.34 + * group of the thread. Whenever any chunk fills-in the young generation 1.35 + * collection occurs. 1.36 + * The chunks can be also be adaptively resized. The idea behind the adaptive 1.37 + * sizing is to reduce the loss of the space in the eden due to fragmentation. 1.38 + * The main cause of fragmentation is uneven allocation rates of threads. 1.39 + * The allocation rate difference between locality groups may be caused either by 1.40 + * application specifics or by uneven LWP distribution by the OS. Besides, 1.41 + * application can have less threads then the number of locality groups. 1.42 + * In order to resize the chunk we measure the allocation rate of the 1.43 + * application between collections. After that we reshape the chunks to reflect 1.44 + * the allocation rate pattern. The AdaptiveWeightedAverage exponentially 1.45 + * decaying average is used to smooth the measurements. The NUMASpaceResizeRate 1.46 + * parameter is used to control the adaptation speed by restricting the number of 1.47 + * bytes that can be moved during the adaptation phase. 1.48 + * Chunks may contain pages from a wrong locality group. The page-scanner has 1.49 + * been introduced to address the problem. Remote pages typically appear due to 1.50 + * the memory shortage in the target locality group. Besides Solaris would 1.51 + * allocate a large page from the remote locality group even if there are small 1.52 + * local pages available. The page-scanner scans the pages right after the 1.53 + * collection and frees remote pages in hope that subsequent reallocation would 1.54 + * be more successful. This approach proved to be useful on systems with high 1.55 + * load where multiple processes are competing for the memory. 1.56 + */ 1.57 + 1.58 +class MutableNUMASpace : public MutableSpace { 1.59 + friend class VMStructs; 1.60 + 1.61 + class LGRPSpace : public CHeapObj { 1.62 + int _lgrp_id; 1.63 + MutableSpace* _space; 1.64 + MemRegion _invalid_region; 1.65 + AdaptiveWeightedAverage *_alloc_rate; 1.66 + 1.67 + struct SpaceStats { 1.68 + size_t _local_space, _remote_space, _unbiased_space, _uncommited_space; 1.69 + size_t _large_pages, _small_pages; 1.70 + 1.71 + SpaceStats() { 1.72 + _local_space = 0; 1.73 + _remote_space = 0; 1.74 + _unbiased_space = 0; 1.75 + _uncommited_space = 0; 1.76 + _large_pages = 0; 1.77 + _small_pages = 0; 1.78 + } 1.79 + }; 1.80 + 1.81 + SpaceStats _space_stats; 1.82 + 1.83 + char* _last_page_scanned; 1.84 + char* last_page_scanned() { return _last_page_scanned; } 1.85 + void set_last_page_scanned(char* p) { _last_page_scanned = p; } 1.86 + public: 1.87 + LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL) { 1.88 + _space = new MutableSpace(); 1.89 + _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight); 1.90 + } 1.91 + ~LGRPSpace() { 1.92 + delete _space; 1.93 + delete _alloc_rate; 1.94 + } 1.95 + 1.96 + void add_invalid_region(MemRegion r) { 1.97 + if (!_invalid_region.is_empty()) { 1.98 + _invalid_region.set_start(MIN2(_invalid_region.start(), r.start())); 1.99 + _invalid_region.set_end(MAX2(_invalid_region.end(), r.end())); 1.100 + } else { 1.101 + _invalid_region = r; 1.102 + } 1.103 + } 1.104 + 1.105 + static bool equals(void* lgrp_id_value, LGRPSpace* p) { 1.106 + return *(int*)lgrp_id_value == p->lgrp_id(); 1.107 + } 1.108 + 1.109 + void sample() { 1.110 + alloc_rate()->sample(space()->used_in_bytes()); 1.111 + } 1.112 + 1.113 + MemRegion invalid_region() const { return _invalid_region; } 1.114 + void set_invalid_region(MemRegion r) { _invalid_region = r; } 1.115 + int lgrp_id() const { return _lgrp_id; } 1.116 + MutableSpace* space() const { return _space; } 1.117 + AdaptiveWeightedAverage* alloc_rate() const { return _alloc_rate; } 1.118 + SpaceStats* space_stats() { return &_space_stats; } 1.119 + void clear_space_stats() { _space_stats = SpaceStats(); } 1.120 + 1.121 + void accumulate_statistics(size_t page_size); 1.122 + void scan_pages(size_t page_size, size_t page_count); 1.123 + }; 1.124 + 1.125 + GrowableArray<LGRPSpace*>* _lgrp_spaces; 1.126 + size_t _page_size; 1.127 + unsigned _adaptation_cycles, _samples_count; 1.128 + 1.129 + void set_page_size(size_t psz) { _page_size = psz; } 1.130 + size_t page_size() const { return _page_size; } 1.131 + 1.132 + unsigned adaptation_cycles() { return _adaptation_cycles; } 1.133 + void set_adaptation_cycles(int v) { _adaptation_cycles = v; } 1.134 + 1.135 + unsigned samples_count() { return _samples_count; } 1.136 + void increment_samples_count() { ++_samples_count; } 1.137 + 1.138 + size_t _base_space_size; 1.139 + void set_base_space_size(size_t v) { _base_space_size = v; } 1.140 + size_t base_space_size() const { return _base_space_size; } 1.141 + 1.142 + // Check if the NUMA topology has changed. Add and remove spaces if needed. 1.143 + // The update can be forced by setting the force parameter equal to true. 1.144 + bool update_layout(bool force); 1.145 + // Bias region towards the first-touching lgrp. 1.146 + void bias_region(MemRegion mr); 1.147 + // Free pages in a given region. 1.148 + void free_region(MemRegion mr); 1.149 + // Get current chunk size. 1.150 + size_t current_chunk_size(int i); 1.151 + // Get default chunk size (equally divide the space). 1.152 + size_t default_chunk_size(); 1.153 + // Adapt the chunk size to follow the allocation rate. 1.154 + size_t adaptive_chunk_size(int i, size_t limit); 1.155 + // Scan and free invalid pages. 1.156 + void scan_pages(size_t page_count); 1.157 + // Return the bottom_region and the top_region. Align them to page_size() boundary. 1.158 + // |------------------new_region---------------------------------| 1.159 + // |----bottom_region--|---intersection---|------top_region------| 1.160 + void select_tails(MemRegion new_region, MemRegion intersection, 1.161 + MemRegion* bottom_region, MemRegion *top_region); 1.162 + // Try to merge the invalid region with the bottom or top region by decreasing 1.163 + // the intersection area. Return the invalid_region aligned to the page_size() 1.164 + // boundary if it's inside the intersection. Return non-empty invalid_region 1.165 + // if it lies inside the intersection (also page-aligned). 1.166 + // |------------------new_region---------------------------------| 1.167 + // |----------------|-------invalid---|--------------------------| 1.168 + // |----bottom_region--|---intersection---|------top_region------| 1.169 + void merge_regions(MemRegion new_region, MemRegion* intersection, 1.170 + MemRegion *invalid_region); 1.171 + 1.172 + public: 1.173 + GrowableArray<LGRPSpace*>* lgrp_spaces() const { return _lgrp_spaces; } 1.174 + MutableNUMASpace(); 1.175 + virtual ~MutableNUMASpace(); 1.176 + // Space initialization. 1.177 + virtual void initialize(MemRegion mr, bool clear_space); 1.178 + // Update space layout if necessary. Do all adaptive resizing job. 1.179 + virtual void update(); 1.180 + // Update allocation rate averages. 1.181 + virtual void accumulate_statistics(); 1.182 + 1.183 + virtual void clear(); 1.184 + virtual void mangle_unused_area(); 1.185 + virtual void ensure_parsability(); 1.186 + virtual size_t used_in_words() const; 1.187 + virtual size_t free_in_words() const; 1.188 + virtual size_t tlab_capacity(Thread* thr) const; 1.189 + virtual size_t unsafe_max_tlab_alloc(Thread* thr) const; 1.190 + 1.191 + // Allocation (return NULL if full) 1.192 + virtual HeapWord* allocate(size_t word_size); 1.193 + virtual HeapWord* cas_allocate(size_t word_size); 1.194 + 1.195 + // Debugging 1.196 + virtual void print_on(outputStream* st) const; 1.197 + virtual void print_short_on(outputStream* st) const; 1.198 + virtual void verify(bool allow_dirty) const; 1.199 + 1.200 + virtual void set_top(HeapWord* value); 1.201 +};