1.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Wed Apr 27 01:25:04 2016 +0800 1.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Fri Apr 29 00:06:10 2016 +0800 1.3 @@ -22,12 +22,19 @@ 1.4 * 1.5 */ 1.6 1.7 +/* 1.8 + * This file has been modified by Loongson Technology in 2015. These 1.9 + * modifications are Copyright (c) 2015 Loongson Technology, and are made 1.10 + * available on the same license terms set forth above. 1.11 + */ 1.12 + 1.13 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONMANAGER_INLINE_HPP 1.14 #define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONMANAGER_INLINE_HPP 1.15 1.16 #include "gc_implementation/parallelScavenge/psOldGen.hpp" 1.17 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp" 1.18 #include "gc_implementation/parallelScavenge/psScavenge.hpp" 1.19 +#include "gc_implementation/shared/mutableNUMASpace.hpp" 1.20 #include "oops/oop.psgc.inline.hpp" 1.21 1.22 inline PSPromotionManager* PSPromotionManager::manager_array(int index) { 1.23 @@ -68,6 +75,11 @@ 1.24 // into smaller submethods, but we need to be careful not to hurt 1.25 // performance. 1.26 // 1.27 + 1.28 +extern int node_ex; 1.29 +extern int each_gc_copy_fre[16]; 1.30 +extern float each_gc_copy_time[16]; 1.31 + 1.32 template<bool promote_immediately> 1.33 oop PSPromotionManager::copy_to_survivor_space(oop o) { 1.34 assert(PSScavenge::should_scavenge(&o), "Sanity"); 1.35 @@ -83,6 +95,10 @@ 1.36 if (!test_mark->is_marked()) { 1.37 bool new_obj_is_tenured = false; 1.38 size_t new_obj_size = o->size(); 1.39 + 1.40 + if(UseStasticScavenge) { 1.41 + stastic_scavenge(o); 1.42 + } 1.43 1.44 if (!promote_immediately) { 1.45 // Find the objects age, MT safe. 1.46 @@ -122,53 +138,154 @@ 1.47 } 1.48 #endif // #ifndef PRODUCT 1.49 1.50 - new_obj = (oop) _old_lab.allocate(new_obj_size); 1.51 - new_obj_is_tenured = true; 1.52 + if(UseOldNUMA) { 1.53 +/* 2014/7/7 Liao: Copy objects to the same node of current GC thread */ 1.54 + if(UseNUMAGC) { 1.55 + new_obj = (oop) _old_lab_oldnuma[os::numa_get_group_id()].allocate(new_obj_size); 1.56 + new_obj_is_tenured = true; 1.57 1.58 - if (new_obj == NULL) { 1.59 - if (!_old_gen_is_full) { 1.60 - // Do we allocate directly, or flush and refill? 1.61 - if (new_obj_size > (OldPLABSize / 2)) { 1.62 - // Allocate this object directly 1.63 - new_obj = (oop)old_gen()->cas_allocate(new_obj_size); 1.64 - } else { 1.65 - // Flush and fill 1.66 - _old_lab.flush(); 1.67 + if (new_obj == NULL) { 1.68 + if (!_old_gen_is_full) { 1.69 + // Do we allocate directly, or flush and refill? 1.70 + if (new_obj_size > (OldPLABSize / 2)) { 1.71 + // Allocate this object directly 1.72 + new_obj = (oop)old_gen()->cas_allocate(new_obj_size, os::numa_get_group_id()); 1.73 + } else { 1.74 + // Flush and fill 1.75 + _old_lab_oldnuma[os::numa_get_group_id()].flush(); 1.76 1.77 - HeapWord* lab_base = old_gen()->cas_allocate(OldPLABSize); 1.78 - if(lab_base != NULL) { 1.79 -#ifdef ASSERT 1.80 - // Delay the initialization of the promotion lab (plab). 1.81 - // This exposes uninitialized plabs to card table processing. 1.82 - if (GCWorkerDelayMillis > 0) { 1.83 - os::sleep(Thread::current(), GCWorkerDelayMillis, false); 1.84 + HeapWord* lab_base = old_gen()->cas_allocate(OldPLABSize, os::numa_get_group_id()); 1.85 + if(lab_base != NULL) { 1.86 + _old_lab_oldnuma[os::numa_get_group_id()].initialize(MemRegion(lab_base, OldPLABSize)); 1.87 + // Try the old lab allocation again. 1.88 + new_obj = (oop) _old_lab_oldnuma[os::numa_get_group_id()].allocate(new_obj_size); 1.89 + } 1.90 } 1.91 -#endif 1.92 - _old_lab.initialize(MemRegion(lab_base, OldPLABSize)); 1.93 - // Try the old lab allocation again. 1.94 - new_obj = (oop) _old_lab.allocate(new_obj_size); 1.95 + } 1.96 + 1.97 + // This is the promotion failed test, and code handling. 1.98 + // The code belongs here for two reasons. It is slightly 1.99 + // different than the code below, and cannot share the 1.100 + // CAS testing code. Keeping the code here also minimizes 1.101 + // the impact on the common case fast path code. 1.102 + 1.103 + if (new_obj == NULL) { 1.104 + _old_gen_is_full = true; 1.105 + return oop_promotion_failed(o, test_mark); 1.106 } 1.107 } 1.108 } 1.109 + else { 1.110 + ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); 1.111 + MutableNUMASpace* s = (MutableNUMASpace*) heap->old_gen()->object_space(); 1.112 + int i = s->lgrp_spaces()->length(); 1.113 + int node; 1.114 + if(i > 1) { 1.115 + node = node_ex % (i - 1) + 1; 1.116 + node_ex++; 1.117 + } 1.118 + else 1.119 + node = 0; 1.120 1.121 - // This is the promotion failed test, and code handling. 1.122 - // The code belongs here for two reasons. It is slightly 1.123 - // different than the code below, and cannot share the 1.124 - // CAS testing code. Keeping the code here also minimizes 1.125 - // the impact on the common case fast path code. 1.126 + new_obj = (oop) _old_lab_oldnuma[node].allocate(new_obj_size); 1.127 + new_obj_is_tenured = true; 1.128 + 1.129 + if (new_obj == NULL) { 1.130 + if (!_old_gen_is_full) { 1.131 + // Do we allocate directly, or flush and refill? 1.132 + if (new_obj_size > (OldPLABSize / 2)) { 1.133 + // Allocate this object directly 1.134 + new_obj = (oop)old_gen()->cas_allocate(new_obj_size, node); 1.135 + } else { 1.136 + // Flush and fill 1.137 + _old_lab_oldnuma[node].flush(); 1.138 + 1.139 + HeapWord* lab_base = old_gen()->cas_allocate(OldPLABSize, node); 1.140 + if(lab_base != NULL) { 1.141 + _old_lab_oldnuma[node].initialize(MemRegion(lab_base, OldPLABSize)); 1.142 + // Try the old lab allocation again. 1.143 + new_obj = (oop) _old_lab_oldnuma[node].allocate(new_obj_size); 1.144 + } 1.145 + } 1.146 + } 1.147 + 1.148 + // This is the promotion failed test, and code handling. 1.149 + // The code belongs here for two reasons. It is slightly 1.150 + // different than the code below, and cannot share the 1.151 + // CAS testing code. Keeping the code here also minimizes 1.152 + // the impact on the common case fast path code. 1.153 + 1.154 + if (new_obj == NULL) { 1.155 + _old_gen_is_full = true; 1.156 + return oop_promotion_failed(o, test_mark); 1.157 + } 1.158 + } 1.159 + } 1.160 + } 1.161 + else { 1.162 + new_obj = (oop) _old_lab.allocate(new_obj_size); 1.163 + new_obj_is_tenured = true; 1.164 1.165 if (new_obj == NULL) { 1.166 - _old_gen_is_full = true; 1.167 - return oop_promotion_failed(o, test_mark); 1.168 + if (!_old_gen_is_full) { 1.169 + // Do we allocate directly, or flush and refill? 1.170 + if (new_obj_size > (OldPLABSize / 2)) { 1.171 + // Allocate this object directly 1.172 + new_obj = (oop)old_gen()->cas_allocate(new_obj_size, 0); 1.173 + } else { 1.174 + // Flush and fill 1.175 + _old_lab.flush(); 1.176 + 1.177 + HeapWord* lab_base = old_gen()->cas_allocate(OldPLABSize, 0); 1.178 + if(lab_base != NULL) { 1.179 +#ifdef ASSERT 1.180 + // Delay the initialization of the promotion lab (plab). 1.181 + // This exposes uninitialized plabs to card table processing. 1.182 + if (GCWorkerDelayMillis > 0) { 1.183 + os::sleep(Thread::current(), GCWorkerDelayMillis, false); 1.184 + } 1.185 +#endif 1.186 + _old_lab.initialize(MemRegion(lab_base, OldPLABSize)); 1.187 + // Try the old lab allocation again. 1.188 + new_obj = (oop) _old_lab.allocate(new_obj_size); 1.189 + } 1.190 + } 1.191 + } 1.192 + 1.193 + // This is the promotion failed test, and code handling. 1.194 + // The code belongs here for two reasons. It is slightly 1.195 + // different than the code below, and cannot share the 1.196 + // CAS testing code. Keeping the code here also minimizes 1.197 + // the impact on the common case fast path code. 1.198 + 1.199 + if (new_obj == NULL) { 1.200 + _old_gen_is_full = true; 1.201 + return oop_promotion_failed(o, test_mark); 1.202 + } 1.203 } 1.204 } 1.205 } 1.206 1.207 assert(new_obj != NULL, "allocation should have succeeded"); 1.208 1.209 + TimeStamp before_copy, after_copy; 1.210 + 1.211 + if(UseStasticCopy) { 1.212 + before_copy.update(); 1.213 + } 1.214 + 1.215 // Copy obj 1.216 Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size); 1.217 1.218 + if(UseStasticCopy) { 1.219 + after_copy.update(); 1.220 + } 1.221 + 1.222 + if(UseStasticCopy) { 1.223 + each_gc_copy_time[os::numa_get_cpu_id()] += after_copy.ticks() - before_copy.ticks(); 1.224 + each_gc_copy_fre[os::numa_get_cpu_id()]++; 1.225 + } 1.226 + 1.227 // Now we have to CAS in the header. 1.228 if (o->cas_forward_to(new_obj, test_mark)) { 1.229 // We won any races, we "own" this object. 1.230 @@ -205,8 +322,20 @@ 1.231 // deallocate it, so we have to test. If the deallocation fails, 1.232 // overwrite with a filler object. 1.233 if (new_obj_is_tenured) { 1.234 - if (!_old_lab.unallocate_object((HeapWord*) new_obj, new_obj_size)) { 1.235 - CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size); 1.236 + if(UseOldNUMA) { 1.237 + ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); 1.238 + MutableNUMASpace* s = (MutableNUMASpace*) heap->old_gen()->object_space(); 1.239 + int i; 1.240 + for(i = 0; i < s->lgrp_spaces()->length(); i++) { 1.241 + if (!_old_lab_oldnuma[i].unallocate_object((HeapWord*) new_obj, new_obj_size)) { 1.242 + CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size); 1.243 + } 1.244 + } 1.245 + } 1.246 + else { 1.247 + if (!_old_lab.unallocate_object((HeapWord*) new_obj, new_obj_size)) { 1.248 + CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size); 1.249 + } 1.250 } 1.251 } else if (!_young_lab.unallocate_object((HeapWord*) new_obj, new_obj_size)) { 1.252 CollectedHeap::fill_with_object((HeapWord*) new_obj, new_obj_size); 1.253 @@ -233,7 +362,6 @@ 1.254 return new_obj; 1.255 } 1.256 1.257 - 1.258 inline void PSPromotionManager::process_popped_location_depth(StarTask p) { 1.259 if (is_oop_masked(p)) { 1.260 assert(PSChunkLargeArrays, "invariant");