1.1 --- a/src/share/vm/classfile/symbolTable.cpp Mon Jun 11 13:10:14 2012 -0400 1.2 +++ b/src/share/vm/classfile/symbolTable.cpp Wed Jun 13 19:52:59 2012 -0400 1.3 @@ -23,6 +23,7 @@ 1.4 */ 1.5 1.6 #include "precompiled.hpp" 1.7 +#include "classfile/altHashing.hpp" 1.8 #include "classfile/javaClasses.hpp" 1.9 #include "classfile/symbolTable.hpp" 1.10 #include "classfile/systemDictionary.hpp" 1.11 @@ -34,12 +35,15 @@ 1.12 #include "oops/oop.inline2.hpp" 1.13 #include "runtime/mutexLocker.hpp" 1.14 #include "utilities/hashtable.inline.hpp" 1.15 +#include "utilities/numberSeq.hpp" 1.16 1.17 // -------------------------------------------------------------------------- 1.18 1.19 SymbolTable* SymbolTable::_the_table = NULL; 1.20 // Static arena for symbols that are not deallocated 1.21 Arena* SymbolTable::_arena = NULL; 1.22 +bool SymbolTable::_needs_rehashing = false; 1.23 +jint SymbolTable::_seed = 0; 1.24 1.25 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) { 1.26 // Don't allow symbols to be created which cannot fit in a Symbol*. 1.27 @@ -121,12 +125,41 @@ 1.28 } 1.29 } 1.30 1.31 +unsigned int SymbolTable::new_hash(Symbol* sym) { 1.32 + ResourceMark rm; 1.33 + // Use alternate hashing algorithm on this symbol. 1.34 + return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length()); 1.35 +} 1.36 + 1.37 +// Create a new table and using alternate hash code, populate the new table 1.38 +// with the existing strings. Set flag to use the alternate hash code afterwards. 1.39 +void SymbolTable::rehash_table() { 1.40 + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); 1.41 + assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump"); 1.42 + // Create a new symbol table 1.43 + SymbolTable* new_table = new SymbolTable(); 1.44 + 1.45 + // Initialize the global seed for hashing. 1.46 + _seed = AltHashing::compute_seed(); 1.47 + assert(seed() != 0, "shouldn't be zero"); 1.48 + 1.49 + the_table()->move_to(new_table); 1.50 + 1.51 + // Delete the table and buckets (entries are reused in new table). 1.52 + delete _the_table; 1.53 + // Don't check if we need rehashing until the table gets unbalanced again. 1.54 + // Then rehash with a new global seed. 1.55 + _needs_rehashing = false; 1.56 + _the_table = new_table; 1.57 +} 1.58 1.59 // Lookup a symbol in a bucket. 1.60 1.61 Symbol* SymbolTable::lookup(int index, const char* name, 1.62 int len, unsigned int hash) { 1.63 + int count = 0; 1.64 for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) { 1.65 + count++; // count all entries in this bucket, not just ones with same hash 1.66 if (e->hash() == hash) { 1.67 Symbol* sym = e->literal(); 1.68 if (sym->equals(name, len)) { 1.69 @@ -136,9 +169,21 @@ 1.70 } 1.71 } 1.72 } 1.73 + // If the bucket size is too deep check if this hash code is insufficient. 1.74 + if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { 1.75 + _needs_rehashing = check_rehash_table(count); 1.76 + } 1.77 return NULL; 1.78 } 1.79 1.80 +// Pick hashing algorithm, but return value already given if not using a new 1.81 +// hash algorithm. 1.82 +unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) { 1.83 + return use_alternate_hashcode() ? 1.84 + AltHashing::murmur3_32(seed(), (const jbyte*)s, len) : 1.85 + (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len)); 1.86 +} 1.87 + 1.88 1.89 // We take care not to be blocking while holding the 1.90 // SymbolTable_lock. Otherwise, the system might deadlock, since the 1.91 @@ -287,13 +332,17 @@ 1.92 } 1.93 1.94 Symbol* SymbolTable::basic_add(int index, u1 *name, int len, 1.95 - unsigned int hashValue, bool c_heap, TRAPS) { 1.96 + unsigned int hashValue_arg, bool c_heap, TRAPS) { 1.97 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), 1.98 "proposed name of symbol must be stable"); 1.99 1.100 // Grab SymbolTable_lock first. 1.101 MutexLocker ml(SymbolTable_lock, THREAD); 1.102 1.103 + // Check if the symbol table has been rehashed, if so, need to recalculate 1.104 + // the hash value. 1.105 + unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg); 1.106 + 1.107 // Since look-up was done lock-free, we need to check if another 1.108 // thread beat us in the race to insert the symbol. 1.109 Symbol* test = lookup(index, (char*)name, len, hashValue); 1.110 @@ -332,10 +381,13 @@ 1.111 MutexLocker ml(SymbolTable_lock, THREAD); 1.112 1.113 for (int i=0; i<names_count; i++) { 1.114 + // Check if the symbol table has been rehashed, if so, need to recalculate 1.115 + // the hash value. 1.116 + unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]); 1.117 // Since look-up was done lock-free, we need to check if another 1.118 // thread beat us in the race to insert the symbol. 1.119 - int index = hash_to_index(hashValues[i]); 1.120 - Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]); 1.121 + int index = hash_to_index(hashValue); 1.122 + Symbol* test = lookup(index, names[i], lengths[i], hashValue); 1.123 if (test != NULL) { 1.124 // A race occurred and another thread introduced the symbol, this one 1.125 // will be dropped and collected. Use test instead. 1.126 @@ -347,7 +399,7 @@ 1.127 bool c_heap = class_loader() != NULL; 1.128 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false)); 1.129 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be??? 1.130 - HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym); 1.131 + HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym); 1.132 add_entry(index, entry); 1.133 cp->symbol_at_put(cp_indices[i], sym); 1.134 } 1.135 @@ -370,6 +422,24 @@ 1.136 } 1.137 } 1.138 1.139 +void SymbolTable::dump(outputStream* st) { 1.140 + NumberSeq summary; 1.141 + for (int i = 0; i < the_table()->table_size(); ++i) { 1.142 + int count = 0; 1.143 + for (HashtableEntry<Symbol*>* e = the_table()->bucket(i); 1.144 + e != NULL; e = e->next()) { 1.145 + count++; 1.146 + } 1.147 + summary.add((double)count); 1.148 + } 1.149 + st->print_cr("SymbolTable statistics:"); 1.150 + st->print_cr("Number of buckets : %7d", summary.num()); 1.151 + st->print_cr("Average bucket size : %7.0f", summary.avg()); 1.152 + st->print_cr("Variance of bucket size : %7.0f", summary.variance()); 1.153 + st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); 1.154 + st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); 1.155 +} 1.156 + 1.157 1.158 //--------------------------------------------------------------------------- 1.159 // Non-product code 1.160 @@ -468,7 +538,6 @@ 1.161 } 1.162 } 1.163 } 1.164 - 1.165 #endif // PRODUCT 1.166 1.167 // -------------------------------------------------------------------------- 1.168 @@ -514,21 +583,36 @@ 1.169 // -------------------------------------------------------------------------- 1.170 StringTable* StringTable::_the_table = NULL; 1.171 1.172 +bool StringTable::_needs_rehashing = false; 1.173 +jint StringTable::_seed = 0; 1.174 + 1.175 +// Pick hashing algorithm 1.176 +unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) { 1.177 + return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) : 1.178 + (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len)); 1.179 +} 1.180 + 1.181 oop StringTable::lookup(int index, jchar* name, 1.182 int len, unsigned int hash) { 1.183 + int count = 0; 1.184 for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) { 1.185 + count++; 1.186 if (l->hash() == hash) { 1.187 if (java_lang_String::equals(l->literal(), name, len)) { 1.188 return l->literal(); 1.189 } 1.190 } 1.191 } 1.192 + // If the bucket size is too deep check if this hash code is insufficient. 1.193 + if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { 1.194 + _needs_rehashing = check_rehash_table(count); 1.195 + } 1.196 return NULL; 1.197 } 1.198 1.199 1.200 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name, 1.201 - int len, unsigned int hashValue, TRAPS) { 1.202 + int len, unsigned int hashValue_arg, TRAPS) { 1.203 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); 1.204 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), 1.205 "proposed name of symbol must be stable"); 1.206 @@ -547,6 +631,10 @@ 1.207 assert(java_lang_String::equals(string(), name, len), 1.208 "string must be properly initialized"); 1.209 1.210 + // Check if the symbol table has been rehashed, if so, need to recalculate 1.211 + // the hash value before second lookup. 1.212 + unsigned int hashValue = hash_string(name, len, hashValue_arg); 1.213 + 1.214 // Since look-up was done lock-free, we need to check if another 1.215 // thread beat us in the race to insert the symbol. 1.216 1.217 @@ -566,7 +654,7 @@ 1.218 ResourceMark rm; 1.219 int length; 1.220 jchar* chars = symbol->as_unicode(length); 1.221 - unsigned int hashValue = java_lang_String::hash_string(chars, length); 1.222 + unsigned int hashValue = hash_string(chars, length); 1.223 int index = the_table()->hash_to_index(hashValue); 1.224 return the_table()->lookup(index, chars, length, hashValue); 1.225 } 1.226 @@ -574,7 +662,7 @@ 1.227 1.228 oop StringTable::intern(Handle string_or_null, jchar* name, 1.229 int len, TRAPS) { 1.230 - unsigned int hashValue = java_lang_String::hash_string(name, len); 1.231 + unsigned int hashValue = hash_string(name, len); 1.232 int index = the_table()->hash_to_index(hashValue); 1.233 oop string = the_table()->lookup(index, name, len, hashValue); 1.234 1.235 @@ -675,3 +763,52 @@ 1.236 } 1.237 } 1.238 } 1.239 + 1.240 +void StringTable::dump(outputStream* st) { 1.241 + NumberSeq summary; 1.242 + for (int i = 0; i < the_table()->table_size(); ++i) { 1.243 + HashtableEntry<oop>* p = the_table()->bucket(i); 1.244 + int count = 0; 1.245 + for ( ; p != NULL; p = p->next()) { 1.246 + count++; 1.247 + } 1.248 + summary.add((double)count); 1.249 + } 1.250 + st->print_cr("StringTable statistics:"); 1.251 + st->print_cr("Number of buckets : %7d", summary.num()); 1.252 + st->print_cr("Average bucket size : %7.0f", summary.avg()); 1.253 + st->print_cr("Variance of bucket size : %7.0f", summary.variance()); 1.254 + st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); 1.255 + st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); 1.256 +} 1.257 + 1.258 + 1.259 +unsigned int StringTable::new_hash(oop string) { 1.260 + ResourceMark rm; 1.261 + int length; 1.262 + jchar* chars = java_lang_String::as_unicode_string(string, length); 1.263 + // Use alternate hashing algorithm on the string 1.264 + return AltHashing::murmur3_32(seed(), chars, length); 1.265 +} 1.266 + 1.267 +// Create a new table and using alternate hash code, populate the new table 1.268 +// with the existing strings. Set flag to use the alternate hash code afterwards. 1.269 +void StringTable::rehash_table() { 1.270 + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); 1.271 + assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump"); 1.272 + StringTable* new_table = new StringTable(); 1.273 + 1.274 + // Initialize new global seed for hashing. 1.275 + _seed = AltHashing::compute_seed(); 1.276 + assert(seed() != 0, "shouldn't be zero"); 1.277 + 1.278 + // Rehash the table 1.279 + the_table()->move_to(new_table); 1.280 + 1.281 + // Delete the table and buckets (entries are reused in new table). 1.282 + delete _the_table; 1.283 + // Don't check if we need rehashing until the table gets unbalanced again. 1.284 + // Then rehash with a new global seed. 1.285 + _needs_rehashing = false; 1.286 + _the_table = new_table; 1.287 +}