Wed, 13 Jun 2012 19:52:59 -0400
7158800: Improve storage of symbol tables
Summary: Use an alternate version of hashing algorithm for symbol string tables and after a certain bucket size to improve performance
Reviewed-by: pbk, kamg, dlong, kvn, fparain
1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/altHashing.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/symbolTable.hpp"
29 #include "classfile/systemDictionary.hpp"
30 #include "gc_interface/collectedHeap.inline.hpp"
31 #include "memory/allocation.inline.hpp"
32 #include "memory/filemap.hpp"
33 #include "memory/gcLocker.inline.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "oops/oop.inline2.hpp"
36 #include "runtime/mutexLocker.hpp"
37 #include "utilities/hashtable.inline.hpp"
38 #include "utilities/numberSeq.hpp"
40 // --------------------------------------------------------------------------
42 SymbolTable* SymbolTable::_the_table = NULL;
43 // Static arena for symbols that are not deallocated
44 Arena* SymbolTable::_arena = NULL;
45 bool SymbolTable::_needs_rehashing = false;
46 jint SymbolTable::_seed = 0;
48 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
49 // Don't allow symbols to be created which cannot fit in a Symbol*.
50 if (len > Symbol::max_length()) {
51 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
52 "name is too long to represent");
53 }
54 Symbol* sym;
55 // Allocate symbols in the C heap when dumping shared spaces in case there
56 // are temporary symbols we can remove.
57 if (c_heap || DumpSharedSpaces) {
58 // refcount starts as 1
59 sym = new (len, THREAD) Symbol(name, len, 1);
60 } else {
61 sym = new (len, arena(), THREAD) Symbol(name, len, -1);
62 }
63 assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
64 return sym;
65 }
67 void SymbolTable::initialize_symbols(int arena_alloc_size) {
68 // Initialize the arena for global symbols, size passed in depends on CDS.
69 if (arena_alloc_size == 0) {
70 _arena = new Arena();
71 } else {
72 _arena = new Arena(arena_alloc_size);
73 }
74 }
76 // Call function for all symbols in the symbol table.
77 void SymbolTable::symbols_do(SymbolClosure *cl) {
78 const int n = the_table()->table_size();
79 for (int i = 0; i < n; i++) {
80 for (HashtableEntry<Symbol*>* p = the_table()->bucket(i);
81 p != NULL;
82 p = p->next()) {
83 cl->do_symbol(p->literal_addr());
84 }
85 }
86 }
88 int SymbolTable::symbols_removed = 0;
89 int SymbolTable::symbols_counted = 0;
91 // Remove unreferenced symbols from the symbol table
92 // This is done late during GC.
93 void SymbolTable::unlink() {
94 int removed = 0;
95 int total = 0;
96 size_t memory_total = 0;
97 for (int i = 0; i < the_table()->table_size(); ++i) {
98 for (HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i); *p != NULL; ) {
99 HashtableEntry<Symbol*>* entry = *p;
100 if (entry->is_shared()) {
101 break;
102 }
103 Symbol* s = entry->literal();
104 memory_total += s->object_size();
105 total++;
106 assert(s != NULL, "just checking");
107 // If reference count is zero, remove.
108 if (s->refcount() == 0) {
109 delete s;
110 removed++;
111 *p = entry->next();
112 the_table()->free_entry(entry);
113 } else {
114 p = entry->next_addr();
115 }
116 }
117 }
118 symbols_removed += removed;
119 symbols_counted += total;
120 // Exclude printing for normal PrintGCDetails because people parse
121 // this output.
122 if (PrintGCDetails && Verbose && WizardMode) {
123 gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", total,
124 (memory_total*HeapWordSize)/1024);
125 }
126 }
128 unsigned int SymbolTable::new_hash(Symbol* sym) {
129 ResourceMark rm;
130 // Use alternate hashing algorithm on this symbol.
131 return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
132 }
134 // Create a new table and using alternate hash code, populate the new table
135 // with the existing strings. Set flag to use the alternate hash code afterwards.
136 void SymbolTable::rehash_table() {
137 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
138 assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
139 // Create a new symbol table
140 SymbolTable* new_table = new SymbolTable();
142 // Initialize the global seed for hashing.
143 _seed = AltHashing::compute_seed();
144 assert(seed() != 0, "shouldn't be zero");
146 the_table()->move_to(new_table);
148 // Delete the table and buckets (entries are reused in new table).
149 delete _the_table;
150 // Don't check if we need rehashing until the table gets unbalanced again.
151 // Then rehash with a new global seed.
152 _needs_rehashing = false;
153 _the_table = new_table;
154 }
156 // Lookup a symbol in a bucket.
158 Symbol* SymbolTable::lookup(int index, const char* name,
159 int len, unsigned int hash) {
160 int count = 0;
161 for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
162 count++; // count all entries in this bucket, not just ones with same hash
163 if (e->hash() == hash) {
164 Symbol* sym = e->literal();
165 if (sym->equals(name, len)) {
166 // something is referencing this symbol now.
167 sym->increment_refcount();
168 return sym;
169 }
170 }
171 }
172 // If the bucket size is too deep check if this hash code is insufficient.
173 if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
174 _needs_rehashing = check_rehash_table(count);
175 }
176 return NULL;
177 }
179 // Pick hashing algorithm, but return value already given if not using a new
180 // hash algorithm.
181 unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) {
182 return use_alternate_hashcode() ?
183 AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
184 (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
185 }
188 // We take care not to be blocking while holding the
189 // SymbolTable_lock. Otherwise, the system might deadlock, since the
190 // symboltable is used during compilation (VM_thread) The lock free
191 // synchronization is simplified by the fact that we do not delete
192 // entries in the symbol table during normal execution (only during
193 // safepoints).
195 Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
196 unsigned int hashValue = hash_symbol(name, len);
197 int index = the_table()->hash_to_index(hashValue);
199 Symbol* s = the_table()->lookup(index, name, len, hashValue);
201 // Found
202 if (s != NULL) return s;
204 // Otherwise, add to symbol to table
205 return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
206 }
208 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
209 char* buffer;
210 int index, len;
211 unsigned int hashValue;
212 char* name;
213 {
214 debug_only(No_Safepoint_Verifier nsv;)
216 name = (char*)sym->base() + begin;
217 len = end - begin;
218 hashValue = hash_symbol(name, len);
219 index = the_table()->hash_to_index(hashValue);
220 Symbol* s = the_table()->lookup(index, name, len, hashValue);
222 // Found
223 if (s != NULL) return s;
224 }
226 // Otherwise, add to symbol to table. Copy to a C string first.
227 char stack_buf[128];
228 ResourceMark rm(THREAD);
229 if (len <= 128) {
230 buffer = stack_buf;
231 } else {
232 buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
233 }
234 for (int i=0; i<len; i++) {
235 buffer[i] = name[i];
236 }
237 // Make sure there is no safepoint in the code above since name can't move.
238 // We can't include the code in No_Safepoint_Verifier because of the
239 // ResourceMark.
241 return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
242 }
244 Symbol* SymbolTable::lookup_only(const char* name, int len,
245 unsigned int& hash) {
246 hash = hash_symbol(name, len);
247 int index = the_table()->hash_to_index(hash);
249 Symbol* s = the_table()->lookup(index, name, len, hash);
250 return s;
251 }
253 // Look up the address of the literal in the SymbolTable for this Symbol*
254 // Do not create any new symbols
255 // Do not increment the reference count to keep this alive
256 Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
257 unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
258 int index = the_table()->hash_to_index(hash);
260 for (HashtableEntry<Symbol*>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
261 if (e->hash() == hash) {
262 Symbol* literal_sym = e->literal();
263 if (sym == literal_sym) {
264 return e->literal_addr();
265 }
266 }
267 }
268 return NULL;
269 }
271 // Suggestion: Push unicode-based lookup all the way into the hashing
272 // and probing logic, so there is no need for convert_to_utf8 until
273 // an actual new Symbol* is created.
274 Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
275 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
276 char stack_buf[128];
277 if (utf8_length < (int) sizeof(stack_buf)) {
278 char* chars = stack_buf;
279 UNICODE::convert_to_utf8(name, utf16_length, chars);
280 return lookup(chars, utf8_length, THREAD);
281 } else {
282 ResourceMark rm(THREAD);
283 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
284 UNICODE::convert_to_utf8(name, utf16_length, chars);
285 return lookup(chars, utf8_length, THREAD);
286 }
287 }
289 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
290 unsigned int& hash) {
291 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
292 char stack_buf[128];
293 if (utf8_length < (int) sizeof(stack_buf)) {
294 char* chars = stack_buf;
295 UNICODE::convert_to_utf8(name, utf16_length, chars);
296 return lookup_only(chars, utf8_length, hash);
297 } else {
298 ResourceMark rm;
299 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
300 UNICODE::convert_to_utf8(name, utf16_length, chars);
301 return lookup_only(chars, utf8_length, hash);
302 }
303 }
305 void SymbolTable::add(Handle class_loader, constantPoolHandle cp,
306 int names_count,
307 const char** names, int* lengths, int* cp_indices,
308 unsigned int* hashValues, TRAPS) {
309 SymbolTable* table = the_table();
310 bool added = table->basic_add(class_loader, cp, names_count, names, lengths,
311 cp_indices, hashValues, CHECK);
312 if (!added) {
313 // do it the hard way
314 for (int i=0; i<names_count; i++) {
315 int index = table->hash_to_index(hashValues[i]);
316 bool c_heap = class_loader() != NULL;
317 Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
318 cp->symbol_at_put(cp_indices[i], sym);
319 }
320 }
321 }
323 Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
324 unsigned int hash;
325 Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
326 if (result != NULL) {
327 return result;
328 }
329 SymbolTable* table = the_table();
330 int index = table->hash_to_index(hash);
331 return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
332 }
334 Symbol* SymbolTable::basic_add(int index, u1 *name, int len,
335 unsigned int hashValue_arg, bool c_heap, TRAPS) {
336 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
337 "proposed name of symbol must be stable");
339 // Grab SymbolTable_lock first.
340 MutexLocker ml(SymbolTable_lock, THREAD);
342 // Check if the symbol table has been rehashed, if so, need to recalculate
343 // the hash value.
344 unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg);
346 // Since look-up was done lock-free, we need to check if another
347 // thread beat us in the race to insert the symbol.
348 Symbol* test = lookup(index, (char*)name, len, hashValue);
349 if (test != NULL) {
350 // A race occurred and another thread introduced the symbol.
351 assert(test->refcount() != 0, "lookup should have incremented the count");
352 return test;
353 }
355 // Create a new symbol.
356 Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
357 assert(sym->equals((char*)name, len), "symbol must be properly initialized");
359 HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
360 add_entry(index, entry);
361 return sym;
362 }
364 // This version of basic_add adds symbols in batch from the constant pool
365 // parsing.
366 bool SymbolTable::basic_add(Handle class_loader, constantPoolHandle cp,
367 int names_count,
368 const char** names, int* lengths,
369 int* cp_indices, unsigned int* hashValues,
370 TRAPS) {
372 // Check symbol names are not too long. If any are too long, don't add any.
373 for (int i = 0; i< names_count; i++) {
374 if (lengths[i] > Symbol::max_length()) {
375 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
376 "name is too long to represent");
377 }
378 }
380 // Hold SymbolTable_lock through the symbol creation
381 MutexLocker ml(SymbolTable_lock, THREAD);
383 for (int i=0; i<names_count; i++) {
384 // Check if the symbol table has been rehashed, if so, need to recalculate
385 // the hash value.
386 unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]);
387 // Since look-up was done lock-free, we need to check if another
388 // thread beat us in the race to insert the symbol.
389 int index = hash_to_index(hashValue);
390 Symbol* test = lookup(index, names[i], lengths[i], hashValue);
391 if (test != NULL) {
392 // A race occurred and another thread introduced the symbol, this one
393 // will be dropped and collected. Use test instead.
394 cp->symbol_at_put(cp_indices[i], test);
395 assert(test->refcount() != 0, "lookup should have incremented the count");
396 } else {
397 // Create a new symbol. The null class loader is never unloaded so these
398 // are allocated specially in a permanent arena.
399 bool c_heap = class_loader() != NULL;
400 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
401 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be???
402 HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
403 add_entry(index, entry);
404 cp->symbol_at_put(cp_indices[i], sym);
405 }
406 }
407 return true;
408 }
411 void SymbolTable::verify() {
412 for (int i = 0; i < the_table()->table_size(); ++i) {
413 HashtableEntry<Symbol*>* p = the_table()->bucket(i);
414 for ( ; p != NULL; p = p->next()) {
415 Symbol* s = (Symbol*)(p->literal());
416 guarantee(s != NULL, "symbol is NULL");
417 unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
418 guarantee(p->hash() == h, "broken hash in symbol table entry");
419 guarantee(the_table()->hash_to_index(h) == i,
420 "wrong index in symbol table");
421 }
422 }
423 }
425 void SymbolTable::dump(outputStream* st) {
426 NumberSeq summary;
427 for (int i = 0; i < the_table()->table_size(); ++i) {
428 int count = 0;
429 for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
430 e != NULL; e = e->next()) {
431 count++;
432 }
433 summary.add((double)count);
434 }
435 st->print_cr("SymbolTable statistics:");
436 st->print_cr("Number of buckets : %7d", summary.num());
437 st->print_cr("Average bucket size : %7.0f", summary.avg());
438 st->print_cr("Variance of bucket size : %7.0f", summary.variance());
439 st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
440 st->print_cr("Maximum bucket size : %7.0f", summary.maximum());
441 }
444 //---------------------------------------------------------------------------
445 // Non-product code
447 #ifndef PRODUCT
449 void SymbolTable::print_histogram() {
450 MutexLocker ml(SymbolTable_lock);
451 const int results_length = 100;
452 int results[results_length];
453 int i,j;
455 // initialize results to zero
456 for (j = 0; j < results_length; j++) {
457 results[j] = 0;
458 }
460 int total = 0;
461 int max_symbols = 0;
462 int out_of_range = 0;
463 int memory_total = 0;
464 int count = 0;
465 for (i = 0; i < the_table()->table_size(); i++) {
466 HashtableEntry<Symbol*>* p = the_table()->bucket(i);
467 for ( ; p != NULL; p = p->next()) {
468 memory_total += p->literal()->object_size();
469 count++;
470 int counter = p->literal()->utf8_length();
471 total += counter;
472 if (counter < results_length) {
473 results[counter]++;
474 } else {
475 out_of_range++;
476 }
477 max_symbols = MAX2(max_symbols, counter);
478 }
479 }
480 tty->print_cr("Symbol Table:");
481 tty->print_cr("Total number of symbols %5d", count);
482 tty->print_cr("Total size in memory %5dK",
483 (memory_total*HeapWordSize)/1024);
484 tty->print_cr("Total counted %5d", symbols_counted);
485 tty->print_cr("Total removed %5d", symbols_removed);
486 if (symbols_counted > 0) {
487 tty->print_cr("Percent removed %3.2f",
488 ((float)symbols_removed/(float)symbols_counted)* 100);
489 }
490 tty->print_cr("Reference counts %5d", Symbol::_total_count);
491 tty->print_cr("Symbol arena size %5d used %5d",
492 arena()->size_in_bytes(), arena()->used());
493 tty->print_cr("Histogram of symbol length:");
494 tty->print_cr("%8s %5d", "Total ", total);
495 tty->print_cr("%8s %5d", "Maximum", max_symbols);
496 tty->print_cr("%8s %3.2f", "Average",
497 ((float) total / (float) the_table()->table_size()));
498 tty->print_cr("%s", "Histogram:");
499 tty->print_cr(" %s %29s", "Length", "Number chains that length");
500 for (i = 0; i < results_length; i++) {
501 if (results[i] > 0) {
502 tty->print_cr("%6d %10d", i, results[i]);
503 }
504 }
505 if (Verbose) {
506 int line_length = 70;
507 tty->print_cr("%s %30s", " Length", "Number chains that length");
508 for (i = 0; i < results_length; i++) {
509 if (results[i] > 0) {
510 tty->print("%4d", i);
511 for (j = 0; (j < results[i]) && (j < line_length); j++) {
512 tty->print("%1s", "*");
513 }
514 if (j == line_length) {
515 tty->print("%1s", "+");
516 }
517 tty->cr();
518 }
519 }
520 }
521 tty->print_cr(" %s %d: %d\n", "Number chains longer than",
522 results_length, out_of_range);
523 }
525 void SymbolTable::print() {
526 for (int i = 0; i < the_table()->table_size(); ++i) {
527 HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i);
528 HashtableEntry<Symbol*>* entry = the_table()->bucket(i);
529 if (entry != NULL) {
530 while (entry != NULL) {
531 tty->print(PTR_FORMAT " ", entry->literal());
532 entry->literal()->print();
533 tty->print(" %d", entry->literal()->refcount());
534 p = entry->next_addr();
535 entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p);
536 }
537 tty->cr();
538 }
539 }
540 }
541 #endif // PRODUCT
543 // --------------------------------------------------------------------------
545 #ifdef ASSERT
546 class StableMemoryChecker : public StackObj {
547 enum { _bufsize = wordSize*4 };
549 address _region;
550 jint _size;
551 u1 _save_buf[_bufsize];
553 int sample(u1* save_buf) {
554 if (_size <= _bufsize) {
555 memcpy(save_buf, _region, _size);
556 return _size;
557 } else {
558 // copy head and tail
559 memcpy(&save_buf[0], _region, _bufsize/2);
560 memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
561 return (_bufsize/2)*2;
562 }
563 }
565 public:
566 StableMemoryChecker(const void* region, jint size) {
567 _region = (address) region;
568 _size = size;
569 sample(_save_buf);
570 }
572 bool verify() {
573 u1 check_buf[sizeof(_save_buf)];
574 int check_size = sample(check_buf);
575 return (0 == memcmp(_save_buf, check_buf, check_size));
576 }
578 void set_region(const void* region) { _region = (address) region; }
579 };
580 #endif
583 // --------------------------------------------------------------------------
584 StringTable* StringTable::_the_table = NULL;
586 bool StringTable::_needs_rehashing = false;
587 jint StringTable::_seed = 0;
589 // Pick hashing algorithm
590 unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) {
591 return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
592 (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
593 }
595 oop StringTable::lookup(int index, jchar* name,
596 int len, unsigned int hash) {
597 int count = 0;
598 for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
599 count++;
600 if (l->hash() == hash) {
601 if (java_lang_String::equals(l->literal(), name, len)) {
602 return l->literal();
603 }
604 }
605 }
606 // If the bucket size is too deep check if this hash code is insufficient.
607 if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
608 _needs_rehashing = check_rehash_table(count);
609 }
610 return NULL;
611 }
614 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
615 int len, unsigned int hashValue_arg, TRAPS) {
616 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
617 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
618 "proposed name of symbol must be stable");
620 Handle string;
621 // try to reuse the string if possible
622 if (!string_or_null.is_null() && (!JavaObjectsInPerm || string_or_null()->is_perm())) {
623 string = string_or_null;
624 } else {
625 string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
626 }
628 // Allocation must be done before grapping the SymbolTable_lock lock
629 MutexLocker ml(StringTable_lock, THREAD);
631 assert(java_lang_String::equals(string(), name, len),
632 "string must be properly initialized");
634 // Check if the symbol table has been rehashed, if so, need to recalculate
635 // the hash value before second lookup.
636 unsigned int hashValue = hash_string(name, len, hashValue_arg);
638 // Since look-up was done lock-free, we need to check if another
639 // thread beat us in the race to insert the symbol.
641 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
642 if (test != NULL) {
643 // Entry already added
644 return test;
645 }
647 HashtableEntry<oop>* entry = new_entry(hashValue, string());
648 add_entry(index, entry);
649 return string();
650 }
653 oop StringTable::lookup(Symbol* symbol) {
654 ResourceMark rm;
655 int length;
656 jchar* chars = symbol->as_unicode(length);
657 unsigned int hashValue = hash_string(chars, length);
658 int index = the_table()->hash_to_index(hashValue);
659 return the_table()->lookup(index, chars, length, hashValue);
660 }
663 oop StringTable::intern(Handle string_or_null, jchar* name,
664 int len, TRAPS) {
665 unsigned int hashValue = hash_string(name, len);
666 int index = the_table()->hash_to_index(hashValue);
667 oop string = the_table()->lookup(index, name, len, hashValue);
669 // Found
670 if (string != NULL) return string;
672 // Otherwise, add to symbol to table
673 return the_table()->basic_add(index, string_or_null, name, len,
674 hashValue, CHECK_NULL);
675 }
677 oop StringTable::intern(Symbol* symbol, TRAPS) {
678 if (symbol == NULL) return NULL;
679 ResourceMark rm(THREAD);
680 int length;
681 jchar* chars = symbol->as_unicode(length);
682 Handle string;
683 oop result = intern(string, chars, length, CHECK_NULL);
684 return result;
685 }
688 oop StringTable::intern(oop string, TRAPS)
689 {
690 if (string == NULL) return NULL;
691 ResourceMark rm(THREAD);
692 int length;
693 Handle h_string (THREAD, string);
694 jchar* chars = java_lang_String::as_unicode_string(string, length);
695 oop result = intern(h_string, chars, length, CHECK_NULL);
696 return result;
697 }
700 oop StringTable::intern(const char* utf8_string, TRAPS) {
701 if (utf8_string == NULL) return NULL;
702 ResourceMark rm(THREAD);
703 int length = UTF8::unicode_length(utf8_string);
704 jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
705 UTF8::convert_to_unicode(utf8_string, chars, length);
706 Handle string;
707 oop result = intern(string, chars, length, CHECK_NULL);
708 return result;
709 }
711 void StringTable::unlink(BoolObjectClosure* is_alive) {
712 // Readers of the table are unlocked, so we should only be removing
713 // entries at a safepoint.
714 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
715 for (int i = 0; i < the_table()->table_size(); ++i) {
716 for (HashtableEntry<oop>** p = the_table()->bucket_addr(i); *p != NULL; ) {
717 HashtableEntry<oop>* entry = *p;
718 if (entry->is_shared()) {
719 break;
720 }
721 assert(entry->literal() != NULL, "just checking");
722 if (is_alive->do_object_b(entry->literal())) {
723 p = entry->next_addr();
724 } else {
725 *p = entry->next();
726 the_table()->free_entry(entry);
727 }
728 }
729 }
730 }
732 void StringTable::oops_do(OopClosure* f) {
733 for (int i = 0; i < the_table()->table_size(); ++i) {
734 HashtableEntry<oop>** p = the_table()->bucket_addr(i);
735 HashtableEntry<oop>* entry = the_table()->bucket(i);
736 while (entry != NULL) {
737 f->do_oop((oop*)entry->literal_addr());
739 // Did the closure remove the literal from the table?
740 if (entry->literal() == NULL) {
741 assert(!entry->is_shared(), "immutable hashtable entry?");
742 *p = entry->next();
743 the_table()->free_entry(entry);
744 } else {
745 p = entry->next_addr();
746 }
747 entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p);
748 }
749 }
750 }
752 void StringTable::verify() {
753 for (int i = 0; i < the_table()->table_size(); ++i) {
754 HashtableEntry<oop>* p = the_table()->bucket(i);
755 for ( ; p != NULL; p = p->next()) {
756 oop s = p->literal();
757 guarantee(s != NULL, "interned string is NULL");
758 guarantee(s->is_perm() || !JavaObjectsInPerm, "interned string not in permspace");
759 unsigned int h = java_lang_String::hash_string(s);
760 guarantee(p->hash() == h, "broken hash in string table entry");
761 guarantee(the_table()->hash_to_index(h) == i,
762 "wrong index in string table");
763 }
764 }
765 }
767 void StringTable::dump(outputStream* st) {
768 NumberSeq summary;
769 for (int i = 0; i < the_table()->table_size(); ++i) {
770 HashtableEntry<oop>* p = the_table()->bucket(i);
771 int count = 0;
772 for ( ; p != NULL; p = p->next()) {
773 count++;
774 }
775 summary.add((double)count);
776 }
777 st->print_cr("StringTable statistics:");
778 st->print_cr("Number of buckets : %7d", summary.num());
779 st->print_cr("Average bucket size : %7.0f", summary.avg());
780 st->print_cr("Variance of bucket size : %7.0f", summary.variance());
781 st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
782 st->print_cr("Maximum bucket size : %7.0f", summary.maximum());
783 }
786 unsigned int StringTable::new_hash(oop string) {
787 ResourceMark rm;
788 int length;
789 jchar* chars = java_lang_String::as_unicode_string(string, length);
790 // Use alternate hashing algorithm on the string
791 return AltHashing::murmur3_32(seed(), chars, length);
792 }
794 // Create a new table and using alternate hash code, populate the new table
795 // with the existing strings. Set flag to use the alternate hash code afterwards.
796 void StringTable::rehash_table() {
797 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
798 assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
799 StringTable* new_table = new StringTable();
801 // Initialize new global seed for hashing.
802 _seed = AltHashing::compute_seed();
803 assert(seed() != 0, "shouldn't be zero");
805 // Rehash the table
806 the_table()->move_to(new_table);
808 // Delete the table and buckets (entries are reused in new table).
809 delete _the_table;
810 // Don't check if we need rehashing until the table gets unbalanced again.
811 // Then rehash with a new global seed.
812 _needs_rehashing = false;
813 _the_table = new_table;
814 }