Tue, 18 Mar 2014 19:07:22 +0100
8029075: String deduplication in G1
Summary: Implementation of JEP 192, http://openjdk.java.net/jeps/192
Reviewed-by: brutisso, tschatzl, coleenp
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/altHashing.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/symbolTable.hpp"
29 #include "classfile/systemDictionary.hpp"
30 #include "gc_interface/collectedHeap.inline.hpp"
31 #include "memory/allocation.inline.hpp"
32 #include "memory/filemap.hpp"
33 #include "memory/gcLocker.inline.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "oops/oop.inline2.hpp"
36 #include "runtime/mutexLocker.hpp"
37 #include "utilities/hashtable.inline.hpp"
38 #if INCLUDE_ALL_GCS
39 #include "gc_implementation/g1/g1StringDedup.hpp"
40 #endif
42 // --------------------------------------------------------------------------
44 // the number of buckets a thread claims
45 const int ClaimChunkSize = 32;
47 SymbolTable* SymbolTable::_the_table = NULL;
48 // Static arena for symbols that are not deallocated
49 Arena* SymbolTable::_arena = NULL;
50 bool SymbolTable::_needs_rehashing = false;
52 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
53 assert (len <= Symbol::max_length(), "should be checked by caller");
55 Symbol* sym;
57 if (DumpSharedSpaces) {
58 // Allocate all symbols to CLD shared metaspace
59 sym = new (len, ClassLoaderData::the_null_class_loader_data(), THREAD) Symbol(name, len, -1);
60 } else if (c_heap) {
61 // refcount starts as 1
62 sym = new (len, THREAD) Symbol(name, len, 1);
63 assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
64 } else {
65 // Allocate to global arena
66 sym = new (len, arena(), THREAD) Symbol(name, len, -1);
67 }
68 return sym;
69 }
71 void SymbolTable::initialize_symbols(int arena_alloc_size) {
72 // Initialize the arena for global symbols, size passed in depends on CDS.
73 if (arena_alloc_size == 0) {
74 _arena = new (mtSymbol) Arena();
75 } else {
76 _arena = new (mtSymbol) Arena(arena_alloc_size);
77 }
78 }
80 // Call function for all symbols in the symbol table.
81 void SymbolTable::symbols_do(SymbolClosure *cl) {
82 const int n = the_table()->table_size();
83 for (int i = 0; i < n; i++) {
84 for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
85 p != NULL;
86 p = p->next()) {
87 cl->do_symbol(p->literal_addr());
88 }
89 }
90 }
92 int SymbolTable::_symbols_removed = 0;
93 int SymbolTable::_symbols_counted = 0;
94 volatile int SymbolTable::_parallel_claimed_idx = 0;
96 void SymbolTable::buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total) {
97 for (int i = start_idx; i < end_idx; ++i) {
98 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
99 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
100 while (entry != NULL) {
101 // Shared entries are normally at the end of the bucket and if we run into
102 // a shared entry, then there is nothing more to remove. However, if we
103 // have rehashed the table, then the shared entries are no longer at the
104 // end of the bucket.
105 if (entry->is_shared() && !use_alternate_hashcode()) {
106 break;
107 }
108 Symbol* s = entry->literal();
109 (*memory_total) += s->size();
110 (*processed)++;
111 assert(s != NULL, "just checking");
112 // If reference count is zero, remove.
113 if (s->refcount() == 0) {
114 assert(!entry->is_shared(), "shared entries should be kept live");
115 delete s;
116 (*removed)++;
117 *p = entry->next();
118 the_table()->free_entry(entry);
119 } else {
120 p = entry->next_addr();
121 }
122 // get next entry
123 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
124 }
125 }
126 }
128 // Remove unreferenced symbols from the symbol table
129 // This is done late during GC.
130 void SymbolTable::unlink(int* processed, int* removed) {
131 size_t memory_total = 0;
132 buckets_unlink(0, the_table()->table_size(), processed, removed, &memory_total);
133 _symbols_removed += *removed;
134 _symbols_counted += *processed;
135 // Exclude printing for normal PrintGCDetails because people parse
136 // this output.
137 if (PrintGCDetails && Verbose && WizardMode) {
138 gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed,
139 (memory_total*HeapWordSize)/1024);
140 }
141 }
143 void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) {
144 const int limit = the_table()->table_size();
146 size_t memory_total = 0;
148 for (;;) {
149 // Grab next set of buckets to scan
150 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
151 if (start_idx >= limit) {
152 // End of table
153 break;
154 }
156 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
157 buckets_unlink(start_idx, end_idx, processed, removed, &memory_total);
158 }
159 Atomic::add(*processed, &_symbols_counted);
160 Atomic::add(*removed, &_symbols_removed);
161 // Exclude printing for normal PrintGCDetails because people parse
162 // this output.
163 if (PrintGCDetails && Verbose && WizardMode) {
164 gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed,
165 (memory_total*HeapWordSize)/1024);
166 }
167 }
169 // Create a new table and using alternate hash code, populate the new table
170 // with the existing strings. Set flag to use the alternate hash code afterwards.
171 void SymbolTable::rehash_table() {
172 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
173 // This should never happen with -Xshare:dump but it might in testing mode.
174 if (DumpSharedSpaces) return;
175 // Create a new symbol table
176 SymbolTable* new_table = new SymbolTable();
178 the_table()->move_to(new_table);
180 // Delete the table and buckets (entries are reused in new table).
181 delete _the_table;
182 // Don't check if we need rehashing until the table gets unbalanced again.
183 // Then rehash with a new global seed.
184 _needs_rehashing = false;
185 _the_table = new_table;
186 }
188 // Lookup a symbol in a bucket.
190 Symbol* SymbolTable::lookup(int index, const char* name,
191 int len, unsigned int hash) {
192 int count = 0;
193 for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
194 count++; // count all entries in this bucket, not just ones with same hash
195 if (e->hash() == hash) {
196 Symbol* sym = e->literal();
197 if (sym->equals(name, len)) {
198 // something is referencing this symbol now.
199 sym->increment_refcount();
200 return sym;
201 }
202 }
203 }
204 // If the bucket size is too deep check if this hash code is insufficient.
205 if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
206 _needs_rehashing = check_rehash_table(count);
207 }
208 return NULL;
209 }
211 // Pick hashing algorithm.
212 unsigned int SymbolTable::hash_symbol(const char* s, int len) {
213 return use_alternate_hashcode() ?
214 AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
215 java_lang_String::hash_code(s, len);
216 }
219 // We take care not to be blocking while holding the
220 // SymbolTable_lock. Otherwise, the system might deadlock, since the
221 // symboltable is used during compilation (VM_thread) The lock free
222 // synchronization is simplified by the fact that we do not delete
223 // entries in the symbol table during normal execution (only during
224 // safepoints).
226 Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
227 unsigned int hashValue = hash_symbol(name, len);
228 int index = the_table()->hash_to_index(hashValue);
230 Symbol* s = the_table()->lookup(index, name, len, hashValue);
232 // Found
233 if (s != NULL) return s;
235 // Grab SymbolTable_lock first.
236 MutexLocker ml(SymbolTable_lock, THREAD);
238 // Otherwise, add to symbol to table
239 return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
240 }
242 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
243 char* buffer;
244 int index, len;
245 unsigned int hashValue;
246 char* name;
247 {
248 debug_only(No_Safepoint_Verifier nsv;)
250 name = (char*)sym->base() + begin;
251 len = end - begin;
252 hashValue = hash_symbol(name, len);
253 index = the_table()->hash_to_index(hashValue);
254 Symbol* s = the_table()->lookup(index, name, len, hashValue);
256 // Found
257 if (s != NULL) return s;
258 }
260 // Otherwise, add to symbol to table. Copy to a C string first.
261 char stack_buf[128];
262 ResourceMark rm(THREAD);
263 if (len <= 128) {
264 buffer = stack_buf;
265 } else {
266 buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
267 }
268 for (int i=0; i<len; i++) {
269 buffer[i] = name[i];
270 }
271 // Make sure there is no safepoint in the code above since name can't move.
272 // We can't include the code in No_Safepoint_Verifier because of the
273 // ResourceMark.
275 // Grab SymbolTable_lock first.
276 MutexLocker ml(SymbolTable_lock, THREAD);
278 return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
279 }
281 Symbol* SymbolTable::lookup_only(const char* name, int len,
282 unsigned int& hash) {
283 hash = hash_symbol(name, len);
284 int index = the_table()->hash_to_index(hash);
286 Symbol* s = the_table()->lookup(index, name, len, hash);
287 return s;
288 }
290 // Look up the address of the literal in the SymbolTable for this Symbol*
291 // Do not create any new symbols
292 // Do not increment the reference count to keep this alive
293 Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
294 unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
295 int index = the_table()->hash_to_index(hash);
297 for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
298 if (e->hash() == hash) {
299 Symbol* literal_sym = e->literal();
300 if (sym == literal_sym) {
301 return e->literal_addr();
302 }
303 }
304 }
305 return NULL;
306 }
308 // Suggestion: Push unicode-based lookup all the way into the hashing
309 // and probing logic, so there is no need for convert_to_utf8 until
310 // an actual new Symbol* is created.
311 Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
312 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
313 char stack_buf[128];
314 if (utf8_length < (int) sizeof(stack_buf)) {
315 char* chars = stack_buf;
316 UNICODE::convert_to_utf8(name, utf16_length, chars);
317 return lookup(chars, utf8_length, THREAD);
318 } else {
319 ResourceMark rm(THREAD);
320 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
321 UNICODE::convert_to_utf8(name, utf16_length, chars);
322 return lookup(chars, utf8_length, THREAD);
323 }
324 }
326 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
327 unsigned int& hash) {
328 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
329 char stack_buf[128];
330 if (utf8_length < (int) sizeof(stack_buf)) {
331 char* chars = stack_buf;
332 UNICODE::convert_to_utf8(name, utf16_length, chars);
333 return lookup_only(chars, utf8_length, hash);
334 } else {
335 ResourceMark rm;
336 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
337 UNICODE::convert_to_utf8(name, utf16_length, chars);
338 return lookup_only(chars, utf8_length, hash);
339 }
340 }
342 void SymbolTable::add(ClassLoaderData* loader_data, constantPoolHandle cp,
343 int names_count,
344 const char** names, int* lengths, int* cp_indices,
345 unsigned int* hashValues, TRAPS) {
346 // Grab SymbolTable_lock first.
347 MutexLocker ml(SymbolTable_lock, THREAD);
349 SymbolTable* table = the_table();
350 bool added = table->basic_add(loader_data, cp, names_count, names, lengths,
351 cp_indices, hashValues, CHECK);
352 if (!added) {
353 // do it the hard way
354 for (int i=0; i<names_count; i++) {
355 int index = table->hash_to_index(hashValues[i]);
356 bool c_heap = !loader_data->is_the_null_class_loader_data();
357 Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
358 cp->symbol_at_put(cp_indices[i], sym);
359 }
360 }
361 }
363 Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
364 unsigned int hash;
365 Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
366 if (result != NULL) {
367 return result;
368 }
369 // Grab SymbolTable_lock first.
370 MutexLocker ml(SymbolTable_lock, THREAD);
372 SymbolTable* table = the_table();
373 int index = table->hash_to_index(hash);
374 return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
375 }
377 Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
378 unsigned int hashValue_arg, bool c_heap, TRAPS) {
379 assert(!Universe::heap()->is_in_reserved(name),
380 "proposed name of symbol must be stable");
382 // Don't allow symbols to be created which cannot fit in a Symbol*.
383 if (len > Symbol::max_length()) {
384 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
385 "name is too long to represent");
386 }
388 // Cannot hit a safepoint in this function because the "this" pointer can move.
389 No_Safepoint_Verifier nsv;
391 // Check if the symbol table has been rehashed, if so, need to recalculate
392 // the hash value and index.
393 unsigned int hashValue;
394 int index;
395 if (use_alternate_hashcode()) {
396 hashValue = hash_symbol((const char*)name, len);
397 index = hash_to_index(hashValue);
398 } else {
399 hashValue = hashValue_arg;
400 index = index_arg;
401 }
403 // Since look-up was done lock-free, we need to check if another
404 // thread beat us in the race to insert the symbol.
405 Symbol* test = lookup(index, (char*)name, len, hashValue);
406 if (test != NULL) {
407 // A race occurred and another thread introduced the symbol.
408 assert(test->refcount() != 0, "lookup should have incremented the count");
409 return test;
410 }
412 // Create a new symbol.
413 Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
414 assert(sym->equals((char*)name, len), "symbol must be properly initialized");
416 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
417 add_entry(index, entry);
418 return sym;
419 }
421 // This version of basic_add adds symbols in batch from the constant pool
422 // parsing.
423 bool SymbolTable::basic_add(ClassLoaderData* loader_data, constantPoolHandle cp,
424 int names_count,
425 const char** names, int* lengths,
426 int* cp_indices, unsigned int* hashValues,
427 TRAPS) {
429 // Check symbol names are not too long. If any are too long, don't add any.
430 for (int i = 0; i< names_count; i++) {
431 if (lengths[i] > Symbol::max_length()) {
432 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
433 "name is too long to represent");
434 }
435 }
437 // Cannot hit a safepoint in this function because the "this" pointer can move.
438 No_Safepoint_Verifier nsv;
440 for (int i=0; i<names_count; i++) {
441 // Check if the symbol table has been rehashed, if so, need to recalculate
442 // the hash value.
443 unsigned int hashValue;
444 if (use_alternate_hashcode()) {
445 hashValue = hash_symbol(names[i], lengths[i]);
446 } else {
447 hashValue = hashValues[i];
448 }
449 // Since look-up was done lock-free, we need to check if another
450 // thread beat us in the race to insert the symbol.
451 int index = hash_to_index(hashValue);
452 Symbol* test = lookup(index, names[i], lengths[i], hashValue);
453 if (test != NULL) {
454 // A race occurred and another thread introduced the symbol, this one
455 // will be dropped and collected. Use test instead.
456 cp->symbol_at_put(cp_indices[i], test);
457 assert(test->refcount() != 0, "lookup should have incremented the count");
458 } else {
459 // Create a new symbol. The null class loader is never unloaded so these
460 // are allocated specially in a permanent arena.
461 bool c_heap = !loader_data->is_the_null_class_loader_data();
462 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
463 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be???
464 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
465 add_entry(index, entry);
466 cp->symbol_at_put(cp_indices[i], sym);
467 }
468 }
469 return true;
470 }
473 void SymbolTable::verify() {
474 for (int i = 0; i < the_table()->table_size(); ++i) {
475 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
476 for ( ; p != NULL; p = p->next()) {
477 Symbol* s = (Symbol*)(p->literal());
478 guarantee(s != NULL, "symbol is NULL");
479 unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
480 guarantee(p->hash() == h, "broken hash in symbol table entry");
481 guarantee(the_table()->hash_to_index(h) == i,
482 "wrong index in symbol table");
483 }
484 }
485 }
487 void SymbolTable::dump(outputStream* st) {
488 the_table()->dump_table(st, "SymbolTable");
489 }
492 //---------------------------------------------------------------------------
493 // Non-product code
495 #ifndef PRODUCT
497 void SymbolTable::print_histogram() {
498 MutexLocker ml(SymbolTable_lock);
499 const int results_length = 100;
500 int results[results_length];
501 int i,j;
503 // initialize results to zero
504 for (j = 0; j < results_length; j++) {
505 results[j] = 0;
506 }
508 int total = 0;
509 int max_symbols = 0;
510 int out_of_range = 0;
511 int memory_total = 0;
512 int count = 0;
513 for (i = 0; i < the_table()->table_size(); i++) {
514 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
515 for ( ; p != NULL; p = p->next()) {
516 memory_total += p->literal()->size();
517 count++;
518 int counter = p->literal()->utf8_length();
519 total += counter;
520 if (counter < results_length) {
521 results[counter]++;
522 } else {
523 out_of_range++;
524 }
525 max_symbols = MAX2(max_symbols, counter);
526 }
527 }
528 tty->print_cr("Symbol Table:");
529 tty->print_cr("Total number of symbols %5d", count);
530 tty->print_cr("Total size in memory %5dK",
531 (memory_total*HeapWordSize)/1024);
532 tty->print_cr("Total counted %5d", _symbols_counted);
533 tty->print_cr("Total removed %5d", _symbols_removed);
534 if (_symbols_counted > 0) {
535 tty->print_cr("Percent removed %3.2f",
536 ((float)_symbols_removed/(float)_symbols_counted)* 100);
537 }
538 tty->print_cr("Reference counts %5d", Symbol::_total_count);
539 tty->print_cr("Symbol arena size %5d used %5d",
540 arena()->size_in_bytes(), arena()->used());
541 tty->print_cr("Histogram of symbol length:");
542 tty->print_cr("%8s %5d", "Total ", total);
543 tty->print_cr("%8s %5d", "Maximum", max_symbols);
544 tty->print_cr("%8s %3.2f", "Average",
545 ((float) total / (float) the_table()->table_size()));
546 tty->print_cr("%s", "Histogram:");
547 tty->print_cr(" %s %29s", "Length", "Number chains that length");
548 for (i = 0; i < results_length; i++) {
549 if (results[i] > 0) {
550 tty->print_cr("%6d %10d", i, results[i]);
551 }
552 }
553 if (Verbose) {
554 int line_length = 70;
555 tty->print_cr("%s %30s", " Length", "Number chains that length");
556 for (i = 0; i < results_length; i++) {
557 if (results[i] > 0) {
558 tty->print("%4d", i);
559 for (j = 0; (j < results[i]) && (j < line_length); j++) {
560 tty->print("%1s", "*");
561 }
562 if (j == line_length) {
563 tty->print("%1s", "+");
564 }
565 tty->cr();
566 }
567 }
568 }
569 tty->print_cr(" %s %d: %d\n", "Number chains longer than",
570 results_length, out_of_range);
571 }
573 void SymbolTable::print() {
574 for (int i = 0; i < the_table()->table_size(); ++i) {
575 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
576 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
577 if (entry != NULL) {
578 while (entry != NULL) {
579 tty->print(PTR_FORMAT " ", entry->literal());
580 entry->literal()->print();
581 tty->print(" %d", entry->literal()->refcount());
582 p = entry->next_addr();
583 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
584 }
585 tty->cr();
586 }
587 }
588 }
589 #endif // PRODUCT
591 // --------------------------------------------------------------------------
593 #ifdef ASSERT
594 class StableMemoryChecker : public StackObj {
595 enum { _bufsize = wordSize*4 };
597 address _region;
598 jint _size;
599 u1 _save_buf[_bufsize];
601 int sample(u1* save_buf) {
602 if (_size <= _bufsize) {
603 memcpy(save_buf, _region, _size);
604 return _size;
605 } else {
606 // copy head and tail
607 memcpy(&save_buf[0], _region, _bufsize/2);
608 memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
609 return (_bufsize/2)*2;
610 }
611 }
613 public:
614 StableMemoryChecker(const void* region, jint size) {
615 _region = (address) region;
616 _size = size;
617 sample(_save_buf);
618 }
620 bool verify() {
621 u1 check_buf[sizeof(_save_buf)];
622 int check_size = sample(check_buf);
623 return (0 == memcmp(_save_buf, check_buf, check_size));
624 }
626 void set_region(const void* region) { _region = (address) region; }
627 };
628 #endif
631 // --------------------------------------------------------------------------
632 StringTable* StringTable::_the_table = NULL;
634 bool StringTable::_needs_rehashing = false;
636 volatile int StringTable::_parallel_claimed_idx = 0;
638 // Pick hashing algorithm
639 unsigned int StringTable::hash_string(const jchar* s, int len) {
640 return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
641 java_lang_String::hash_code(s, len);
642 }
644 oop StringTable::lookup(int index, jchar* name,
645 int len, unsigned int hash) {
646 int count = 0;
647 for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
648 count++;
649 if (l->hash() == hash) {
650 if (java_lang_String::equals(l->literal(), name, len)) {
651 return l->literal();
652 }
653 }
654 }
655 // If the bucket size is too deep check if this hash code is insufficient.
656 if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
657 _needs_rehashing = check_rehash_table(count);
658 }
659 return NULL;
660 }
663 oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
664 int len, unsigned int hashValue_arg, TRAPS) {
666 assert(java_lang_String::equals(string(), name, len),
667 "string must be properly initialized");
668 // Cannot hit a safepoint in this function because the "this" pointer can move.
669 No_Safepoint_Verifier nsv;
671 // Check if the symbol table has been rehashed, if so, need to recalculate
672 // the hash value and index before second lookup.
673 unsigned int hashValue;
674 int index;
675 if (use_alternate_hashcode()) {
676 hashValue = hash_string(name, len);
677 index = hash_to_index(hashValue);
678 } else {
679 hashValue = hashValue_arg;
680 index = index_arg;
681 }
683 // Since look-up was done lock-free, we need to check if another
684 // thread beat us in the race to insert the symbol.
686 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
687 if (test != NULL) {
688 // Entry already added
689 return test;
690 }
692 HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
693 add_entry(index, entry);
694 return string();
695 }
698 oop StringTable::lookup(Symbol* symbol) {
699 ResourceMark rm;
700 int length;
701 jchar* chars = symbol->as_unicode(length);
702 return lookup(chars, length);
703 }
706 oop StringTable::lookup(jchar* name, int len) {
707 unsigned int hash = hash_string(name, len);
708 int index = the_table()->hash_to_index(hash);
709 return the_table()->lookup(index, name, len, hash);
710 }
713 oop StringTable::intern(Handle string_or_null, jchar* name,
714 int len, TRAPS) {
715 unsigned int hashValue = hash_string(name, len);
716 int index = the_table()->hash_to_index(hashValue);
717 oop found_string = the_table()->lookup(index, name, len, hashValue);
719 // Found
720 if (found_string != NULL) return found_string;
722 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
723 assert(!Universe::heap()->is_in_reserved(name),
724 "proposed name of symbol must be stable");
726 Handle string;
727 // try to reuse the string if possible
728 if (!string_or_null.is_null()) {
729 string = string_or_null;
730 } else {
731 string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
732 }
734 #if INCLUDE_ALL_GCS
735 if (G1StringDedup::is_enabled()) {
736 // Deduplicate the string before it is interned. Note that we should never
737 // deduplicate a string after it has been interned. Doing so will counteract
738 // compiler optimizations done on e.g. interned string literals.
739 G1StringDedup::deduplicate(string());
740 }
741 #endif
743 // Grab the StringTable_lock before getting the_table() because it could
744 // change at safepoint.
745 MutexLocker ml(StringTable_lock, THREAD);
747 // Otherwise, add to symbol to table
748 return the_table()->basic_add(index, string, name, len,
749 hashValue, CHECK_NULL);
750 }
752 oop StringTable::intern(Symbol* symbol, TRAPS) {
753 if (symbol == NULL) return NULL;
754 ResourceMark rm(THREAD);
755 int length;
756 jchar* chars = symbol->as_unicode(length);
757 Handle string;
758 oop result = intern(string, chars, length, CHECK_NULL);
759 return result;
760 }
763 oop StringTable::intern(oop string, TRAPS)
764 {
765 if (string == NULL) return NULL;
766 ResourceMark rm(THREAD);
767 int length;
768 Handle h_string (THREAD, string);
769 jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL);
770 oop result = intern(h_string, chars, length, CHECK_NULL);
771 return result;
772 }
775 oop StringTable::intern(const char* utf8_string, TRAPS) {
776 if (utf8_string == NULL) return NULL;
777 ResourceMark rm(THREAD);
778 int length = UTF8::unicode_length(utf8_string);
779 jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
780 UTF8::convert_to_unicode(utf8_string, chars, length);
781 Handle string;
782 oop result = intern(string, chars, length, CHECK_NULL);
783 return result;
784 }
786 void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
787 buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), processed, removed);
788 }
790 void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
791 // Readers of the table are unlocked, so we should only be removing
792 // entries at a safepoint.
793 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
794 const int limit = the_table()->table_size();
796 for (;;) {
797 // Grab next set of buckets to scan
798 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
799 if (start_idx >= limit) {
800 // End of table
801 break;
802 }
804 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
805 buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, processed, removed);
806 }
807 }
809 void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) {
810 const int limit = the_table()->table_size();
812 assert(0 <= start_idx && start_idx <= limit,
813 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
814 assert(0 <= end_idx && end_idx <= limit,
815 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
816 assert(start_idx <= end_idx,
817 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
818 start_idx, end_idx));
820 for (int i = start_idx; i < end_idx; i += 1) {
821 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
822 while (entry != NULL) {
823 assert(!entry->is_shared(), "CDS not used for the StringTable");
825 f->do_oop((oop*)entry->literal_addr());
827 entry = entry->next();
828 }
829 }
830 }
832 void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, int* processed, int* removed) {
833 const int limit = the_table()->table_size();
835 assert(0 <= start_idx && start_idx <= limit,
836 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
837 assert(0 <= end_idx && end_idx <= limit,
838 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
839 assert(start_idx <= end_idx,
840 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
841 start_idx, end_idx));
843 for (int i = start_idx; i < end_idx; ++i) {
844 HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
845 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
846 while (entry != NULL) {
847 assert(!entry->is_shared(), "CDS not used for the StringTable");
849 if (is_alive->do_object_b(entry->literal())) {
850 if (f != NULL) {
851 f->do_oop((oop*)entry->literal_addr());
852 }
853 p = entry->next_addr();
854 } else {
855 *p = entry->next();
856 the_table()->free_entry(entry);
857 (*removed)++;
858 }
859 (*processed)++;
860 entry = *p;
861 }
862 }
863 }
865 void StringTable::oops_do(OopClosure* f) {
866 buckets_oops_do(f, 0, the_table()->table_size());
867 }
869 void StringTable::possibly_parallel_oops_do(OopClosure* f) {
870 const int limit = the_table()->table_size();
872 for (;;) {
873 // Grab next set of buckets to scan
874 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
875 if (start_idx >= limit) {
876 // End of table
877 break;
878 }
880 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
881 buckets_oops_do(f, start_idx, end_idx);
882 }
883 }
885 // This verification is part of Universe::verify() and needs to be quick.
886 // See StringTable::verify_and_compare() below for exhaustive verification.
887 void StringTable::verify() {
888 for (int i = 0; i < the_table()->table_size(); ++i) {
889 HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
890 for ( ; p != NULL; p = p->next()) {
891 oop s = p->literal();
892 guarantee(s != NULL, "interned string is NULL");
893 unsigned int h = java_lang_String::hash_string(s);
894 guarantee(p->hash() == h, "broken hash in string table entry");
895 guarantee(the_table()->hash_to_index(h) == i,
896 "wrong index in string table");
897 }
898 }
899 }
901 void StringTable::dump(outputStream* st) {
902 the_table()->dump_table(st, "StringTable");
903 }
905 StringTable::VerifyRetTypes StringTable::compare_entries(
906 int bkt1, int e_cnt1,
907 HashtableEntry<oop, mtSymbol>* e_ptr1,
908 int bkt2, int e_cnt2,
909 HashtableEntry<oop, mtSymbol>* e_ptr2) {
910 // These entries are sanity checked by verify_and_compare_entries()
911 // before this function is called.
912 oop str1 = e_ptr1->literal();
913 oop str2 = e_ptr2->literal();
915 if (str1 == str2) {
916 tty->print_cr("ERROR: identical oop values (0x" PTR_FORMAT ") "
917 "in entry @ bucket[%d][%d] and entry @ bucket[%d][%d]",
918 (void *)str1, bkt1, e_cnt1, bkt2, e_cnt2);
919 return _verify_fail_continue;
920 }
922 if (java_lang_String::equals(str1, str2)) {
923 tty->print_cr("ERROR: identical String values in entry @ "
924 "bucket[%d][%d] and entry @ bucket[%d][%d]",
925 bkt1, e_cnt1, bkt2, e_cnt2);
926 return _verify_fail_continue;
927 }
929 return _verify_pass;
930 }
932 StringTable::VerifyRetTypes StringTable::verify_entry(int bkt, int e_cnt,
933 HashtableEntry<oop, mtSymbol>* e_ptr,
934 StringTable::VerifyMesgModes mesg_mode) {
936 VerifyRetTypes ret = _verify_pass; // be optimistic
938 oop str = e_ptr->literal();
939 if (str == NULL) {
940 if (mesg_mode == _verify_with_mesgs) {
941 tty->print_cr("ERROR: NULL oop value in entry @ bucket[%d][%d]", bkt,
942 e_cnt);
943 }
944 // NULL oop means no more verifications are possible
945 return _verify_fail_done;
946 }
948 if (str->klass() != SystemDictionary::String_klass()) {
949 if (mesg_mode == _verify_with_mesgs) {
950 tty->print_cr("ERROR: oop is not a String in entry @ bucket[%d][%d]",
951 bkt, e_cnt);
952 }
953 // not a String means no more verifications are possible
954 return _verify_fail_done;
955 }
957 unsigned int h = java_lang_String::hash_string(str);
958 if (e_ptr->hash() != h) {
959 if (mesg_mode == _verify_with_mesgs) {
960 tty->print_cr("ERROR: broken hash value in entry @ bucket[%d][%d], "
961 "bkt_hash=%d, str_hash=%d", bkt, e_cnt, e_ptr->hash(), h);
962 }
963 ret = _verify_fail_continue;
964 }
966 if (the_table()->hash_to_index(h) != bkt) {
967 if (mesg_mode == _verify_with_mesgs) {
968 tty->print_cr("ERROR: wrong index value for entry @ bucket[%d][%d], "
969 "str_hash=%d, hash_to_index=%d", bkt, e_cnt, h,
970 the_table()->hash_to_index(h));
971 }
972 ret = _verify_fail_continue;
973 }
975 return ret;
976 }
978 // See StringTable::verify() above for the quick verification that is
979 // part of Universe::verify(). This verification is exhaustive and
980 // reports on every issue that is found. StringTable::verify() only
981 // reports on the first issue that is found.
982 //
983 // StringTable::verify_entry() checks:
984 // - oop value != NULL (same as verify())
985 // - oop value is a String
986 // - hash(String) == hash in entry (same as verify())
987 // - index for hash == index of entry (same as verify())
988 //
989 // StringTable::compare_entries() checks:
990 // - oops are unique across all entries
991 // - String values are unique across all entries
992 //
993 int StringTable::verify_and_compare_entries() {
994 assert(StringTable_lock->is_locked(), "sanity check");
996 int fail_cnt = 0;
998 // first, verify all the entries individually:
999 for (int bkt = 0; bkt < the_table()->table_size(); bkt++) {
1000 HashtableEntry<oop, mtSymbol>* e_ptr = the_table()->bucket(bkt);
1001 for (int e_cnt = 0; e_ptr != NULL; e_ptr = e_ptr->next(), e_cnt++) {
1002 VerifyRetTypes ret = verify_entry(bkt, e_cnt, e_ptr, _verify_with_mesgs);
1003 if (ret != _verify_pass) {
1004 fail_cnt++;
1005 }
1006 }
1007 }
1009 // Optimization: if the above check did not find any failures, then
1010 // the comparison loop below does not need to call verify_entry()
1011 // before calling compare_entries(). If there were failures, then we
1012 // have to call verify_entry() to see if the entry can be passed to
1013 // compare_entries() safely. When we call verify_entry() in the loop
1014 // below, we do so quietly to void duplicate messages and we don't
1015 // increment fail_cnt because the failures have already been counted.
1016 bool need_entry_verify = (fail_cnt != 0);
1018 // second, verify all entries relative to each other:
1019 for (int bkt1 = 0; bkt1 < the_table()->table_size(); bkt1++) {
1020 HashtableEntry<oop, mtSymbol>* e_ptr1 = the_table()->bucket(bkt1);
1021 for (int e_cnt1 = 0; e_ptr1 != NULL; e_ptr1 = e_ptr1->next(), e_cnt1++) {
1022 if (need_entry_verify) {
1023 VerifyRetTypes ret = verify_entry(bkt1, e_cnt1, e_ptr1,
1024 _verify_quietly);
1025 if (ret == _verify_fail_done) {
1026 // cannot use the current entry to compare against other entries
1027 continue;
1028 }
1029 }
1031 for (int bkt2 = bkt1; bkt2 < the_table()->table_size(); bkt2++) {
1032 HashtableEntry<oop, mtSymbol>* e_ptr2 = the_table()->bucket(bkt2);
1033 int e_cnt2;
1034 for (e_cnt2 = 0; e_ptr2 != NULL; e_ptr2 = e_ptr2->next(), e_cnt2++) {
1035 if (bkt1 == bkt2 && e_cnt2 <= e_cnt1) {
1036 // skip the entries up to and including the one that
1037 // we're comparing against
1038 continue;
1039 }
1041 if (need_entry_verify) {
1042 VerifyRetTypes ret = verify_entry(bkt2, e_cnt2, e_ptr2,
1043 _verify_quietly);
1044 if (ret == _verify_fail_done) {
1045 // cannot compare against this entry
1046 continue;
1047 }
1048 }
1050 // compare two entries, report and count any failures:
1051 if (compare_entries(bkt1, e_cnt1, e_ptr1, bkt2, e_cnt2, e_ptr2)
1052 != _verify_pass) {
1053 fail_cnt++;
1054 }
1055 }
1056 }
1057 }
1058 }
1059 return fail_cnt;
1060 }
1062 // Create a new table and using alternate hash code, populate the new table
1063 // with the existing strings. Set flag to use the alternate hash code afterwards.
1064 void StringTable::rehash_table() {
1065 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1066 // This should never happen with -Xshare:dump but it might in testing mode.
1067 if (DumpSharedSpaces) return;
1068 StringTable* new_table = new StringTable();
1070 // Rehash the table
1071 the_table()->move_to(new_table);
1073 // Delete the table and buckets (entries are reused in new table).
1074 delete _the_table;
1075 // Don't check if we need rehashing until the table gets unbalanced again.
1076 // Then rehash with a new global seed.
1077 _needs_rehashing = false;
1078 _the_table = new_table;
1079 }