Thu, 24 Mar 2016 21:38:15 -0700
8150752: Share Class Data
Reviewed-by: acorn, hseigel, mschoene
1 /*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/altHashing.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/symbolTable.hpp"
29 #include "classfile/systemDictionary.hpp"
30 #include "gc_interface/collectedHeap.inline.hpp"
31 #include "memory/allocation.inline.hpp"
32 #include "memory/filemap.hpp"
33 #include "memory/gcLocker.inline.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "oops/oop.inline2.hpp"
36 #include "runtime/mutexLocker.hpp"
37 #include "utilities/hashtable.inline.hpp"
38 #if INCLUDE_ALL_GCS
39 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
40 #include "gc_implementation/g1/g1StringDedup.hpp"
41 #endif
43 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
45 // --------------------------------------------------------------------------
47 // the number of buckets a thread claims
48 const int ClaimChunkSize = 32;
50 SymbolTable* SymbolTable::_the_table = NULL;
51 // Static arena for symbols that are not deallocated
52 Arena* SymbolTable::_arena = NULL;
53 bool SymbolTable::_needs_rehashing = false;
55 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
56 assert (len <= Symbol::max_length(), "should be checked by caller");
58 Symbol* sym;
60 if (DumpSharedSpaces) {
61 // Allocate all symbols to CLD shared metaspace
62 sym = new (len, ClassLoaderData::the_null_class_loader_data(), THREAD) Symbol(name, len, -1);
63 } else if (c_heap) {
64 // refcount starts as 1
65 sym = new (len, THREAD) Symbol(name, len, 1);
66 assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
67 } else {
68 // Allocate to global arena
69 sym = new (len, arena(), THREAD) Symbol(name, len, -1);
70 }
71 return sym;
72 }
74 void SymbolTable::initialize_symbols(int arena_alloc_size) {
75 // Initialize the arena for global symbols, size passed in depends on CDS.
76 if (arena_alloc_size == 0) {
77 _arena = new (mtSymbol) Arena(mtSymbol);
78 } else {
79 _arena = new (mtSymbol) Arena(mtSymbol, arena_alloc_size);
80 }
81 }
83 // Call function for all symbols in the symbol table.
84 void SymbolTable::symbols_do(SymbolClosure *cl) {
85 const int n = the_table()->table_size();
86 for (int i = 0; i < n; i++) {
87 for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
88 p != NULL;
89 p = p->next()) {
90 cl->do_symbol(p->literal_addr());
91 }
92 }
93 }
95 int SymbolTable::_symbols_removed = 0;
96 int SymbolTable::_symbols_counted = 0;
97 volatile int SymbolTable::_parallel_claimed_idx = 0;
99 void SymbolTable::buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total) {
100 for (int i = start_idx; i < end_idx; ++i) {
101 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
102 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
103 while (entry != NULL) {
104 // Shared entries are normally at the end of the bucket and if we run into
105 // a shared entry, then there is nothing more to remove. However, if we
106 // have rehashed the table, then the shared entries are no longer at the
107 // end of the bucket.
108 if (entry->is_shared() && !use_alternate_hashcode()) {
109 break;
110 }
111 Symbol* s = entry->literal();
112 (*memory_total) += s->size();
113 (*processed)++;
114 assert(s != NULL, "just checking");
115 // If reference count is zero, remove.
116 if (s->refcount() == 0) {
117 assert(!entry->is_shared(), "shared entries should be kept live");
118 delete s;
119 (*removed)++;
120 *p = entry->next();
121 the_table()->free_entry(entry);
122 } else {
123 p = entry->next_addr();
124 }
125 // get next entry
126 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
127 }
128 }
129 }
131 // Remove unreferenced symbols from the symbol table
132 // This is done late during GC.
133 void SymbolTable::unlink(int* processed, int* removed) {
134 size_t memory_total = 0;
135 buckets_unlink(0, the_table()->table_size(), processed, removed, &memory_total);
136 _symbols_removed += *removed;
137 _symbols_counted += *processed;
138 // Exclude printing for normal PrintGCDetails because people parse
139 // this output.
140 if (PrintGCDetails && Verbose && WizardMode) {
141 gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed,
142 (memory_total*HeapWordSize)/1024);
143 }
144 }
146 void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) {
147 const int limit = the_table()->table_size();
149 size_t memory_total = 0;
151 for (;;) {
152 // Grab next set of buckets to scan
153 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
154 if (start_idx >= limit) {
155 // End of table
156 break;
157 }
159 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
160 buckets_unlink(start_idx, end_idx, processed, removed, &memory_total);
161 }
162 Atomic::add(*processed, &_symbols_counted);
163 Atomic::add(*removed, &_symbols_removed);
164 // Exclude printing for normal PrintGCDetails because people parse
165 // this output.
166 if (PrintGCDetails && Verbose && WizardMode) {
167 gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed,
168 (memory_total*HeapWordSize)/1024);
169 }
170 }
172 // Create a new table and using alternate hash code, populate the new table
173 // with the existing strings. Set flag to use the alternate hash code afterwards.
174 void SymbolTable::rehash_table() {
175 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
176 // This should never happen with -Xshare:dump but it might in testing mode.
177 if (DumpSharedSpaces) return;
178 // Create a new symbol table
179 SymbolTable* new_table = new SymbolTable();
181 the_table()->move_to(new_table);
183 // Delete the table and buckets (entries are reused in new table).
184 delete _the_table;
185 // Don't check if we need rehashing until the table gets unbalanced again.
186 // Then rehash with a new global seed.
187 _needs_rehashing = false;
188 _the_table = new_table;
189 }
191 // Lookup a symbol in a bucket.
193 Symbol* SymbolTable::lookup(int index, const char* name,
194 int len, unsigned int hash) {
195 int count = 0;
196 for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
197 count++; // count all entries in this bucket, not just ones with same hash
198 if (e->hash() == hash) {
199 Symbol* sym = e->literal();
200 if (sym->equals(name, len)) {
201 // something is referencing this symbol now.
202 sym->increment_refcount();
203 return sym;
204 }
205 }
206 }
207 // If the bucket size is too deep check if this hash code is insufficient.
208 if (count >= rehash_count && !needs_rehashing()) {
209 _needs_rehashing = check_rehash_table(count);
210 }
211 return NULL;
212 }
214 // Pick hashing algorithm.
215 unsigned int SymbolTable::hash_symbol(const char* s, int len) {
216 return use_alternate_hashcode() ?
217 AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
218 java_lang_String::hash_code(s, len);
219 }
222 // We take care not to be blocking while holding the
223 // SymbolTable_lock. Otherwise, the system might deadlock, since the
224 // symboltable is used during compilation (VM_thread) The lock free
225 // synchronization is simplified by the fact that we do not delete
226 // entries in the symbol table during normal execution (only during
227 // safepoints).
229 Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
230 unsigned int hashValue = hash_symbol(name, len);
231 int index = the_table()->hash_to_index(hashValue);
233 Symbol* s = the_table()->lookup(index, name, len, hashValue);
235 // Found
236 if (s != NULL) return s;
238 // Grab SymbolTable_lock first.
239 MutexLocker ml(SymbolTable_lock, THREAD);
241 // Otherwise, add to symbol to table
242 return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
243 }
245 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
246 char* buffer;
247 int index, len;
248 unsigned int hashValue;
249 char* name;
250 {
251 debug_only(No_Safepoint_Verifier nsv;)
253 name = (char*)sym->base() + begin;
254 len = end - begin;
255 hashValue = hash_symbol(name, len);
256 index = the_table()->hash_to_index(hashValue);
257 Symbol* s = the_table()->lookup(index, name, len, hashValue);
259 // Found
260 if (s != NULL) return s;
261 }
263 // Otherwise, add to symbol to table. Copy to a C string first.
264 char stack_buf[128];
265 ResourceMark rm(THREAD);
266 if (len <= 128) {
267 buffer = stack_buf;
268 } else {
269 buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
270 }
271 for (int i=0; i<len; i++) {
272 buffer[i] = name[i];
273 }
274 // Make sure there is no safepoint in the code above since name can't move.
275 // We can't include the code in No_Safepoint_Verifier because of the
276 // ResourceMark.
278 // Grab SymbolTable_lock first.
279 MutexLocker ml(SymbolTable_lock, THREAD);
281 return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
282 }
284 Symbol* SymbolTable::lookup_only(const char* name, int len,
285 unsigned int& hash) {
286 hash = hash_symbol(name, len);
287 int index = the_table()->hash_to_index(hash);
289 Symbol* s = the_table()->lookup(index, name, len, hash);
290 return s;
291 }
293 // Look up the address of the literal in the SymbolTable for this Symbol*
294 // Do not create any new symbols
295 // Do not increment the reference count to keep this alive
296 Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
297 unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
298 int index = the_table()->hash_to_index(hash);
300 for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
301 if (e->hash() == hash) {
302 Symbol* literal_sym = e->literal();
303 if (sym == literal_sym) {
304 return e->literal_addr();
305 }
306 }
307 }
308 return NULL;
309 }
311 // Suggestion: Push unicode-based lookup all the way into the hashing
312 // and probing logic, so there is no need for convert_to_utf8 until
313 // an actual new Symbol* is created.
314 Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
315 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
316 char stack_buf[128];
317 if (utf8_length < (int) sizeof(stack_buf)) {
318 char* chars = stack_buf;
319 UNICODE::convert_to_utf8(name, utf16_length, chars);
320 return lookup(chars, utf8_length, THREAD);
321 } else {
322 ResourceMark rm(THREAD);
323 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
324 UNICODE::convert_to_utf8(name, utf16_length, chars);
325 return lookup(chars, utf8_length, THREAD);
326 }
327 }
329 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
330 unsigned int& hash) {
331 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
332 char stack_buf[128];
333 if (utf8_length < (int) sizeof(stack_buf)) {
334 char* chars = stack_buf;
335 UNICODE::convert_to_utf8(name, utf16_length, chars);
336 return lookup_only(chars, utf8_length, hash);
337 } else {
338 ResourceMark rm;
339 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
340 UNICODE::convert_to_utf8(name, utf16_length, chars);
341 return lookup_only(chars, utf8_length, hash);
342 }
343 }
345 void SymbolTable::add(ClassLoaderData* loader_data, constantPoolHandle cp,
346 int names_count,
347 const char** names, int* lengths, int* cp_indices,
348 unsigned int* hashValues, TRAPS) {
349 // Grab SymbolTable_lock first.
350 MutexLocker ml(SymbolTable_lock, THREAD);
352 SymbolTable* table = the_table();
353 bool added = table->basic_add(loader_data, cp, names_count, names, lengths,
354 cp_indices, hashValues, CHECK);
355 if (!added) {
356 // do it the hard way
357 for (int i=0; i<names_count; i++) {
358 int index = table->hash_to_index(hashValues[i]);
359 bool c_heap = !loader_data->is_the_null_class_loader_data();
360 Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
361 cp->symbol_at_put(cp_indices[i], sym);
362 }
363 }
364 }
366 Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
367 unsigned int hash;
368 Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
369 if (result != NULL) {
370 return result;
371 }
372 // Grab SymbolTable_lock first.
373 MutexLocker ml(SymbolTable_lock, THREAD);
375 SymbolTable* table = the_table();
376 int index = table->hash_to_index(hash);
377 return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
378 }
380 Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
381 unsigned int hashValue_arg, bool c_heap, TRAPS) {
382 assert(!Universe::heap()->is_in_reserved(name),
383 "proposed name of symbol must be stable");
385 // Don't allow symbols to be created which cannot fit in a Symbol*.
386 if (len > Symbol::max_length()) {
387 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
388 "name is too long to represent");
389 }
391 // Cannot hit a safepoint in this function because the "this" pointer can move.
392 No_Safepoint_Verifier nsv;
394 // Check if the symbol table has been rehashed, if so, need to recalculate
395 // the hash value and index.
396 unsigned int hashValue;
397 int index;
398 if (use_alternate_hashcode()) {
399 hashValue = hash_symbol((const char*)name, len);
400 index = hash_to_index(hashValue);
401 } else {
402 hashValue = hashValue_arg;
403 index = index_arg;
404 }
406 // Since look-up was done lock-free, we need to check if another
407 // thread beat us in the race to insert the symbol.
408 Symbol* test = lookup(index, (char*)name, len, hashValue);
409 if (test != NULL) {
410 // A race occurred and another thread introduced the symbol.
411 assert(test->refcount() != 0, "lookup should have incremented the count");
412 return test;
413 }
415 // Create a new symbol.
416 Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
417 assert(sym->equals((char*)name, len), "symbol must be properly initialized");
419 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
420 add_entry(index, entry);
421 return sym;
422 }
424 // This version of basic_add adds symbols in batch from the constant pool
425 // parsing.
426 bool SymbolTable::basic_add(ClassLoaderData* loader_data, constantPoolHandle cp,
427 int names_count,
428 const char** names, int* lengths,
429 int* cp_indices, unsigned int* hashValues,
430 TRAPS) {
432 // Check symbol names are not too long. If any are too long, don't add any.
433 for (int i = 0; i< names_count; i++) {
434 if (lengths[i] > Symbol::max_length()) {
435 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
436 "name is too long to represent");
437 }
438 }
440 // Cannot hit a safepoint in this function because the "this" pointer can move.
441 No_Safepoint_Verifier nsv;
443 for (int i=0; i<names_count; i++) {
444 // Check if the symbol table has been rehashed, if so, need to recalculate
445 // the hash value.
446 unsigned int hashValue;
447 if (use_alternate_hashcode()) {
448 hashValue = hash_symbol(names[i], lengths[i]);
449 } else {
450 hashValue = hashValues[i];
451 }
452 // Since look-up was done lock-free, we need to check if another
453 // thread beat us in the race to insert the symbol.
454 int index = hash_to_index(hashValue);
455 Symbol* test = lookup(index, names[i], lengths[i], hashValue);
456 if (test != NULL) {
457 // A race occurred and another thread introduced the symbol, this one
458 // will be dropped and collected. Use test instead.
459 cp->symbol_at_put(cp_indices[i], test);
460 assert(test->refcount() != 0, "lookup should have incremented the count");
461 } else {
462 // Create a new symbol. The null class loader is never unloaded so these
463 // are allocated specially in a permanent arena.
464 bool c_heap = !loader_data->is_the_null_class_loader_data();
465 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
466 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be???
467 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
468 add_entry(index, entry);
469 cp->symbol_at_put(cp_indices[i], sym);
470 }
471 }
472 return true;
473 }
476 void SymbolTable::verify() {
477 for (int i = 0; i < the_table()->table_size(); ++i) {
478 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
479 for ( ; p != NULL; p = p->next()) {
480 Symbol* s = (Symbol*)(p->literal());
481 guarantee(s != NULL, "symbol is NULL");
482 unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
483 guarantee(p->hash() == h, "broken hash in symbol table entry");
484 guarantee(the_table()->hash_to_index(h) == i,
485 "wrong index in symbol table");
486 }
487 }
488 }
490 void SymbolTable::dump(outputStream* st) {
491 the_table()->dump_table(st, "SymbolTable");
492 }
495 //---------------------------------------------------------------------------
496 // Non-product code
498 #ifndef PRODUCT
500 void SymbolTable::print_histogram() {
501 MutexLocker ml(SymbolTable_lock);
502 const int results_length = 100;
503 int results[results_length];
504 int i,j;
506 // initialize results to zero
507 for (j = 0; j < results_length; j++) {
508 results[j] = 0;
509 }
511 int total = 0;
512 int max_symbols = 0;
513 int out_of_range = 0;
514 int memory_total = 0;
515 int count = 0;
516 for (i = 0; i < the_table()->table_size(); i++) {
517 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
518 for ( ; p != NULL; p = p->next()) {
519 memory_total += p->literal()->size();
520 count++;
521 int counter = p->literal()->utf8_length();
522 total += counter;
523 if (counter < results_length) {
524 results[counter]++;
525 } else {
526 out_of_range++;
527 }
528 max_symbols = MAX2(max_symbols, counter);
529 }
530 }
531 tty->print_cr("Symbol Table:");
532 tty->print_cr("Total number of symbols %5d", count);
533 tty->print_cr("Total size in memory %5dK",
534 (memory_total*HeapWordSize)/1024);
535 tty->print_cr("Total counted %5d", _symbols_counted);
536 tty->print_cr("Total removed %5d", _symbols_removed);
537 if (_symbols_counted > 0) {
538 tty->print_cr("Percent removed %3.2f",
539 ((float)_symbols_removed/(float)_symbols_counted)* 100);
540 }
541 tty->print_cr("Reference counts %5d", Symbol::_total_count);
542 tty->print_cr("Symbol arena size %5d used %5d",
543 arena()->size_in_bytes(), arena()->used());
544 tty->print_cr("Histogram of symbol length:");
545 tty->print_cr("%8s %5d", "Total ", total);
546 tty->print_cr("%8s %5d", "Maximum", max_symbols);
547 tty->print_cr("%8s %3.2f", "Average",
548 ((float) total / (float) the_table()->table_size()));
549 tty->print_cr("%s", "Histogram:");
550 tty->print_cr(" %s %29s", "Length", "Number chains that length");
551 for (i = 0; i < results_length; i++) {
552 if (results[i] > 0) {
553 tty->print_cr("%6d %10d", i, results[i]);
554 }
555 }
556 if (Verbose) {
557 int line_length = 70;
558 tty->print_cr("%s %30s", " Length", "Number chains that length");
559 for (i = 0; i < results_length; i++) {
560 if (results[i] > 0) {
561 tty->print("%4d", i);
562 for (j = 0; (j < results[i]) && (j < line_length); j++) {
563 tty->print("%1s", "*");
564 }
565 if (j == line_length) {
566 tty->print("%1s", "+");
567 }
568 tty->cr();
569 }
570 }
571 }
572 tty->print_cr(" %s %d: %d\n", "Number chains longer than",
573 results_length, out_of_range);
574 }
576 void SymbolTable::print() {
577 for (int i = 0; i < the_table()->table_size(); ++i) {
578 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
579 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
580 if (entry != NULL) {
581 while (entry != NULL) {
582 tty->print(PTR_FORMAT " ", entry->literal());
583 entry->literal()->print();
584 tty->print(" %d", entry->literal()->refcount());
585 p = entry->next_addr();
586 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
587 }
588 tty->cr();
589 }
590 }
591 }
592 #endif // PRODUCT
594 // --------------------------------------------------------------------------
596 #ifdef ASSERT
597 class StableMemoryChecker : public StackObj {
598 enum { _bufsize = wordSize*4 };
600 address _region;
601 jint _size;
602 u1 _save_buf[_bufsize];
604 int sample(u1* save_buf) {
605 if (_size <= _bufsize) {
606 memcpy(save_buf, _region, _size);
607 return _size;
608 } else {
609 // copy head and tail
610 memcpy(&save_buf[0], _region, _bufsize/2);
611 memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
612 return (_bufsize/2)*2;
613 }
614 }
616 public:
617 StableMemoryChecker(const void* region, jint size) {
618 _region = (address) region;
619 _size = size;
620 sample(_save_buf);
621 }
623 bool verify() {
624 u1 check_buf[sizeof(_save_buf)];
625 int check_size = sample(check_buf);
626 return (0 == memcmp(_save_buf, check_buf, check_size));
627 }
629 void set_region(const void* region) { _region = (address) region; }
630 };
631 #endif
634 // --------------------------------------------------------------------------
635 StringTable* StringTable::_the_table = NULL;
637 bool StringTable::_needs_rehashing = false;
639 volatile int StringTable::_parallel_claimed_idx = 0;
641 // Pick hashing algorithm
642 unsigned int StringTable::hash_string(const jchar* s, int len) {
643 return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
644 java_lang_String::hash_code(s, len);
645 }
647 oop StringTable::lookup(int index, jchar* name,
648 int len, unsigned int hash) {
649 int count = 0;
650 for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
651 count++;
652 if (l->hash() == hash) {
653 if (java_lang_String::equals(l->literal(), name, len)) {
654 return l->literal();
655 }
656 }
657 }
658 // If the bucket size is too deep check if this hash code is insufficient.
659 if (count >= rehash_count && !needs_rehashing()) {
660 _needs_rehashing = check_rehash_table(count);
661 }
662 return NULL;
663 }
666 oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
667 int len, unsigned int hashValue_arg, TRAPS) {
669 assert(java_lang_String::equals(string(), name, len),
670 "string must be properly initialized");
671 // Cannot hit a safepoint in this function because the "this" pointer can move.
672 No_Safepoint_Verifier nsv;
674 // Check if the symbol table has been rehashed, if so, need to recalculate
675 // the hash value and index before second lookup.
676 unsigned int hashValue;
677 int index;
678 if (use_alternate_hashcode()) {
679 hashValue = hash_string(name, len);
680 index = hash_to_index(hashValue);
681 } else {
682 hashValue = hashValue_arg;
683 index = index_arg;
684 }
686 // Since look-up was done lock-free, we need to check if another
687 // thread beat us in the race to insert the symbol.
689 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
690 if (test != NULL) {
691 // Entry already added
692 return test;
693 }
695 HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
696 add_entry(index, entry);
697 return string();
698 }
701 oop StringTable::lookup(Symbol* symbol) {
702 ResourceMark rm;
703 int length;
704 jchar* chars = symbol->as_unicode(length);
705 return lookup(chars, length);
706 }
708 // Tell the GC that this string was looked up in the StringTable.
709 static void ensure_string_alive(oop string) {
710 // A lookup in the StringTable could return an object that was previously
711 // considered dead. The SATB part of G1 needs to get notified about this
712 // potential resurrection, otherwise the marking might not find the object.
713 #if INCLUDE_ALL_GCS
714 if (UseG1GC && string != NULL) {
715 G1SATBCardTableModRefBS::enqueue(string);
716 }
717 #endif
718 }
720 oop StringTable::lookup(jchar* name, int len) {
721 unsigned int hash = hash_string(name, len);
722 int index = the_table()->hash_to_index(hash);
723 oop string = the_table()->lookup(index, name, len, hash);
725 ensure_string_alive(string);
727 return string;
728 }
731 oop StringTable::intern(Handle string_or_null, jchar* name,
732 int len, TRAPS) {
733 unsigned int hashValue = hash_string(name, len);
734 int index = the_table()->hash_to_index(hashValue);
735 oop found_string = the_table()->lookup(index, name, len, hashValue);
737 // Found
738 if (found_string != NULL) {
739 ensure_string_alive(found_string);
740 return found_string;
741 }
743 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
744 assert(!Universe::heap()->is_in_reserved(name),
745 "proposed name of symbol must be stable");
747 Handle string;
748 // try to reuse the string if possible
749 if (!string_or_null.is_null()) {
750 string = string_or_null;
751 } else {
752 string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
753 }
755 #if INCLUDE_ALL_GCS
756 if (G1StringDedup::is_enabled()) {
757 // Deduplicate the string before it is interned. Note that we should never
758 // deduplicate a string after it has been interned. Doing so will counteract
759 // compiler optimizations done on e.g. interned string literals.
760 G1StringDedup::deduplicate(string());
761 }
762 #endif
764 // Grab the StringTable_lock before getting the_table() because it could
765 // change at safepoint.
766 oop added_or_found;
767 {
768 MutexLocker ml(StringTable_lock, THREAD);
769 // Otherwise, add to symbol to table
770 added_or_found = the_table()->basic_add(index, string, name, len,
771 hashValue, CHECK_NULL);
772 }
774 ensure_string_alive(added_or_found);
776 return added_or_found;
777 }
779 oop StringTable::intern(Symbol* symbol, TRAPS) {
780 if (symbol == NULL) return NULL;
781 ResourceMark rm(THREAD);
782 int length;
783 jchar* chars = symbol->as_unicode(length);
784 Handle string;
785 oop result = intern(string, chars, length, CHECK_NULL);
786 return result;
787 }
790 oop StringTable::intern(oop string, TRAPS)
791 {
792 if (string == NULL) return NULL;
793 ResourceMark rm(THREAD);
794 int length;
795 Handle h_string (THREAD, string);
796 jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL);
797 oop result = intern(h_string, chars, length, CHECK_NULL);
798 return result;
799 }
802 oop StringTable::intern(const char* utf8_string, TRAPS) {
803 if (utf8_string == NULL) return NULL;
804 ResourceMark rm(THREAD);
805 int length = UTF8::unicode_length(utf8_string);
806 jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
807 UTF8::convert_to_unicode(utf8_string, chars, length);
808 Handle string;
809 oop result = intern(string, chars, length, CHECK_NULL);
810 return result;
811 }
813 void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
814 buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), processed, removed);
815 }
817 void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
818 // Readers of the table are unlocked, so we should only be removing
819 // entries at a safepoint.
820 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
821 const int limit = the_table()->table_size();
823 for (;;) {
824 // Grab next set of buckets to scan
825 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
826 if (start_idx >= limit) {
827 // End of table
828 break;
829 }
831 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
832 buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, processed, removed);
833 }
834 }
836 void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) {
837 const int limit = the_table()->table_size();
839 assert(0 <= start_idx && start_idx <= limit,
840 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
841 assert(0 <= end_idx && end_idx <= limit,
842 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
843 assert(start_idx <= end_idx,
844 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
845 start_idx, end_idx));
847 for (int i = start_idx; i < end_idx; i += 1) {
848 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
849 while (entry != NULL) {
850 assert(!entry->is_shared(), "CDS not used for the StringTable");
852 f->do_oop((oop*)entry->literal_addr());
854 entry = entry->next();
855 }
856 }
857 }
859 void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, int* processed, int* removed) {
860 const int limit = the_table()->table_size();
862 assert(0 <= start_idx && start_idx <= limit,
863 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
864 assert(0 <= end_idx && end_idx <= limit,
865 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
866 assert(start_idx <= end_idx,
867 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
868 start_idx, end_idx));
870 for (int i = start_idx; i < end_idx; ++i) {
871 HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
872 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
873 while (entry != NULL) {
874 assert(!entry->is_shared(), "CDS not used for the StringTable");
876 if (is_alive->do_object_b(entry->literal())) {
877 if (f != NULL) {
878 f->do_oop((oop*)entry->literal_addr());
879 }
880 p = entry->next_addr();
881 } else {
882 *p = entry->next();
883 the_table()->free_entry(entry);
884 (*removed)++;
885 }
886 (*processed)++;
887 entry = *p;
888 }
889 }
890 }
892 void StringTable::oops_do(OopClosure* f) {
893 buckets_oops_do(f, 0, the_table()->table_size());
894 }
896 void StringTable::possibly_parallel_oops_do(OopClosure* f) {
897 const int limit = the_table()->table_size();
899 for (;;) {
900 // Grab next set of buckets to scan
901 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
902 if (start_idx >= limit) {
903 // End of table
904 break;
905 }
907 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
908 buckets_oops_do(f, start_idx, end_idx);
909 }
910 }
912 // This verification is part of Universe::verify() and needs to be quick.
913 // See StringTable::verify_and_compare() below for exhaustive verification.
914 void StringTable::verify() {
915 for (int i = 0; i < the_table()->table_size(); ++i) {
916 HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
917 for ( ; p != NULL; p = p->next()) {
918 oop s = p->literal();
919 guarantee(s != NULL, "interned string is NULL");
920 unsigned int h = java_lang_String::hash_string(s);
921 guarantee(p->hash() == h, "broken hash in string table entry");
922 guarantee(the_table()->hash_to_index(h) == i,
923 "wrong index in string table");
924 }
925 }
926 }
928 void StringTable::dump(outputStream* st) {
929 the_table()->dump_table(st, "StringTable");
930 }
932 StringTable::VerifyRetTypes StringTable::compare_entries(
933 int bkt1, int e_cnt1,
934 HashtableEntry<oop, mtSymbol>* e_ptr1,
935 int bkt2, int e_cnt2,
936 HashtableEntry<oop, mtSymbol>* e_ptr2) {
937 // These entries are sanity checked by verify_and_compare_entries()
938 // before this function is called.
939 oop str1 = e_ptr1->literal();
940 oop str2 = e_ptr2->literal();
942 if (str1 == str2) {
943 tty->print_cr("ERROR: identical oop values (0x" PTR_FORMAT ") "
944 "in entry @ bucket[%d][%d] and entry @ bucket[%d][%d]",
945 (void *)str1, bkt1, e_cnt1, bkt2, e_cnt2);
946 return _verify_fail_continue;
947 }
949 if (java_lang_String::equals(str1, str2)) {
950 tty->print_cr("ERROR: identical String values in entry @ "
951 "bucket[%d][%d] and entry @ bucket[%d][%d]",
952 bkt1, e_cnt1, bkt2, e_cnt2);
953 return _verify_fail_continue;
954 }
956 return _verify_pass;
957 }
959 StringTable::VerifyRetTypes StringTable::verify_entry(int bkt, int e_cnt,
960 HashtableEntry<oop, mtSymbol>* e_ptr,
961 StringTable::VerifyMesgModes mesg_mode) {
963 VerifyRetTypes ret = _verify_pass; // be optimistic
965 oop str = e_ptr->literal();
966 if (str == NULL) {
967 if (mesg_mode == _verify_with_mesgs) {
968 tty->print_cr("ERROR: NULL oop value in entry @ bucket[%d][%d]", bkt,
969 e_cnt);
970 }
971 // NULL oop means no more verifications are possible
972 return _verify_fail_done;
973 }
975 if (str->klass() != SystemDictionary::String_klass()) {
976 if (mesg_mode == _verify_with_mesgs) {
977 tty->print_cr("ERROR: oop is not a String in entry @ bucket[%d][%d]",
978 bkt, e_cnt);
979 }
980 // not a String means no more verifications are possible
981 return _verify_fail_done;
982 }
984 unsigned int h = java_lang_String::hash_string(str);
985 if (e_ptr->hash() != h) {
986 if (mesg_mode == _verify_with_mesgs) {
987 tty->print_cr("ERROR: broken hash value in entry @ bucket[%d][%d], "
988 "bkt_hash=%d, str_hash=%d", bkt, e_cnt, e_ptr->hash(), h);
989 }
990 ret = _verify_fail_continue;
991 }
993 if (the_table()->hash_to_index(h) != bkt) {
994 if (mesg_mode == _verify_with_mesgs) {
995 tty->print_cr("ERROR: wrong index value for entry @ bucket[%d][%d], "
996 "str_hash=%d, hash_to_index=%d", bkt, e_cnt, h,
997 the_table()->hash_to_index(h));
998 }
999 ret = _verify_fail_continue;
1000 }
1002 return ret;
1003 }
1005 // See StringTable::verify() above for the quick verification that is
1006 // part of Universe::verify(). This verification is exhaustive and
1007 // reports on every issue that is found. StringTable::verify() only
1008 // reports on the first issue that is found.
1009 //
1010 // StringTable::verify_entry() checks:
1011 // - oop value != NULL (same as verify())
1012 // - oop value is a String
1013 // - hash(String) == hash in entry (same as verify())
1014 // - index for hash == index of entry (same as verify())
1015 //
1016 // StringTable::compare_entries() checks:
1017 // - oops are unique across all entries
1018 // - String values are unique across all entries
1019 //
1020 int StringTable::verify_and_compare_entries() {
1021 assert(StringTable_lock->is_locked(), "sanity check");
1023 int fail_cnt = 0;
1025 // first, verify all the entries individually:
1026 for (int bkt = 0; bkt < the_table()->table_size(); bkt++) {
1027 HashtableEntry<oop, mtSymbol>* e_ptr = the_table()->bucket(bkt);
1028 for (int e_cnt = 0; e_ptr != NULL; e_ptr = e_ptr->next(), e_cnt++) {
1029 VerifyRetTypes ret = verify_entry(bkt, e_cnt, e_ptr, _verify_with_mesgs);
1030 if (ret != _verify_pass) {
1031 fail_cnt++;
1032 }
1033 }
1034 }
1036 // Optimization: if the above check did not find any failures, then
1037 // the comparison loop below does not need to call verify_entry()
1038 // before calling compare_entries(). If there were failures, then we
1039 // have to call verify_entry() to see if the entry can be passed to
1040 // compare_entries() safely. When we call verify_entry() in the loop
1041 // below, we do so quietly to void duplicate messages and we don't
1042 // increment fail_cnt because the failures have already been counted.
1043 bool need_entry_verify = (fail_cnt != 0);
1045 // second, verify all entries relative to each other:
1046 for (int bkt1 = 0; bkt1 < the_table()->table_size(); bkt1++) {
1047 HashtableEntry<oop, mtSymbol>* e_ptr1 = the_table()->bucket(bkt1);
1048 for (int e_cnt1 = 0; e_ptr1 != NULL; e_ptr1 = e_ptr1->next(), e_cnt1++) {
1049 if (need_entry_verify) {
1050 VerifyRetTypes ret = verify_entry(bkt1, e_cnt1, e_ptr1,
1051 _verify_quietly);
1052 if (ret == _verify_fail_done) {
1053 // cannot use the current entry to compare against other entries
1054 continue;
1055 }
1056 }
1058 for (int bkt2 = bkt1; bkt2 < the_table()->table_size(); bkt2++) {
1059 HashtableEntry<oop, mtSymbol>* e_ptr2 = the_table()->bucket(bkt2);
1060 int e_cnt2;
1061 for (e_cnt2 = 0; e_ptr2 != NULL; e_ptr2 = e_ptr2->next(), e_cnt2++) {
1062 if (bkt1 == bkt2 && e_cnt2 <= e_cnt1) {
1063 // skip the entries up to and including the one that
1064 // we're comparing against
1065 continue;
1066 }
1068 if (need_entry_verify) {
1069 VerifyRetTypes ret = verify_entry(bkt2, e_cnt2, e_ptr2,
1070 _verify_quietly);
1071 if (ret == _verify_fail_done) {
1072 // cannot compare against this entry
1073 continue;
1074 }
1075 }
1077 // compare two entries, report and count any failures:
1078 if (compare_entries(bkt1, e_cnt1, e_ptr1, bkt2, e_cnt2, e_ptr2)
1079 != _verify_pass) {
1080 fail_cnt++;
1081 }
1082 }
1083 }
1084 }
1085 }
1086 return fail_cnt;
1087 }
1089 // Create a new table and using alternate hash code, populate the new table
1090 // with the existing strings. Set flag to use the alternate hash code afterwards.
1091 void StringTable::rehash_table() {
1092 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1093 // This should never happen with -Xshare:dump but it might in testing mode.
1094 if (DumpSharedSpaces) return;
1095 StringTable* new_table = new StringTable();
1097 // Rehash the table
1098 the_table()->move_to(new_table);
1100 // Delete the table and buckets (entries are reused in new table).
1101 delete _the_table;
1102 // Don't check if we need rehashing until the table gets unbalanced again.
1103 // Then rehash with a new global seed.
1104 _needs_rehashing = false;
1105 _the_table = new_table;
1106 }