Wed, 03 Jul 2019 20:42:37 +0800
Merge
1 /*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/altHashing.hpp"
27 #include "classfile/javaClasses.hpp"
28 #include "classfile/symbolTable.hpp"
29 #include "classfile/systemDictionary.hpp"
30 #include "gc_interface/collectedHeap.inline.hpp"
31 #include "memory/allocation.inline.hpp"
32 #include "memory/filemap.hpp"
33 #include "memory/gcLocker.inline.hpp"
34 #include "oops/oop.inline.hpp"
35 #include "oops/oop.inline2.hpp"
36 #include "runtime/mutexLocker.hpp"
37 #include "utilities/hashtable.inline.hpp"
38 #if INCLUDE_ALL_GCS
39 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
40 #include "gc_implementation/g1/g1StringDedup.hpp"
41 #endif
43 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
45 // --------------------------------------------------------------------------
47 // the number of buckets a thread claims
48 const int ClaimChunkSize = 32;
50 SymbolTable* SymbolTable::_the_table = NULL;
51 // Static arena for symbols that are not deallocated
52 Arena* SymbolTable::_arena = NULL;
53 bool SymbolTable::_needs_rehashing = false;
55 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
56 assert (len <= Symbol::max_length(), "should be checked by caller");
58 Symbol* sym;
60 if (DumpSharedSpaces) {
61 // Allocate all symbols to CLD shared metaspace
62 sym = new (len, ClassLoaderData::the_null_class_loader_data(), THREAD) Symbol(name, len, -1);
63 } else if (c_heap) {
64 // refcount starts as 1
65 sym = new (len, THREAD) Symbol(name, len, 1);
66 assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
67 } else {
68 // Allocate to global arena
69 sym = new (len, arena(), THREAD) Symbol(name, len, -1);
70 }
71 return sym;
72 }
74 void SymbolTable::initialize_symbols(int arena_alloc_size) {
75 // Initialize the arena for global symbols, size passed in depends on CDS.
76 if (arena_alloc_size == 0) {
77 _arena = new (mtSymbol) Arena(mtSymbol);
78 } else {
79 _arena = new (mtSymbol) Arena(mtSymbol, arena_alloc_size);
80 }
81 }
83 // Call function for all symbols in the symbol table.
84 void SymbolTable::symbols_do(SymbolClosure *cl) {
85 const int n = the_table()->table_size();
86 for (int i = 0; i < n; i++) {
87 for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
88 p != NULL;
89 p = p->next()) {
90 cl->do_symbol(p->literal_addr());
91 }
92 }
93 }
95 int SymbolTable::_symbols_removed = 0;
96 int SymbolTable::_symbols_counted = 0;
97 volatile int SymbolTable::_parallel_claimed_idx = 0;
99 void SymbolTable::buckets_unlink(int start_idx, int end_idx, BucketUnlinkContext* context, size_t* memory_total) {
100 for (int i = start_idx; i < end_idx; ++i) {
101 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
102 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
103 while (entry != NULL) {
104 // Shared entries are normally at the end of the bucket and if we run into
105 // a shared entry, then there is nothing more to remove. However, if we
106 // have rehashed the table, then the shared entries are no longer at the
107 // end of the bucket.
108 if (entry->is_shared() && !use_alternate_hashcode()) {
109 break;
110 }
111 Symbol* s = entry->literal();
112 (*memory_total) += s->size();
113 context->_num_processed++;
114 assert(s != NULL, "just checking");
115 // If reference count is zero, remove.
116 if (s->refcount() == 0) {
117 assert(!entry->is_shared(), "shared entries should be kept live");
118 delete s;
119 *p = entry->next();
120 context->free_entry(entry);
121 } else {
122 p = entry->next_addr();
123 }
124 // get next entry
125 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
126 }
127 }
128 }
130 // Remove unreferenced symbols from the symbol table
131 // This is done late during GC.
132 void SymbolTable::unlink(int* processed, int* removed) {
133 size_t memory_total = 0;
134 BucketUnlinkContext context;
135 buckets_unlink(0, the_table()->table_size(), &context, &memory_total);
136 _the_table->bulk_free_entries(&context);
137 *processed = context._num_processed;
138 *removed = context._num_removed;
140 _symbols_removed = context._num_removed;
141 _symbols_counted = context._num_processed;
142 // Exclude printing for normal PrintGCDetails because people parse
143 // this output.
144 if (PrintGCDetails && Verbose && WizardMode) {
145 gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed,
146 (memory_total*HeapWordSize)/1024);
147 }
148 }
150 void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) {
151 const int limit = the_table()->table_size();
153 size_t memory_total = 0;
155 BucketUnlinkContext context;
156 for (;;) {
157 // Grab next set of buckets to scan
158 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
159 if (start_idx >= limit) {
160 // End of table
161 break;
162 }
164 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
165 buckets_unlink(start_idx, end_idx, &context, &memory_total);
166 }
168 _the_table->bulk_free_entries(&context);
169 *processed = context._num_processed;
170 *removed = context._num_removed;
172 Atomic::add(context._num_processed, &_symbols_counted);
173 Atomic::add(context._num_removed, &_symbols_removed);
174 // Exclude printing for normal PrintGCDetails because people parse
175 // this output.
176 if (PrintGCDetails && Verbose && WizardMode) {
177 gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed,
178 (memory_total*HeapWordSize)/1024);
179 }
180 }
182 // Create a new table and using alternate hash code, populate the new table
183 // with the existing strings. Set flag to use the alternate hash code afterwards.
184 void SymbolTable::rehash_table() {
185 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
186 // This should never happen with -Xshare:dump but it might in testing mode.
187 if (DumpSharedSpaces) return;
188 // Create a new symbol table
189 SymbolTable* new_table = new SymbolTable();
191 the_table()->move_to(new_table);
193 // Delete the table and buckets (entries are reused in new table).
194 delete _the_table;
195 // Don't check if we need rehashing until the table gets unbalanced again.
196 // Then rehash with a new global seed.
197 _needs_rehashing = false;
198 _the_table = new_table;
199 }
201 // Lookup a symbol in a bucket.
203 Symbol* SymbolTable::lookup(int index, const char* name,
204 int len, unsigned int hash) {
205 int count = 0;
206 for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
207 count++; // count all entries in this bucket, not just ones with same hash
208 if (e->hash() == hash) {
209 Symbol* sym = e->literal();
210 if (sym->equals(name, len)) {
211 // something is referencing this symbol now.
212 sym->increment_refcount();
213 return sym;
214 }
215 }
216 }
217 // If the bucket size is too deep check if this hash code is insufficient.
218 if (count >= rehash_count && !needs_rehashing()) {
219 _needs_rehashing = check_rehash_table(count);
220 }
221 return NULL;
222 }
224 // Pick hashing algorithm.
225 unsigned int SymbolTable::hash_symbol(const char* s, int len) {
226 return use_alternate_hashcode() ?
227 AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
228 java_lang_String::hash_code(s, len);
229 }
232 // We take care not to be blocking while holding the
233 // SymbolTable_lock. Otherwise, the system might deadlock, since the
234 // symboltable is used during compilation (VM_thread) The lock free
235 // synchronization is simplified by the fact that we do not delete
236 // entries in the symbol table during normal execution (only during
237 // safepoints).
239 Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
240 unsigned int hashValue = hash_symbol(name, len);
241 int index = the_table()->hash_to_index(hashValue);
243 Symbol* s = the_table()->lookup(index, name, len, hashValue);
245 // Found
246 if (s != NULL) return s;
248 // Grab SymbolTable_lock first.
249 MutexLocker ml(SymbolTable_lock, THREAD);
251 // Otherwise, add to symbol to table
252 return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
253 }
255 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
256 char* buffer;
257 int index, len;
258 unsigned int hashValue;
259 char* name;
260 {
261 debug_only(No_Safepoint_Verifier nsv;)
263 name = (char*)sym->base() + begin;
264 len = end - begin;
265 hashValue = hash_symbol(name, len);
266 index = the_table()->hash_to_index(hashValue);
267 Symbol* s = the_table()->lookup(index, name, len, hashValue);
269 // Found
270 if (s != NULL) return s;
271 }
273 // Otherwise, add to symbol to table. Copy to a C string first.
274 char stack_buf[128];
275 ResourceMark rm(THREAD);
276 if (len <= 128) {
277 buffer = stack_buf;
278 } else {
279 buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
280 }
281 for (int i=0; i<len; i++) {
282 buffer[i] = name[i];
283 }
284 // Make sure there is no safepoint in the code above since name can't move.
285 // We can't include the code in No_Safepoint_Verifier because of the
286 // ResourceMark.
288 // Grab SymbolTable_lock first.
289 MutexLocker ml(SymbolTable_lock, THREAD);
291 return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
292 }
294 Symbol* SymbolTable::lookup_only(const char* name, int len,
295 unsigned int& hash) {
296 hash = hash_symbol(name, len);
297 int index = the_table()->hash_to_index(hash);
299 Symbol* s = the_table()->lookup(index, name, len, hash);
300 return s;
301 }
303 // Look up the address of the literal in the SymbolTable for this Symbol*
304 // Do not create any new symbols
305 // Do not increment the reference count to keep this alive
306 Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
307 unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
308 int index = the_table()->hash_to_index(hash);
310 for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
311 if (e->hash() == hash) {
312 Symbol* literal_sym = e->literal();
313 if (sym == literal_sym) {
314 return e->literal_addr();
315 }
316 }
317 }
318 return NULL;
319 }
321 // Suggestion: Push unicode-based lookup all the way into the hashing
322 // and probing logic, so there is no need for convert_to_utf8 until
323 // an actual new Symbol* is created.
324 Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
325 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
326 char stack_buf[128];
327 if (utf8_length < (int) sizeof(stack_buf)) {
328 char* chars = stack_buf;
329 UNICODE::convert_to_utf8(name, utf16_length, chars);
330 return lookup(chars, utf8_length, THREAD);
331 } else {
332 ResourceMark rm(THREAD);
333 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
334 UNICODE::convert_to_utf8(name, utf16_length, chars);
335 return lookup(chars, utf8_length, THREAD);
336 }
337 }
339 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
340 unsigned int& hash) {
341 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
342 char stack_buf[128];
343 if (utf8_length < (int) sizeof(stack_buf)) {
344 char* chars = stack_buf;
345 UNICODE::convert_to_utf8(name, utf16_length, chars);
346 return lookup_only(chars, utf8_length, hash);
347 } else {
348 ResourceMark rm;
349 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
350 UNICODE::convert_to_utf8(name, utf16_length, chars);
351 return lookup_only(chars, utf8_length, hash);
352 }
353 }
355 void SymbolTable::add(ClassLoaderData* loader_data, constantPoolHandle cp,
356 int names_count,
357 const char** names, int* lengths, int* cp_indices,
358 unsigned int* hashValues, TRAPS) {
359 // Grab SymbolTable_lock first.
360 MutexLocker ml(SymbolTable_lock, THREAD);
362 SymbolTable* table = the_table();
363 bool added = table->basic_add(loader_data, cp, names_count, names, lengths,
364 cp_indices, hashValues, CHECK);
365 if (!added) {
366 // do it the hard way
367 for (int i=0; i<names_count; i++) {
368 int index = table->hash_to_index(hashValues[i]);
369 bool c_heap = !loader_data->is_the_null_class_loader_data();
370 Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
371 cp->symbol_at_put(cp_indices[i], sym);
372 }
373 }
374 }
376 Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
377 unsigned int hash;
378 Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
379 if (result != NULL) {
380 return result;
381 }
382 // Grab SymbolTable_lock first.
383 MutexLocker ml(SymbolTable_lock, THREAD);
385 SymbolTable* table = the_table();
386 int index = table->hash_to_index(hash);
387 return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
388 }
390 Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
391 unsigned int hashValue_arg, bool c_heap, TRAPS) {
392 assert(!Universe::heap()->is_in_reserved(name),
393 "proposed name of symbol must be stable");
395 // Don't allow symbols to be created which cannot fit in a Symbol*.
396 if (len > Symbol::max_length()) {
397 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
398 "name is too long to represent");
399 }
401 // Cannot hit a safepoint in this function because the "this" pointer can move.
402 No_Safepoint_Verifier nsv;
404 // Check if the symbol table has been rehashed, if so, need to recalculate
405 // the hash value and index.
406 unsigned int hashValue;
407 int index;
408 if (use_alternate_hashcode()) {
409 hashValue = hash_symbol((const char*)name, len);
410 index = hash_to_index(hashValue);
411 } else {
412 hashValue = hashValue_arg;
413 index = index_arg;
414 }
416 // Since look-up was done lock-free, we need to check if another
417 // thread beat us in the race to insert the symbol.
418 Symbol* test = lookup(index, (char*)name, len, hashValue);
419 if (test != NULL) {
420 // A race occurred and another thread introduced the symbol.
421 assert(test->refcount() != 0, "lookup should have incremented the count");
422 return test;
423 }
425 // Create a new symbol.
426 Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
427 assert(sym->equals((char*)name, len), "symbol must be properly initialized");
429 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
430 add_entry(index, entry);
431 return sym;
432 }
434 // This version of basic_add adds symbols in batch from the constant pool
435 // parsing.
436 bool SymbolTable::basic_add(ClassLoaderData* loader_data, constantPoolHandle cp,
437 int names_count,
438 const char** names, int* lengths,
439 int* cp_indices, unsigned int* hashValues,
440 TRAPS) {
442 // Check symbol names are not too long. If any are too long, don't add any.
443 for (int i = 0; i< names_count; i++) {
444 if (lengths[i] > Symbol::max_length()) {
445 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
446 "name is too long to represent");
447 }
448 }
450 // Cannot hit a safepoint in this function because the "this" pointer can move.
451 No_Safepoint_Verifier nsv;
453 for (int i=0; i<names_count; i++) {
454 // Check if the symbol table has been rehashed, if so, need to recalculate
455 // the hash value.
456 unsigned int hashValue;
457 if (use_alternate_hashcode()) {
458 hashValue = hash_symbol(names[i], lengths[i]);
459 } else {
460 hashValue = hashValues[i];
461 }
462 // Since look-up was done lock-free, we need to check if another
463 // thread beat us in the race to insert the symbol.
464 int index = hash_to_index(hashValue);
465 Symbol* test = lookup(index, names[i], lengths[i], hashValue);
466 if (test != NULL) {
467 // A race occurred and another thread introduced the symbol, this one
468 // will be dropped and collected. Use test instead.
469 cp->symbol_at_put(cp_indices[i], test);
470 assert(test->refcount() != 0, "lookup should have incremented the count");
471 } else {
472 // Create a new symbol. The null class loader is never unloaded so these
473 // are allocated specially in a permanent arena.
474 bool c_heap = !loader_data->is_the_null_class_loader_data();
475 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
476 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be???
477 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
478 add_entry(index, entry);
479 cp->symbol_at_put(cp_indices[i], sym);
480 }
481 }
482 return true;
483 }
486 void SymbolTable::verify() {
487 for (int i = 0; i < the_table()->table_size(); ++i) {
488 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
489 for ( ; p != NULL; p = p->next()) {
490 Symbol* s = (Symbol*)(p->literal());
491 guarantee(s != NULL, "symbol is NULL");
492 unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
493 guarantee(p->hash() == h, "broken hash in symbol table entry");
494 guarantee(the_table()->hash_to_index(h) == i,
495 "wrong index in symbol table");
496 }
497 }
498 }
500 void SymbolTable::dump(outputStream* st) {
501 the_table()->dump_table(st, "SymbolTable");
502 }
505 //---------------------------------------------------------------------------
506 // Non-product code
508 #ifndef PRODUCT
510 void SymbolTable::print_histogram() {
511 MutexLocker ml(SymbolTable_lock);
512 const int results_length = 100;
513 int results[results_length];
514 int i,j;
516 // initialize results to zero
517 for (j = 0; j < results_length; j++) {
518 results[j] = 0;
519 }
521 int total = 0;
522 int max_symbols = 0;
523 int out_of_range = 0;
524 int memory_total = 0;
525 int count = 0;
526 for (i = 0; i < the_table()->table_size(); i++) {
527 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
528 for ( ; p != NULL; p = p->next()) {
529 memory_total += p->literal()->size();
530 count++;
531 int counter = p->literal()->utf8_length();
532 total += counter;
533 if (counter < results_length) {
534 results[counter]++;
535 } else {
536 out_of_range++;
537 }
538 max_symbols = MAX2(max_symbols, counter);
539 }
540 }
541 tty->print_cr("Symbol Table:");
542 tty->print_cr("Total number of symbols %5d", count);
543 tty->print_cr("Total size in memory %5dK",
544 (memory_total*HeapWordSize)/1024);
545 tty->print_cr("Total counted %5d", _symbols_counted);
546 tty->print_cr("Total removed %5d", _symbols_removed);
547 if (_symbols_counted > 0) {
548 tty->print_cr("Percent removed %3.2f",
549 ((float)_symbols_removed/(float)_symbols_counted)* 100);
550 }
551 tty->print_cr("Reference counts %5d", Symbol::_total_count);
552 tty->print_cr("Symbol arena size %5d used %5d",
553 arena()->size_in_bytes(), arena()->used());
554 tty->print_cr("Histogram of symbol length:");
555 tty->print_cr("%8s %5d", "Total ", total);
556 tty->print_cr("%8s %5d", "Maximum", max_symbols);
557 tty->print_cr("%8s %3.2f", "Average",
558 ((float) total / (float) the_table()->table_size()));
559 tty->print_cr("%s", "Histogram:");
560 tty->print_cr(" %s %29s", "Length", "Number chains that length");
561 for (i = 0; i < results_length; i++) {
562 if (results[i] > 0) {
563 tty->print_cr("%6d %10d", i, results[i]);
564 }
565 }
566 if (Verbose) {
567 int line_length = 70;
568 tty->print_cr("%s %30s", " Length", "Number chains that length");
569 for (i = 0; i < results_length; i++) {
570 if (results[i] > 0) {
571 tty->print("%4d", i);
572 for (j = 0; (j < results[i]) && (j < line_length); j++) {
573 tty->print("%1s", "*");
574 }
575 if (j == line_length) {
576 tty->print("%1s", "+");
577 }
578 tty->cr();
579 }
580 }
581 }
582 tty->print_cr(" %s %d: %d\n", "Number chains longer than",
583 results_length, out_of_range);
584 }
586 void SymbolTable::print() {
587 for (int i = 0; i < the_table()->table_size(); ++i) {
588 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
589 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
590 if (entry != NULL) {
591 while (entry != NULL) {
592 tty->print(PTR_FORMAT " ", entry->literal());
593 entry->literal()->print();
594 tty->print(" %d", entry->literal()->refcount());
595 p = entry->next_addr();
596 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
597 }
598 tty->cr();
599 }
600 }
601 }
602 #endif // PRODUCT
604 // --------------------------------------------------------------------------
606 #ifdef ASSERT
607 class StableMemoryChecker : public StackObj {
608 enum { _bufsize = wordSize*4 };
610 address _region;
611 jint _size;
612 u1 _save_buf[_bufsize];
614 int sample(u1* save_buf) {
615 if (_size <= _bufsize) {
616 memcpy(save_buf, _region, _size);
617 return _size;
618 } else {
619 // copy head and tail
620 memcpy(&save_buf[0], _region, _bufsize/2);
621 memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
622 return (_bufsize/2)*2;
623 }
624 }
626 public:
627 StableMemoryChecker(const void* region, jint size) {
628 _region = (address) region;
629 _size = size;
630 sample(_save_buf);
631 }
633 bool verify() {
634 u1 check_buf[sizeof(_save_buf)];
635 int check_size = sample(check_buf);
636 return (0 == memcmp(_save_buf, check_buf, check_size));
637 }
639 void set_region(const void* region) { _region = (address) region; }
640 };
641 #endif
644 // --------------------------------------------------------------------------
645 StringTable* StringTable::_the_table = NULL;
647 bool StringTable::_needs_rehashing = false;
649 volatile int StringTable::_parallel_claimed_idx = 0;
651 // Pick hashing algorithm
652 unsigned int StringTable::hash_string(const jchar* s, int len) {
653 return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
654 java_lang_String::hash_code(s, len);
655 }
657 oop StringTable::lookup(int index, jchar* name,
658 int len, unsigned int hash) {
659 int count = 0;
660 for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
661 count++;
662 if (l->hash() == hash) {
663 if (java_lang_String::equals(l->literal(), name, len)) {
664 return l->literal();
665 }
666 }
667 }
668 // If the bucket size is too deep check if this hash code is insufficient.
669 if (count >= rehash_count && !needs_rehashing()) {
670 _needs_rehashing = check_rehash_table(count);
671 }
672 return NULL;
673 }
676 oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
677 int len, unsigned int hashValue_arg, TRAPS) {
679 assert(java_lang_String::equals(string(), name, len),
680 "string must be properly initialized");
681 // Cannot hit a safepoint in this function because the "this" pointer can move.
682 No_Safepoint_Verifier nsv;
684 // Check if the symbol table has been rehashed, if so, need to recalculate
685 // the hash value and index before second lookup.
686 unsigned int hashValue;
687 int index;
688 if (use_alternate_hashcode()) {
689 hashValue = hash_string(name, len);
690 index = hash_to_index(hashValue);
691 } else {
692 hashValue = hashValue_arg;
693 index = index_arg;
694 }
696 // Since look-up was done lock-free, we need to check if another
697 // thread beat us in the race to insert the symbol.
699 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
700 if (test != NULL) {
701 // Entry already added
702 return test;
703 }
705 HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
706 add_entry(index, entry);
707 return string();
708 }
711 oop StringTable::lookup(Symbol* symbol) {
712 ResourceMark rm;
713 int length;
714 jchar* chars = symbol->as_unicode(length);
715 return lookup(chars, length);
716 }
718 // Tell the GC that this string was looked up in the StringTable.
719 static void ensure_string_alive(oop string) {
720 // A lookup in the StringTable could return an object that was previously
721 // considered dead. The SATB part of G1 needs to get notified about this
722 // potential resurrection, otherwise the marking might not find the object.
723 #if INCLUDE_ALL_GCS
724 if (UseG1GC && string != NULL) {
725 G1SATBCardTableModRefBS::enqueue(string);
726 }
727 #endif
728 }
730 oop StringTable::lookup(jchar* name, int len) {
731 unsigned int hash = hash_string(name, len);
732 int index = the_table()->hash_to_index(hash);
733 oop string = the_table()->lookup(index, name, len, hash);
735 ensure_string_alive(string);
737 return string;
738 }
741 oop StringTable::intern(Handle string_or_null, jchar* name,
742 int len, TRAPS) {
743 unsigned int hashValue = hash_string(name, len);
744 int index = the_table()->hash_to_index(hashValue);
745 oop found_string = the_table()->lookup(index, name, len, hashValue);
747 // Found
748 if (found_string != NULL) {
749 ensure_string_alive(found_string);
750 return found_string;
751 }
753 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
754 assert(!Universe::heap()->is_in_reserved(name),
755 "proposed name of symbol must be stable");
757 Handle string;
758 // try to reuse the string if possible
759 if (!string_or_null.is_null()) {
760 string = string_or_null;
761 } else {
762 string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
763 }
765 #if INCLUDE_ALL_GCS
766 if (G1StringDedup::is_enabled()) {
767 // Deduplicate the string before it is interned. Note that we should never
768 // deduplicate a string after it has been interned. Doing so will counteract
769 // compiler optimizations done on e.g. interned string literals.
770 G1StringDedup::deduplicate(string());
771 }
772 #endif
774 // Grab the StringTable_lock before getting the_table() because it could
775 // change at safepoint.
776 oop added_or_found;
777 {
778 MutexLocker ml(StringTable_lock, THREAD);
779 // Otherwise, add to symbol to table
780 added_or_found = the_table()->basic_add(index, string, name, len,
781 hashValue, CHECK_NULL);
782 }
784 ensure_string_alive(added_or_found);
786 return added_or_found;
787 }
789 oop StringTable::intern(Symbol* symbol, TRAPS) {
790 if (symbol == NULL) return NULL;
791 ResourceMark rm(THREAD);
792 int length;
793 jchar* chars = symbol->as_unicode(length);
794 Handle string;
795 oop result = intern(string, chars, length, CHECK_NULL);
796 return result;
797 }
800 oop StringTable::intern(oop string, TRAPS)
801 {
802 if (string == NULL) return NULL;
803 ResourceMark rm(THREAD);
804 int length;
805 Handle h_string (THREAD, string);
806 jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL);
807 oop result = intern(h_string, chars, length, CHECK_NULL);
808 return result;
809 }
812 oop StringTable::intern(const char* utf8_string, TRAPS) {
813 if (utf8_string == NULL) return NULL;
814 ResourceMark rm(THREAD);
815 int length = UTF8::unicode_length(utf8_string);
816 jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
817 UTF8::convert_to_unicode(utf8_string, chars, length);
818 Handle string;
819 oop result = intern(string, chars, length, CHECK_NULL);
820 return result;
821 }
823 void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
824 BucketUnlinkContext context;
825 buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), &context);
826 _the_table->bulk_free_entries(&context);
827 *processed = context._num_processed;
828 *removed = context._num_removed;
829 }
831 void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
832 // Readers of the table are unlocked, so we should only be removing
833 // entries at a safepoint.
834 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
835 const int limit = the_table()->table_size();
837 BucketUnlinkContext context;
838 for (;;) {
839 // Grab next set of buckets to scan
840 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
841 if (start_idx >= limit) {
842 // End of table
843 break;
844 }
846 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
847 buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, &context);
848 }
849 _the_table->bulk_free_entries(&context);
850 *processed = context._num_processed;
851 *removed = context._num_removed;
852 }
854 void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) {
855 const int limit = the_table()->table_size();
857 assert(0 <= start_idx && start_idx <= limit,
858 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
859 assert(0 <= end_idx && end_idx <= limit,
860 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
861 assert(start_idx <= end_idx,
862 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
863 start_idx, end_idx));
865 for (int i = start_idx; i < end_idx; i += 1) {
866 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
867 while (entry != NULL) {
868 assert(!entry->is_shared(), "CDS not used for the StringTable");
870 f->do_oop((oop*)entry->literal_addr());
872 entry = entry->next();
873 }
874 }
875 }
877 void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, BucketUnlinkContext* context) {
878 const int limit = the_table()->table_size();
880 assert(0 <= start_idx && start_idx <= limit,
881 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
882 assert(0 <= end_idx && end_idx <= limit,
883 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
884 assert(start_idx <= end_idx,
885 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
886 start_idx, end_idx));
888 for (int i = start_idx; i < end_idx; ++i) {
889 HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
890 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
891 while (entry != NULL) {
892 assert(!entry->is_shared(), "CDS not used for the StringTable");
894 if (is_alive->do_object_b(entry->literal())) {
895 if (f != NULL) {
896 f->do_oop((oop*)entry->literal_addr());
897 }
898 p = entry->next_addr();
899 } else {
900 *p = entry->next();
901 context->free_entry(entry);
902 }
903 context->_num_processed++;
904 entry = *p;
905 }
906 }
907 }
909 void StringTable::oops_do(OopClosure* f) {
910 buckets_oops_do(f, 0, the_table()->table_size());
911 }
913 void StringTable::possibly_parallel_oops_do(OopClosure* f) {
914 const int limit = the_table()->table_size();
916 for (;;) {
917 // Grab next set of buckets to scan
918 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
919 if (start_idx >= limit) {
920 // End of table
921 break;
922 }
924 int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
925 buckets_oops_do(f, start_idx, end_idx);
926 }
927 }
929 // This verification is part of Universe::verify() and needs to be quick.
930 // See StringTable::verify_and_compare() below for exhaustive verification.
931 void StringTable::verify() {
932 for (int i = 0; i < the_table()->table_size(); ++i) {
933 HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
934 for ( ; p != NULL; p = p->next()) {
935 oop s = p->literal();
936 guarantee(s != NULL, "interned string is NULL");
937 unsigned int h = java_lang_String::hash_string(s);
938 guarantee(p->hash() == h, "broken hash in string table entry");
939 guarantee(the_table()->hash_to_index(h) == i,
940 "wrong index in string table");
941 }
942 }
943 }
945 void StringTable::dump(outputStream* st) {
946 the_table()->dump_table(st, "StringTable");
947 }
949 StringTable::VerifyRetTypes StringTable::compare_entries(
950 int bkt1, int e_cnt1,
951 HashtableEntry<oop, mtSymbol>* e_ptr1,
952 int bkt2, int e_cnt2,
953 HashtableEntry<oop, mtSymbol>* e_ptr2) {
954 // These entries are sanity checked by verify_and_compare_entries()
955 // before this function is called.
956 oop str1 = e_ptr1->literal();
957 oop str2 = e_ptr2->literal();
959 if (str1 == str2) {
960 tty->print_cr("ERROR: identical oop values (0x" PTR_FORMAT ") "
961 "in entry @ bucket[%d][%d] and entry @ bucket[%d][%d]",
962 (void *)str1, bkt1, e_cnt1, bkt2, e_cnt2);
963 return _verify_fail_continue;
964 }
966 if (java_lang_String::equals(str1, str2)) {
967 tty->print_cr("ERROR: identical String values in entry @ "
968 "bucket[%d][%d] and entry @ bucket[%d][%d]",
969 bkt1, e_cnt1, bkt2, e_cnt2);
970 return _verify_fail_continue;
971 }
973 return _verify_pass;
974 }
976 StringTable::VerifyRetTypes StringTable::verify_entry(int bkt, int e_cnt,
977 HashtableEntry<oop, mtSymbol>* e_ptr,
978 StringTable::VerifyMesgModes mesg_mode) {
980 VerifyRetTypes ret = _verify_pass; // be optimistic
982 oop str = e_ptr->literal();
983 if (str == NULL) {
984 if (mesg_mode == _verify_with_mesgs) {
985 tty->print_cr("ERROR: NULL oop value in entry @ bucket[%d][%d]", bkt,
986 e_cnt);
987 }
988 // NULL oop means no more verifications are possible
989 return _verify_fail_done;
990 }
992 if (str->klass() != SystemDictionary::String_klass()) {
993 if (mesg_mode == _verify_with_mesgs) {
994 tty->print_cr("ERROR: oop is not a String in entry @ bucket[%d][%d]",
995 bkt, e_cnt);
996 }
997 // not a String means no more verifications are possible
998 return _verify_fail_done;
999 }
1001 unsigned int h = java_lang_String::hash_string(str);
1002 if (e_ptr->hash() != h) {
1003 if (mesg_mode == _verify_with_mesgs) {
1004 tty->print_cr("ERROR: broken hash value in entry @ bucket[%d][%d], "
1005 "bkt_hash=%d, str_hash=%d", bkt, e_cnt, e_ptr->hash(), h);
1006 }
1007 ret = _verify_fail_continue;
1008 }
1010 if (the_table()->hash_to_index(h) != bkt) {
1011 if (mesg_mode == _verify_with_mesgs) {
1012 tty->print_cr("ERROR: wrong index value for entry @ bucket[%d][%d], "
1013 "str_hash=%d, hash_to_index=%d", bkt, e_cnt, h,
1014 the_table()->hash_to_index(h));
1015 }
1016 ret = _verify_fail_continue;
1017 }
1019 return ret;
1020 }
1022 // See StringTable::verify() above for the quick verification that is
1023 // part of Universe::verify(). This verification is exhaustive and
1024 // reports on every issue that is found. StringTable::verify() only
1025 // reports on the first issue that is found.
1026 //
1027 // StringTable::verify_entry() checks:
1028 // - oop value != NULL (same as verify())
1029 // - oop value is a String
1030 // - hash(String) == hash in entry (same as verify())
1031 // - index for hash == index of entry (same as verify())
1032 //
1033 // StringTable::compare_entries() checks:
1034 // - oops are unique across all entries
1035 // - String values are unique across all entries
1036 //
1037 int StringTable::verify_and_compare_entries() {
1038 assert(StringTable_lock->is_locked(), "sanity check");
1040 int fail_cnt = 0;
1042 // first, verify all the entries individually:
1043 for (int bkt = 0; bkt < the_table()->table_size(); bkt++) {
1044 HashtableEntry<oop, mtSymbol>* e_ptr = the_table()->bucket(bkt);
1045 for (int e_cnt = 0; e_ptr != NULL; e_ptr = e_ptr->next(), e_cnt++) {
1046 VerifyRetTypes ret = verify_entry(bkt, e_cnt, e_ptr, _verify_with_mesgs);
1047 if (ret != _verify_pass) {
1048 fail_cnt++;
1049 }
1050 }
1051 }
1053 // Optimization: if the above check did not find any failures, then
1054 // the comparison loop below does not need to call verify_entry()
1055 // before calling compare_entries(). If there were failures, then we
1056 // have to call verify_entry() to see if the entry can be passed to
1057 // compare_entries() safely. When we call verify_entry() in the loop
1058 // below, we do so quietly to void duplicate messages and we don't
1059 // increment fail_cnt because the failures have already been counted.
1060 bool need_entry_verify = (fail_cnt != 0);
1062 // second, verify all entries relative to each other:
1063 for (int bkt1 = 0; bkt1 < the_table()->table_size(); bkt1++) {
1064 HashtableEntry<oop, mtSymbol>* e_ptr1 = the_table()->bucket(bkt1);
1065 for (int e_cnt1 = 0; e_ptr1 != NULL; e_ptr1 = e_ptr1->next(), e_cnt1++) {
1066 if (need_entry_verify) {
1067 VerifyRetTypes ret = verify_entry(bkt1, e_cnt1, e_ptr1,
1068 _verify_quietly);
1069 if (ret == _verify_fail_done) {
1070 // cannot use the current entry to compare against other entries
1071 continue;
1072 }
1073 }
1075 for (int bkt2 = bkt1; bkt2 < the_table()->table_size(); bkt2++) {
1076 HashtableEntry<oop, mtSymbol>* e_ptr2 = the_table()->bucket(bkt2);
1077 int e_cnt2;
1078 for (e_cnt2 = 0; e_ptr2 != NULL; e_ptr2 = e_ptr2->next(), e_cnt2++) {
1079 if (bkt1 == bkt2 && e_cnt2 <= e_cnt1) {
1080 // skip the entries up to and including the one that
1081 // we're comparing against
1082 continue;
1083 }
1085 if (need_entry_verify) {
1086 VerifyRetTypes ret = verify_entry(bkt2, e_cnt2, e_ptr2,
1087 _verify_quietly);
1088 if (ret == _verify_fail_done) {
1089 // cannot compare against this entry
1090 continue;
1091 }
1092 }
1094 // compare two entries, report and count any failures:
1095 if (compare_entries(bkt1, e_cnt1, e_ptr1, bkt2, e_cnt2, e_ptr2)
1096 != _verify_pass) {
1097 fail_cnt++;
1098 }
1099 }
1100 }
1101 }
1102 }
1103 return fail_cnt;
1104 }
1106 // Create a new table and using alternate hash code, populate the new table
1107 // with the existing strings. Set flag to use the alternate hash code afterwards.
1108 void StringTable::rehash_table() {
1109 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1110 // This should never happen with -Xshare:dump but it might in testing mode.
1111 if (DumpSharedSpaces) return;
1112 StringTable* new_table = new StringTable();
1114 // Rehash the table
1115 the_table()->move_to(new_table);
1117 // Delete the table and buckets (entries are reused in new table).
1118 delete _the_table;
1119 // Don't check if we need rehashing until the table gets unbalanced again.
1120 // Then rehash with a new global seed.
1121 _needs_rehashing = false;
1122 _the_table = new_table;
1123 }