src/share/vm/classfile/symbolTable.cpp

Tue, 18 Mar 2014 19:07:22 +0100

author
pliden
date
Tue, 18 Mar 2014 19:07:22 +0100
changeset 6413
595c0f60d50d
parent 6229
5a32d2a3cc1e
child 6680
78bbf4d43a14
permissions
-rw-r--r--

8029075: String deduplication in G1
Summary: Implementation of JEP 192, http://openjdk.java.net/jeps/192
Reviewed-by: brutisso, tschatzl, coleenp

     1 /*
     2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.
     8  *
     9  * This code is distributed in the hope that it will be useful, but WITHOUT
    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    12  * version 2 for more details (a copy is included in the LICENSE file that
    13  * accompanied this code).
    14  *
    15  * You should have received a copy of the GNU General Public License version
    16  * 2 along with this work; if not, write to the Free Software Foundation,
    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    18  *
    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    20  * or visit www.oracle.com if you need additional information or have any
    21  * questions.
    22  *
    23  */
    25 #include "precompiled.hpp"
    26 #include "classfile/altHashing.hpp"
    27 #include "classfile/javaClasses.hpp"
    28 #include "classfile/symbolTable.hpp"
    29 #include "classfile/systemDictionary.hpp"
    30 #include "gc_interface/collectedHeap.inline.hpp"
    31 #include "memory/allocation.inline.hpp"
    32 #include "memory/filemap.hpp"
    33 #include "memory/gcLocker.inline.hpp"
    34 #include "oops/oop.inline.hpp"
    35 #include "oops/oop.inline2.hpp"
    36 #include "runtime/mutexLocker.hpp"
    37 #include "utilities/hashtable.inline.hpp"
    38 #if INCLUDE_ALL_GCS
    39 #include "gc_implementation/g1/g1StringDedup.hpp"
    40 #endif
    42 // --------------------------------------------------------------------------
    44 // the number of buckets a thread claims
    45 const int ClaimChunkSize = 32;
    47 SymbolTable* SymbolTable::_the_table = NULL;
    48 // Static arena for symbols that are not deallocated
    49 Arena* SymbolTable::_arena = NULL;
    50 bool SymbolTable::_needs_rehashing = false;
    52 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
    53   assert (len <= Symbol::max_length(), "should be checked by caller");
    55   Symbol* sym;
    57   if (DumpSharedSpaces) {
    58     // Allocate all symbols to CLD shared metaspace
    59     sym = new (len, ClassLoaderData::the_null_class_loader_data(), THREAD) Symbol(name, len, -1);
    60   } else if (c_heap) {
    61     // refcount starts as 1
    62     sym = new (len, THREAD) Symbol(name, len, 1);
    63     assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
    64   } else {
    65     // Allocate to global arena
    66     sym = new (len, arena(), THREAD) Symbol(name, len, -1);
    67   }
    68   return sym;
    69 }
    71 void SymbolTable::initialize_symbols(int arena_alloc_size) {
    72   // Initialize the arena for global symbols, size passed in depends on CDS.
    73   if (arena_alloc_size == 0) {
    74     _arena = new (mtSymbol) Arena();
    75   } else {
    76     _arena = new (mtSymbol) Arena(arena_alloc_size);
    77   }
    78 }
    80 // Call function for all symbols in the symbol table.
    81 void SymbolTable::symbols_do(SymbolClosure *cl) {
    82   const int n = the_table()->table_size();
    83   for (int i = 0; i < n; i++) {
    84     for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
    85          p != NULL;
    86          p = p->next()) {
    87       cl->do_symbol(p->literal_addr());
    88     }
    89   }
    90 }
    92 int SymbolTable::_symbols_removed = 0;
    93 int SymbolTable::_symbols_counted = 0;
    94 volatile int SymbolTable::_parallel_claimed_idx = 0;
    96 void SymbolTable::buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total) {
    97   for (int i = start_idx; i < end_idx; ++i) {
    98     HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
    99     HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
   100     while (entry != NULL) {
   101       // Shared entries are normally at the end of the bucket and if we run into
   102       // a shared entry, then there is nothing more to remove. However, if we
   103       // have rehashed the table, then the shared entries are no longer at the
   104       // end of the bucket.
   105       if (entry->is_shared() && !use_alternate_hashcode()) {
   106         break;
   107       }
   108       Symbol* s = entry->literal();
   109       (*memory_total) += s->size();
   110       (*processed)++;
   111       assert(s != NULL, "just checking");
   112       // If reference count is zero, remove.
   113       if (s->refcount() == 0) {
   114         assert(!entry->is_shared(), "shared entries should be kept live");
   115         delete s;
   116         (*removed)++;
   117         *p = entry->next();
   118         the_table()->free_entry(entry);
   119       } else {
   120         p = entry->next_addr();
   121       }
   122       // get next entry
   123       entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
   124     }
   125   }
   126 }
   128 // Remove unreferenced symbols from the symbol table
   129 // This is done late during GC.
   130 void SymbolTable::unlink(int* processed, int* removed) {
   131   size_t memory_total = 0;
   132   buckets_unlink(0, the_table()->table_size(), processed, removed, &memory_total);
   133   _symbols_removed += *removed;
   134   _symbols_counted += *processed;
   135   // Exclude printing for normal PrintGCDetails because people parse
   136   // this output.
   137   if (PrintGCDetails && Verbose && WizardMode) {
   138     gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed,
   139                         (memory_total*HeapWordSize)/1024);
   140   }
   141 }
   143 void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) {
   144   const int limit = the_table()->table_size();
   146   size_t memory_total = 0;
   148   for (;;) {
   149     // Grab next set of buckets to scan
   150     int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
   151     if (start_idx >= limit) {
   152       // End of table
   153       break;
   154     }
   156     int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
   157     buckets_unlink(start_idx, end_idx, processed, removed, &memory_total);
   158   }
   159   Atomic::add(*processed, &_symbols_counted);
   160   Atomic::add(*removed, &_symbols_removed);
   161   // Exclude printing for normal PrintGCDetails because people parse
   162   // this output.
   163   if (PrintGCDetails && Verbose && WizardMode) {
   164     gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed,
   165                         (memory_total*HeapWordSize)/1024);
   166   }
   167 }
   169 // Create a new table and using alternate hash code, populate the new table
   170 // with the existing strings.   Set flag to use the alternate hash code afterwards.
   171 void SymbolTable::rehash_table() {
   172   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
   173   // This should never happen with -Xshare:dump but it might in testing mode.
   174   if (DumpSharedSpaces) return;
   175   // Create a new symbol table
   176   SymbolTable* new_table = new SymbolTable();
   178   the_table()->move_to(new_table);
   180   // Delete the table and buckets (entries are reused in new table).
   181   delete _the_table;
   182   // Don't check if we need rehashing until the table gets unbalanced again.
   183   // Then rehash with a new global seed.
   184   _needs_rehashing = false;
   185   _the_table = new_table;
   186 }
   188 // Lookup a symbol in a bucket.
   190 Symbol* SymbolTable::lookup(int index, const char* name,
   191                               int len, unsigned int hash) {
   192   int count = 0;
   193   for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) {
   194     count++;  // count all entries in this bucket, not just ones with same hash
   195     if (e->hash() == hash) {
   196       Symbol* sym = e->literal();
   197       if (sym->equals(name, len)) {
   198         // something is referencing this symbol now.
   199         sym->increment_refcount();
   200         return sym;
   201       }
   202     }
   203   }
   204   // If the bucket size is too deep check if this hash code is insufficient.
   205   if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
   206     _needs_rehashing = check_rehash_table(count);
   207   }
   208   return NULL;
   209 }
   211 // Pick hashing algorithm.
   212 unsigned int SymbolTable::hash_symbol(const char* s, int len) {
   213   return use_alternate_hashcode() ?
   214            AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
   215            java_lang_String::hash_code(s, len);
   216 }
   219 // We take care not to be blocking while holding the
   220 // SymbolTable_lock. Otherwise, the system might deadlock, since the
   221 // symboltable is used during compilation (VM_thread) The lock free
   222 // synchronization is simplified by the fact that we do not delete
   223 // entries in the symbol table during normal execution (only during
   224 // safepoints).
   226 Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
   227   unsigned int hashValue = hash_symbol(name, len);
   228   int index = the_table()->hash_to_index(hashValue);
   230   Symbol* s = the_table()->lookup(index, name, len, hashValue);
   232   // Found
   233   if (s != NULL) return s;
   235   // Grab SymbolTable_lock first.
   236   MutexLocker ml(SymbolTable_lock, THREAD);
   238   // Otherwise, add to symbol to table
   239   return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
   240 }
   242 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
   243   char* buffer;
   244   int index, len;
   245   unsigned int hashValue;
   246   char* name;
   247   {
   248     debug_only(No_Safepoint_Verifier nsv;)
   250     name = (char*)sym->base() + begin;
   251     len = end - begin;
   252     hashValue = hash_symbol(name, len);
   253     index = the_table()->hash_to_index(hashValue);
   254     Symbol* s = the_table()->lookup(index, name, len, hashValue);
   256     // Found
   257     if (s != NULL) return s;
   258   }
   260   // Otherwise, add to symbol to table. Copy to a C string first.
   261   char stack_buf[128];
   262   ResourceMark rm(THREAD);
   263   if (len <= 128) {
   264     buffer = stack_buf;
   265   } else {
   266     buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
   267   }
   268   for (int i=0; i<len; i++) {
   269     buffer[i] = name[i];
   270   }
   271   // Make sure there is no safepoint in the code above since name can't move.
   272   // We can't include the code in No_Safepoint_Verifier because of the
   273   // ResourceMark.
   275   // Grab SymbolTable_lock first.
   276   MutexLocker ml(SymbolTable_lock, THREAD);
   278   return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
   279 }
   281 Symbol* SymbolTable::lookup_only(const char* name, int len,
   282                                    unsigned int& hash) {
   283   hash = hash_symbol(name, len);
   284   int index = the_table()->hash_to_index(hash);
   286   Symbol* s = the_table()->lookup(index, name, len, hash);
   287   return s;
   288 }
   290 // Look up the address of the literal in the SymbolTable for this Symbol*
   291 // Do not create any new symbols
   292 // Do not increment the reference count to keep this alive
   293 Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){
   294   unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length());
   295   int index = the_table()->hash_to_index(hash);
   297   for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) {
   298     if (e->hash() == hash) {
   299       Symbol* literal_sym = e->literal();
   300       if (sym == literal_sym) {
   301         return e->literal_addr();
   302       }
   303     }
   304   }
   305   return NULL;
   306 }
   308 // Suggestion: Push unicode-based lookup all the way into the hashing
   309 // and probing logic, so there is no need for convert_to_utf8 until
   310 // an actual new Symbol* is created.
   311 Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
   312   int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
   313   char stack_buf[128];
   314   if (utf8_length < (int) sizeof(stack_buf)) {
   315     char* chars = stack_buf;
   316     UNICODE::convert_to_utf8(name, utf16_length, chars);
   317     return lookup(chars, utf8_length, THREAD);
   318   } else {
   319     ResourceMark rm(THREAD);
   320     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
   321     UNICODE::convert_to_utf8(name, utf16_length, chars);
   322     return lookup(chars, utf8_length, THREAD);
   323   }
   324 }
   326 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
   327                                            unsigned int& hash) {
   328   int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
   329   char stack_buf[128];
   330   if (utf8_length < (int) sizeof(stack_buf)) {
   331     char* chars = stack_buf;
   332     UNICODE::convert_to_utf8(name, utf16_length, chars);
   333     return lookup_only(chars, utf8_length, hash);
   334   } else {
   335     ResourceMark rm;
   336     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
   337     UNICODE::convert_to_utf8(name, utf16_length, chars);
   338     return lookup_only(chars, utf8_length, hash);
   339   }
   340 }
   342 void SymbolTable::add(ClassLoaderData* loader_data, constantPoolHandle cp,
   343                       int names_count,
   344                       const char** names, int* lengths, int* cp_indices,
   345                       unsigned int* hashValues, TRAPS) {
   346   // Grab SymbolTable_lock first.
   347   MutexLocker ml(SymbolTable_lock, THREAD);
   349   SymbolTable* table = the_table();
   350   bool added = table->basic_add(loader_data, cp, names_count, names, lengths,
   351                                 cp_indices, hashValues, CHECK);
   352   if (!added) {
   353     // do it the hard way
   354     for (int i=0; i<names_count; i++) {
   355       int index = table->hash_to_index(hashValues[i]);
   356       bool c_heap = !loader_data->is_the_null_class_loader_data();
   357       Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK);
   358       cp->symbol_at_put(cp_indices[i], sym);
   359     }
   360   }
   361 }
   363 Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
   364   unsigned int hash;
   365   Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash);
   366   if (result != NULL) {
   367     return result;
   368   }
   369   // Grab SymbolTable_lock first.
   370   MutexLocker ml(SymbolTable_lock, THREAD);
   372   SymbolTable* table = the_table();
   373   int index = table->hash_to_index(hash);
   374   return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
   375 }
   377 Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
   378                                unsigned int hashValue_arg, bool c_heap, TRAPS) {
   379   assert(!Universe::heap()->is_in_reserved(name),
   380          "proposed name of symbol must be stable");
   382   // Don't allow symbols to be created which cannot fit in a Symbol*.
   383   if (len > Symbol::max_length()) {
   384     THROW_MSG_0(vmSymbols::java_lang_InternalError(),
   385                 "name is too long to represent");
   386   }
   388   // Cannot hit a safepoint in this function because the "this" pointer can move.
   389   No_Safepoint_Verifier nsv;
   391   // Check if the symbol table has been rehashed, if so, need to recalculate
   392   // the hash value and index.
   393   unsigned int hashValue;
   394   int index;
   395   if (use_alternate_hashcode()) {
   396     hashValue = hash_symbol((const char*)name, len);
   397     index = hash_to_index(hashValue);
   398   } else {
   399     hashValue = hashValue_arg;
   400     index = index_arg;
   401   }
   403   // Since look-up was done lock-free, we need to check if another
   404   // thread beat us in the race to insert the symbol.
   405   Symbol* test = lookup(index, (char*)name, len, hashValue);
   406   if (test != NULL) {
   407     // A race occurred and another thread introduced the symbol.
   408     assert(test->refcount() != 0, "lookup should have incremented the count");
   409     return test;
   410   }
   412   // Create a new symbol.
   413   Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL);
   414   assert(sym->equals((char*)name, len), "symbol must be properly initialized");
   416   HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
   417   add_entry(index, entry);
   418   return sym;
   419 }
   421 // This version of basic_add adds symbols in batch from the constant pool
   422 // parsing.
   423 bool SymbolTable::basic_add(ClassLoaderData* loader_data, constantPoolHandle cp,
   424                             int names_count,
   425                             const char** names, int* lengths,
   426                             int* cp_indices, unsigned int* hashValues,
   427                             TRAPS) {
   429   // Check symbol names are not too long.  If any are too long, don't add any.
   430   for (int i = 0; i< names_count; i++) {
   431     if (lengths[i] > Symbol::max_length()) {
   432       THROW_MSG_0(vmSymbols::java_lang_InternalError(),
   433                   "name is too long to represent");
   434     }
   435   }
   437   // Cannot hit a safepoint in this function because the "this" pointer can move.
   438   No_Safepoint_Verifier nsv;
   440   for (int i=0; i<names_count; i++) {
   441     // Check if the symbol table has been rehashed, if so, need to recalculate
   442     // the hash value.
   443     unsigned int hashValue;
   444     if (use_alternate_hashcode()) {
   445       hashValue = hash_symbol(names[i], lengths[i]);
   446     } else {
   447       hashValue = hashValues[i];
   448     }
   449     // Since look-up was done lock-free, we need to check if another
   450     // thread beat us in the race to insert the symbol.
   451     int index = hash_to_index(hashValue);
   452     Symbol* test = lookup(index, names[i], lengths[i], hashValue);
   453     if (test != NULL) {
   454       // A race occurred and another thread introduced the symbol, this one
   455       // will be dropped and collected. Use test instead.
   456       cp->symbol_at_put(cp_indices[i], test);
   457       assert(test->refcount() != 0, "lookup should have incremented the count");
   458     } else {
   459       // Create a new symbol.  The null class loader is never unloaded so these
   460       // are allocated specially in a permanent arena.
   461       bool c_heap = !loader_data->is_the_null_class_loader_data();
   462       Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
   463       assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
   464       HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym);
   465       add_entry(index, entry);
   466       cp->symbol_at_put(cp_indices[i], sym);
   467     }
   468   }
   469   return true;
   470 }
   473 void SymbolTable::verify() {
   474   for (int i = 0; i < the_table()->table_size(); ++i) {
   475     HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
   476     for ( ; p != NULL; p = p->next()) {
   477       Symbol* s = (Symbol*)(p->literal());
   478       guarantee(s != NULL, "symbol is NULL");
   479       unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
   480       guarantee(p->hash() == h, "broken hash in symbol table entry");
   481       guarantee(the_table()->hash_to_index(h) == i,
   482                 "wrong index in symbol table");
   483     }
   484   }
   485 }
   487 void SymbolTable::dump(outputStream* st) {
   488   the_table()->dump_table(st, "SymbolTable");
   489 }
   492 //---------------------------------------------------------------------------
   493 // Non-product code
   495 #ifndef PRODUCT
   497 void SymbolTable::print_histogram() {
   498   MutexLocker ml(SymbolTable_lock);
   499   const int results_length = 100;
   500   int results[results_length];
   501   int i,j;
   503   // initialize results to zero
   504   for (j = 0; j < results_length; j++) {
   505     results[j] = 0;
   506   }
   508   int total = 0;
   509   int max_symbols = 0;
   510   int out_of_range = 0;
   511   int memory_total = 0;
   512   int count = 0;
   513   for (i = 0; i < the_table()->table_size(); i++) {
   514     HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i);
   515     for ( ; p != NULL; p = p->next()) {
   516       memory_total += p->literal()->size();
   517       count++;
   518       int counter = p->literal()->utf8_length();
   519       total += counter;
   520       if (counter < results_length) {
   521         results[counter]++;
   522       } else {
   523         out_of_range++;
   524       }
   525       max_symbols = MAX2(max_symbols, counter);
   526     }
   527   }
   528   tty->print_cr("Symbol Table:");
   529   tty->print_cr("Total number of symbols  %5d", count);
   530   tty->print_cr("Total size in memory     %5dK",
   531           (memory_total*HeapWordSize)/1024);
   532   tty->print_cr("Total counted            %5d", _symbols_counted);
   533   tty->print_cr("Total removed            %5d", _symbols_removed);
   534   if (_symbols_counted > 0) {
   535     tty->print_cr("Percent removed          %3.2f",
   536           ((float)_symbols_removed/(float)_symbols_counted)* 100);
   537   }
   538   tty->print_cr("Reference counts         %5d", Symbol::_total_count);
   539   tty->print_cr("Symbol arena size        %5d used %5d",
   540                  arena()->size_in_bytes(), arena()->used());
   541   tty->print_cr("Histogram of symbol length:");
   542   tty->print_cr("%8s %5d", "Total  ", total);
   543   tty->print_cr("%8s %5d", "Maximum", max_symbols);
   544   tty->print_cr("%8s %3.2f", "Average",
   545           ((float) total / (float) the_table()->table_size()));
   546   tty->print_cr("%s", "Histogram:");
   547   tty->print_cr(" %s %29s", "Length", "Number chains that length");
   548   for (i = 0; i < results_length; i++) {
   549     if (results[i] > 0) {
   550       tty->print_cr("%6d %10d", i, results[i]);
   551     }
   552   }
   553   if (Verbose) {
   554     int line_length = 70;
   555     tty->print_cr("%s %30s", " Length", "Number chains that length");
   556     for (i = 0; i < results_length; i++) {
   557       if (results[i] > 0) {
   558         tty->print("%4d", i);
   559         for (j = 0; (j < results[i]) && (j < line_length);  j++) {
   560           tty->print("%1s", "*");
   561         }
   562         if (j == line_length) {
   563           tty->print("%1s", "+");
   564         }
   565         tty->cr();
   566       }
   567     }
   568   }
   569   tty->print_cr(" %s %d: %d\n", "Number chains longer than",
   570                     results_length, out_of_range);
   571 }
   573 void SymbolTable::print() {
   574   for (int i = 0; i < the_table()->table_size(); ++i) {
   575     HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
   576     HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
   577     if (entry != NULL) {
   578       while (entry != NULL) {
   579         tty->print(PTR_FORMAT " ", entry->literal());
   580         entry->literal()->print();
   581         tty->print(" %d", entry->literal()->refcount());
   582         p = entry->next_addr();
   583         entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
   584       }
   585       tty->cr();
   586     }
   587   }
   588 }
   589 #endif // PRODUCT
   591 // --------------------------------------------------------------------------
   593 #ifdef ASSERT
   594 class StableMemoryChecker : public StackObj {
   595   enum { _bufsize = wordSize*4 };
   597   address _region;
   598   jint    _size;
   599   u1      _save_buf[_bufsize];
   601   int sample(u1* save_buf) {
   602     if (_size <= _bufsize) {
   603       memcpy(save_buf, _region, _size);
   604       return _size;
   605     } else {
   606       // copy head and tail
   607       memcpy(&save_buf[0],          _region,                      _bufsize/2);
   608       memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
   609       return (_bufsize/2)*2;
   610     }
   611   }
   613  public:
   614   StableMemoryChecker(const void* region, jint size) {
   615     _region = (address) region;
   616     _size   = size;
   617     sample(_save_buf);
   618   }
   620   bool verify() {
   621     u1 check_buf[sizeof(_save_buf)];
   622     int check_size = sample(check_buf);
   623     return (0 == memcmp(_save_buf, check_buf, check_size));
   624   }
   626   void set_region(const void* region) { _region = (address) region; }
   627 };
   628 #endif
   631 // --------------------------------------------------------------------------
   632 StringTable* StringTable::_the_table = NULL;
   634 bool StringTable::_needs_rehashing = false;
   636 volatile int StringTable::_parallel_claimed_idx = 0;
   638 // Pick hashing algorithm
   639 unsigned int StringTable::hash_string(const jchar* s, int len) {
   640   return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
   641                                     java_lang_String::hash_code(s, len);
   642 }
   644 oop StringTable::lookup(int index, jchar* name,
   645                         int len, unsigned int hash) {
   646   int count = 0;
   647   for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) {
   648     count++;
   649     if (l->hash() == hash) {
   650       if (java_lang_String::equals(l->literal(), name, len)) {
   651         return l->literal();
   652       }
   653     }
   654   }
   655   // If the bucket size is too deep check if this hash code is insufficient.
   656   if (count >= BasicHashtable<mtSymbol>::rehash_count && !needs_rehashing()) {
   657     _needs_rehashing = check_rehash_table(count);
   658   }
   659   return NULL;
   660 }
   663 oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
   664                            int len, unsigned int hashValue_arg, TRAPS) {
   666   assert(java_lang_String::equals(string(), name, len),
   667          "string must be properly initialized");
   668   // Cannot hit a safepoint in this function because the "this" pointer can move.
   669   No_Safepoint_Verifier nsv;
   671   // Check if the symbol table has been rehashed, if so, need to recalculate
   672   // the hash value and index before second lookup.
   673   unsigned int hashValue;
   674   int index;
   675   if (use_alternate_hashcode()) {
   676     hashValue = hash_string(name, len);
   677     index = hash_to_index(hashValue);
   678   } else {
   679     hashValue = hashValue_arg;
   680     index = index_arg;
   681   }
   683   // Since look-up was done lock-free, we need to check if another
   684   // thread beat us in the race to insert the symbol.
   686   oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
   687   if (test != NULL) {
   688     // Entry already added
   689     return test;
   690   }
   692   HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string());
   693   add_entry(index, entry);
   694   return string();
   695 }
   698 oop StringTable::lookup(Symbol* symbol) {
   699   ResourceMark rm;
   700   int length;
   701   jchar* chars = symbol->as_unicode(length);
   702   return lookup(chars, length);
   703 }
   706 oop StringTable::lookup(jchar* name, int len) {
   707   unsigned int hash = hash_string(name, len);
   708   int index = the_table()->hash_to_index(hash);
   709   return the_table()->lookup(index, name, len, hash);
   710 }
   713 oop StringTable::intern(Handle string_or_null, jchar* name,
   714                         int len, TRAPS) {
   715   unsigned int hashValue = hash_string(name, len);
   716   int index = the_table()->hash_to_index(hashValue);
   717   oop found_string = the_table()->lookup(index, name, len, hashValue);
   719   // Found
   720   if (found_string != NULL) return found_string;
   722   debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
   723   assert(!Universe::heap()->is_in_reserved(name),
   724          "proposed name of symbol must be stable");
   726   Handle string;
   727   // try to reuse the string if possible
   728   if (!string_or_null.is_null()) {
   729     string = string_or_null;
   730   } else {
   731     string = java_lang_String::create_from_unicode(name, len, CHECK_NULL);
   732   }
   734 #if INCLUDE_ALL_GCS
   735   if (G1StringDedup::is_enabled()) {
   736     // Deduplicate the string before it is interned. Note that we should never
   737     // deduplicate a string after it has been interned. Doing so will counteract
   738     // compiler optimizations done on e.g. interned string literals.
   739     G1StringDedup::deduplicate(string());
   740   }
   741 #endif
   743   // Grab the StringTable_lock before getting the_table() because it could
   744   // change at safepoint.
   745   MutexLocker ml(StringTable_lock, THREAD);
   747   // Otherwise, add to symbol to table
   748   return the_table()->basic_add(index, string, name, len,
   749                                 hashValue, CHECK_NULL);
   750 }
   752 oop StringTable::intern(Symbol* symbol, TRAPS) {
   753   if (symbol == NULL) return NULL;
   754   ResourceMark rm(THREAD);
   755   int length;
   756   jchar* chars = symbol->as_unicode(length);
   757   Handle string;
   758   oop result = intern(string, chars, length, CHECK_NULL);
   759   return result;
   760 }
   763 oop StringTable::intern(oop string, TRAPS)
   764 {
   765   if (string == NULL) return NULL;
   766   ResourceMark rm(THREAD);
   767   int length;
   768   Handle h_string (THREAD, string);
   769   jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL);
   770   oop result = intern(h_string, chars, length, CHECK_NULL);
   771   return result;
   772 }
   775 oop StringTable::intern(const char* utf8_string, TRAPS) {
   776   if (utf8_string == NULL) return NULL;
   777   ResourceMark rm(THREAD);
   778   int length = UTF8::unicode_length(utf8_string);
   779   jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
   780   UTF8::convert_to_unicode(utf8_string, chars, length);
   781   Handle string;
   782   oop result = intern(string, chars, length, CHECK_NULL);
   783   return result;
   784 }
   786 void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
   787   buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), processed, removed);
   788 }
   790 void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) {
   791   // Readers of the table are unlocked, so we should only be removing
   792   // entries at a safepoint.
   793   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
   794   const int limit = the_table()->table_size();
   796   for (;;) {
   797     // Grab next set of buckets to scan
   798     int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
   799     if (start_idx >= limit) {
   800       // End of table
   801       break;
   802     }
   804     int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
   805     buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, processed, removed);
   806   }
   807 }
   809 void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) {
   810   const int limit = the_table()->table_size();
   812   assert(0 <= start_idx && start_idx <= limit,
   813          err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
   814   assert(0 <= end_idx && end_idx <= limit,
   815          err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
   816   assert(start_idx <= end_idx,
   817          err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
   818                  start_idx, end_idx));
   820   for (int i = start_idx; i < end_idx; i += 1) {
   821     HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
   822     while (entry != NULL) {
   823       assert(!entry->is_shared(), "CDS not used for the StringTable");
   825       f->do_oop((oop*)entry->literal_addr());
   827       entry = entry->next();
   828     }
   829   }
   830 }
   832 void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, int* processed, int* removed) {
   833   const int limit = the_table()->table_size();
   835   assert(0 <= start_idx && start_idx <= limit,
   836          err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
   837   assert(0 <= end_idx && end_idx <= limit,
   838          err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
   839   assert(start_idx <= end_idx,
   840          err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
   841                  start_idx, end_idx));
   843   for (int i = start_idx; i < end_idx; ++i) {
   844     HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
   845     HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
   846     while (entry != NULL) {
   847       assert(!entry->is_shared(), "CDS not used for the StringTable");
   849       if (is_alive->do_object_b(entry->literal())) {
   850         if (f != NULL) {
   851           f->do_oop((oop*)entry->literal_addr());
   852         }
   853         p = entry->next_addr();
   854       } else {
   855         *p = entry->next();
   856         the_table()->free_entry(entry);
   857         (*removed)++;
   858       }
   859       (*processed)++;
   860       entry = *p;
   861     }
   862   }
   863 }
   865 void StringTable::oops_do(OopClosure* f) {
   866   buckets_oops_do(f, 0, the_table()->table_size());
   867 }
   869 void StringTable::possibly_parallel_oops_do(OopClosure* f) {
   870   const int limit = the_table()->table_size();
   872   for (;;) {
   873     // Grab next set of buckets to scan
   874     int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
   875     if (start_idx >= limit) {
   876       // End of table
   877       break;
   878     }
   880     int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
   881     buckets_oops_do(f, start_idx, end_idx);
   882   }
   883 }
   885 // This verification is part of Universe::verify() and needs to be quick.
   886 // See StringTable::verify_and_compare() below for exhaustive verification.
   887 void StringTable::verify() {
   888   for (int i = 0; i < the_table()->table_size(); ++i) {
   889     HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i);
   890     for ( ; p != NULL; p = p->next()) {
   891       oop s = p->literal();
   892       guarantee(s != NULL, "interned string is NULL");
   893       unsigned int h = java_lang_String::hash_string(s);
   894       guarantee(p->hash() == h, "broken hash in string table entry");
   895       guarantee(the_table()->hash_to_index(h) == i,
   896                 "wrong index in string table");
   897     }
   898   }
   899 }
   901 void StringTable::dump(outputStream* st) {
   902   the_table()->dump_table(st, "StringTable");
   903 }
   905 StringTable::VerifyRetTypes StringTable::compare_entries(
   906                                       int bkt1, int e_cnt1,
   907                                       HashtableEntry<oop, mtSymbol>* e_ptr1,
   908                                       int bkt2, int e_cnt2,
   909                                       HashtableEntry<oop, mtSymbol>* e_ptr2) {
   910   // These entries are sanity checked by verify_and_compare_entries()
   911   // before this function is called.
   912   oop str1 = e_ptr1->literal();
   913   oop str2 = e_ptr2->literal();
   915   if (str1 == str2) {
   916     tty->print_cr("ERROR: identical oop values (0x" PTR_FORMAT ") "
   917                   "in entry @ bucket[%d][%d] and entry @ bucket[%d][%d]",
   918                   (void *)str1, bkt1, e_cnt1, bkt2, e_cnt2);
   919     return _verify_fail_continue;
   920   }
   922   if (java_lang_String::equals(str1, str2)) {
   923     tty->print_cr("ERROR: identical String values in entry @ "
   924                   "bucket[%d][%d] and entry @ bucket[%d][%d]",
   925                   bkt1, e_cnt1, bkt2, e_cnt2);
   926     return _verify_fail_continue;
   927   }
   929   return _verify_pass;
   930 }
   932 StringTable::VerifyRetTypes StringTable::verify_entry(int bkt, int e_cnt,
   933                                       HashtableEntry<oop, mtSymbol>* e_ptr,
   934                                       StringTable::VerifyMesgModes mesg_mode) {
   936   VerifyRetTypes ret = _verify_pass;  // be optimistic
   938   oop str = e_ptr->literal();
   939   if (str == NULL) {
   940     if (mesg_mode == _verify_with_mesgs) {
   941       tty->print_cr("ERROR: NULL oop value in entry @ bucket[%d][%d]", bkt,
   942                     e_cnt);
   943     }
   944     // NULL oop means no more verifications are possible
   945     return _verify_fail_done;
   946   }
   948   if (str->klass() != SystemDictionary::String_klass()) {
   949     if (mesg_mode == _verify_with_mesgs) {
   950       tty->print_cr("ERROR: oop is not a String in entry @ bucket[%d][%d]",
   951                     bkt, e_cnt);
   952     }
   953     // not a String means no more verifications are possible
   954     return _verify_fail_done;
   955   }
   957   unsigned int h = java_lang_String::hash_string(str);
   958   if (e_ptr->hash() != h) {
   959     if (mesg_mode == _verify_with_mesgs) {
   960       tty->print_cr("ERROR: broken hash value in entry @ bucket[%d][%d], "
   961                     "bkt_hash=%d, str_hash=%d", bkt, e_cnt, e_ptr->hash(), h);
   962     }
   963     ret = _verify_fail_continue;
   964   }
   966   if (the_table()->hash_to_index(h) != bkt) {
   967     if (mesg_mode == _verify_with_mesgs) {
   968       tty->print_cr("ERROR: wrong index value for entry @ bucket[%d][%d], "
   969                     "str_hash=%d, hash_to_index=%d", bkt, e_cnt, h,
   970                     the_table()->hash_to_index(h));
   971     }
   972     ret = _verify_fail_continue;
   973   }
   975   return ret;
   976 }
   978 // See StringTable::verify() above for the quick verification that is
   979 // part of Universe::verify(). This verification is exhaustive and
   980 // reports on every issue that is found. StringTable::verify() only
   981 // reports on the first issue that is found.
   982 //
   983 // StringTable::verify_entry() checks:
   984 // - oop value != NULL (same as verify())
   985 // - oop value is a String
   986 // - hash(String) == hash in entry (same as verify())
   987 // - index for hash == index of entry (same as verify())
   988 //
   989 // StringTable::compare_entries() checks:
   990 // - oops are unique across all entries
   991 // - String values are unique across all entries
   992 //
   993 int StringTable::verify_and_compare_entries() {
   994   assert(StringTable_lock->is_locked(), "sanity check");
   996   int  fail_cnt = 0;
   998   // first, verify all the entries individually:
   999   for (int bkt = 0; bkt < the_table()->table_size(); bkt++) {
  1000     HashtableEntry<oop, mtSymbol>* e_ptr = the_table()->bucket(bkt);
  1001     for (int e_cnt = 0; e_ptr != NULL; e_ptr = e_ptr->next(), e_cnt++) {
  1002       VerifyRetTypes ret = verify_entry(bkt, e_cnt, e_ptr, _verify_with_mesgs);
  1003       if (ret != _verify_pass) {
  1004         fail_cnt++;
  1009   // Optimization: if the above check did not find any failures, then
  1010   // the comparison loop below does not need to call verify_entry()
  1011   // before calling compare_entries(). If there were failures, then we
  1012   // have to call verify_entry() to see if the entry can be passed to
  1013   // compare_entries() safely. When we call verify_entry() in the loop
  1014   // below, we do so quietly to void duplicate messages and we don't
  1015   // increment fail_cnt because the failures have already been counted.
  1016   bool need_entry_verify = (fail_cnt != 0);
  1018   // second, verify all entries relative to each other:
  1019   for (int bkt1 = 0; bkt1 < the_table()->table_size(); bkt1++) {
  1020     HashtableEntry<oop, mtSymbol>* e_ptr1 = the_table()->bucket(bkt1);
  1021     for (int e_cnt1 = 0; e_ptr1 != NULL; e_ptr1 = e_ptr1->next(), e_cnt1++) {
  1022       if (need_entry_verify) {
  1023         VerifyRetTypes ret = verify_entry(bkt1, e_cnt1, e_ptr1,
  1024                                           _verify_quietly);
  1025         if (ret == _verify_fail_done) {
  1026           // cannot use the current entry to compare against other entries
  1027           continue;
  1031       for (int bkt2 = bkt1; bkt2 < the_table()->table_size(); bkt2++) {
  1032         HashtableEntry<oop, mtSymbol>* e_ptr2 = the_table()->bucket(bkt2);
  1033         int e_cnt2;
  1034         for (e_cnt2 = 0; e_ptr2 != NULL; e_ptr2 = e_ptr2->next(), e_cnt2++) {
  1035           if (bkt1 == bkt2 && e_cnt2 <= e_cnt1) {
  1036             // skip the entries up to and including the one that
  1037             // we're comparing against
  1038             continue;
  1041           if (need_entry_verify) {
  1042             VerifyRetTypes ret = verify_entry(bkt2, e_cnt2, e_ptr2,
  1043                                               _verify_quietly);
  1044             if (ret == _verify_fail_done) {
  1045               // cannot compare against this entry
  1046               continue;
  1050           // compare two entries, report and count any failures:
  1051           if (compare_entries(bkt1, e_cnt1, e_ptr1, bkt2, e_cnt2, e_ptr2)
  1052               != _verify_pass) {
  1053             fail_cnt++;
  1059   return fail_cnt;
  1062 // Create a new table and using alternate hash code, populate the new table
  1063 // with the existing strings.   Set flag to use the alternate hash code afterwards.
  1064 void StringTable::rehash_table() {
  1065   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
  1066   // This should never happen with -Xshare:dump but it might in testing mode.
  1067   if (DumpSharedSpaces) return;
  1068   StringTable* new_table = new StringTable();
  1070   // Rehash the table
  1071   the_table()->move_to(new_table);
  1073   // Delete the table and buckets (entries are reused in new table).
  1074   delete _the_table;
  1075   // Don't check if we need rehashing until the table gets unbalanced again.
  1076   // Then rehash with a new global seed.
  1077   _needs_rehashing = false;
  1078   _the_table = new_table;

mercurial