src/share/vm/classfile/symbolTable.cpp

changeset 3865
e9140bf80b4a
parent 3682
fc9d8850ab8b
child 3875
246d977b51f2
     1.1 --- a/src/share/vm/classfile/symbolTable.cpp	Mon Jun 11 13:10:14 2012 -0400
     1.2 +++ b/src/share/vm/classfile/symbolTable.cpp	Wed Jun 13 19:52:59 2012 -0400
     1.3 @@ -23,6 +23,7 @@
     1.4   */
     1.5  
     1.6  #include "precompiled.hpp"
     1.7 +#include "classfile/altHashing.hpp"
     1.8  #include "classfile/javaClasses.hpp"
     1.9  #include "classfile/symbolTable.hpp"
    1.10  #include "classfile/systemDictionary.hpp"
    1.11 @@ -34,12 +35,15 @@
    1.12  #include "oops/oop.inline2.hpp"
    1.13  #include "runtime/mutexLocker.hpp"
    1.14  #include "utilities/hashtable.inline.hpp"
    1.15 +#include "utilities/numberSeq.hpp"
    1.16  
    1.17  // --------------------------------------------------------------------------
    1.18  
    1.19  SymbolTable* SymbolTable::_the_table = NULL;
    1.20  // Static arena for symbols that are not deallocated
    1.21  Arena* SymbolTable::_arena = NULL;
    1.22 +bool SymbolTable::_needs_rehashing = false;
    1.23 +jint SymbolTable::_seed = 0;
    1.24  
    1.25  Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
    1.26    // Don't allow symbols to be created which cannot fit in a Symbol*.
    1.27 @@ -121,12 +125,41 @@
    1.28    }
    1.29  }
    1.30  
    1.31 +unsigned int SymbolTable::new_hash(Symbol* sym) {
    1.32 +  ResourceMark rm;
    1.33 +  // Use alternate hashing algorithm on this symbol.
    1.34 +  return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
    1.35 +}
    1.36 +
    1.37 +// Create a new table and using alternate hash code, populate the new table
    1.38 +// with the existing strings.   Set flag to use the alternate hash code afterwards.
    1.39 +void SymbolTable::rehash_table() {
    1.40 +  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
    1.41 +  assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
    1.42 +  // Create a new symbol table
    1.43 +  SymbolTable* new_table = new SymbolTable();
    1.44 +
    1.45 +  // Initialize the global seed for hashing.
    1.46 +  _seed = AltHashing::compute_seed();
    1.47 +  assert(seed() != 0, "shouldn't be zero");
    1.48 +
    1.49 +  the_table()->move_to(new_table);
    1.50 +
    1.51 +  // Delete the table and buckets (entries are reused in new table).
    1.52 +  delete _the_table;
    1.53 +  // Don't check if we need rehashing until the table gets unbalanced again.
    1.54 +  // Then rehash with a new global seed.
    1.55 +  _needs_rehashing = false;
    1.56 +  _the_table = new_table;
    1.57 +}
    1.58  
    1.59  // Lookup a symbol in a bucket.
    1.60  
    1.61  Symbol* SymbolTable::lookup(int index, const char* name,
    1.62                                int len, unsigned int hash) {
    1.63 +  int count = 0;
    1.64    for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
    1.65 +    count++;  // count all entries in this bucket, not just ones with same hash
    1.66      if (e->hash() == hash) {
    1.67        Symbol* sym = e->literal();
    1.68        if (sym->equals(name, len)) {
    1.69 @@ -136,9 +169,21 @@
    1.70        }
    1.71      }
    1.72    }
    1.73 +  // If the bucket size is too deep check if this hash code is insufficient.
    1.74 +  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
    1.75 +    _needs_rehashing = check_rehash_table(count);
    1.76 +  }
    1.77    return NULL;
    1.78  }
    1.79  
    1.80 +// Pick hashing algorithm, but return value already given if not using a new
    1.81 +// hash algorithm.
    1.82 +unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) {
    1.83 +  return use_alternate_hashcode() ?
    1.84 +           AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
    1.85 +           (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
    1.86 +}
    1.87 +
    1.88  
    1.89  // We take care not to be blocking while holding the
    1.90  // SymbolTable_lock. Otherwise, the system might deadlock, since the
    1.91 @@ -287,13 +332,17 @@
    1.92  }
    1.93  
    1.94  Symbol* SymbolTable::basic_add(int index, u1 *name, int len,
    1.95 -                               unsigned int hashValue, bool c_heap, TRAPS) {
    1.96 +                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
    1.97    assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
    1.98           "proposed name of symbol must be stable");
    1.99  
   1.100    // Grab SymbolTable_lock first.
   1.101    MutexLocker ml(SymbolTable_lock, THREAD);
   1.102  
   1.103 +  // Check if the symbol table has been rehashed, if so, need to recalculate
   1.104 +  // the hash value.
   1.105 +  unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg);
   1.106 +
   1.107    // Since look-up was done lock-free, we need to check if another
   1.108    // thread beat us in the race to insert the symbol.
   1.109    Symbol* test = lookup(index, (char*)name, len, hashValue);
   1.110 @@ -332,10 +381,13 @@
   1.111    MutexLocker ml(SymbolTable_lock, THREAD);
   1.112  
   1.113    for (int i=0; i<names_count; i++) {
   1.114 +    // Check if the symbol table has been rehashed, if so, need to recalculate
   1.115 +    // the hash value.
   1.116 +    unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]);
   1.117      // Since look-up was done lock-free, we need to check if another
   1.118      // thread beat us in the race to insert the symbol.
   1.119 -    int index = hash_to_index(hashValues[i]);
   1.120 -    Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]);
   1.121 +    int index = hash_to_index(hashValue);
   1.122 +    Symbol* test = lookup(index, names[i], lengths[i], hashValue);
   1.123      if (test != NULL) {
   1.124        // A race occurred and another thread introduced the symbol, this one
   1.125        // will be dropped and collected. Use test instead.
   1.126 @@ -347,7 +399,7 @@
   1.127        bool c_heap = class_loader() != NULL;
   1.128        Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
   1.129        assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
   1.130 -      HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym);
   1.131 +      HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
   1.132        add_entry(index, entry);
   1.133        cp->symbol_at_put(cp_indices[i], sym);
   1.134      }
   1.135 @@ -370,6 +422,24 @@
   1.136    }
   1.137  }
   1.138  
   1.139 +void SymbolTable::dump(outputStream* st) {
   1.140 +  NumberSeq summary;
   1.141 +  for (int i = 0; i < the_table()->table_size(); ++i) {
   1.142 +    int count = 0;
   1.143 +    for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
   1.144 +       e != NULL; e = e->next()) {
   1.145 +      count++;
   1.146 +    }
   1.147 +    summary.add((double)count);
   1.148 +  }
   1.149 +  st->print_cr("SymbolTable statistics:");
   1.150 +  st->print_cr("Number of buckets       : %7d", summary.num());
   1.151 +  st->print_cr("Average bucket size     : %7.0f", summary.avg());
   1.152 +  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
   1.153 +  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
   1.154 +  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
   1.155 +}
   1.156 +
   1.157  
   1.158  //---------------------------------------------------------------------------
   1.159  // Non-product code
   1.160 @@ -468,7 +538,6 @@
   1.161      }
   1.162    }
   1.163  }
   1.164 -
   1.165  #endif // PRODUCT
   1.166  
   1.167  // --------------------------------------------------------------------------
   1.168 @@ -514,21 +583,36 @@
   1.169  // --------------------------------------------------------------------------
   1.170  StringTable* StringTable::_the_table = NULL;
   1.171  
   1.172 +bool StringTable::_needs_rehashing = false;
   1.173 +jint StringTable::_seed = 0;
   1.174 +
   1.175 +// Pick hashing algorithm
   1.176 +unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) {
   1.177 +  return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
   1.178 +            (hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
   1.179 +}
   1.180 +
   1.181  oop StringTable::lookup(int index, jchar* name,
   1.182                          int len, unsigned int hash) {
   1.183 +  int count = 0;
   1.184    for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
   1.185 +    count++;
   1.186      if (l->hash() == hash) {
   1.187        if (java_lang_String::equals(l->literal(), name, len)) {
   1.188          return l->literal();
   1.189        }
   1.190      }
   1.191    }
   1.192 +  // If the bucket size is too deep check if this hash code is insufficient.
   1.193 +  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
   1.194 +    _needs_rehashing = check_rehash_table(count);
   1.195 +  }
   1.196    return NULL;
   1.197  }
   1.198  
   1.199  
   1.200  oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
   1.201 -                           int len, unsigned int hashValue, TRAPS) {
   1.202 +                           int len, unsigned int hashValue_arg, TRAPS) {
   1.203    debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
   1.204    assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
   1.205           "proposed name of symbol must be stable");
   1.206 @@ -547,6 +631,10 @@
   1.207    assert(java_lang_String::equals(string(), name, len),
   1.208           "string must be properly initialized");
   1.209  
   1.210 +  // Check if the symbol table has been rehashed, if so, need to recalculate
   1.211 +  // the hash value before second lookup.
   1.212 +  unsigned int hashValue = hash_string(name, len, hashValue_arg);
   1.213 +
   1.214    // Since look-up was done lock-free, we need to check if another
   1.215    // thread beat us in the race to insert the symbol.
   1.216  
   1.217 @@ -566,7 +654,7 @@
   1.218    ResourceMark rm;
   1.219    int length;
   1.220    jchar* chars = symbol->as_unicode(length);
   1.221 -  unsigned int hashValue = java_lang_String::hash_string(chars, length);
   1.222 +  unsigned int hashValue = hash_string(chars, length);
   1.223    int index = the_table()->hash_to_index(hashValue);
   1.224    return the_table()->lookup(index, chars, length, hashValue);
   1.225  }
   1.226 @@ -574,7 +662,7 @@
   1.227  
   1.228  oop StringTable::intern(Handle string_or_null, jchar* name,
   1.229                          int len, TRAPS) {
   1.230 -  unsigned int hashValue = java_lang_String::hash_string(name, len);
   1.231 +  unsigned int hashValue = hash_string(name, len);
   1.232    int index = the_table()->hash_to_index(hashValue);
   1.233    oop string = the_table()->lookup(index, name, len, hashValue);
   1.234  
   1.235 @@ -675,3 +763,52 @@
   1.236      }
   1.237    }
   1.238  }
   1.239 +
   1.240 +void StringTable::dump(outputStream* st) {
   1.241 +  NumberSeq summary;
   1.242 +  for (int i = 0; i < the_table()->table_size(); ++i) {
   1.243 +    HashtableEntry<oop>* p = the_table()->bucket(i);
   1.244 +    int count = 0;
   1.245 +    for ( ; p != NULL; p = p->next()) {
   1.246 +      count++;
   1.247 +    }
   1.248 +    summary.add((double)count);
   1.249 +  }
   1.250 +  st->print_cr("StringTable statistics:");
   1.251 +  st->print_cr("Number of buckets       : %7d", summary.num());
   1.252 +  st->print_cr("Average bucket size     : %7.0f", summary.avg());
   1.253 +  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
   1.254 +  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
   1.255 +  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
   1.256 +}
   1.257 +
   1.258 +
   1.259 +unsigned int StringTable::new_hash(oop string) {
   1.260 +  ResourceMark rm;
   1.261 +  int length;
   1.262 +  jchar* chars = java_lang_String::as_unicode_string(string, length);
   1.263 +  // Use alternate hashing algorithm on the string
   1.264 +  return AltHashing::murmur3_32(seed(), chars, length);
   1.265 +}
   1.266 +
   1.267 +// Create a new table and using alternate hash code, populate the new table
   1.268 +// with the existing strings.   Set flag to use the alternate hash code afterwards.
   1.269 +void StringTable::rehash_table() {
   1.270 +  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
   1.271 +  assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
   1.272 +  StringTable* new_table = new StringTable();
   1.273 +
   1.274 +  // Initialize new global seed for hashing.
   1.275 +  _seed = AltHashing::compute_seed();
   1.276 +  assert(seed() != 0, "shouldn't be zero");
   1.277 +
   1.278 +  // Rehash the table
   1.279 +  the_table()->move_to(new_table);
   1.280 +
   1.281 +  // Delete the table and buckets (entries are reused in new table).
   1.282 +  delete _the_table;
   1.283 +  // Don't check if we need rehashing until the table gets unbalanced again.
   1.284 +  // Then rehash with a new global seed.
   1.285 +  _needs_rehashing = false;
   1.286 +  _the_table = new_table;
   1.287 +}

mercurial