Thu, 27 Jan 2011 16:11:27 -0800
6990754: Use native memory and reference counting to implement SymbolTable
Summary: move symbols from permgen into C heap and reference count them
Reviewed-by: never, acorn, jmasa, stefank
1 /*
2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/javaClasses.hpp"
27 #include "classfile/symbolTable.hpp"
28 #include "classfile/systemDictionary.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "memory/filemap.hpp"
31 #include "memory/gcLocker.inline.hpp"
32 #include "oops/oop.inline.hpp"
33 #include "oops/oop.inline2.hpp"
34 #include "runtime/mutexLocker.hpp"
35 #include "utilities/hashtable.inline.hpp"
37 // --------------------------------------------------------------------------
39 SymbolTable* SymbolTable::_the_table = NULL;
41 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, TRAPS) {
42 // Don't allow symbols to be created which cannot fit in a Symbol*.
43 if (len > Symbol::max_length()) {
44 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
45 "name is too long to represent");
46 }
47 Symbol* sym = new (len) Symbol(name, len);
48 assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted");
49 return sym;
50 }
52 bool SymbolTable::allocate_symbols(int names_count, const u1** names,
53 int* lengths, Symbol** syms, TRAPS) {
54 for (int i = 0; i< names_count; i++) {
55 if (lengths[i] > Symbol::max_length()) {
56 THROW_MSG_0(vmSymbols::java_lang_InternalError(),
57 "name is too long to represent");
58 }
59 }
61 for (int i = 0; i< names_count; i++) {
62 int len = lengths[i];
63 syms[i] = new (len) Symbol(names[i], len);
64 assert(syms[i] != NULL, "new should call vm_exit_out_of_memory if "
65 "C_HEAP is exhausted");
66 }
67 return true;
68 }
70 // Call function for all symbols in the symbol table.
71 void SymbolTable::symbols_do(SymbolClosure *cl) {
72 const int n = the_table()->table_size();
73 for (int i = 0; i < n; i++) {
74 for (HashtableEntry<Symbol*>* p = the_table()->bucket(i);
75 p != NULL;
76 p = p->next()) {
77 cl->do_symbol(p->literal_addr());
78 }
79 }
80 }
82 int SymbolTable::symbols_removed = 0;
83 int SymbolTable::symbols_counted = 0;
85 // Remove unreferenced symbols from the symbol table
86 // This is done late during GC. This doesn't use the hash table unlink because
87 // it assumes that the literals are oops.
88 void SymbolTable::unlink() {
89 int removed = 0;
90 int total = 0;
91 int memory_total = 0;
92 for (int i = 0; i < the_table()->table_size(); ++i) {
93 for (HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i); *p != NULL; ) {
94 HashtableEntry<Symbol*>* entry = *p;
95 if (entry->is_shared()) {
96 break;
97 }
98 Symbol* s = entry->literal();
99 memory_total += s->object_size();
100 total++;
101 assert(s != NULL, "just checking");
102 // If reference count is zero, remove.
103 if (s->refcount() == 0) {
104 delete s;
105 removed++;
106 *p = entry->next();
107 the_table()->free_entry(entry);
108 } else {
109 p = entry->next_addr();
110 }
111 }
112 }
113 symbols_removed += removed;
114 symbols_counted += total;
115 if (PrintGCDetails) {
116 gclog_or_tty->print(" [Symbols=%d size=%dK] ", total,
117 (memory_total*HeapWordSize)/1024);
118 }
119 }
122 // Lookup a symbol in a bucket.
124 Symbol* SymbolTable::lookup(int index, const char* name,
125 int len, unsigned int hash) {
126 for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
127 if (e->hash() == hash) {
128 Symbol* sym = e->literal();
129 if (sym->equals(name, len)) {
130 // something is referencing this symbol now.
131 sym->increment_refcount();
132 return sym;
133 }
134 }
135 }
136 return NULL;
137 }
140 // We take care not to be blocking while holding the
141 // SymbolTable_lock. Otherwise, the system might deadlock, since the
142 // symboltable is used during compilation (VM_thread) The lock free
143 // synchronization is simplified by the fact that we do not delete
144 // entries in the symbol table during normal execution (only during
145 // safepoints).
147 Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) {
148 unsigned int hashValue = hash_symbol(name, len);
149 int index = the_table()->hash_to_index(hashValue);
151 Symbol* s = the_table()->lookup(index, name, len, hashValue);
153 // Found
154 if (s != NULL) return s;
156 // Otherwise, add to symbol to table
157 return the_table()->basic_add(index, (u1*)name, len, hashValue, CHECK_NULL);
158 }
160 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) {
161 char* buffer;
162 int index, len;
163 unsigned int hashValue;
164 char* name;
165 {
166 debug_only(No_Safepoint_Verifier nsv;)
168 name = (char*)sym->base() + begin;
169 len = end - begin;
170 hashValue = hash_symbol(name, len);
171 index = the_table()->hash_to_index(hashValue);
172 Symbol* s = the_table()->lookup(index, name, len, hashValue);
174 // Found
175 if (s != NULL) return s;
176 }
178 // Otherwise, add to symbol to table. Copy to a C string first.
179 char stack_buf[128];
180 ResourceMark rm(THREAD);
181 if (len <= 128) {
182 buffer = stack_buf;
183 } else {
184 buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
185 }
186 for (int i=0; i<len; i++) {
187 buffer[i] = name[i];
188 }
189 // Make sure there is no safepoint in the code above since name can't move.
190 // We can't include the code in No_Safepoint_Verifier because of the
191 // ResourceMark.
193 return the_table()->basic_add(index, (u1*)buffer, len, hashValue, CHECK_NULL);
194 }
196 Symbol* SymbolTable::lookup_only(const char* name, int len,
197 unsigned int& hash) {
198 hash = hash_symbol(name, len);
199 int index = the_table()->hash_to_index(hash);
201 Symbol* s = the_table()->lookup(index, name, len, hash);
202 return s;
203 }
205 // Suggestion: Push unicode-based lookup all the way into the hashing
206 // and probing logic, so there is no need for convert_to_utf8 until
207 // an actual new Symbol* is created.
208 Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
209 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
210 char stack_buf[128];
211 if (utf8_length < (int) sizeof(stack_buf)) {
212 char* chars = stack_buf;
213 UNICODE::convert_to_utf8(name, utf16_length, chars);
214 return lookup(chars, utf8_length, THREAD);
215 } else {
216 ResourceMark rm(THREAD);
217 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
218 UNICODE::convert_to_utf8(name, utf16_length, chars);
219 return lookup(chars, utf8_length, THREAD);
220 }
221 }
223 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
224 unsigned int& hash) {
225 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
226 char stack_buf[128];
227 if (utf8_length < (int) sizeof(stack_buf)) {
228 char* chars = stack_buf;
229 UNICODE::convert_to_utf8(name, utf16_length, chars);
230 return lookup_only(chars, utf8_length, hash);
231 } else {
232 ResourceMark rm;
233 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
234 UNICODE::convert_to_utf8(name, utf16_length, chars);
235 return lookup_only(chars, utf8_length, hash);
236 }
237 }
239 void SymbolTable::add(constantPoolHandle cp, int names_count,
240 const char** names, int* lengths, int* cp_indices,
241 unsigned int* hashValues, TRAPS) {
242 SymbolTable* table = the_table();
243 bool added = table->basic_add(cp, names_count, names, lengths,
244 cp_indices, hashValues, CHECK);
245 if (!added) {
246 // do it the hard way
247 for (int i=0; i<names_count; i++) {
248 int index = table->hash_to_index(hashValues[i]);
249 Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i],
250 hashValues[i], CHECK);
251 cp->symbol_at_put(cp_indices[i], sym);
252 }
253 }
254 }
256 Symbol* SymbolTable::basic_add(int index, u1 *name, int len,
257 unsigned int hashValue, TRAPS) {
258 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
259 "proposed name of symbol must be stable");
261 // We assume that lookup() has been called already, that it failed,
262 // and symbol was not found. We create the symbol here.
263 Symbol* sym = allocate_symbol(name, len, CHECK_NULL);
265 // Allocation must be done before grabbing the SymbolTable_lock lock
266 MutexLocker ml(SymbolTable_lock, THREAD);
268 assert(sym->equals((char*)name, len), "symbol must be properly initialized");
270 // Since look-up was done lock-free, we need to check if another
271 // thread beat us in the race to insert the symbol.
273 Symbol* test = lookup(index, (char*)name, len, hashValue);
274 if (test != NULL) {
275 // A race occurred and another thread introduced the symbol, this one
276 // will be dropped and collected.
277 delete sym;
278 assert(test->refcount() != 0, "lookup should have incremented the count");
279 return test;
280 }
282 HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
283 sym->increment_refcount();
284 add_entry(index, entry);
285 return sym;
286 }
288 bool SymbolTable::basic_add(constantPoolHandle cp, int names_count,
289 const char** names, int* lengths,
290 int* cp_indices, unsigned int* hashValues,
291 TRAPS) {
292 Symbol* syms[symbol_alloc_batch_size];
293 bool allocated = allocate_symbols(names_count, (const u1**)names, lengths,
294 syms, CHECK_false);
295 if (!allocated) {
296 return false;
297 }
299 // Allocation must be done before grabbing the SymbolTable_lock lock
300 MutexLocker ml(SymbolTable_lock, THREAD);
302 for (int i=0; i<names_count; i++) {
303 assert(syms[i]->equals(names[i], lengths[i]), "symbol must be properly initialized");
304 // Since look-up was done lock-free, we need to check if another
305 // thread beat us in the race to insert the symbol.
306 int index = hash_to_index(hashValues[i]);
307 Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]);
308 if (test != NULL) {
309 // A race occurred and another thread introduced the symbol, this one
310 // will be dropped and collected. Use test instead.
311 cp->symbol_at_put(cp_indices[i], test);
312 assert(test->refcount() != 0, "lookup should have incremented the count");
313 delete syms[i];
314 } else {
315 Symbol* sym = syms[i];
316 HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym);
317 sym->increment_refcount(); // increment refcount in external hashtable
318 add_entry(index, entry);
319 cp->symbol_at_put(cp_indices[i], sym);
320 }
321 }
323 return true;
324 }
327 void SymbolTable::verify() {
328 for (int i = 0; i < the_table()->table_size(); ++i) {
329 HashtableEntry<Symbol*>* p = the_table()->bucket(i);
330 for ( ; p != NULL; p = p->next()) {
331 Symbol* s = (Symbol*)(p->literal());
332 guarantee(s != NULL, "symbol is NULL");
333 unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
334 guarantee(p->hash() == h, "broken hash in symbol table entry");
335 guarantee(the_table()->hash_to_index(h) == i,
336 "wrong index in symbol table");
337 }
338 }
339 }
342 //---------------------------------------------------------------------------
343 // Non-product code
345 #ifndef PRODUCT
347 void SymbolTable::print_histogram() {
348 MutexLocker ml(SymbolTable_lock);
349 const int results_length = 100;
350 int results[results_length];
351 int i,j;
353 // initialize results to zero
354 for (j = 0; j < results_length; j++) {
355 results[j] = 0;
356 }
358 int total = 0;
359 int max_symbols = 0;
360 int out_of_range = 0;
361 int memory_total = 0;
362 int count = 0;
363 for (i = 0; i < the_table()->table_size(); i++) {
364 HashtableEntry<Symbol*>* p = the_table()->bucket(i);
365 for ( ; p != NULL; p = p->next()) {
366 memory_total += p->literal()->object_size();
367 count++;
368 int counter = p->literal()->utf8_length();
369 total += counter;
370 if (counter < results_length) {
371 results[counter]++;
372 } else {
373 out_of_range++;
374 }
375 max_symbols = MAX2(max_symbols, counter);
376 }
377 }
378 tty->print_cr("Symbol Table:");
379 tty->print_cr("Total number of symbols %5d", count);
380 tty->print_cr("Total size in memory %5dK",
381 (memory_total*HeapWordSize)/1024);
382 tty->print_cr("Total counted %5d", symbols_counted);
383 tty->print_cr("Total removed %5d", symbols_removed);
384 if (symbols_counted > 0) {
385 tty->print_cr("Percent removed %3.2f",
386 ((float)symbols_removed/(float)symbols_counted)* 100);
387 }
388 tty->print_cr("Reference counts %5d", Symbol::_total_count);
389 tty->print_cr("Histogram of symbol length:");
390 tty->print_cr("%8s %5d", "Total ", total);
391 tty->print_cr("%8s %5d", "Maximum", max_symbols);
392 tty->print_cr("%8s %3.2f", "Average",
393 ((float) total / (float) the_table()->table_size()));
394 tty->print_cr("%s", "Histogram:");
395 tty->print_cr(" %s %29s", "Length", "Number chains that length");
396 for (i = 0; i < results_length; i++) {
397 if (results[i] > 0) {
398 tty->print_cr("%6d %10d", i, results[i]);
399 }
400 }
401 if (Verbose) {
402 int line_length = 70;
403 tty->print_cr("%s %30s", " Length", "Number chains that length");
404 for (i = 0; i < results_length; i++) {
405 if (results[i] > 0) {
406 tty->print("%4d", i);
407 for (j = 0; (j < results[i]) && (j < line_length); j++) {
408 tty->print("%1s", "*");
409 }
410 if (j == line_length) {
411 tty->print("%1s", "+");
412 }
413 tty->cr();
414 }
415 }
416 }
417 tty->print_cr(" %s %d: %d\n", "Number chains longer than",
418 results_length, out_of_range);
419 }
421 void SymbolTable::print() {
422 for (int i = 0; i < the_table()->table_size(); ++i) {
423 HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i);
424 HashtableEntry<Symbol*>* entry = the_table()->bucket(i);
425 if (entry != NULL) {
426 while (entry != NULL) {
427 tty->print(PTR_FORMAT " ", entry->literal());
428 entry->literal()->print();
429 tty->print(" %d", entry->literal()->refcount());
430 p = entry->next_addr();
431 entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p);
432 }
433 tty->cr();
434 }
435 }
436 }
438 #endif // PRODUCT
440 // --------------------------------------------------------------------------
442 #ifdef ASSERT
443 class StableMemoryChecker : public StackObj {
444 enum { _bufsize = wordSize*4 };
446 address _region;
447 jint _size;
448 u1 _save_buf[_bufsize];
450 int sample(u1* save_buf) {
451 if (_size <= _bufsize) {
452 memcpy(save_buf, _region, _size);
453 return _size;
454 } else {
455 // copy head and tail
456 memcpy(&save_buf[0], _region, _bufsize/2);
457 memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
458 return (_bufsize/2)*2;
459 }
460 }
462 public:
463 StableMemoryChecker(const void* region, jint size) {
464 _region = (address) region;
465 _size = size;
466 sample(_save_buf);
467 }
469 bool verify() {
470 u1 check_buf[sizeof(_save_buf)];
471 int check_size = sample(check_buf);
472 return (0 == memcmp(_save_buf, check_buf, check_size));
473 }
475 void set_region(const void* region) { _region = (address) region; }
476 };
477 #endif
480 // --------------------------------------------------------------------------
483 // Compute the hash value for a java.lang.String object which would
484 // contain the characters passed in. This hash value is used for at
485 // least two purposes.
486 //
487 // (a) As the hash value used by the StringTable for bucket selection
488 // and comparison (stored in the HashtableEntry structures). This
489 // is used in the String.intern() method.
490 //
491 // (b) As the hash value used by the String object itself, in
492 // String.hashCode(). This value is normally calculate in Java code
493 // in the String.hashCode method(), but is precomputed for String
494 // objects in the shared archive file.
495 //
496 // For this reason, THIS ALGORITHM MUST MATCH String.hashCode().
498 int StringTable::hash_string(jchar* s, int len) {
499 unsigned h = 0;
500 while (len-- > 0) {
501 h = 31*h + (unsigned) *s;
502 s++;
503 }
504 return h;
505 }
508 StringTable* StringTable::_the_table = NULL;
510 oop StringTable::lookup(int index, jchar* name,
511 int len, unsigned int hash) {
512 for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
513 if (l->hash() == hash) {
514 if (java_lang_String::equals(l->literal(), name, len)) {
515 return l->literal();
516 }
517 }
518 }
519 return NULL;
520 }
523 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
524 int len, unsigned int hashValue, TRAPS) {
525 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
526 assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
527 "proposed name of symbol must be stable");
529 Handle string;
530 // try to reuse the string if possible
531 if (!string_or_null.is_null() && string_or_null()->is_perm()) {
532 string = string_or_null;
533 } else {
534 string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
535 }
537 // Allocation must be done before grapping the SymbolTable_lock lock
538 MutexLocker ml(StringTable_lock, THREAD);
540 assert(java_lang_String::equals(string(), name, len),
541 "string must be properly initialized");
543 // Since look-up was done lock-free, we need to check if another
544 // thread beat us in the race to insert the symbol.
546 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
547 if (test != NULL) {
548 // Entry already added
549 return test;
550 }
552 HashtableEntry<oop>* entry = new_entry(hashValue, string());
553 add_entry(index, entry);
554 return string();
555 }
558 oop StringTable::lookup(Symbol* symbol) {
559 ResourceMark rm;
560 int length;
561 jchar* chars = symbol->as_unicode(length);
562 unsigned int hashValue = hash_string(chars, length);
563 int index = the_table()->hash_to_index(hashValue);
564 return the_table()->lookup(index, chars, length, hashValue);
565 }
568 oop StringTable::intern(Handle string_or_null, jchar* name,
569 int len, TRAPS) {
570 unsigned int hashValue = hash_string(name, len);
571 int index = the_table()->hash_to_index(hashValue);
572 oop string = the_table()->lookup(index, name, len, hashValue);
574 // Found
575 if (string != NULL) return string;
577 // Otherwise, add to symbol to table
578 return the_table()->basic_add(index, string_or_null, name, len,
579 hashValue, CHECK_NULL);
580 }
582 oop StringTable::intern(Symbol* symbol, TRAPS) {
583 if (symbol == NULL) return NULL;
584 ResourceMark rm(THREAD);
585 int length;
586 jchar* chars = symbol->as_unicode(length);
587 Handle string;
588 oop result = intern(string, chars, length, CHECK_NULL);
589 return result;
590 }
593 oop StringTable::intern(oop string, TRAPS)
594 {
595 if (string == NULL) return NULL;
596 ResourceMark rm(THREAD);
597 int length;
598 Handle h_string (THREAD, string);
599 jchar* chars = java_lang_String::as_unicode_string(string, length);
600 oop result = intern(h_string, chars, length, CHECK_NULL);
601 return result;
602 }
605 oop StringTable::intern(const char* utf8_string, TRAPS) {
606 if (utf8_string == NULL) return NULL;
607 ResourceMark rm(THREAD);
608 int length = UTF8::unicode_length(utf8_string);
609 jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
610 UTF8::convert_to_unicode(utf8_string, chars, length);
611 Handle string;
612 oop result = intern(string, chars, length, CHECK_NULL);
613 return result;
614 }
616 void StringTable::unlink(BoolObjectClosure* is_alive) {
617 // Readers of the table are unlocked, so we should only be removing
618 // entries at a safepoint.
619 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
620 for (int i = 0; i < the_table()->table_size(); ++i) {
621 for (HashtableEntry<oop>** p = the_table()->bucket_addr(i); *p != NULL; ) {
622 HashtableEntry<oop>* entry = *p;
623 if (entry->is_shared()) {
624 break;
625 }
626 assert(entry->literal() != NULL, "just checking");
627 if (is_alive->do_object_b(entry->literal())) {
628 p = entry->next_addr();
629 } else {
630 *p = entry->next();
631 the_table()->free_entry(entry);
632 }
633 }
634 }
635 }
637 void StringTable::oops_do(OopClosure* f) {
638 for (int i = 0; i < the_table()->table_size(); ++i) {
639 HashtableEntry<oop>** p = the_table()->bucket_addr(i);
640 HashtableEntry<oop>* entry = the_table()->bucket(i);
641 while (entry != NULL) {
642 f->do_oop((oop*)entry->literal_addr());
644 // Did the closure remove the literal from the table?
645 if (entry->literal() == NULL) {
646 assert(!entry->is_shared(), "immutable hashtable entry?");
647 *p = entry->next();
648 the_table()->free_entry(entry);
649 } else {
650 p = entry->next_addr();
651 }
652 entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p);
653 }
654 }
655 }
657 void StringTable::verify() {
658 for (int i = 0; i < the_table()->table_size(); ++i) {
659 HashtableEntry<oop>* p = the_table()->bucket(i);
660 for ( ; p != NULL; p = p->next()) {
661 oop s = p->literal();
662 guarantee(s != NULL, "interned string is NULL");
663 guarantee(s->is_perm(), "interned string not in permspace");
665 int length;
666 jchar* chars = java_lang_String::as_unicode_string(s, length);
667 unsigned int h = hash_string(chars, length);
668 guarantee(p->hash() == h, "broken hash in string table entry");
669 guarantee(the_table()->hash_to_index(h) == i,
670 "wrong index in string table");
671 }
672 }
673 }