1 /* 2 * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/altHashing.hpp" 27 #include "classfile/javaClasses.hpp" 28 #include "classfile/symbolTable.hpp" 29 #include "classfile/systemDictionary.hpp" 30 #include "gc_interface/collectedHeap.inline.hpp" 31 #include "memory/allocation.inline.hpp" 32 #include "memory/filemap.hpp" 33 #include "memory/gcLocker.inline.hpp" 34 #include "oops/oop.inline.hpp" 35 #include "oops/oop.inline2.hpp" 36 #include "runtime/mutexLocker.hpp" 37 #include "utilities/hashtable.inline.hpp" 38 #if INCLUDE_ALL_GCS 39 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 40 #include "gc_implementation/g1/g1StringDedup.hpp" 41 #endif 42 43 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC 44 45 // -------------------------------------------------------------------------- 46 47 // the number of buckets a thread claims 48 const int ClaimChunkSize = 32; 49 50 SymbolTable* SymbolTable::_the_table = NULL; 51 // Static arena for symbols that are not deallocated 52 Arena* SymbolTable::_arena = NULL; 53 bool SymbolTable::_needs_rehashing = false; 54 55 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) { 56 assert (len <= Symbol::max_length(), "should be checked by caller"); 57 58 Symbol* sym; 59 60 if (DumpSharedSpaces) { 61 // Allocate all symbols to CLD shared metaspace 62 sym = new (len, ClassLoaderData::the_null_class_loader_data(), THREAD) Symbol(name, len, -1); 63 } else if (c_heap) { 64 // refcount starts as 1 65 sym = new (len, THREAD) Symbol(name, len, 1); 66 assert(sym != NULL, "new should call vm_exit_out_of_memory if C_HEAP is exhausted"); 67 } else { 68 // Allocate to global arena 69 sym = new (len, arena(), THREAD) Symbol(name, len, -1); 70 } 71 return sym; 72 } 73 74 void SymbolTable::initialize_symbols(int arena_alloc_size) { 75 // Initialize the arena for global symbols, size passed in depends on CDS. 76 if (arena_alloc_size == 0) { 77 _arena = new (mtSymbol) Arena(mtSymbol); 78 } else { 79 _arena = new (mtSymbol) Arena(mtSymbol, arena_alloc_size); 80 } 81 } 82 83 // Call function for all symbols in the symbol table. 84 void SymbolTable::symbols_do(SymbolClosure *cl) { 85 const int n = the_table()->table_size(); 86 for (int i = 0; i < n; i++) { 87 for (HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i); 88 p != NULL; 89 p = p->next()) { 90 cl->do_symbol(p->literal_addr()); 91 } 92 } 93 } 94 95 int SymbolTable::_symbols_removed = 0; 96 int SymbolTable::_symbols_counted = 0; 97 volatile int SymbolTable::_parallel_claimed_idx = 0; 98 99 void SymbolTable::buckets_unlink(int start_idx, int end_idx, BucketUnlinkContext* context, size_t* memory_total) { 100 for (int i = start_idx; i < end_idx; ++i) { 101 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i); 102 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i); 103 while (entry != NULL) { 104 // Shared entries are normally at the end of the bucket and if we run into 105 // a shared entry, then there is nothing more to remove. However, if we 106 // have rehashed the table, then the shared entries are no longer at the 107 // end of the bucket. 108 if (entry->is_shared() && !use_alternate_hashcode()) { 109 break; 110 } 111 Symbol* s = entry->literal(); 112 (*memory_total) += s->size(); 113 context->_num_processed++; 114 assert(s != NULL, "just checking"); 115 // If reference count is zero, remove. 116 if (s->refcount() == 0) { 117 assert(!entry->is_shared(), "shared entries should be kept live"); 118 delete s; 119 *p = entry->next(); 120 context->free_entry(entry); 121 } else { 122 p = entry->next_addr(); 123 } 124 // get next entry 125 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p); 126 } 127 } 128 } 129 130 // Remove unreferenced symbols from the symbol table 131 // This is done late during GC. 132 void SymbolTable::unlink(int* processed, int* removed) { 133 size_t memory_total = 0; 134 BucketUnlinkContext context; 135 buckets_unlink(0, the_table()->table_size(), &context, &memory_total); 136 _the_table->bulk_free_entries(&context); 137 *processed = context._num_processed; 138 *removed = context._num_removed; 139 140 _symbols_removed = context._num_removed; 141 _symbols_counted = context._num_processed; 142 // Exclude printing for normal PrintGCDetails because people parse 143 // this output. 144 if (PrintGCDetails && Verbose && WizardMode) { 145 gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed, 146 (memory_total*HeapWordSize)/1024); 147 } 148 } 149 150 void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) { 151 const int limit = the_table()->table_size(); 152 153 size_t memory_total = 0; 154 155 BucketUnlinkContext context; 156 for (;;) { 157 // Grab next set of buckets to scan 158 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize; 159 if (start_idx >= limit) { 160 // End of table 161 break; 162 } 163 164 int end_idx = MIN2(limit, start_idx + ClaimChunkSize); 165 buckets_unlink(start_idx, end_idx, &context, &memory_total); 166 } 167 168 _the_table->bulk_free_entries(&context); 169 *processed = context._num_processed; 170 *removed = context._num_removed; 171 172 Atomic::add(context._num_processed, &_symbols_counted); 173 Atomic::add(context._num_removed, &_symbols_removed); 174 // Exclude printing for normal PrintGCDetails because people parse 175 // this output. 176 if (PrintGCDetails && Verbose && WizardMode) { 177 gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed, 178 (memory_total*HeapWordSize)/1024); 179 } 180 } 181 182 // Create a new table and using alternate hash code, populate the new table 183 // with the existing strings. Set flag to use the alternate hash code afterwards. 184 void SymbolTable::rehash_table() { 185 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); 186 // This should never happen with -Xshare:dump but it might in testing mode. 187 if (DumpSharedSpaces) return; 188 // Create a new symbol table 189 SymbolTable* new_table = new SymbolTable(); 190 191 the_table()->move_to(new_table); 192 193 // Delete the table and buckets (entries are reused in new table). 194 delete _the_table; 195 // Don't check if we need rehashing until the table gets unbalanced again. 196 // Then rehash with a new global seed. 197 _needs_rehashing = false; 198 _the_table = new_table; 199 } 200 201 // Lookup a symbol in a bucket. 202 203 Symbol* SymbolTable::lookup(int index, const char* name, 204 int len, unsigned int hash) { 205 int count = 0; 206 for (HashtableEntry<Symbol*, mtSymbol>* e = bucket(index); e != NULL; e = e->next()) { 207 count++; // count all entries in this bucket, not just ones with same hash 208 if (e->hash() == hash) { 209 Symbol* sym = e->literal(); 210 if (sym->equals(name, len)) { 211 // something is referencing this symbol now. 212 sym->increment_refcount(); 213 return sym; 214 } 215 } 216 } 217 // If the bucket size is too deep check if this hash code is insufficient. 218 if (count >= rehash_count && !needs_rehashing()) { 219 _needs_rehashing = check_rehash_table(count); 220 } 221 return NULL; 222 } 223 224 // Pick hashing algorithm. 225 unsigned int SymbolTable::hash_symbol(const char* s, int len) { 226 return use_alternate_hashcode() ? 227 AltHashing::halfsiphash_32(seed(), (const uint8_t*)s, len) : 228 java_lang_String::hash_code(s, len); 229 } 230 231 232 // We take care not to be blocking while holding the 233 // SymbolTable_lock. Otherwise, the system might deadlock, since the 234 // symboltable is used during compilation (VM_thread) The lock free 235 // synchronization is simplified by the fact that we do not delete 236 // entries in the symbol table during normal execution (only during 237 // safepoints). 238 239 Symbol* SymbolTable::lookup(const char* name, int len, TRAPS) { 240 unsigned int hashValue = hash_symbol(name, len); 241 int index = the_table()->hash_to_index(hashValue); 242 243 Symbol* s = the_table()->lookup(index, name, len, hashValue); 244 245 // Found 246 if (s != NULL) return s; 247 248 // Grab SymbolTable_lock first. 249 MutexLocker ml(SymbolTable_lock, THREAD); 250 251 // Otherwise, add to symbol to table 252 return the_table()->basic_add(index, (u1*)name, len, hashValue, true, THREAD); 253 } 254 255 Symbol* SymbolTable::lookup(const Symbol* sym, int begin, int end, TRAPS) { 256 char* buffer; 257 int index, len; 258 unsigned int hashValue; 259 char* name; 260 { 261 debug_only(No_Safepoint_Verifier nsv;) 262 263 name = (char*)sym->base() + begin; 264 len = end - begin; 265 hashValue = hash_symbol(name, len); 266 index = the_table()->hash_to_index(hashValue); 267 Symbol* s = the_table()->lookup(index, name, len, hashValue); 268 269 // Found 270 if (s != NULL) return s; 271 } 272 273 // Otherwise, add to symbol to table. Copy to a C string first. 274 char stack_buf[128]; 275 ResourceMark rm(THREAD); 276 if (len <= 128) { 277 buffer = stack_buf; 278 } else { 279 buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len); 280 } 281 for (int i=0; i<len; i++) { 282 buffer[i] = name[i]; 283 } 284 // Make sure there is no safepoint in the code above since name can't move. 285 // We can't include the code in No_Safepoint_Verifier because of the 286 // ResourceMark. 287 288 // Grab SymbolTable_lock first. 289 MutexLocker ml(SymbolTable_lock, THREAD); 290 291 return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, THREAD); 292 } 293 294 Symbol* SymbolTable::lookup_only(const char* name, int len, 295 unsigned int& hash) { 296 hash = hash_symbol(name, len); 297 int index = the_table()->hash_to_index(hash); 298 299 Symbol* s = the_table()->lookup(index, name, len, hash); 300 return s; 301 } 302 303 // Look up the address of the literal in the SymbolTable for this Symbol* 304 // Do not create any new symbols 305 // Do not increment the reference count to keep this alive 306 Symbol** SymbolTable::lookup_symbol_addr(Symbol* sym){ 307 unsigned int hash = hash_symbol((char*)sym->bytes(), sym->utf8_length()); 308 int index = the_table()->hash_to_index(hash); 309 310 for (HashtableEntry<Symbol*, mtSymbol>* e = the_table()->bucket(index); e != NULL; e = e->next()) { 311 if (e->hash() == hash) { 312 Symbol* literal_sym = e->literal(); 313 if (sym == literal_sym) { 314 return e->literal_addr(); 315 } 316 } 317 } 318 return NULL; 319 } 320 321 // Suggestion: Push unicode-based lookup all the way into the hashing 322 // and probing logic, so there is no need for convert_to_utf8 until 323 // an actual new Symbol* is created. 324 Symbol* SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) { 325 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length); 326 char stack_buf[128]; 327 if (utf8_length < (int) sizeof(stack_buf)) { 328 char* chars = stack_buf; 329 UNICODE::convert_to_utf8(name, utf16_length, chars); 330 return lookup(chars, utf8_length, THREAD); 331 } else { 332 ResourceMark rm(THREAD); 333 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);; 334 UNICODE::convert_to_utf8(name, utf16_length, chars); 335 return lookup(chars, utf8_length, THREAD); 336 } 337 } 338 339 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length, 340 unsigned int& hash) { 341 int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length); 342 char stack_buf[128]; 343 if (utf8_length < (int) sizeof(stack_buf)) { 344 char* chars = stack_buf; 345 UNICODE::convert_to_utf8(name, utf16_length, chars); 346 return lookup_only(chars, utf8_length, hash); 347 } else { 348 ResourceMark rm; 349 char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);; 350 UNICODE::convert_to_utf8(name, utf16_length, chars); 351 return lookup_only(chars, utf8_length, hash); 352 } 353 } 354 355 void SymbolTable::add(ClassLoaderData* loader_data, constantPoolHandle cp, 356 int names_count, 357 const char** names, int* lengths, int* cp_indices, 358 unsigned int* hashValues, TRAPS) { 359 // Grab SymbolTable_lock first. 360 MutexLocker ml(SymbolTable_lock, THREAD); 361 362 SymbolTable* table = the_table(); 363 bool added = table->basic_add(loader_data, cp, names_count, names, lengths, 364 cp_indices, hashValues, CHECK); 365 if (!added) { 366 // do it the hard way 367 for (int i=0; i<names_count; i++) { 368 int index = table->hash_to_index(hashValues[i]); 369 bool c_heap = !loader_data->is_the_null_class_loader_data(); 370 Symbol* sym = table->basic_add(index, (u1*)names[i], lengths[i], hashValues[i], c_heap, CHECK); 371 cp->symbol_at_put(cp_indices[i], sym); 372 } 373 } 374 } 375 376 Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) { 377 unsigned int hash; 378 Symbol* result = SymbolTable::lookup_only((char*)name, (int)strlen(name), hash); 379 if (result != NULL) { 380 return result; 381 } 382 // Grab SymbolTable_lock first. 383 MutexLocker ml(SymbolTable_lock, THREAD); 384 385 SymbolTable* table = the_table(); 386 int index = table->hash_to_index(hash); 387 return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD); 388 } 389 390 Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len, 391 unsigned int hashValue_arg, bool c_heap, TRAPS) { 392 assert(!Universe::heap()->is_in_reserved(name), 393 "proposed name of symbol must be stable"); 394 395 // Don't allow symbols to be created which cannot fit in a Symbol*. 396 if (len > Symbol::max_length()) { 397 THROW_MSG_0(vmSymbols::java_lang_InternalError(), 398 "name is too long to represent"); 399 } 400 401 // Cannot hit a safepoint in this function because the "this" pointer can move. 402 No_Safepoint_Verifier nsv; 403 404 // Check if the symbol table has been rehashed, if so, need to recalculate 405 // the hash value and index. 406 unsigned int hashValue; 407 int index; 408 if (use_alternate_hashcode()) { 409 hashValue = hash_symbol((const char*)name, len); 410 index = hash_to_index(hashValue); 411 } else { 412 hashValue = hashValue_arg; 413 index = index_arg; 414 } 415 416 // Since look-up was done lock-free, we need to check if another 417 // thread beat us in the race to insert the symbol. 418 Symbol* test = lookup(index, (char*)name, len, hashValue); 419 if (test != NULL) { 420 // A race occurred and another thread introduced the symbol. 421 assert(test->refcount() != 0, "lookup should have incremented the count"); 422 return test; 423 } 424 425 // Create a new symbol. 426 Symbol* sym = allocate_symbol(name, len, c_heap, CHECK_NULL); 427 assert(sym->equals((char*)name, len), "symbol must be properly initialized"); 428 429 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym); 430 add_entry(index, entry); 431 return sym; 432 } 433 434 // This version of basic_add adds symbols in batch from the constant pool 435 // parsing. 436 bool SymbolTable::basic_add(ClassLoaderData* loader_data, constantPoolHandle cp, 437 int names_count, 438 const char** names, int* lengths, 439 int* cp_indices, unsigned int* hashValues, 440 TRAPS) { 441 442 // Check symbol names are not too long. If any are too long, don't add any. 443 for (int i = 0; i< names_count; i++) { 444 if (lengths[i] > Symbol::max_length()) { 445 THROW_MSG_0(vmSymbols::java_lang_InternalError(), 446 "name is too long to represent"); 447 } 448 } 449 450 // Cannot hit a safepoint in this function because the "this" pointer can move. 451 No_Safepoint_Verifier nsv; 452 453 for (int i=0; i<names_count; i++) { 454 // Check if the symbol table has been rehashed, if so, need to recalculate 455 // the hash value. 456 unsigned int hashValue; 457 if (use_alternate_hashcode()) { 458 hashValue = hash_symbol(names[i], lengths[i]); 459 } else { 460 hashValue = hashValues[i]; 461 } 462 // Since look-up was done lock-free, we need to check if another 463 // thread beat us in the race to insert the symbol. 464 int index = hash_to_index(hashValue); 465 Symbol* test = lookup(index, names[i], lengths[i], hashValue); 466 if (test != NULL) { 467 // A race occurred and another thread introduced the symbol, this one 468 // will be dropped and collected. Use test instead. 469 cp->symbol_at_put(cp_indices[i], test); 470 assert(test->refcount() != 0, "lookup should have incremented the count"); 471 } else { 472 // Create a new symbol. The null class loader is never unloaded so these 473 // are allocated specially in a permanent arena. 474 bool c_heap = !loader_data->is_the_null_class_loader_data(); 475 Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false)); 476 assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be??? 477 HashtableEntry<Symbol*, mtSymbol>* entry = new_entry(hashValue, sym); 478 add_entry(index, entry); 479 cp->symbol_at_put(cp_indices[i], sym); 480 } 481 } 482 return true; 483 } 484 485 486 void SymbolTable::verify() { 487 for (int i = 0; i < the_table()->table_size(); ++i) { 488 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i); 489 for ( ; p != NULL; p = p->next()) { 490 Symbol* s = (Symbol*)(p->literal()); 491 guarantee(s != NULL, "symbol is NULL"); 492 unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length()); 493 guarantee(p->hash() == h, "broken hash in symbol table entry"); 494 guarantee(the_table()->hash_to_index(h) == i, 495 "wrong index in symbol table"); 496 } 497 } 498 } 499 500 void SymbolTable::dump(outputStream* st) { 501 the_table()->dump_table(st, "SymbolTable"); 502 } 503 504 505 //--------------------------------------------------------------------------- 506 // Non-product code 507 508 #ifndef PRODUCT 509 510 void SymbolTable::print_histogram() { 511 MutexLocker ml(SymbolTable_lock); 512 const int results_length = 100; 513 int results[results_length]; 514 int i,j; 515 516 // initialize results to zero 517 for (j = 0; j < results_length; j++) { 518 results[j] = 0; 519 } 520 521 int total = 0; 522 int max_symbols = 0; 523 int out_of_range = 0; 524 int memory_total = 0; 525 int count = 0; 526 for (i = 0; i < the_table()->table_size(); i++) { 527 HashtableEntry<Symbol*, mtSymbol>* p = the_table()->bucket(i); 528 for ( ; p != NULL; p = p->next()) { 529 memory_total += p->literal()->size(); 530 count++; 531 int counter = p->literal()->utf8_length(); 532 total += counter; 533 if (counter < results_length) { 534 results[counter]++; 535 } else { 536 out_of_range++; 537 } 538 max_symbols = MAX2(max_symbols, counter); 539 } 540 } 541 tty->print_cr("Symbol Table:"); 542 tty->print_cr("Total number of symbols %5d", count); 543 tty->print_cr("Total size in memory %5dK", 544 (memory_total*HeapWordSize)/1024); 545 tty->print_cr("Total counted %5d", _symbols_counted); 546 tty->print_cr("Total removed %5d", _symbols_removed); 547 if (_symbols_counted > 0) { 548 tty->print_cr("Percent removed %3.2f", 549 ((float)_symbols_removed/(float)_symbols_counted)* 100); 550 } 551 tty->print_cr("Reference counts %5d", Symbol::_total_count); 552 tty->print_cr("Symbol arena size %5d used %5d", 553 arena()->size_in_bytes(), arena()->used()); 554 tty->print_cr("Histogram of symbol length:"); 555 tty->print_cr("%8s %5d", "Total ", total); 556 tty->print_cr("%8s %5d", "Maximum", max_symbols); 557 tty->print_cr("%8s %3.2f", "Average", 558 ((float) total / (float) the_table()->table_size())); 559 tty->print_cr("%s", "Histogram:"); 560 tty->print_cr(" %s %29s", "Length", "Number chains that length"); 561 for (i = 0; i < results_length; i++) { 562 if (results[i] > 0) { 563 tty->print_cr("%6d %10d", i, results[i]); 564 } 565 } 566 if (Verbose) { 567 int line_length = 70; 568 tty->print_cr("%s %30s", " Length", "Number chains that length"); 569 for (i = 0; i < results_length; i++) { 570 if (results[i] > 0) { 571 tty->print("%4d", i); 572 for (j = 0; (j < results[i]) && (j < line_length); j++) { 573 tty->print("%1s", "*"); 574 } 575 if (j == line_length) { 576 tty->print("%1s", "+"); 577 } 578 tty->cr(); 579 } 580 } 581 } 582 tty->print_cr(" %s %d: %d\n", "Number chains longer than", 583 results_length, out_of_range); 584 } 585 586 void SymbolTable::print() { 587 for (int i = 0; i < the_table()->table_size(); ++i) { 588 HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i); 589 HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i); 590 if (entry != NULL) { 591 while (entry != NULL) { 592 tty->print(PTR_FORMAT " ", entry->literal()); 593 entry->literal()->print(); 594 tty->print(" %d", entry->literal()->refcount()); 595 p = entry->next_addr(); 596 entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p); 597 } 598 tty->cr(); 599 } 600 } 601 } 602 #endif // PRODUCT 603 604 // -------------------------------------------------------------------------- 605 606 #ifdef ASSERT 607 class StableMemoryChecker : public StackObj { 608 enum { _bufsize = wordSize*4 }; 609 610 address _region; 611 jint _size; 612 u1 _save_buf[_bufsize]; 613 614 int sample(u1* save_buf) { 615 if (_size <= _bufsize) { 616 memcpy(save_buf, _region, _size); 617 return _size; 618 } else { 619 // copy head and tail 620 memcpy(&save_buf[0], _region, _bufsize/2); 621 memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2); 622 return (_bufsize/2)*2; 623 } 624 } 625 626 public: 627 StableMemoryChecker(const void* region, jint size) { 628 _region = (address) region; 629 _size = size; 630 sample(_save_buf); 631 } 632 633 bool verify() { 634 u1 check_buf[sizeof(_save_buf)]; 635 int check_size = sample(check_buf); 636 return (0 == memcmp(_save_buf, check_buf, check_size)); 637 } 638 639 void set_region(const void* region) { _region = (address) region; } 640 }; 641 #endif 642 643 644 // -------------------------------------------------------------------------- 645 StringTable* StringTable::_the_table = NULL; 646 647 bool StringTable::_needs_rehashing = false; 648 649 volatile int StringTable::_parallel_claimed_idx = 0; 650 651 // Pick hashing algorithm 652 unsigned int StringTable::hash_string(const jchar* s, int len) { 653 return use_alternate_hashcode() ? AltHashing::halfsiphash_32(seed(), s, len) : 654 java_lang_String::hash_code(s, len); 655 } 656 657 oop StringTable::lookup(int index, jchar* name, 658 int len, unsigned int hash) { 659 int count = 0; 660 for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) { 661 count++; 662 if (l->hash() == hash) { 663 if (java_lang_String::equals(l->literal(), name, len)) { 664 return l->literal(); 665 } 666 } 667 } 668 // If the bucket size is too deep check if this hash code is insufficient. 669 if (count >= rehash_count && !needs_rehashing()) { 670 _needs_rehashing = check_rehash_table(count); 671 } 672 return NULL; 673 } 674 675 676 oop StringTable::basic_add(int index_arg, Handle string, jchar* name, 677 int len, unsigned int hashValue_arg, TRAPS) { 678 679 assert(java_lang_String::equals(string(), name, len), 680 "string must be properly initialized"); 681 // Cannot hit a safepoint in this function because the "this" pointer can move. 682 No_Safepoint_Verifier nsv; 683 684 // Check if the symbol table has been rehashed, if so, need to recalculate 685 // the hash value and index before second lookup. 686 unsigned int hashValue; 687 int index; 688 if (use_alternate_hashcode()) { 689 hashValue = hash_string(name, len); 690 index = hash_to_index(hashValue); 691 } else { 692 hashValue = hashValue_arg; 693 index = index_arg; 694 } 695 696 // Since look-up was done lock-free, we need to check if another 697 // thread beat us in the race to insert the symbol. 698 699 oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int) 700 if (test != NULL) { 701 // Entry already added 702 return test; 703 } 704 705 HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string()); 706 add_entry(index, entry); 707 return string(); 708 } 709 710 711 oop StringTable::lookup(Symbol* symbol) { 712 ResourceMark rm; 713 int length; 714 jchar* chars = symbol->as_unicode(length); 715 return lookup(chars, length); 716 } 717 718 // Tell the GC that this string was looked up in the StringTable. 719 static void ensure_string_alive(oop string) { 720 // A lookup in the StringTable could return an object that was previously 721 // considered dead. The SATB part of G1 needs to get notified about this 722 // potential resurrection, otherwise the marking might not find the object. 723 #if INCLUDE_ALL_GCS 724 if (UseG1GC && string != NULL) { 725 G1SATBCardTableModRefBS::enqueue(string); 726 } 727 #endif 728 } 729 730 oop StringTable::lookup(jchar* name, int len) { 731 unsigned int hash = hash_string(name, len); 732 int index = the_table()->hash_to_index(hash); 733 oop string = the_table()->lookup(index, name, len, hash); 734 735 ensure_string_alive(string); 736 737 return string; 738 } 739 740 741 oop StringTable::intern(Handle string_or_null, jchar* name, 742 int len, TRAPS) { 743 unsigned int hashValue = hash_string(name, len); 744 int index = the_table()->hash_to_index(hashValue); 745 oop found_string = the_table()->lookup(index, name, len, hashValue); 746 747 // Found 748 if (found_string != NULL) { 749 ensure_string_alive(found_string); 750 return found_string; 751 } 752 753 debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); 754 assert(!Universe::heap()->is_in_reserved(name), 755 "proposed name of symbol must be stable"); 756 757 Handle string; 758 // try to reuse the string if possible 759 if (!string_or_null.is_null()) { 760 string = string_or_null; 761 } else { 762 string = java_lang_String::create_from_unicode(name, len, CHECK_NULL); 763 } 764 765 #if INCLUDE_ALL_GCS 766 if (G1StringDedup::is_enabled()) { 767 // Deduplicate the string before it is interned. Note that we should never 768 // deduplicate a string after it has been interned. Doing so will counteract 769 // compiler optimizations done on e.g. interned string literals. 770 G1StringDedup::deduplicate(string()); 771 } 772 #endif 773 774 // Grab the StringTable_lock before getting the_table() because it could 775 // change at safepoint. 776 oop added_or_found; 777 { 778 MutexLocker ml(StringTable_lock, THREAD); 779 // Otherwise, add to symbol to table 780 added_or_found = the_table()->basic_add(index, string, name, len, 781 hashValue, CHECK_NULL); 782 } 783 784 ensure_string_alive(added_or_found); 785 786 return added_or_found; 787 } 788 789 oop StringTable::intern(Symbol* symbol, TRAPS) { 790 if (symbol == NULL) return NULL; 791 ResourceMark rm(THREAD); 792 int length; 793 jchar* chars = symbol->as_unicode(length); 794 Handle string; 795 oop result = intern(string, chars, length, CHECK_NULL); 796 return result; 797 } 798 799 800 oop StringTable::intern(oop string, TRAPS) 801 { 802 if (string == NULL) return NULL; 803 ResourceMark rm(THREAD); 804 int length; 805 Handle h_string (THREAD, string); 806 jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL); 807 oop result = intern(h_string, chars, length, CHECK_NULL); 808 return result; 809 } 810 811 812 oop StringTable::intern(const char* utf8_string, TRAPS) { 813 if (utf8_string == NULL) return NULL; 814 ResourceMark rm(THREAD); 815 int length = UTF8::unicode_length(utf8_string); 816 jchar* chars = NEW_RESOURCE_ARRAY(jchar, length); 817 UTF8::convert_to_unicode(utf8_string, chars, length); 818 Handle string; 819 oop result = intern(string, chars, length, CHECK_NULL); 820 return result; 821 } 822 823 void StringTable::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) { 824 BucketUnlinkContext context; 825 buckets_unlink_or_oops_do(is_alive, f, 0, the_table()->table_size(), &context); 826 _the_table->bulk_free_entries(&context); 827 *processed = context._num_processed; 828 *removed = context._num_removed; 829 } 830 831 void StringTable::possibly_parallel_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int* processed, int* removed) { 832 // Readers of the table are unlocked, so we should only be removing 833 // entries at a safepoint. 834 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); 835 const int limit = the_table()->table_size(); 836 837 BucketUnlinkContext context; 838 for (;;) { 839 // Grab next set of buckets to scan 840 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize; 841 if (start_idx >= limit) { 842 // End of table 843 break; 844 } 845 846 int end_idx = MIN2(limit, start_idx + ClaimChunkSize); 847 buckets_unlink_or_oops_do(is_alive, f, start_idx, end_idx, &context); 848 } 849 _the_table->bulk_free_entries(&context); 850 *processed = context._num_processed; 851 *removed = context._num_removed; 852 } 853 854 void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) { 855 const int limit = the_table()->table_size(); 856 857 assert(0 <= start_idx && start_idx <= limit, 858 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx)); 859 assert(0 <= end_idx && end_idx <= limit, 860 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx)); 861 assert(start_idx <= end_idx, 862 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT, 863 start_idx, end_idx)); 864 865 for (int i = start_idx; i < end_idx; i += 1) { 866 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i); 867 while (entry != NULL) { 868 assert(!entry->is_shared(), "CDS not used for the StringTable"); 869 870 f->do_oop((oop*)entry->literal_addr()); 871 872 entry = entry->next(); 873 } 874 } 875 } 876 877 void StringTable::buckets_unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* f, int start_idx, int end_idx, BucketUnlinkContext* context) { 878 const int limit = the_table()->table_size(); 879 880 assert(0 <= start_idx && start_idx <= limit, 881 err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx)); 882 assert(0 <= end_idx && end_idx <= limit, 883 err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx)); 884 assert(start_idx <= end_idx, 885 err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT, 886 start_idx, end_idx)); 887 888 for (int i = start_idx; i < end_idx; ++i) { 889 HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i); 890 HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i); 891 while (entry != NULL) { 892 assert(!entry->is_shared(), "CDS not used for the StringTable"); 893 894 if (is_alive->do_object_b(entry->literal())) { 895 if (f != NULL) { 896 f->do_oop((oop*)entry->literal_addr()); 897 } 898 p = entry->next_addr(); 899 } else { 900 *p = entry->next(); 901 context->free_entry(entry); 902 } 903 context->_num_processed++; 904 entry = *p; 905 } 906 } 907 } 908 909 void StringTable::oops_do(OopClosure* f) { 910 buckets_oops_do(f, 0, the_table()->table_size()); 911 } 912 913 void StringTable::possibly_parallel_oops_do(OopClosure* f) { 914 const int limit = the_table()->table_size(); 915 916 for (;;) { 917 // Grab next set of buckets to scan 918 int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize; 919 if (start_idx >= limit) { 920 // End of table 921 break; 922 } 923 924 int end_idx = MIN2(limit, start_idx + ClaimChunkSize); 925 buckets_oops_do(f, start_idx, end_idx); 926 } 927 } 928 929 // This verification is part of Universe::verify() and needs to be quick. 930 // See StringTable::verify_and_compare() below for exhaustive verification. 931 void StringTable::verify() { 932 for (int i = 0; i < the_table()->table_size(); ++i) { 933 HashtableEntry<oop, mtSymbol>* p = the_table()->bucket(i); 934 for ( ; p != NULL; p = p->next()) { 935 oop s = p->literal(); 936 guarantee(s != NULL, "interned string is NULL"); 937 unsigned int h = java_lang_String::hash_string(s); 938 guarantee(p->hash() == h, "broken hash in string table entry"); 939 guarantee(the_table()->hash_to_index(h) == i, 940 "wrong index in string table"); 941 } 942 } 943 } 944 945 void StringTable::dump(outputStream* st) { 946 the_table()->dump_table(st, "StringTable"); 947 } 948 949 StringTable::VerifyRetTypes StringTable::compare_entries( 950 int bkt1, int e_cnt1, 951 HashtableEntry<oop, mtSymbol>* e_ptr1, 952 int bkt2, int e_cnt2, 953 HashtableEntry<oop, mtSymbol>* e_ptr2) { 954 // These entries are sanity checked by verify_and_compare_entries() 955 // before this function is called. 956 oop str1 = e_ptr1->literal(); 957 oop str2 = e_ptr2->literal(); 958 959 if (str1 == str2) { 960 tty->print_cr("ERROR: identical oop values (0x" PTR_FORMAT ") " 961 "in entry @ bucket[%d][%d] and entry @ bucket[%d][%d]", 962 (void *)str1, bkt1, e_cnt1, bkt2, e_cnt2); 963 return _verify_fail_continue; 964 } 965 966 if (java_lang_String::equals(str1, str2)) { 967 tty->print_cr("ERROR: identical String values in entry @ " 968 "bucket[%d][%d] and entry @ bucket[%d][%d]", 969 bkt1, e_cnt1, bkt2, e_cnt2); 970 return _verify_fail_continue; 971 } 972 973 return _verify_pass; 974 } 975 976 StringTable::VerifyRetTypes StringTable::verify_entry(int bkt, int e_cnt, 977 HashtableEntry<oop, mtSymbol>* e_ptr, 978 StringTable::VerifyMesgModes mesg_mode) { 979 980 VerifyRetTypes ret = _verify_pass; // be optimistic 981 982 oop str = e_ptr->literal(); 983 if (str == NULL) { 984 if (mesg_mode == _verify_with_mesgs) { 985 tty->print_cr("ERROR: NULL oop value in entry @ bucket[%d][%d]", bkt, 986 e_cnt); 987 } 988 // NULL oop means no more verifications are possible 989 return _verify_fail_done; 990 } 991 992 if (str->klass() != SystemDictionary::String_klass()) { 993 if (mesg_mode == _verify_with_mesgs) { 994 tty->print_cr("ERROR: oop is not a String in entry @ bucket[%d][%d]", 995 bkt, e_cnt); 996 } 997 // not a String means no more verifications are possible 998 return _verify_fail_done; 999 } 1000 1001 unsigned int h = java_lang_String::hash_string(str); 1002 if (e_ptr->hash() != h) { 1003 if (mesg_mode == _verify_with_mesgs) { 1004 tty->print_cr("ERROR: broken hash value in entry @ bucket[%d][%d], " 1005 "bkt_hash=%d, str_hash=%d", bkt, e_cnt, e_ptr->hash(), h); 1006 } 1007 ret = _verify_fail_continue; 1008 } 1009 1010 if (the_table()->hash_to_index(h) != bkt) { 1011 if (mesg_mode == _verify_with_mesgs) { 1012 tty->print_cr("ERROR: wrong index value for entry @ bucket[%d][%d], " 1013 "str_hash=%d, hash_to_index=%d", bkt, e_cnt, h, 1014 the_table()->hash_to_index(h)); 1015 } 1016 ret = _verify_fail_continue; 1017 } 1018 1019 return ret; 1020 } 1021 1022 // See StringTable::verify() above for the quick verification that is 1023 // part of Universe::verify(). This verification is exhaustive and 1024 // reports on every issue that is found. StringTable::verify() only 1025 // reports on the first issue that is found. 1026 // 1027 // StringTable::verify_entry() checks: 1028 // - oop value != NULL (same as verify()) 1029 // - oop value is a String 1030 // - hash(String) == hash in entry (same as verify()) 1031 // - index for hash == index of entry (same as verify()) 1032 // 1033 // StringTable::compare_entries() checks: 1034 // - oops are unique across all entries 1035 // - String values are unique across all entries 1036 // 1037 int StringTable::verify_and_compare_entries() { 1038 assert(StringTable_lock->is_locked(), "sanity check"); 1039 1040 int fail_cnt = 0; 1041 1042 // first, verify all the entries individually: 1043 for (int bkt = 0; bkt < the_table()->table_size(); bkt++) { 1044 HashtableEntry<oop, mtSymbol>* e_ptr = the_table()->bucket(bkt); 1045 for (int e_cnt = 0; e_ptr != NULL; e_ptr = e_ptr->next(), e_cnt++) { 1046 VerifyRetTypes ret = verify_entry(bkt, e_cnt, e_ptr, _verify_with_mesgs); 1047 if (ret != _verify_pass) { 1048 fail_cnt++; 1049 } 1050 } 1051 } 1052 1053 // Optimization: if the above check did not find any failures, then 1054 // the comparison loop below does not need to call verify_entry() 1055 // before calling compare_entries(). If there were failures, then we 1056 // have to call verify_entry() to see if the entry can be passed to 1057 // compare_entries() safely. When we call verify_entry() in the loop 1058 // below, we do so quietly to void duplicate messages and we don't 1059 // increment fail_cnt because the failures have already been counted. 1060 bool need_entry_verify = (fail_cnt != 0); 1061 1062 // second, verify all entries relative to each other: 1063 for (int bkt1 = 0; bkt1 < the_table()->table_size(); bkt1++) { 1064 HashtableEntry<oop, mtSymbol>* e_ptr1 = the_table()->bucket(bkt1); 1065 for (int e_cnt1 = 0; e_ptr1 != NULL; e_ptr1 = e_ptr1->next(), e_cnt1++) { 1066 if (need_entry_verify) { 1067 VerifyRetTypes ret = verify_entry(bkt1, e_cnt1, e_ptr1, 1068 _verify_quietly); 1069 if (ret == _verify_fail_done) { 1070 // cannot use the current entry to compare against other entries 1071 continue; 1072 } 1073 } 1074 1075 for (int bkt2 = bkt1; bkt2 < the_table()->table_size(); bkt2++) { 1076 HashtableEntry<oop, mtSymbol>* e_ptr2 = the_table()->bucket(bkt2); 1077 int e_cnt2; 1078 for (e_cnt2 = 0; e_ptr2 != NULL; e_ptr2 = e_ptr2->next(), e_cnt2++) { 1079 if (bkt1 == bkt2 && e_cnt2 <= e_cnt1) { 1080 // skip the entries up to and including the one that 1081 // we're comparing against 1082 continue; 1083 } 1084 1085 if (need_entry_verify) { 1086 VerifyRetTypes ret = verify_entry(bkt2, e_cnt2, e_ptr2, 1087 _verify_quietly); 1088 if (ret == _verify_fail_done) { 1089 // cannot compare against this entry 1090 continue; 1091 } 1092 } 1093 1094 // compare two entries, report and count any failures: 1095 if (compare_entries(bkt1, e_cnt1, e_ptr1, bkt2, e_cnt2, e_ptr2) 1096 != _verify_pass) { 1097 fail_cnt++; 1098 } 1099 } 1100 } 1101 } 1102 } 1103 return fail_cnt; 1104 } 1105 1106 // Create a new table and using alternate hash code, populate the new table 1107 // with the existing strings. Set flag to use the alternate hash code afterwards. 1108 void StringTable::rehash_table() { 1109 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); 1110 // This should never happen with -Xshare:dump but it might in testing mode. 1111 if (DumpSharedSpaces) return; 1112 StringTable* new_table = new StringTable(); 1113 1114 // Rehash the table 1115 the_table()->move_to(new_table); 1116 1117 // Delete the table and buckets (entries are reused in new table). 1118 delete _the_table; 1119 // Don't check if we need rehashing until the table gets unbalanced again. 1120 // Then rehash with a new global seed. 1121 _needs_rehashing = false; 1122 _the_table = new_table; 1123 }