1 /*
  2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #include "precompiled.hpp"
 26 #include "cds/archiveBuilder.hpp"
 27 #include "cds/cdsConfig.hpp"
 28 #include "cds/dynamicArchive.hpp"
 29 #include "classfile/altHashing.hpp"
 30 #include "classfile/classLoaderData.hpp"
 31 #include "classfile/compactHashtable.hpp"
 32 #include "classfile/javaClasses.hpp"
 33 #include "classfile/symbolTable.hpp"
 34 #include "memory/allocation.inline.hpp"
 35 #include "memory/metaspaceClosure.hpp"
 36 #include "memory/resourceArea.hpp"
 37 #include "oops/oop.inline.hpp"
 38 #include "runtime/atomic.hpp"
 39 #include "runtime/interfaceSupport.inline.hpp"
 40 #include "runtime/timerTrace.hpp"
 41 #include "runtime/trimNativeHeap.hpp"
 42 #include "services/diagnosticCommand.hpp"
 43 #include "utilities/concurrentHashTable.inline.hpp"
 44 #include "utilities/concurrentHashTableTasks.inline.hpp"
 45 #include "utilities/utf8.hpp"
 46 
 47 // We used to not resize at all, so let's be conservative
 48 // and not set it too short before we decide to resize,
 49 // to match previous startup behavior
 50 const double PREF_AVG_LIST_LEN = 8.0;
 51 // 2^24 is max size, like StringTable.
 52 const size_t END_SIZE = 24;
 53 // If a chain gets to 100 something might be wrong
 54 const size_t REHASH_LEN = 100;
 55 
 56 const size_t ON_STACK_BUFFER_LENGTH = 128;
 57 
 58 // --------------------------------------------------------------------------
 59 
 60 inline bool symbol_equals_compact_hashtable_entry(Symbol* value, const char* key, int len) {
 61   if (value->equals(key, len)) {
 62     return true;
 63   } else {
 64     return false;
 65   }
 66 }
 67 
 68 static OffsetCompactHashtable<
 69   const char*, Symbol*,
 70   symbol_equals_compact_hashtable_entry
 71 > _shared_table, _dynamic_shared_table, _shared_table_for_dumping;
 72 
 73 // --------------------------------------------------------------------------
 74 
 75 typedef ConcurrentHashTable<SymbolTableConfig, mtSymbol> SymbolTableHash;
 76 static SymbolTableHash* _local_table = nullptr;
 77 
 78 volatile bool SymbolTable::_has_work = 0;
 79 volatile bool SymbolTable::_needs_rehashing = false;
 80 
 81 // For statistics
 82 static size_t _symbols_removed = 0;
 83 static size_t _symbols_counted = 0;
 84 static size_t _current_size = 0;
 85 
 86 static volatile size_t _items_count = 0;
 87 static volatile bool   _has_items_to_clean = false;
 88 
 89 
 90 static volatile bool _alt_hash = false;
 91 
 92 #ifdef USE_LIBRARY_BASED_TLS_ONLY
 93 static volatile bool _lookup_shared_first = false;
 94 #else
 95 // "_lookup_shared_first" can get highly contended with many cores if multiple threads
 96 // are updating "lookup success history" in a global shared variable. If built-in TLS is available, use it.
 97 static THREAD_LOCAL bool _lookup_shared_first = false;
 98 #endif
 99 
100 // Static arena for symbols that are not deallocated
101 Arena* SymbolTable::_arena = nullptr;
102 
103 static bool _rehashed = false;
104 static uint64_t _alt_hash_seed = 0;
105 
106 static inline void log_trace_symboltable_helper(Symbol* sym, const char* msg) {
107 #ifndef PRODUCT
108   ResourceMark rm;
109   log_trace(symboltable)("%s [%s]", msg, sym->as_quoted_ascii());
110 #endif // PRODUCT
111 }
112 
113 // Pick hashing algorithm.
114 static unsigned int hash_symbol(const char* s, int len, bool useAlt) {
115   return useAlt ?
116   AltHashing::halfsiphash_32(_alt_hash_seed, (const uint8_t*)s, len) :
117   java_lang_String::hash_code((const jbyte*)s, len);
118 }
119 
120 #if INCLUDE_CDS
121 static unsigned int hash_shared_symbol(const char* s, int len) {
122   return java_lang_String::hash_code((const jbyte*)s, len);
123 }
124 #endif
125 
126 class SymbolTableConfig : public AllStatic {
127 
128 public:
129   typedef Symbol Value;  // value of the Node in the hashtable
130 
131   static uintx get_hash(Value const& value, bool* is_dead) {
132     *is_dead = (value.refcount() == 0);
133     if (*is_dead) {
134       return 0;
135     } else {
136       return hash_symbol((const char*)value.bytes(), value.utf8_length(), _alt_hash);
137     }
138   }
139   // We use default allocation/deallocation but counted
140   static void* allocate_node(void* context, size_t size, Value const& value) {
141     SymbolTable::item_added();
142     return allocate_node_impl(size, value);
143   }
144   static void free_node(void* context, void* memory, Value & value) {
145     // We get here because #1 some threads lost a race to insert a newly created Symbol
146     // or #2 we're cleaning up unused symbol.
147     // If #1, then the symbol can be either permanent,
148     // or regular newly created one (refcount==1)
149     // If #2, then the symbol is dead (refcount==0)
150     assert(value.is_permanent() || (value.refcount() == 1) || (value.refcount() == 0),
151            "refcount %d", value.refcount());
152 #if INCLUDE_CDS
153     if (CDSConfig::is_dumping_static_archive()) {
154       // We have allocated with MetaspaceShared::symbol_space_alloc(). No deallocation is needed.
155       // Unreferenced Symbols will not be copied into the archive.
156       return;
157     }
158 #endif
159     if (value.refcount() == 1) {
160       value.decrement_refcount();
161       assert(value.refcount() == 0, "expected dead symbol");
162     }
163     if (value.refcount() != PERM_REFCOUNT) {
164       FreeHeap(memory);
165     } else {
166       MutexLocker ml(SymbolArena_lock, Mutex::_no_safepoint_check_flag); // Protect arena
167       // Deleting permanent symbol should not occur very often (insert race condition),
168       // so log it.
169       log_trace_symboltable_helper(&value, "Freeing permanent symbol");
170       size_t alloc_size = SymbolTableHash::get_dynamic_node_size(value.byte_size());
171       if (!SymbolTable::arena()->Afree(memory, alloc_size)) {
172         // Can't access the symbol after Afree, but we just printed it above.
173         NOT_PRODUCT(log_trace(symboltable)(" - Leaked permanent symbol");)
174       }
175     }
176     SymbolTable::item_removed();
177   }
178 
179 private:
180   static void* allocate_node_impl(size_t size, Value const& value) {
181     size_t alloc_size = SymbolTableHash::get_dynamic_node_size(value.byte_size());
182 #if INCLUDE_CDS
183     if (CDSConfig::is_dumping_static_archive()) {
184       MutexLocker ml(DumpRegion_lock, Mutex::_no_safepoint_check_flag);
185       // To get deterministic output from -Xshare:dump, we ensure that Symbols are allocated in
186       // increasing addresses. When the symbols are copied into the archive, we preserve their
187       // relative address order (sorted, see ArchiveBuilder::gather_klasses_and_symbols).
188       //
189       // We cannot use arena because arena chunks are allocated by the OS. As a result, for example,
190       // the archived symbol of "java/lang/Object" may sometimes be lower than "java/lang/String", and
191       // sometimes be higher. This would cause non-deterministic contents in the archive.
192       DEBUG_ONLY(static void* last = nullptr);
193       void* p = (void*)MetaspaceShared::symbol_space_alloc(alloc_size);
194       assert(p > last, "must increase monotonically");
195       DEBUG_ONLY(last = p);
196       return p;
197     }
198 #endif
199     if (value.refcount() != PERM_REFCOUNT) {
200       return AllocateHeap(alloc_size, mtSymbol);
201     } else {
202       // Allocate to global arena
203       MutexLocker ml(SymbolArena_lock, Mutex::_no_safepoint_check_flag); // Protect arena
204       return SymbolTable::arena()->Amalloc(alloc_size);
205     }
206   }
207 };
208 
209 void SymbolTable::create_table ()  {
210   size_t start_size_log_2 = ceil_log2(SymbolTableSize);
211   _current_size = ((size_t)1) << start_size_log_2;
212   log_trace(symboltable)("Start size: " SIZE_FORMAT " (" SIZE_FORMAT ")",
213                          _current_size, start_size_log_2);
214   _local_table = new SymbolTableHash(start_size_log_2, END_SIZE, REHASH_LEN, true);
215 
216   // Initialize the arena for global symbols, size passed in depends on CDS.
217   if (symbol_alloc_arena_size == 0) {
218     _arena = new (mtSymbol) Arena(mtSymbol);
219   } else {
220     _arena = new (mtSymbol) Arena(mtSymbol, Arena::Tag::tag_other, symbol_alloc_arena_size);
221   }
222 }
223 
224 void SymbolTable::reset_has_items_to_clean() { Atomic::store(&_has_items_to_clean, false); }
225 void SymbolTable::mark_has_items_to_clean()  { Atomic::store(&_has_items_to_clean, true); }
226 bool SymbolTable::has_items_to_clean()       { return Atomic::load(&_has_items_to_clean); }
227 
228 void SymbolTable::item_added() {
229   Atomic::inc(&_items_count);
230 }
231 
232 void SymbolTable::item_removed() {
233   Atomic::inc(&(_symbols_removed));
234   Atomic::dec(&_items_count);
235 }
236 
237 double SymbolTable::get_load_factor() {
238   return (double)_items_count/(double)_current_size;
239 }
240 
241 size_t SymbolTable::table_size() {
242   return ((size_t)1) << _local_table->get_size_log2(Thread::current());
243 }
244 
245 bool SymbolTable::has_work() { return Atomic::load_acquire(&_has_work); }
246 
247 void SymbolTable::trigger_cleanup() {
248   // Avoid churn on ServiceThread
249   if (!has_work()) {
250     MutexLocker ml(Service_lock, Mutex::_no_safepoint_check_flag);
251     _has_work = true;
252     Service_lock->notify_all();
253   }
254 }
255 
256 class SymbolsDo : StackObj {
257   SymbolClosure *_cl;
258 public:
259   SymbolsDo(SymbolClosure *cl) : _cl(cl) {}
260   bool operator()(Symbol* value) {
261     assert(value != nullptr, "expected valid value");
262     _cl->do_symbol(&value);
263     return true;
264   };
265 };
266 
267 class SharedSymbolIterator {
268   SymbolClosure* _symbol_closure;
269 public:
270   SharedSymbolIterator(SymbolClosure* f) : _symbol_closure(f) {}
271   void do_value(Symbol* symbol) {
272     _symbol_closure->do_symbol(&symbol);
273   }
274 };
275 
276 // Call function for all symbols in the symbol table.
277 void SymbolTable::symbols_do(SymbolClosure *cl) {
278   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint");
279   // all symbols from shared table
280   SharedSymbolIterator iter(cl);
281   _shared_table.iterate(&iter);
282   _dynamic_shared_table.iterate(&iter);
283 
284   // all symbols from the dynamic table
285   SymbolsDo sd(cl);
286   _local_table->do_safepoint_scan(sd);
287 }
288 
289 // Call function for all symbols in shared table. Used by -XX:+PrintSharedArchiveAndExit
290 void SymbolTable::shared_symbols_do(SymbolClosure *cl) {
291   SharedSymbolIterator iter(cl);
292   _shared_table.iterate(&iter);
293   _dynamic_shared_table.iterate(&iter);
294 }
295 
296 Symbol* SymbolTable::lookup_dynamic(const char* name,
297                                     int len, unsigned int hash) {
298   Symbol* sym = do_lookup(name, len, hash);
299   assert((sym == nullptr) || sym->refcount() != 0, "refcount must not be zero");
300   return sym;
301 }
302 
303 #if INCLUDE_CDS
304 Symbol* SymbolTable::lookup_shared(const char* name,
305                                    int len, unsigned int hash) {
306   Symbol* sym = nullptr;
307   if (!_shared_table.empty()) {
308     if (_alt_hash) {
309       // hash_code parameter may use alternate hashing algorithm but the shared table
310       // always uses the same original hash code.
311       hash = hash_shared_symbol(name, len);
312     }
313     sym = _shared_table.lookup(name, hash, len);
314     if (sym == nullptr && DynamicArchive::is_mapped()) {
315       sym = _dynamic_shared_table.lookup(name, hash, len);
316     }
317   }
318   return sym;
319 }
320 #endif
321 
322 Symbol* SymbolTable::lookup_common(const char* name,
323                             int len, unsigned int hash) {
324   Symbol* sym;
325   if (_lookup_shared_first) {
326     sym = lookup_shared(name, len, hash);
327     if (sym == nullptr) {
328       _lookup_shared_first = false;
329       sym = lookup_dynamic(name, len, hash);
330     }
331   } else {
332     sym = lookup_dynamic(name, len, hash);
333     if (sym == nullptr) {
334       sym = lookup_shared(name, len, hash);
335       if (sym != nullptr) {
336         _lookup_shared_first = true;
337       }
338     }
339   }
340   return sym;
341 }
342 
343 // Symbols should represent entities from the constant pool that are
344 // limited to <64K in length, but usage errors creep in allowing Symbols
345 // to be used for arbitrary strings. For debug builds we will assert if
346 // a string is too long, whereas product builds will truncate it.
347 static int check_length(const char* name, int len) {
348   assert(len >= 0, "negative length %d suggests integer overflow in the caller", len);
349   assert(len <= Symbol::max_length(),
350          "String length %d exceeds the maximum Symbol length of %d", len, Symbol::max_length());
351   if (len > Symbol::max_length()) {
352     warning("A string \"%.80s ... %.80s\" exceeds the maximum Symbol "
353             "length of %d and has been truncated", name, (name + len - 80), Symbol::max_length());
354     len = Symbol::max_length();
355   }
356   return len;
357 }
358 
359 Symbol* SymbolTable::new_symbol(const char* name, int len) {
360   len = check_length(name, len);
361   unsigned int hash = hash_symbol(name, len, _alt_hash);
362   Symbol* sym = lookup_common(name, len, hash);
363   if (sym == nullptr) {
364     sym = do_add_if_needed(name, len, hash, /* is_permanent */ false);
365   }
366   assert(sym->refcount() != 0, "lookup should have incremented the count");
367   assert(sym->equals(name, len), "symbol must be properly initialized");
368   return sym;
369 }
370 
371 Symbol* SymbolTable::new_symbol(const Symbol* sym, int begin, int end) {
372   assert(begin <= end && end <= sym->utf8_length(), "just checking");
373   assert(sym->refcount() != 0, "require a valid symbol");
374   const char* name = (const char*)sym->base() + begin;
375   int len = end - begin;
376   assert(len <= Symbol::max_length(), "sanity");
377   unsigned int hash = hash_symbol(name, len, _alt_hash);
378   Symbol* found = lookup_common(name, len, hash);
379   if (found == nullptr) {
380     found = do_add_if_needed(name, len, hash, /* is_permanent */ false);
381   }
382   return found;
383 }
384 
385 class SymbolTableLookup : StackObj {
386 private:
387   uintx _hash;
388   int _len;
389   const char* _str;
390 public:
391   SymbolTableLookup(const char* key, int len, uintx hash)
392   : _hash(hash), _len(len), _str(key) {}
393   uintx get_hash() const {
394     return _hash;
395   }
396   // Note: When equals() returns "true", the symbol's refcount is incremented. This is
397   // needed to ensure that the symbol is kept alive before equals() returns to the caller,
398   // so that another thread cannot clean the symbol up concurrently. The caller is
399   // responsible for decrementing the refcount, when the symbol is no longer needed.
400   bool equals(Symbol* value) {
401     assert(value != nullptr, "expected valid value");
402     Symbol *sym = value;
403     if (sym->equals(_str, _len)) {
404       if (sym->try_increment_refcount()) {
405         // something is referencing this symbol now.
406         return true;
407       } else {
408         assert(sym->refcount() == 0, "expected dead symbol");
409         return false;
410       }
411     } else {
412       return false;
413     }
414   }
415   bool is_dead(Symbol* value) {
416     return value->refcount() == 0;
417   }
418 };
419 
420 class SymbolTableGet : public StackObj {
421   Symbol* _return;
422 public:
423   SymbolTableGet() : _return(nullptr) {}
424   void operator()(Symbol* value) {
425     assert(value != nullptr, "expected valid value");
426     _return = value;
427   }
428   Symbol* get_res_sym() const {
429     return _return;
430   }
431 };
432 
433 void SymbolTable::update_needs_rehash(bool rehash) {
434   if (rehash) {
435     _needs_rehashing = true;
436     trigger_cleanup();
437   }
438 }
439 
440 Symbol* SymbolTable::do_lookup(const char* name, int len, uintx hash) {
441   Thread* thread = Thread::current();
442   SymbolTableLookup lookup(name, len, hash);
443   SymbolTableGet stg;
444   bool rehash_warning = false;
445   _local_table->get(thread, lookup, stg, &rehash_warning);
446   update_needs_rehash(rehash_warning);
447   Symbol* sym = stg.get_res_sym();
448   assert((sym == nullptr) || sym->refcount() != 0, "found dead symbol");
449   return sym;
450 }
451 
452 Symbol* SymbolTable::lookup_only(const char* name, int len, unsigned int& hash) {
453   hash = hash_symbol(name, len, _alt_hash);
454   return lookup_common(name, len, hash);
455 }
456 
457 // Suggestion: Push unicode-based lookup all the way into the hashing
458 // and probing logic, so there is no need for convert_to_utf8 until
459 // an actual new Symbol* is created.
460 Symbol* SymbolTable::new_symbol(const jchar* name, int utf16_length) {
461   size_t utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
462   char stack_buf[ON_STACK_BUFFER_LENGTH];
463   if (utf8_length < sizeof(stack_buf)) {
464     char* chars = stack_buf;
465     UNICODE::convert_to_utf8(name, utf16_length, chars);
466     return new_symbol(chars, checked_cast<int>(utf8_length));
467   } else {
468     ResourceMark rm;
469     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);
470     UNICODE::convert_to_utf8(name, utf16_length, chars);
471     return new_symbol(chars, checked_cast<int>(utf8_length));
472   }
473 }
474 
475 Symbol* SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
476                                          unsigned int& hash) {
477   size_t utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
478   char stack_buf[ON_STACK_BUFFER_LENGTH];
479   if (utf8_length < sizeof(stack_buf)) {
480     char* chars = stack_buf;
481     UNICODE::convert_to_utf8(name, utf16_length, chars);
482     return lookup_only(chars, checked_cast<int>(utf8_length), hash);
483   } else {
484     ResourceMark rm;
485     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);
486     UNICODE::convert_to_utf8(name, utf16_length, chars);
487     return lookup_only(chars, checked_cast<int>(utf8_length), hash);
488   }
489 }
490 
491 void SymbolTable::new_symbols(ClassLoaderData* loader_data, const constantPoolHandle& cp,
492                               int names_count, const char** names, int* lengths,
493                               int* cp_indices, unsigned int* hashValues) {
494   // Note that is_permanent will be false for non-strong hidden classes.
495   // even if their loader is the boot loader because they will have a different cld.
496   bool is_permanent = loader_data->is_the_null_class_loader_data();
497   for (int i = 0; i < names_count; i++) {
498     const char *name = names[i];
499     int len = lengths[i];
500     assert(len <= Symbol::max_length(), "must be - these come from the constant pool");
501     unsigned int hash = hashValues[i];
502     assert(lookup_shared(name, len, hash) == nullptr, "must have checked already");
503     Symbol* sym = do_add_if_needed(name, len, hash, is_permanent);
504     assert(sym->refcount() != 0, "lookup should have incremented the count");
505     cp->symbol_at_put(cp_indices[i], sym);
506   }
507 }
508 
509 Symbol* SymbolTable::do_add_if_needed(const char* name, int len, uintx hash, bool is_permanent) {
510   assert(len <= Symbol::max_length(), "caller should have ensured this");
511   SymbolTableLookup lookup(name, len, hash);
512   SymbolTableGet stg;
513   bool clean_hint = false;
514   bool rehash_warning = false;
515   Thread* current = Thread::current();
516   Symbol* sym;
517 
518   ResourceMark rm(current);
519   const int alloc_size = Symbol::byte_size(len);
520   u1* u1_buf = NEW_RESOURCE_ARRAY_IN_THREAD(current, u1, alloc_size);
521   Symbol* tmp = ::new ((void*)u1_buf) Symbol((const u1*)name, len,
522                                              (is_permanent || CDSConfig::is_dumping_static_archive()) ? PERM_REFCOUNT : 1);
523 
524   do {
525     if (_local_table->insert(current, lookup, *tmp, &rehash_warning, &clean_hint)) {
526       if (_local_table->get(current, lookup, stg, &rehash_warning)) {
527         sym = stg.get_res_sym();
528         // The get adds one to ref count, but we inserted with our ref already included.
529         // Therefore decrement with one.
530         if (sym->refcount() != PERM_REFCOUNT) {
531           sym->decrement_refcount();
532         }
533         break;
534       }
535     }
536 
537     // In case another thread did a concurrent add, return value already in the table.
538     // This could fail if the symbol got deleted concurrently, so loop back until success.
539     if (_local_table->get(current, lookup, stg, &rehash_warning)) {
540       // The lookup added a refcount, which is ours.
541       sym = stg.get_res_sym();
542       break;
543     }
544   } while(true);
545 
546   update_needs_rehash(rehash_warning);
547 
548   if (clean_hint) {
549     mark_has_items_to_clean();
550     check_concurrent_work();
551   }
552 
553   assert((sym == nullptr) || sym->refcount() != 0, "found dead symbol");
554   return sym;
555 }
556 
557 Symbol* SymbolTable::new_permanent_symbol(const char* name) {
558   unsigned int hash = 0;
559   int len = check_length(name, (int)strlen(name));
560   Symbol* sym = SymbolTable::lookup_only(name, len, hash);
561   if (sym == nullptr) {
562     sym = do_add_if_needed(name, len, hash, /* is_permanent */ true);
563   }
564   if (!sym->is_permanent()) {
565     sym->make_permanent();
566     log_trace_symboltable_helper(sym, "Asked for a permanent symbol, but got a regular one");
567   }
568   return sym;
569 }
570 
571 struct SizeFunc : StackObj {
572   size_t operator()(Symbol* value) {
573     assert(value != nullptr, "expected valid value");
574     return (value)->size() * HeapWordSize;
575   };
576 };
577 
578 TableStatistics SymbolTable::get_table_statistics() {
579   static TableStatistics ts;
580   SizeFunc sz;
581   ts = _local_table->statistics_get(Thread::current(), sz, ts);
582   return ts;
583 }
584 
585 void SymbolTable::print_table_statistics(outputStream* st) {
586   SizeFunc sz;
587   _local_table->statistics_to(Thread::current(), sz, st, "SymbolTable");
588 
589   if (!_shared_table.empty()) {
590     _shared_table.print_table_statistics(st, "Shared Symbol Table");
591   }
592 
593   if (!_dynamic_shared_table.empty()) {
594     _dynamic_shared_table.print_table_statistics(st, "Dynamic Shared Symbol Table");
595   }
596 }
597 
598 // Verification
599 class VerifySymbols : StackObj {
600 public:
601   bool operator()(Symbol* value) {
602     guarantee(value != nullptr, "expected valid value");
603     Symbol* sym = value;
604     guarantee(sym->equals((const char*)sym->bytes(), sym->utf8_length()),
605               "symbol must be internally consistent");
606     return true;
607   };
608 };
609 
610 void SymbolTable::verify() {
611   Thread* thr = Thread::current();
612   VerifySymbols vs;
613   if (!_local_table->try_scan(thr, vs)) {
614     log_info(symboltable)("verify unavailable at this moment");
615   }
616 }
617 
618 static void print_symbol(outputStream* st, Symbol* sym) {
619   const char* utf8_string = (const char*)sym->bytes();
620   int utf8_length = sym->utf8_length();
621   st->print("%d %d: ", utf8_length, sym->refcount());
622   HashtableTextDump::put_utf8(st, utf8_string, utf8_length);
623   st->cr();
624 }
625 
626 // Dumping
627 class DumpSymbol : StackObj {
628   Thread* _thr;
629   outputStream* _st;
630 public:
631   DumpSymbol(Thread* thr, outputStream* st) : _thr(thr), _st(st) {}
632   bool operator()(Symbol* value) {
633     assert(value != nullptr, "expected valid value");
634     print_symbol(_st, value);
635     return true;
636   };
637 };
638 
639 class DumpSharedSymbol : StackObj {
640   outputStream* _st;
641 public:
642   DumpSharedSymbol(outputStream* st) : _st(st) {}
643   void do_value(Symbol* value) {
644     assert(value != nullptr, "value should point to a symbol");
645     print_symbol(_st, value);
646   };
647 };
648 
649 void SymbolTable::dump(outputStream* st, bool verbose) {
650   if (!verbose) {
651     print_table_statistics(st);
652   } else {
653     Thread* thr = Thread::current();
654     ResourceMark rm(thr);
655     st->print_cr("VERSION: 1.1");
656     DumpSymbol ds(thr, st);
657     if (!_local_table->try_scan(thr, ds)) {
658       log_info(symboltable)("dump unavailable at this moment");
659     }
660     if (!_shared_table.empty()) {
661       st->print_cr("#----------------");
662       st->print_cr("# Shared symbols:");
663       st->print_cr("#----------------");
664       DumpSharedSymbol dss(st);
665       _shared_table.iterate(&dss);
666     }
667     if (!_dynamic_shared_table.empty()) {
668       st->print_cr("#------------------------");
669       st->print_cr("# Dynamic shared symbols:");
670       st->print_cr("#------------------------");
671       DumpSharedSymbol dss(st);
672       _dynamic_shared_table.iterate(&dss);
673     }
674   }
675 }
676 
677 #if INCLUDE_CDS
678 void SymbolTable::copy_shared_symbol_table(GrowableArray<Symbol*>* symbols,
679                                            CompactHashtableWriter* writer) {
680   ArchiveBuilder* builder = ArchiveBuilder::current();
681   int len = symbols->length();
682   for (int i = 0; i < len; i++) {
683     Symbol* sym = ArchiveBuilder::get_buffered_symbol(symbols->at(i));
684     unsigned int fixed_hash = hash_shared_symbol((const char*)sym->bytes(), sym->utf8_length());
685     assert(fixed_hash == hash_symbol((const char*)sym->bytes(), sym->utf8_length(), false),
686            "must not rehash during dumping");
687     sym->set_permanent();
688     writer->add(fixed_hash, builder->buffer_to_offset_u4((address)sym));
689   }
690 }
691 
692 size_t SymbolTable::estimate_size_for_archive() {
693   if (_items_count > (size_t)max_jint) {
694     fatal("Too many symbols to be archived: %zu", _items_count);
695   }
696   return CompactHashtableWriter::estimate_size(int(_items_count));
697 }
698 
699 void SymbolTable::write_to_archive(GrowableArray<Symbol*>* symbols) {
700   CompactHashtableWriter writer(int(_items_count), ArchiveBuilder::symbol_stats());
701   copy_shared_symbol_table(symbols, &writer);
702   _shared_table_for_dumping.reset();
703   writer.dump(&_shared_table_for_dumping, "symbol");
704 }
705 
706 void SymbolTable::serialize_shared_table_header(SerializeClosure* soc,
707                                                 bool is_static_archive) {
708   OffsetCompactHashtable<const char*, Symbol*, symbol_equals_compact_hashtable_entry> * table;
709   if (soc->reading()) {
710     if (is_static_archive) {
711       table = &_shared_table;
712     } else {
713       table = &_dynamic_shared_table;
714     }
715   } else {
716     table = &_shared_table_for_dumping;
717   }
718 
719   table->serialize_header(soc);
720 }
721 #endif //INCLUDE_CDS
722 
723 // Concurrent work
724 void SymbolTable::grow(JavaThread* jt) {
725   SymbolTableHash::GrowTask gt(_local_table);
726   if (!gt.prepare(jt)) {
727     return;
728   }
729   log_trace(symboltable)("Started to grow");
730   {
731     TraceTime timer("Grow", TRACETIME_LOG(Debug, symboltable, perf));
732     while (gt.do_task(jt)) {
733       gt.pause(jt);
734       {
735         ThreadBlockInVM tbivm(jt);
736       }
737       gt.cont(jt);
738     }
739   }
740   gt.done(jt);
741   _current_size = table_size();
742   log_debug(symboltable)("Grown to size:" SIZE_FORMAT, _current_size);
743 }
744 
745 struct SymbolTableDoDelete : StackObj {
746   size_t _deleted;
747   SymbolTableDoDelete() : _deleted(0) {}
748   void operator()(Symbol* value) {
749     assert(value != nullptr, "expected valid value");
750     Symbol *sym = value;
751     assert(sym->refcount() == 0, "refcount");
752     _deleted++;
753   }
754 };
755 
756 struct SymbolTableDeleteCheck : StackObj {
757   size_t _processed;
758   SymbolTableDeleteCheck() : _processed(0) {}
759   bool operator()(Symbol* value) {
760     assert(value != nullptr, "expected valid value");
761     _processed++;
762     Symbol *sym = value;
763     return (sym->refcount() == 0);
764   }
765 };
766 
767 void SymbolTable::clean_dead_entries(JavaThread* jt) {
768   SymbolTableHash::BulkDeleteTask bdt(_local_table);
769   if (!bdt.prepare(jt)) {
770     return;
771   }
772 
773   SymbolTableDeleteCheck stdc;
774   SymbolTableDoDelete stdd;
775   NativeHeapTrimmer::SuspendMark sm("symboltable");
776   {
777     TraceTime timer("Clean", TRACETIME_LOG(Debug, symboltable, perf));
778     while (bdt.do_task(jt, stdc, stdd)) {
779       bdt.pause(jt);
780       {
781         ThreadBlockInVM tbivm(jt);
782       }
783       bdt.cont(jt);
784     }
785     reset_has_items_to_clean();
786     bdt.done(jt);
787   }
788 
789   Atomic::add(&_symbols_counted, stdc._processed);
790 
791   log_debug(symboltable)("Cleaned " SIZE_FORMAT " of " SIZE_FORMAT,
792                          stdd._deleted, stdc._processed);
793 }
794 
795 void SymbolTable::check_concurrent_work() {
796   if (has_work()) {
797     return;
798   }
799   // We should clean/resize if we have
800   // more items than preferred load factor or
801   // more dead items than water mark.
802   if (has_items_to_clean() || (get_load_factor() > PREF_AVG_LIST_LEN)) {
803     log_debug(symboltable)("Concurrent work triggered, load factor: %f, items to clean: %s",
804                            get_load_factor(), has_items_to_clean() ? "true" : "false");
805     trigger_cleanup();
806   }
807 }
808 
809 bool SymbolTable::should_grow() {
810   return get_load_factor() > PREF_AVG_LIST_LEN && !_local_table->is_max_size_reached();
811 }
812 
813 void SymbolTable::do_concurrent_work(JavaThread* jt) {
814   // Rehash if needed.  Rehashing goes to a safepoint but the rest of this
815   // work is concurrent.
816   if (needs_rehashing() && maybe_rehash_table()) {
817     Atomic::release_store(&_has_work, false);
818     return; // done, else grow
819   }
820   log_debug(symboltable, perf)("Concurrent work, live factor: %g", get_load_factor());
821   // We prefer growing, since that also removes dead items
822   if (should_grow()) {
823     grow(jt);
824   } else {
825     clean_dead_entries(jt);
826   }
827   Atomic::release_store(&_has_work, false);
828 }
829 
830 // Called at VM_Operation safepoint
831 void SymbolTable::rehash_table() {
832   assert(SafepointSynchronize::is_at_safepoint(), "must be called at safepoint");
833   // The ServiceThread initiates the rehashing so it is not resizing.
834   assert (_local_table->is_safepoint_safe(), "Should not be resizing now");
835 
836   _alt_hash_seed = AltHashing::compute_seed();
837 
838   // We use current size
839   size_t new_size = _local_table->get_size_log2(Thread::current());
840   SymbolTableHash* new_table = new SymbolTableHash(new_size, END_SIZE, REHASH_LEN, true);
841   // Use alt hash from now on
842   _alt_hash = true;
843   _local_table->rehash_nodes_to(Thread::current(), new_table);
844 
845   // free old table
846   delete _local_table;
847   _local_table = new_table;
848 
849   _rehashed = true;
850   _needs_rehashing = false;
851 }
852 
853 bool SymbolTable::maybe_rehash_table() {
854   log_debug(symboltable)("Table imbalanced, rehashing called.");
855 
856   // Grow instead of rehash.
857   if (should_grow()) {
858     log_debug(symboltable)("Choosing growing over rehashing.");
859     _needs_rehashing = false;
860     return false;
861   }
862 
863   // Already rehashed.
864   if (_rehashed) {
865     log_warning(symboltable)("Rehashing already done, still long lists.");
866     _needs_rehashing = false;
867     return false;
868   }
869 
870   VM_RehashSymbolTable op;
871   VMThread::execute(&op);
872   return true;
873 }
874 
875 //---------------------------------------------------------------------------
876 // Non-product code
877 
878 #ifndef PRODUCT
879 
880 class HistogramIterator : StackObj {
881 public:
882   static const size_t results_length = 100;
883   size_t counts[results_length];
884   size_t sizes[results_length];
885   size_t total_size;
886   size_t total_count;
887   size_t total_length;
888   size_t max_length;
889   size_t out_of_range_count;
890   size_t out_of_range_size;
891   HistogramIterator() : total_size(0), total_count(0), total_length(0),
892                         max_length(0), out_of_range_count(0), out_of_range_size(0) {
893     // initialize results to zero
894     for (size_t i = 0; i < results_length; i++) {
895       counts[i] = 0;
896       sizes[i] = 0;
897     }
898   }
899   bool operator()(Symbol* value) {
900     assert(value != nullptr, "expected valid value");
901     Symbol* sym = value;
902     size_t size = sym->size();
903     size_t len = sym->utf8_length();
904     if (len < results_length) {
905       counts[len]++;
906       sizes[len] += size;
907     } else {
908       out_of_range_count++;
909       out_of_range_size += size;
910     }
911     total_count++;
912     total_size += size;
913     total_length += len;
914     max_length = MAX2(max_length, len);
915 
916     return true;
917   };
918 };
919 
920 void SymbolTable::print_histogram() {
921   HistogramIterator hi;
922   _local_table->do_scan(Thread::current(), hi);
923   tty->print_cr("Symbol Table Histogram:");
924   tty->print_cr("  Total number of symbols  " SIZE_FORMAT_W(7), hi.total_count);
925   tty->print_cr("  Total size in memory     " SIZE_FORMAT_W(7) "K", (hi.total_size * wordSize) / K);
926   tty->print_cr("  Total counted            " SIZE_FORMAT_W(7), _symbols_counted);
927   tty->print_cr("  Total removed            " SIZE_FORMAT_W(7), _symbols_removed);
928   if (_symbols_counted > 0) {
929     tty->print_cr("  Percent removed          %3.2f",
930           ((double)_symbols_removed / (double)_symbols_counted) * 100);
931   }
932   tty->print_cr("  Reference counts         " SIZE_FORMAT_W(7), Symbol::_total_count);
933   tty->print_cr("  Symbol arena used        " SIZE_FORMAT_W(7) "K", arena()->used() / K);
934   tty->print_cr("  Symbol arena size        " SIZE_FORMAT_W(7) "K", arena()->size_in_bytes() / K);
935   tty->print_cr("  Total symbol length      " SIZE_FORMAT_W(7), hi.total_length);
936   tty->print_cr("  Maximum symbol length    " SIZE_FORMAT_W(7), hi.max_length);
937   tty->print_cr("  Average symbol length    %7.2f", ((double)hi.total_length / (double)hi.total_count));
938   tty->print_cr("  Symbol length histogram:");
939   tty->print_cr("    %6s %10s %10s", "Length", "#Symbols", "Size");
940   for (size_t i = 0; i < hi.results_length; i++) {
941     if (hi.counts[i] > 0) {
942       tty->print_cr("    " SIZE_FORMAT_W(6) " " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) "K",
943                     i, hi.counts[i], (hi.sizes[i] * wordSize) / K);
944     }
945   }
946   tty->print_cr("  >=" SIZE_FORMAT_W(6) " " SIZE_FORMAT_W(10) " " SIZE_FORMAT_W(10) "K\n",
947                 hi.results_length, hi.out_of_range_count, (hi.out_of_range_size*wordSize) / K);
948 }
949 #endif // PRODUCT
950 
951 // Utility for dumping symbols
952 SymboltableDCmd::SymboltableDCmd(outputStream* output, bool heap) :
953                                  DCmdWithParser(output, heap),
954   _verbose("-verbose", "Dump the content of each symbol in the table",
955            "BOOLEAN", false, "false") {
956   _dcmdparser.add_dcmd_option(&_verbose);
957 }
958 
959 void SymboltableDCmd::execute(DCmdSource source, TRAPS) {
960   VM_DumpHashtable dumper(output(), VM_DumpHashtable::DumpSymbols,
961                          _verbose.value());
962   VMThread::execute(&dumper);
963 }