1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_CLASSFILE_COMPACTHASHTABLE_HPP 26 #define SHARE_CLASSFILE_COMPACTHASHTABLE_HPP 27 28 #include "cds/cds_globals.hpp" 29 #include "oops/array.hpp" 30 #include "oops/symbol.hpp" 31 #include "runtime/globals.hpp" 32 #include "utilities/growableArray.hpp" 33 34 35 template < 36 typename K, 37 typename V, 38 V (*DECODE)(address base_address, u4 offset), 39 bool (*EQUALS)(V value, K key, int len) 40 > 41 class CompactHashtable; 42 class NumberSeq; 43 class SimpleCompactHashtable; 44 class SerializeClosure; 45 46 // Stats for symbol tables in the CDS archive 47 class CompactHashtableStats { 48 public: 49 int hashentry_count; 50 int hashentry_bytes; 51 int bucket_count; 52 int bucket_bytes; 53 54 CompactHashtableStats() : 55 hashentry_count(0), hashentry_bytes(0), 56 bucket_count(0), bucket_bytes(0) {} 57 }; 58 59 #if INCLUDE_CDS 60 ///////////////////////////////////////////////////////////////////////// 61 // 62 // The compact hash table writer. Used at dump time for writing out 63 // the compact table to the shared archive. 64 // 65 // At dump time, the CompactHashtableWriter obtains all entries from the 66 // symbol/string table and adds them to a new temporary hash table. The hash 67 // table size (number of buckets) is calculated using 68 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket 69 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option. 70 // 4 is chosen because it produces smaller sized bucket on average for 71 // faster lookup. It also has relatively small number of empty buckets and 72 // good distribution of the entries. 73 // 74 // We use a simple hash function (hash % num_bucket) for the table. 75 // The new table is compacted when written out. Please see comments 76 // above the CompactHashtable class for the table layout detail. The bucket 77 // offsets are written to the archive as part of the compact table. The 78 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type 79 // (regular or compact) are encoded in bit[31, 30]. For buckets with more 80 // than one entry, both hash and entry offset are written to the 81 // table. For buckets with only one entry, only the entry offset is written 82 // to the table and the buckets are tagged as compact in their type bits. 83 // Buckets without entry are skipped from the table. Their offsets are 84 // still written out for faster lookup. 85 // 86 class CompactHashtableWriter: public StackObj { 87 public: 88 class Entry { 89 unsigned int _hash; 90 u4 _value; 91 92 public: 93 Entry() {} 94 Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {} 95 96 u4 value() { 97 return _value; 98 } 99 unsigned int hash() { 100 return _hash; 101 } 102 103 bool operator==(const CompactHashtableWriter::Entry& other) { 104 return (_value == other._value && _hash == other._hash); 105 } 106 }; // class CompactHashtableWriter::Entry 107 108 private: 109 int _num_entries_written; 110 int _num_buckets; 111 int _num_empty_buckets; 112 int _num_value_only_buckets; 113 int _num_other_buckets; 114 GrowableArray<Entry>** _buckets; 115 CompactHashtableStats* _stats; 116 Array<u4>* _compact_buckets; 117 Array<u4>* _compact_entries; 118 119 public: 120 // This is called at dump-time only 121 CompactHashtableWriter(int num_entries, CompactHashtableStats* stats); 122 ~CompactHashtableWriter(); 123 124 void add(unsigned int hash, u4 value); 125 126 private: 127 void allocate_table(); 128 void dump_table(NumberSeq* summary); 129 static int calculate_num_buckets(int num_entries) { 130 int num_buckets = num_entries / SharedSymbolTableBucketSize; 131 // calculation of num_buckets can result in zero buckets, we need at least one 132 return (num_buckets < 1) ? 1 : num_buckets; 133 } 134 135 public: 136 void dump(SimpleCompactHashtable *cht, const char* table_name); 137 138 static size_t estimate_size(int num_entries); 139 }; 140 #endif // INCLUDE_CDS 141 142 #define REGULAR_BUCKET_TYPE 0 143 #define VALUE_ONLY_BUCKET_TYPE 1 144 #define TABLEEND_BUCKET_TYPE 3 145 #define BUCKET_OFFSET_MASK 0x3FFFFFFF 146 #define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK) 147 #define BUCKET_TYPE_SHIFT 30 148 #define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT) 149 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK)) 150 151 ///////////////////////////////////////////////////////////////////////////// 152 // 153 // CompactHashtable is used to store the CDS archive's symbol/string tables. 154 // 155 // Because these tables are read-only (no entries can be added/deleted) at run-time 156 // and tend to have large number of entries, we try to minimize the footprint 157 // cost per entry. 158 // 159 // The CompactHashtable is split into two arrays 160 // 161 // u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset 162 // u4 entries[<variable size>] 163 // 164 // The size of buckets[] is 'num_buckets + 1'. Each entry of 165 // buckets[] is a 32-bit encoding of the bucket type and bucket offset, 166 // with the type in the left-most 2-bit and offset in the remaining 30-bit. 167 // The last entry is a special type. It contains the end of the last 168 // bucket. 169 // 170 // There are two types of buckets, regular buckets and value_only buckets. The 171 // value_only buckets have '01' in their highest 2-bit, and regular buckets have 172 // '00' in their highest 2-bit. 173 // 174 // For normal buckets, each entry is 8 bytes in the entries[]: 175 // u4 hash; /* symbol/string hash */ 176 // union { 177 // u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */ 178 // narrowOop str; /* String narrowOop encoding */ 179 // } 180 // 181 // 182 // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[]. 183 // 184 // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code 185 // is skipped. 186 // buckets[0, 4, 5, ....] 187 // | | | 188 // | | +---+ 189 // | | | 190 // | +----+ | 191 // v v v 192 // entries[H,O,H,O,O,H,O,H,O.....] 193 // 194 // See CompactHashtable::lookup() for how the table is searched at runtime. 195 // See CompactHashtableWriter::dump() for how the table is written at CDS 196 // dump time. 197 // 198 class SimpleCompactHashtable { 199 protected: 200 address _base_address; 201 u4 _bucket_count; 202 u4 _entry_count; 203 u4* _buckets; 204 u4* _entries; 205 206 public: 207 SimpleCompactHashtable() : 208 _base_address(nullptr), 209 _bucket_count(0), 210 _entry_count(0), 211 _buckets(nullptr), 212 _entries(nullptr) 213 {} 214 215 void reset() { 216 _base_address = nullptr; 217 _bucket_count = 0; 218 _entry_count = 0; 219 _buckets = nullptr; 220 _entries = nullptr; 221 } 222 223 void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries); 224 225 // Read/Write the table's header from/to the CDS archive 226 void serialize_header(SerializeClosure* soc) NOT_CDS_RETURN; 227 228 inline bool empty() const { 229 return (_entry_count == 0); 230 } 231 232 inline size_t entry_count() const { 233 return _entry_count; 234 } 235 236 static size_t calculate_header_size(); 237 }; 238 239 template < 240 typename K, 241 typename V, 242 V (*DECODE)(address base_address, u4 offset), 243 bool (*EQUALS)(V value, K key, int len) 244 > 245 class CompactHashtable : public SimpleCompactHashtable { 246 friend class VMStructs; 247 248 V decode(u4 offset) const { 249 return DECODE(_base_address, offset); 250 } 251 252 public: 253 // Lookup a value V from the compact table using key K 254 inline V lookup(K key, unsigned int hash, int len) const { 255 if (_entry_count > 0) { 256 int index = hash % _bucket_count; 257 u4 bucket_info = _buckets[index]; 258 u4 bucket_offset = BUCKET_OFFSET(bucket_info); 259 int bucket_type = BUCKET_TYPE(bucket_info); 260 u4* entry = _entries + bucket_offset; 261 262 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 263 V value = decode(entry[0]); 264 if (EQUALS(value, key, len)) { 265 return value; 266 } 267 } else { 268 // This is a regular bucket, which has more than one 269 // entries. Each entry is a pair of entry (hash, offset). 270 // Seek until the end of the bucket. 271 u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]); 272 while (entry < entry_max) { 273 unsigned int h = (unsigned int)(entry[0]); 274 if (h == hash) { 275 V value = decode(entry[1]); 276 if (EQUALS(value, key, len)) { 277 return value; 278 } 279 } 280 entry += 2; 281 } 282 } 283 } 284 return nullptr; 285 } 286 287 template <class ITER> 288 inline void iterate(ITER* iter) const { 289 for (u4 i = 0; i < _bucket_count; i++) { 290 u4 bucket_info = _buckets[i]; 291 u4 bucket_offset = BUCKET_OFFSET(bucket_info); 292 int bucket_type = BUCKET_TYPE(bucket_info); 293 u4* entry = _entries + bucket_offset; 294 295 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 296 iter->do_value(decode(entry[0])); 297 } else { 298 u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]); 299 while (entry < entry_max) { 300 iter->do_value(decode(entry[1])); 301 entry += 2; 302 } 303 } 304 } 305 } 306 307 template<typename Function> 308 inline void iterate(Function function) const { // lambda enabled API 309 for (u4 i = 0; i < _bucket_count; i++) { 310 u4 bucket_info = _buckets[i]; 311 u4 bucket_offset = BUCKET_OFFSET(bucket_info); 312 int bucket_type = BUCKET_TYPE(bucket_info); 313 u4* entry = _entries + bucket_offset; 314 315 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 316 function(decode(entry[0])); 317 } else { 318 u4* entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]); 319 while (entry < entry_max) { 320 function(decode(entry[1])); 321 entry += 2; 322 } 323 } 324 } 325 } 326 327 void print_table_statistics(outputStream* st, const char* name) { 328 st->print_cr("%s statistics:", name); 329 int total_entries = 0; 330 int max_bucket = 0; 331 for (u4 i = 0; i < _bucket_count; i++) { 332 u4 bucket_info = _buckets[i]; 333 int bucket_type = BUCKET_TYPE(bucket_info); 334 int bucket_size; 335 336 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 337 bucket_size = 1; 338 } else { 339 bucket_size = (BUCKET_OFFSET(_buckets[i + 1]) - BUCKET_OFFSET(bucket_info)) / 2; 340 } 341 total_entries += bucket_size; 342 if (max_bucket < bucket_size) { 343 max_bucket = bucket_size; 344 } 345 } 346 st->print_cr("Number of buckets : %9d", _bucket_count); 347 st->print_cr("Number of entries : %9d", total_entries); 348 st->print_cr("Maximum bucket size : %9d", max_bucket); 349 } 350 }; 351 352 //////////////////////////////////////////////////////////////////////// 353 // 354 // OffsetCompactHashtable -- This is used to store many types of objects 355 // in the CDS archive. On 64-bit platforms, we save space by using a 32-bit 356 // offset from the CDS base address. 357 358 template <typename V> 359 inline V read_value_from_compact_hashtable(address base_address, u4 offset) { 360 return (V)(base_address + offset); 361 } 362 363 template < 364 typename K, 365 typename V, 366 bool (*EQUALS)(V value, K key, int len) 367 > 368 class OffsetCompactHashtable : public CompactHashtable< 369 K, V, read_value_from_compact_hashtable<V>, EQUALS> { 370 }; 371 372 373 //////////////////////////////////////////////////////////////////////// 374 // 375 // Read/Write the contents of a hashtable textual dump (created by 376 // SymbolTable::dump and StringTable::dump). 377 // Because the dump file may be big (hundred of MB in extreme cases), 378 // we use mmap for fast access when reading it. 379 // 380 class HashtableTextDump { 381 int _fd; 382 const char* _base; 383 const char* _p; 384 const char* _end; 385 const char* _filename; 386 size_t _size; 387 int _prefix_type; 388 int _line_no; 389 public: 390 HashtableTextDump(const char* filename); 391 ~HashtableTextDump(); 392 393 enum { 394 SymbolPrefix = 1 << 0, 395 StringPrefix = 1 << 1, 396 Unknown = 1 << 2 397 }; 398 399 void quit(const char* err, const char* msg); 400 401 inline int remain() { 402 return (int)(_end - _p); 403 } 404 int last_line_no() { 405 return _line_no - 1; 406 } 407 408 void corrupted(const char *p, const char *msg); 409 410 inline void corrupted_if(bool cond, const char *msg) { 411 if (cond) { 412 corrupted(_p, msg); 413 } 414 } 415 416 bool skip_newline(); 417 int skip(char must_be_char); 418 void skip_past(char c); 419 void check_version(const char* ver); 420 421 inline void get_num(char delim, int *num) { 422 const char* p = _p; 423 const char* end = _end; 424 u8 n = 0; 425 426 while (p < end) { 427 char c = *p++; 428 if ('0' <= c && c <= '9') { 429 n = n * 10 + (c - '0'); 430 if (n > (u8)INT_MAX) { 431 corrupted(_p, "Num overflow"); 432 } 433 } else if (c == delim) { 434 _p = p; 435 *num = (int)n; 436 return; 437 } else { 438 // Not [0-9], not 'delim' 439 corrupted(_p, "Unrecognized format");; 440 } 441 } 442 443 corrupted(_end, "Incorrect format"); 444 ShouldNotReachHere(); 445 } 446 447 void scan_prefix_type(); 448 int scan_prefix(int* utf8_length); 449 int scan_string_prefix(); 450 int scan_symbol_prefix(); 451 452 int unescape(const char* from, const char* end, int count); 453 void get_utf8(char* utf8_buffer, int utf8_length); 454 static void put_utf8(outputStream* st, const char* utf8_string, size_t utf8_length); 455 }; 456 457 #endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP --- EOF ---