1 /* 2 * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_CDS_ARCHIVEBUILDER_HPP 26 #define SHARE_CDS_ARCHIVEBUILDER_HPP 27 28 #include "cds/archiveUtils.hpp" 29 #include "cds/dumpAllocStats.hpp" 30 #include "memory/metaspace.hpp" 31 #include "memory/metaspaceClosure.hpp" 32 #include "memory/reservedSpace.hpp" 33 #include "memory/virtualspace.hpp" 34 #include "oops/array.hpp" 35 #include "oops/klass.hpp" 36 #include "runtime/os.hpp" 37 #include "utilities/bitMap.hpp" 38 #include "utilities/growableArray.hpp" 39 #include "utilities/hashTable.hpp" 40 #include "utilities/resizableHashTable.hpp" 41 42 class ArchiveHeapInfo; 43 class CHeapBitMap; 44 class FileMapInfo; 45 class Klass; 46 class MemRegion; 47 class Symbol; 48 49 // The minimum alignment for non-Klass objects inside the CDS archive. Klass objects need 50 // to follow CompressedKlassPointers::klass_alignment_in_bytes(). 51 constexpr size_t SharedSpaceObjectAlignment = Metaspace::min_allocation_alignment_bytes; 52 53 // Overview of CDS archive creation (for both static and dynamic dump): 54 // 55 // [1] Load all classes (static dump: from the classlist, dynamic dump: as part of app execution) 56 // [2] Allocate "output buffer" 57 // [3] Copy contents of the 2 "core" regions (rw/ro) into the output buffer. 58 // - allocate the cpp vtables in rw (static dump only) 59 // - memcpy the MetaspaceObjs into rw/ro: 60 // dump_rw_region(); 61 // dump_ro_region(); 62 // - fix all the pointers in the MetaspaceObjs to point to the copies 63 // relocate_metaspaceobj_embedded_pointers() 64 // [4] Copy symbol table, dictionary, etc, into the ro region 65 // [5] Relocate all the pointers in rw/ro, so that the archive can be mapped to 66 // the "requested" location without runtime relocation. See relocate_to_requested() 67 // 68 // "source" vs "buffered" vs "requested" 69 // 70 // The ArchiveBuilder deals with three types of addresses. 71 // 72 // "source": These are the addresses of objects created in step [1] above. They are the actual 73 // InstanceKlass*, Method*, etc, of the Java classes that are loaded for executing 74 // Java bytecodes in the JVM process that's dumping the CDS archive. 75 // 76 // It may be necessary to contiue Java execution after ArchiveBuilder is finished. 77 // Therefore, we don't modify any of the "source" objects. 78 // 79 // "buffered": The "source" objects that are deemed archivable are copied into a temporary buffer. 80 // Objects in the buffer are modified in steps [2, 3, 4] (e.g., unshareable info is 81 // removed, pointers are relocated, etc) to prepare them to be loaded at runtime. 82 // 83 // "requested": These are the addreses where the "buffered" objects should be loaded at runtime. 84 // When the "buffered" objects are written into the archive file, their addresses 85 // are adjusted in step [5] such that the lowest of these objects would be mapped 86 // at SharedBaseAddress. 87 // 88 // Translation between "source" and "buffered" addresses is done with two hashtables: 89 // _src_obj_table : "source" -> "buffered" 90 // _buffered_to_src_table : "buffered" -> "source" 91 // 92 // Translation between "buffered" and "requested" addresses is done with a simple shift: 93 // buffered_address + _buffer_to_requested_delta == requested_address 94 // 95 class ArchiveBuilder : public StackObj { 96 friend class AOTMapLogger; 97 98 protected: 99 DumpRegion* _current_dump_region; 100 address _buffer_bottom; // for writing the contents of rw/ro regions 101 102 // These are the addresses where we will request the static and dynamic archives to be 103 // mapped at run time. If the request fails (due to ASLR), we will map the archives at 104 // os-selected addresses. 105 address _requested_static_archive_bottom; // This is determined solely by the value of 106 // SharedBaseAddress during -Xshare:dump. 107 address _requested_static_archive_top; 108 address _requested_dynamic_archive_bottom; // Used only during dynamic dump. It's placed 109 // immediately above _requested_static_archive_top. 110 address _requested_dynamic_archive_top; 111 112 // (Used only during dynamic dump) where the static archive is actually mapped. This 113 // may be different than _requested_static_archive_{bottom,top} due to ASLR 114 address _mapped_static_archive_bottom; 115 address _mapped_static_archive_top; 116 117 intx _buffer_to_requested_delta; 118 119 DumpRegion* current_dump_region() const { return _current_dump_region; } 120 121 public: 122 enum FollowMode { 123 make_a_copy, point_to_it, set_to_null 124 }; 125 126 private: 127 class SourceObjInfo { 128 uintx _ptrmap_start; // The bit-offset of the start of this object (inclusive) 129 uintx _ptrmap_end; // The bit-offset of the end of this object (exclusive) 130 bool _read_only; 131 bool _has_embedded_pointer; 132 FollowMode _follow_mode; 133 int _size_in_bytes; 134 int _id; // Each object has a unique serial ID, starting from zero. The ID is assigned 135 // when the object is added into _source_objs. 136 MetaspaceObj::Type _msotype; 137 address _source_addr; // The source object to be copied. 138 address _buffered_addr; // The copy of this object insider the buffer. 139 public: 140 SourceObjInfo(MetaspaceClosure::Ref* ref, bool read_only, FollowMode follow_mode) : 141 _ptrmap_start(0), _ptrmap_end(0), _read_only(read_only), _has_embedded_pointer(false), _follow_mode(follow_mode), 142 _size_in_bytes(ref->size() * BytesPerWord), _id(0), _msotype(ref->msotype()), 143 _source_addr(ref->obj()) { 144 if (follow_mode == point_to_it) { 145 _buffered_addr = ref->obj(); 146 } else { 147 _buffered_addr = nullptr; 148 } 149 } 150 SourceObjInfo(address src, address buf) { 151 _source_addr = src; 152 _buffered_addr = buf; 153 } 154 155 // This constructor is only used for regenerated objects (created by LambdaFormInvokers, etc). 156 // src = address of a Method or InstanceKlass that has been regenerated. 157 // renegerated_obj_info = info for the regenerated version of src. 158 SourceObjInfo(address src, SourceObjInfo* renegerated_obj_info) : 159 _ptrmap_start(0), _ptrmap_end(0), _read_only(false), 160 _follow_mode(renegerated_obj_info->_follow_mode), 161 _size_in_bytes(0), _msotype(renegerated_obj_info->_msotype), 162 _source_addr(src), _buffered_addr(renegerated_obj_info->_buffered_addr) {} 163 164 bool should_copy() const { return _follow_mode == make_a_copy; } 165 void set_buffered_addr(address addr) { 166 assert(should_copy(), "must be"); 167 assert(_buffered_addr == nullptr, "cannot be copied twice"); 168 assert(addr != nullptr, "must be a valid copy"); 169 _buffered_addr = addr; 170 } 171 void set_ptrmap_start(uintx v) { _ptrmap_start = v; } 172 void set_ptrmap_end(uintx v) { _ptrmap_end = v; } 173 uintx ptrmap_start() const { return _ptrmap_start; } // inclusive 174 uintx ptrmap_end() const { return _ptrmap_end; } // exclusive 175 bool read_only() const { return _read_only; } 176 bool has_embedded_pointer() const { return _has_embedded_pointer; } 177 void set_has_embedded_pointer() { _has_embedded_pointer = true; } 178 int size_in_bytes() const { return _size_in_bytes; } 179 int id() const { return _id; } 180 void set_id(int i) { _id = i; } 181 address source_addr() const { return _source_addr; } 182 address buffered_addr() const { 183 if (_follow_mode != set_to_null) { 184 assert(_buffered_addr != nullptr, "must be initialized"); 185 } 186 return _buffered_addr; 187 } 188 MetaspaceObj::Type msotype() const { return _msotype; } 189 FollowMode follow_mode() const { return _follow_mode; } 190 }; 191 192 class SourceObjList { 193 uintx _total_bytes; 194 GrowableArray<SourceObjInfo*>* _objs; // Source objects to be archived 195 CHeapBitMap _ptrmap; // Marks the addresses of the pointer fields 196 // in the source objects 197 public: 198 SourceObjList(); 199 ~SourceObjList(); 200 201 GrowableArray<SourceObjInfo*>* objs() const { return _objs; } 202 203 void append(SourceObjInfo* src_info); 204 void remember_embedded_pointer(SourceObjInfo* pointing_obj, MetaspaceClosure::Ref* ref); 205 void relocate(int i, ArchiveBuilder* builder); 206 207 // convenience accessor 208 SourceObjInfo* at(int i) const { return objs()->at(i); } 209 }; 210 211 static const int INITIAL_TABLE_SIZE = 15889; 212 static const int MAX_TABLE_SIZE = 1000000; 213 214 ReservedSpace _shared_rs; 215 VirtualSpace _shared_vs; 216 217 // The "pz" region is used only during static dumps to reserve an unused space between SharedBaseAddress and 218 // the bottom of the rw region. During runtime, this space will be filled with a reserved area that disallows 219 // read/write/exec, so we can track for bad CompressedKlassPointers encoding. 220 // Note: this region does NOT exist in the cds archive. 221 DumpRegion _pz_region; 222 223 DumpRegion _rw_region; 224 DumpRegion _ro_region; 225 DumpRegion _ac_region; // AOT code 226 227 // Combined bitmap to track pointers in both RW and RO regions. This is updated 228 // as objects are copied into RW and RO. 229 CHeapBitMap _ptrmap; 230 231 // _ptrmap is split into these two bitmaps which are written into the archive. 232 CHeapBitMap _rw_ptrmap; // marks pointers in the RW region 233 CHeapBitMap _ro_ptrmap; // marks pointers in the RO region 234 CHeapBitMap _ac_ptrmap; // marks pointers in the CC region 235 236 SourceObjList _rw_src_objs; // objs to put in rw region 237 SourceObjList _ro_src_objs; // objs to put in ro region 238 ResizeableHashTable<address, SourceObjInfo, AnyObj::C_HEAP, mtClassShared> _src_obj_table; 239 ResizeableHashTable<address, address, AnyObj::C_HEAP, mtClassShared> _buffered_to_src_table; 240 GrowableArray<Klass*>* _klasses; 241 GrowableArray<Symbol*>* _symbols; 242 unsigned int _entropy_seed; 243 244 // statistics 245 DumpAllocStats _alloc_stats; 246 size_t _total_heap_region_size; 247 struct { 248 size_t _num_ptrs; 249 size_t _num_tagged_ptrs; 250 size_t _num_nulled_ptrs; 251 } _relocated_ptr_info; 252 253 void print_region_stats(FileMapInfo *map_info, ArchiveHeapInfo* heap_info); 254 void print_bitmap_region_stats(size_t size, size_t total_size); 255 void print_heap_region_stats(ArchiveHeapInfo* heap_info, size_t total_size); 256 257 // For global access. 258 static ArchiveBuilder* _current; 259 260 public: 261 // Use this when you allocate space outside of ArchiveBuilder::dump_{rw,ro}_region. 262 // These are usually for misc tables that are allocated in the RO space. 263 class OtherROAllocMark { 264 char* _oldtop; 265 public: 266 OtherROAllocMark() { 267 _oldtop = _current->_ro_region.top(); 268 } 269 ~OtherROAllocMark(); 270 }; 271 272 void count_relocated_pointer(bool tagged, bool nulled); 273 274 private: 275 FollowMode get_follow_mode(MetaspaceClosure::Ref *ref); 276 277 void iterate_sorted_roots(MetaspaceClosure* it); 278 void sort_klasses(); 279 static int compare_symbols_by_address(Symbol** a, Symbol** b); 280 static int compare_klass_by_name(Klass** a, Klass** b); 281 void update_hidden_class_loader_type(InstanceKlass* ik) NOT_CDS_JAVA_HEAP_RETURN; 282 283 void make_shallow_copies(DumpRegion *dump_region, const SourceObjList* src_objs); 284 void make_shallow_copy(DumpRegion *dump_region, SourceObjInfo* src_info); 285 286 void relocate_embedded_pointers(SourceObjList* src_objs); 287 288 bool is_excluded(Klass* k); 289 void clean_up_src_obj_table(); 290 291 protected: 292 virtual void iterate_roots(MetaspaceClosure* it) = 0; 293 void start_dump_region(DumpRegion* next); 294 295 public: 296 address reserve_buffer(); 297 298 address buffer_bottom() const { return _buffer_bottom; } 299 address buffer_top() const { return (address)current_dump_region()->top(); } 300 address requested_static_archive_bottom() const { return _requested_static_archive_bottom; } 301 address mapped_static_archive_bottom() const { return _mapped_static_archive_bottom; } 302 intx buffer_to_requested_delta() const { return _buffer_to_requested_delta; } 303 304 bool is_in_buffer_space(address p) const { 305 return (buffer_bottom() != nullptr && buffer_bottom() <= p && p < buffer_top()); 306 } 307 308 template <typename T> bool is_in_requested_static_archive(T p) const { 309 return _requested_static_archive_bottom <= (address)p && (address)p < _requested_static_archive_top; 310 } 311 312 template <typename T> bool is_in_mapped_static_archive(T p) const { 313 return _mapped_static_archive_bottom <= (address)p && (address)p < _mapped_static_archive_top; 314 } 315 316 template <typename T> bool is_in_buffer_space(T obj) const { 317 return is_in_buffer_space(address(obj)); 318 } 319 320 template <typename T> T to_requested(T obj) const { 321 assert(is_in_buffer_space(obj), "must be"); 322 return (T)(address(obj) + _buffer_to_requested_delta); 323 } 324 325 template <typename T> T requested_to_buffered(T obj) const { 326 T b = (T)(address(obj) - _buffer_to_requested_delta); 327 assert(is_in_buffer_space(b), "must be"); 328 return b; 329 } 330 331 static intx get_buffer_to_requested_delta() { 332 return current()->buffer_to_requested_delta(); 333 } 334 335 inline static u4 to_offset_u4(uintx offset) { 336 guarantee(offset <= MAX_SHARED_DELTA, "must be 32-bit offset " INTPTR_FORMAT, offset); 337 return (u4)offset; 338 } 339 340 public: 341 static const uintx MAX_SHARED_DELTA = ArchiveUtils::MAX_SHARED_DELTA;; 342 343 // The address p points to an object inside the output buffer. When the archive is mapped 344 // at the requested address, what's the offset of this object from _requested_static_archive_bottom? 345 uintx buffer_to_offset(address p) const; 346 347 // Same as buffer_to_offset, except that the address p points to either (a) an object 348 // inside the output buffer, or (b), an object in the currently mapped static archive. 349 uintx any_to_offset(address p) const; 350 351 // The reverse of buffer_to_offset() 352 address offset_to_buffered_address(u4 offset) const; 353 354 template <typename T> 355 u4 buffer_to_offset_u4(T p) const { 356 uintx offset = buffer_to_offset((address)p); 357 return to_offset_u4(offset); 358 } 359 360 template <typename T> 361 u4 any_to_offset_u4(T p) const { 362 assert(p != nullptr, "must not be null"); 363 uintx offset = any_to_offset((address)p); 364 return to_offset_u4(offset); 365 } 366 367 template <typename T> 368 u4 any_or_null_to_offset_u4(T p) const { 369 if (p == nullptr) { 370 return 0; 371 } else { 372 return any_to_offset_u4<T>(p); 373 } 374 } 375 376 template <typename T> 377 T offset_to_buffered(u4 offset) const { 378 return (T)offset_to_buffered_address(offset); 379 } 380 381 public: 382 ArchiveBuilder(); 383 ~ArchiveBuilder(); 384 385 int entropy(); 386 void gather_klasses_and_symbols(); 387 void gather_source_objs(); 388 bool gather_klass_and_symbol(MetaspaceClosure::Ref* ref, bool read_only); 389 bool gather_one_source_obj(MetaspaceClosure::Ref* ref, bool read_only); 390 void remember_embedded_pointer_in_enclosing_obj(MetaspaceClosure::Ref* ref); 391 static void serialize_dynamic_archivable_items(SerializeClosure* soc); 392 393 DumpRegion* pz_region() { return &_pz_region; } 394 DumpRegion* rw_region() { return &_rw_region; } 395 DumpRegion* ro_region() { return &_ro_region; } 396 DumpRegion* ac_region() { return &_ac_region; } 397 398 static char* rw_region_alloc(size_t num_bytes) { 399 return current()->rw_region()->allocate(num_bytes); 400 } 401 static char* ro_region_alloc(size_t num_bytes) { 402 return current()->ro_region()->allocate(num_bytes); 403 } 404 static char* ac_region_alloc(size_t num_bytes) { 405 return current()->ac_region()->allocate(num_bytes); 406 } 407 408 void start_ac_region(); 409 void end_ac_region(); 410 411 template <typename T> 412 static Array<T>* new_ro_array(int length) { 413 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T)); 414 Array<T>* array = (Array<T>*)ro_region_alloc(byte_size); 415 array->initialize(length); 416 return array; 417 } 418 419 template <typename T> 420 static Array<T>* new_rw_array(int length) { 421 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T)); 422 Array<T>* array = (Array<T>*)rw_region_alloc(byte_size); 423 array->initialize(length); 424 return array; 425 } 426 427 template <typename T> 428 static size_t ro_array_bytesize(int length) { 429 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T)); 430 return align_up(byte_size, SharedSpaceObjectAlignment); 431 } 432 433 char* ro_strdup(const char* s); 434 435 static int compare_src_objs(SourceObjInfo** a, SourceObjInfo** b); 436 void sort_metadata_objs(); 437 void dump_rw_metadata(); 438 void dump_ro_metadata(); 439 void relocate_metaspaceobj_embedded_pointers(); 440 void record_regenerated_object(address orig_src_obj, address regen_src_obj); 441 void make_klasses_shareable(); 442 void make_training_data_shareable(); 443 void relocate_to_requested(); 444 void write_archive(FileMapInfo* mapinfo, ArchiveHeapInfo* heap_info); 445 void write_region(FileMapInfo* mapinfo, int region_idx, DumpRegion* dump_region, 446 bool read_only, bool allow_exec); 447 448 void write_pointer_in_buffer(address* ptr_location, address src_addr); 449 template <typename T> void write_pointer_in_buffer(T* ptr_location, T src_addr) { 450 write_pointer_in_buffer((address*)ptr_location, (address)src_addr); 451 } 452 453 void mark_and_relocate_to_buffered_addr(address* ptr_location); 454 template <typename T> void mark_and_relocate_to_buffered_addr(T ptr_location) { 455 mark_and_relocate_to_buffered_addr((address*)ptr_location); 456 } 457 458 bool has_been_archived(address src_addr) const; 459 template <typename T> bool has_been_archived(T src_addr) const { 460 return has_been_archived((address)src_addr); 461 } 462 463 address get_buffered_addr(address src_addr) const; 464 template <typename T> T get_buffered_addr(T src_addr) const { 465 CDS_ONLY(return (T)get_buffered_addr((address)src_addr);) 466 NOT_CDS(return nullptr;) 467 } 468 469 address get_source_addr(address buffered_addr) const; 470 template <typename T> T get_source_addr(T buffered_addr) const { 471 return (T)get_source_addr((address)buffered_addr); 472 } 473 474 // All klasses and symbols that will be copied into the archive 475 GrowableArray<Klass*>* klasses() const { return _klasses; } 476 GrowableArray<Symbol*>* symbols() const { return _symbols; } 477 478 static bool is_active() { 479 CDS_ONLY(return (_current != nullptr)); 480 NOT_CDS(return false;) 481 } 482 483 static ArchiveBuilder* current() { 484 assert(_current != nullptr, "ArchiveBuilder must be active"); 485 return _current; 486 } 487 488 static DumpAllocStats* alloc_stats() { 489 return &(current()->_alloc_stats); 490 } 491 492 static CompactHashtableStats* symbol_stats() { 493 return alloc_stats()->symbol_stats(); 494 } 495 496 static CompactHashtableStats* string_stats() { 497 return alloc_stats()->string_stats(); 498 } 499 500 narrowKlass get_requested_narrow_klass(Klass* k); 501 502 static Klass* get_buffered_klass(Klass* src_klass) { 503 Klass* klass = (Klass*)current()->get_buffered_addr((address)src_klass); 504 assert(klass != nullptr && klass->is_klass(), "must be"); 505 return klass; 506 } 507 508 static Symbol* get_buffered_symbol(Symbol* src_symbol) { 509 return (Symbol*)current()->get_buffered_addr((address)src_symbol); 510 } 511 512 void print_stats(); 513 void report_out_of_space(const char* name, size_t needed_bytes); 514 515 #ifdef _LP64 516 // The CDS archive contains pre-computed narrow Klass IDs. It carries them in the headers of 517 // archived heap objects. With +UseCompactObjectHeaders, it also carries them in prototypes 518 // in Klass. 519 // When generating the archive, these narrow Klass IDs are computed using the following scheme: 520 // 1) The future encoding base is assumed to point to the first address of the generated mapping. 521 // That means that at runtime, the narrow Klass encoding must be set up with base pointing to 522 // the start address of the mapped CDS metadata archive (wherever that may be). This precludes 523 // zero-based encoding. 524 // 2) The shift must be large enough to result in an encoding range that covers the future assumed 525 // runtime Klass range. That future Klass range will contain both the CDS metadata archive and 526 // the future runtime class space. Since we do not know the size of the future class space, we 527 // need to chose an encoding base/shift combination that will result in a "large enough" size. 528 // The details depend on whether we use compact object headers or legacy object headers. 529 // In Legacy Mode, a narrow Klass ID is 32 bit. This gives us an encoding range size of 4G even 530 // with shift = 0, which is all we need. Therefore, we use a shift=0 for pre-calculating the 531 // narrow Klass IDs. 532 // TinyClassPointer Mode: 533 // We use the highest possible shift value to maximize the encoding range size. 534 static int precomputed_narrow_klass_shift(); 535 #endif // _LP64 536 537 }; 538 539 #endif // SHARE_CDS_ARCHIVEBUILDER_HPP