1 /* 2 * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_CDS_ARCHIVEBUILDER_HPP 26 #define SHARE_CDS_ARCHIVEBUILDER_HPP 27 28 #include "cds/archiveUtils.hpp" 29 #include "cds/dumpAllocStats.hpp" 30 #include "memory/metaspaceClosure.hpp" 31 #include "oops/array.hpp" 32 #include "oops/klass.hpp" 33 #include "runtime/os.hpp" 34 #include "utilities/bitMap.hpp" 35 #include "utilities/growableArray.hpp" 36 #include "utilities/resizeableResourceHash.hpp" 37 #include "utilities/resourceHash.hpp" 38 39 class ArchiveHeapInfo; 40 class CHeapBitMap; 41 class FileMapInfo; 42 class Klass; 43 class MemRegion; 44 class Symbol; 45 46 // Metaspace::allocate() requires that all blocks must be aligned with KlassAlignmentInBytes. 47 // We enforce the same alignment rule in blocks allocated from the shared space. 48 const int SharedSpaceObjectAlignment = KlassAlignmentInBytes; 49 50 // Overview of CDS archive creation (for both static and dynamic dump): 51 // 52 // [1] Load all classes (static dump: from the classlist, dynamic dump: as part of app execution) 53 // [2] Allocate "output buffer" 54 // [3] Copy contents of the 2 "core" regions (rw/ro) into the output buffer. 55 // - allocate the cpp vtables in rw (static dump only) 56 // - memcpy the MetaspaceObjs into rw/ro: 57 // dump_rw_region(); 58 // dump_ro_region(); 59 // - fix all the pointers in the MetaspaceObjs to point to the copies 60 // relocate_metaspaceobj_embedded_pointers() 61 // [4] Copy symbol table, dictionary, etc, into the ro region 62 // [5] Relocate all the pointers in rw/ro, so that the archive can be mapped to 63 // the "requested" location without runtime relocation. See relocate_to_requested() 64 // 65 // "source" vs "buffered" vs "requested" 66 // 67 // The ArchiveBuilder deals with three types of addresses. 68 // 69 // "source": These are the addresses of objects created in step [1] above. They are the actual 70 // InstanceKlass*, Method*, etc, of the Java classes that are loaded for executing 71 // Java bytecodes in the JVM process that's dumping the CDS archive. 72 // 73 // It may be necessary to contiue Java execution after ArchiveBuilder is finished. 74 // Therefore, we don't modify any of the "source" objects. 75 // 76 // "buffered": The "source" objects that are deemed archivable are copied into a temporary buffer. 77 // Objects in the buffer are modified in steps [2, 3, 4] (e.g., unshareable info is 78 // removed, pointers are relocated, etc) to prepare them to be loaded at runtime. 79 // 80 // "requested": These are the addreses where the "buffered" objects should be loaded at runtime. 81 // When the "buffered" objects are written into the archive file, their addresses 82 // are adjusted in step [5] such that the lowest of these objects would be mapped 83 // at SharedBaseAddress. 84 // 85 // Translation between "source" and "buffered" addresses is done with two hashtables: 86 // _src_obj_table : "source" -> "buffered" 87 // _buffered_to_src_table : "buffered" -> "source" 88 // 89 // Translation between "buffered" and "requested" addresses is done with a simple shift: 90 // buffered_address + _buffer_to_requested_delta == requested_address 91 // 92 class ArchiveBuilder : public StackObj { 93 protected: 94 DumpRegion* _current_dump_space; 95 address _buffer_bottom; // for writing the contents of rw/ro regions 96 address _last_verified_top; 97 int _num_dump_regions_used; 98 size_t _other_region_used_bytes; 99 100 // These are the addresses where we will request the static and dynamic archives to be 101 // mapped at run time. If the request fails (due to ASLR), we will map the archives at 102 // os-selected addresses. 103 address _requested_static_archive_bottom; // This is determined solely by the value of 104 // SharedBaseAddress during -Xshare:dump. 105 address _requested_static_archive_top; 106 address _requested_dynamic_archive_bottom; // Used only during dynamic dump. It's placed 107 // immediately above _requested_static_archive_top. 108 address _requested_dynamic_archive_top; 109 110 // (Used only during dynamic dump) where the static archive is actually mapped. This 111 // may be different than _requested_static_archive_{bottom,top} due to ASLR 112 address _mapped_static_archive_bottom; 113 address _mapped_static_archive_top; 114 115 intx _buffer_to_requested_delta; 116 117 DumpRegion* current_dump_space() const { return _current_dump_space; } 118 119 public: 120 enum FollowMode { 121 make_a_copy, point_to_it, set_to_null 122 }; 123 124 private: 125 class SourceObjInfo { 126 uintx _ptrmap_start; // The bit-offset of the start of this object (inclusive) 127 uintx _ptrmap_end; // The bit-offset of the end of this object (exclusive) 128 bool _read_only; 129 FollowMode _follow_mode; 130 int _size_in_bytes; 131 MetaspaceObj::Type _msotype; 132 address _source_addr; // The source object to be copied. 133 address _buffered_addr; // The copy of this object insider the buffer. 134 public: 135 SourceObjInfo(MetaspaceClosure::Ref* ref, bool read_only, FollowMode follow_mode) : 136 _ptrmap_start(0), _ptrmap_end(0), _read_only(read_only), _follow_mode(follow_mode), 137 _size_in_bytes(ref->size() * BytesPerWord), _msotype(ref->msotype()), 138 _source_addr(ref->obj()) { 139 if (follow_mode == point_to_it) { 140 _buffered_addr = ref->obj(); 141 } else { 142 _buffered_addr = nullptr; 143 } 144 } 145 146 // This constructor is only used for regenerated objects (created by LambdaFormInvokers, etc). 147 // src = address of a Method or InstanceKlass that has been regenerated. 148 // renegerated_obj_info = info for the regenerated version of src. 149 SourceObjInfo(address src, SourceObjInfo* renegerated_obj_info) : 150 _ptrmap_start(0), _ptrmap_end(0), _read_only(false), 151 _follow_mode(renegerated_obj_info->_follow_mode), 152 _size_in_bytes(0), _msotype(renegerated_obj_info->_msotype), 153 _source_addr(src), _buffered_addr(renegerated_obj_info->_buffered_addr) {} 154 155 bool should_copy() const { return _follow_mode == make_a_copy; } 156 void set_buffered_addr(address addr) { 157 assert(should_copy(), "must be"); 158 assert(_buffered_addr == nullptr, "cannot be copied twice"); 159 assert(addr != nullptr, "must be a valid copy"); 160 _buffered_addr = addr; 161 } 162 void set_ptrmap_start(uintx v) { _ptrmap_start = v; } 163 void set_ptrmap_end(uintx v) { _ptrmap_end = v; } 164 uintx ptrmap_start() const { return _ptrmap_start; } // inclusive 165 uintx ptrmap_end() const { return _ptrmap_end; } // exclusive 166 bool read_only() const { return _read_only; } 167 int size_in_bytes() const { return _size_in_bytes; } 168 address source_addr() const { return _source_addr; } 169 address buffered_addr() const { 170 if (_follow_mode != set_to_null) { 171 assert(_buffered_addr != nullptr, "must be initialized"); 172 } 173 return _buffered_addr; 174 } 175 MetaspaceObj::Type msotype() const { return _msotype; } 176 }; 177 178 class SourceObjList { 179 uintx _total_bytes; 180 GrowableArray<SourceObjInfo*>* _objs; // Source objects to be archived 181 CHeapBitMap _ptrmap; // Marks the addresses of the pointer fields 182 // in the source objects 183 public: 184 SourceObjList(); 185 ~SourceObjList(); 186 187 GrowableArray<SourceObjInfo*>* objs() const { return _objs; } 188 189 void append(SourceObjInfo* src_info); 190 void remember_embedded_pointer(SourceObjInfo* pointing_obj, MetaspaceClosure::Ref* ref); 191 void relocate(int i, ArchiveBuilder* builder); 192 193 // convenience accessor 194 SourceObjInfo* at(int i) const { return objs()->at(i); } 195 }; 196 197 class CDSMapLogger; 198 199 static const int INITIAL_TABLE_SIZE = 15889; 200 static const int MAX_TABLE_SIZE = 1000000; 201 202 ReservedSpace _shared_rs; 203 VirtualSpace _shared_vs; 204 205 DumpRegion _rw_region; 206 DumpRegion _ro_region; 207 208 // Combined bitmap to track pointers in both RW and RO regions. This is updated 209 // as objects are copied into RW and RO. 210 CHeapBitMap _ptrmap; 211 212 // _ptrmap is split into these two bitmaps which are written into the archive. 213 CHeapBitMap _rw_ptrmap; // marks pointers in the RW region 214 CHeapBitMap _ro_ptrmap; // marks pointers in the RO region 215 216 SourceObjList _rw_src_objs; // objs to put in rw region 217 SourceObjList _ro_src_objs; // objs to put in ro region 218 ResizeableResourceHashtable<address, SourceObjInfo, AnyObj::C_HEAP, mtClassShared> _src_obj_table; 219 ResizeableResourceHashtable<address, address, AnyObj::C_HEAP, mtClassShared> _buffered_to_src_table; 220 GrowableArray<Klass*>* _klasses; 221 GrowableArray<Symbol*>* _symbols; 222 223 // statistics 224 DumpAllocStats _alloc_stats; 225 size_t _total_heap_region_size; 226 227 void print_region_stats(FileMapInfo *map_info, ArchiveHeapInfo* heap_info); 228 void print_bitmap_region_stats(size_t size, size_t total_size); 229 void print_heap_region_stats(ArchiveHeapInfo* heap_info, size_t total_size); 230 231 // For global access. 232 static ArchiveBuilder* _current; 233 234 public: 235 // Use this when you allocate space outside of ArchiveBuilder::dump_{rw,ro}_region. 236 // These are usually for misc tables that are allocated in the RO space. 237 class OtherROAllocMark { 238 char* _oldtop; 239 public: 240 OtherROAllocMark() { 241 _oldtop = _current->_ro_region.top(); 242 } 243 ~OtherROAllocMark(); 244 }; 245 246 private: 247 FollowMode get_follow_mode(MetaspaceClosure::Ref *ref); 248 249 void iterate_sorted_roots(MetaspaceClosure* it); 250 void sort_klasses(); 251 static int compare_symbols_by_address(Symbol** a, Symbol** b); 252 static int compare_klass_by_name(Klass** a, Klass** b); 253 254 void make_shallow_copies(DumpRegion *dump_region, const SourceObjList* src_objs); 255 void make_shallow_copy(DumpRegion *dump_region, SourceObjInfo* src_info); 256 257 void relocate_embedded_pointers(SourceObjList* src_objs); 258 259 bool is_excluded(Klass* k); 260 void clean_up_src_obj_table(); 261 262 protected: 263 virtual void iterate_roots(MetaspaceClosure* it) = 0; 264 265 // Conservative estimate for number of bytes needed for: 266 size_t _estimated_metaspaceobj_bytes; // all archived MetaspaceObj's. 267 size_t _estimated_hashtable_bytes; // symbol table and dictionaries 268 269 static const int _total_dump_regions = 2; 270 271 size_t estimate_archive_size(); 272 273 void start_dump_space(DumpRegion* next); 274 void verify_estimate_size(size_t estimate, const char* which); 275 276 public: 277 address reserve_buffer(); 278 279 address buffer_bottom() const { return _buffer_bottom; } 280 address buffer_top() const { return (address)current_dump_space()->top(); } 281 address requested_static_archive_bottom() const { return _requested_static_archive_bottom; } 282 address mapped_static_archive_bottom() const { return _mapped_static_archive_bottom; } 283 intx buffer_to_requested_delta() const { return _buffer_to_requested_delta; } 284 285 bool is_in_buffer_space(address p) const { 286 return (buffer_bottom() <= p && p < buffer_top()); 287 } 288 289 template <typename T> bool is_in_requested_static_archive(T p) const { 290 return _requested_static_archive_bottom <= (address)p && (address)p < _requested_static_archive_top; 291 } 292 293 template <typename T> bool is_in_mapped_static_archive(T p) const { 294 return _mapped_static_archive_bottom <= (address)p && (address)p < _mapped_static_archive_top; 295 } 296 297 template <typename T> bool is_in_buffer_space(T obj) const { 298 return is_in_buffer_space(address(obj)); 299 } 300 301 template <typename T> T to_requested(T obj) const { 302 assert(is_in_buffer_space(obj), "must be"); 303 return (T)(address(obj) + _buffer_to_requested_delta); 304 } 305 306 static intx get_buffer_to_requested_delta() { 307 return current()->buffer_to_requested_delta(); 308 } 309 310 inline static u4 to_offset_u4(uintx offset) { 311 guarantee(offset <= MAX_SHARED_DELTA, "must be 32-bit offset " INTPTR_FORMAT, offset); 312 return (u4)offset; 313 } 314 315 public: 316 static const uintx MAX_SHARED_DELTA = 0x7FFFFFFF; 317 318 // The address p points to an object inside the output buffer. When the archive is mapped 319 // at the requested address, what's the offset of this object from _requested_static_archive_bottom? 320 uintx buffer_to_offset(address p) const; 321 322 // Same as buffer_to_offset, except that the address p points to either (a) an object 323 // inside the output buffer, or (b), an object in the currently mapped static archive. 324 uintx any_to_offset(address p) const; 325 326 template <typename T> 327 u4 buffer_to_offset_u4(T p) const { 328 uintx offset = buffer_to_offset((address)p); 329 return to_offset_u4(offset); 330 } 331 332 template <typename T> 333 u4 any_to_offset_u4(T p) const { 334 uintx offset = any_to_offset((address)p); 335 return to_offset_u4(offset); 336 } 337 338 static void assert_is_vm_thread() PRODUCT_RETURN; 339 340 public: 341 ArchiveBuilder(); 342 ~ArchiveBuilder(); 343 344 void gather_klasses_and_symbols(); 345 void gather_source_objs(); 346 bool gather_klass_and_symbol(MetaspaceClosure::Ref* ref, bool read_only); 347 bool gather_one_source_obj(MetaspaceClosure::Ref* ref, bool read_only); 348 void remember_embedded_pointer_in_enclosing_obj(MetaspaceClosure::Ref* ref); 349 static void serialize_dynamic_archivable_items(SerializeClosure* soc); 350 351 DumpRegion* rw_region() { return &_rw_region; } 352 DumpRegion* ro_region() { return &_ro_region; } 353 354 static char* rw_region_alloc(size_t num_bytes) { 355 return current()->rw_region()->allocate(num_bytes); 356 } 357 static char* ro_region_alloc(size_t num_bytes) { 358 return current()->ro_region()->allocate(num_bytes); 359 } 360 361 template <typename T> 362 static Array<T>* new_ro_array(int length) { 363 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T)); 364 Array<T>* array = (Array<T>*)ro_region_alloc(byte_size); 365 array->initialize(length); 366 return array; 367 } 368 369 template <typename T> 370 static Array<T>* new_rw_array(int length) { 371 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T)); 372 Array<T>* array = (Array<T>*)rw_region_alloc(byte_size); 373 array->initialize(length); 374 return array; 375 } 376 377 template <typename T> 378 static size_t ro_array_bytesize(int length) { 379 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T)); 380 return align_up(byte_size, SharedSpaceObjectAlignment); 381 } 382 383 char* ro_strdup(const char* s); 384 385 void dump_rw_metadata(); 386 void dump_ro_metadata(); 387 void relocate_metaspaceobj_embedded_pointers(); 388 void record_regenerated_object(address orig_src_obj, address regen_src_obj); 389 void make_klasses_shareable(); 390 void relocate_to_requested(); 391 void write_archive(FileMapInfo* mapinfo, ArchiveHeapInfo* heap_info); 392 void write_region(FileMapInfo* mapinfo, int region_idx, DumpRegion* dump_region, 393 bool read_only, bool allow_exec); 394 395 void write_pointer_in_buffer(address* ptr_location, address src_addr); 396 template <typename T> void write_pointer_in_buffer(T* ptr_location, T src_addr) { 397 write_pointer_in_buffer((address*)ptr_location, (address)src_addr); 398 } 399 400 address get_buffered_addr(address src_addr) const; 401 template <typename T> T get_buffered_addr(T src_addr) const { 402 return (T)get_buffered_addr((address)src_addr); 403 } 404 405 address get_source_addr(address buffered_addr) const; 406 template <typename T> T get_source_addr(T buffered_addr) const { 407 return (T)get_source_addr((address)buffered_addr); 408 } 409 410 // All klasses and symbols that will be copied into the archive 411 GrowableArray<Klass*>* klasses() const { return _klasses; } 412 GrowableArray<Symbol*>* symbols() const { return _symbols; } 413 414 static bool is_active() { 415 return (_current != nullptr); 416 } 417 418 static ArchiveBuilder* current() { 419 assert_is_vm_thread(); 420 assert(_current != nullptr, "ArchiveBuilder must be active"); 421 return _current; 422 } 423 424 static DumpAllocStats* alloc_stats() { 425 return &(current()->_alloc_stats); 426 } 427 428 static CompactHashtableStats* symbol_stats() { 429 return alloc_stats()->symbol_stats(); 430 } 431 432 static CompactHashtableStats* string_stats() { 433 return alloc_stats()->string_stats(); 434 } 435 436 narrowKlass get_requested_narrow_klass(Klass* k); 437 438 static Klass* get_buffered_klass(Klass* src_klass) { 439 Klass* klass = (Klass*)current()->get_buffered_addr((address)src_klass); 440 assert(klass != nullptr && klass->is_klass(), "must be"); 441 return klass; 442 } 443 444 static Symbol* get_buffered_symbol(Symbol* src_symbol) { 445 return (Symbol*)current()->get_buffered_addr((address)src_symbol); 446 } 447 448 void print_stats(); 449 void report_out_of_space(const char* name, size_t needed_bytes); 450 }; 451 452 #endif // SHARE_CDS_ARCHIVEBUILDER_HPP