1 /*
2 * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef SHARE_CDS_ARCHIVEBUILDER_HPP
26 #define SHARE_CDS_ARCHIVEBUILDER_HPP
27
28 #include "cds/archiveUtils.hpp"
29 #include "cds/dumpAllocStats.hpp"
30 #include "memory/metaspace.hpp"
31 #include "memory/metaspaceClosure.hpp"
32 #include "memory/reservedSpace.hpp"
33 #include "memory/virtualspace.hpp"
34 #include "oops/array.hpp"
35 #include "oops/klass.hpp"
36 #include "runtime/os.hpp"
37 #include "utilities/bitMap.hpp"
38 #include "utilities/growableArray.hpp"
39 #include "utilities/hashTable.hpp"
40 #include "utilities/resizableHashTable.hpp"
41
42 class ArchiveMappedHeapInfo;
43 class ArchiveStreamedHeapInfo;
44 class CHeapBitMap;
45 class FileMapInfo;
46 class Klass;
47 class MemRegion;
48 class Symbol;
49
50 // The minimum alignment for non-Klass objects inside the CDS archive. Klass objects need
51 // to follow CompressedKlassPointers::klass_alignment_in_bytes().
52 constexpr size_t SharedSpaceObjectAlignment = Metaspace::min_allocation_alignment_bytes;
53
54 // Overview of CDS archive creation (for both static and dynamic dump):
55 //
56 // [1] Load all classes (static dump: from the classlist, dynamic dump: as part of app execution)
57 // [2] Allocate "output buffer"
58 // [3] Copy contents of the 2 "core" regions (rw/ro) into the output buffer.
59 // - allocate the cpp vtables in rw (static dump only)
60 // - memcpy the MetaspaceObjs into rw/ro:
61 // dump_rw_region();
62 // dump_ro_region();
63 // - fix all the pointers in the MetaspaceObjs to point to the copies
64 // relocate_metaspaceobj_embedded_pointers()
65 // [4] Copy symbol table, dictionary, etc, into the ro region
66 // [5] Relocate all the pointers in rw/ro, so that the archive can be mapped to
67 // the "requested" location without runtime relocation. See relocate_to_requested()
68 //
69 // "source" vs "buffered" vs "requested"
70 //
71 // The ArchiveBuilder deals with three types of addresses.
72 //
73 // "source": These are the addresses of objects created in step [1] above. They are the actual
74 // InstanceKlass*, Method*, etc, of the Java classes that are loaded for executing
75 // Java bytecodes in the JVM process that's dumping the CDS archive.
76 //
77 // It may be necessary to contiue Java execution after ArchiveBuilder is finished.
78 // Therefore, we don't modify any of the "source" objects.
79 //
80 // "buffered": The "source" objects that are deemed archivable are copied into a temporary buffer.
81 // Objects in the buffer are modified in steps [2, 3, 4] (e.g., unshareable info is
82 // removed, pointers are relocated, etc) to prepare them to be loaded at runtime.
83 //
84 // "requested": These are the addreses where the "buffered" objects should be loaded at runtime.
85 // When the "buffered" objects are written into the archive file, their addresses
86 // are adjusted in step [5] such that the lowest of these objects would be mapped
87 // at SharedBaseAddress.
88 //
89 // Translation between "source" and "buffered" addresses is done with two hashtables:
90 // _src_obj_table : "source" -> "buffered"
91 // _buffered_to_src_table : "buffered" -> "source"
92 //
93 // Translation between "buffered" and "requested" addresses is done with a simple shift:
94 // buffered_address + _buffer_to_requested_delta == requested_address
95 //
96 class ArchiveBuilder : public StackObj {
97 friend class AOTMapLogger;
98
99 protected:
100 DumpRegion* _current_dump_region;
101 address _buffer_bottom; // for writing the contents of rw/ro regions
102
103 // These are the addresses where we will request the static and dynamic archives to be
104 // mapped at run time. If the request fails (due to ASLR), we will map the archives at
105 // os-selected addresses.
106 address _requested_static_archive_bottom; // This is determined solely by the value of
107 // SharedBaseAddress during -Xshare:dump.
108 address _requested_static_archive_top;
109 address _requested_dynamic_archive_bottom; // Used only during dynamic dump. It's placed
110 // immediately above _requested_static_archive_top.
111 address _requested_dynamic_archive_top;
112
113 // (Used only during dynamic dump) where the static archive is actually mapped. This
114 // may be different than _requested_static_archive_{bottom,top} due to ASLR
115 address _mapped_static_archive_bottom;
116 address _mapped_static_archive_top;
117
118 intx _buffer_to_requested_delta;
119
120 DumpRegion* current_dump_region() const { return _current_dump_region; }
121
122 public:
123 enum FollowMode {
124 make_a_copy, point_to_it, set_to_null
125 };
126
127 private:
128 class SourceObjInfo {
129 uintx _ptrmap_start; // The bit-offset of the start of this object (inclusive)
130 uintx _ptrmap_end; // The bit-offset of the end of this object (exclusive)
131 bool _read_only;
132 bool _has_embedded_pointer;
133 FollowMode _follow_mode;
134 int _size_in_bytes;
135 int _id; // Each object has a unique serial ID, starting from zero. The ID is assigned
136 // when the object is added into _source_objs.
137 MetaspaceObj::Type _msotype;
138 address _source_addr; // The source object to be copied.
139 address _buffered_addr; // The copy of this object insider the buffer.
140 public:
141 SourceObjInfo(MetaspaceClosure::Ref* ref, bool read_only, FollowMode follow_mode) :
142 _ptrmap_start(0), _ptrmap_end(0), _read_only(read_only), _has_embedded_pointer(false), _follow_mode(follow_mode),
143 _size_in_bytes(ref->size() * BytesPerWord), _id(0), _msotype(ref->msotype()),
144 _source_addr(ref->obj()) {
145 if (follow_mode == point_to_it) {
146 _buffered_addr = ref->obj();
147 } else {
148 _buffered_addr = nullptr;
149 }
150 }
151 SourceObjInfo(address src, address buf) {
152 _source_addr = src;
153 _buffered_addr = buf;
154 }
155
156 // This constructor is only used for regenerated objects (created by LambdaFormInvokers, etc).
157 // src = address of a Method or InstanceKlass that has been regenerated.
158 // renegerated_obj_info = info for the regenerated version of src.
159 SourceObjInfo(address src, SourceObjInfo* renegerated_obj_info) :
160 _ptrmap_start(0), _ptrmap_end(0), _read_only(false),
161 _follow_mode(renegerated_obj_info->_follow_mode),
162 _size_in_bytes(0), _msotype(renegerated_obj_info->_msotype),
163 _source_addr(src), _buffered_addr(renegerated_obj_info->_buffered_addr) {}
164
165 bool should_copy() const { return _follow_mode == make_a_copy; }
166 void set_buffered_addr(address addr) {
167 assert(should_copy(), "must be");
168 assert(_buffered_addr == nullptr, "cannot be copied twice");
169 assert(addr != nullptr, "must be a valid copy");
170 _buffered_addr = addr;
171 }
172 void set_ptrmap_start(uintx v) { _ptrmap_start = v; }
173 void set_ptrmap_end(uintx v) { _ptrmap_end = v; }
174 uintx ptrmap_start() const { return _ptrmap_start; } // inclusive
175 uintx ptrmap_end() const { return _ptrmap_end; } // exclusive
176 bool read_only() const { return _read_only; }
177 bool has_embedded_pointer() const { return _has_embedded_pointer; }
178 void set_has_embedded_pointer() { _has_embedded_pointer = true; }
179 int size_in_bytes() const { return _size_in_bytes; }
180 int id() const { return _id; }
181 void set_id(int i) { _id = i; }
182 address source_addr() const { return _source_addr; }
183 address buffered_addr() const {
184 if (_follow_mode != set_to_null) {
185 assert(_buffered_addr != nullptr, "must be initialized");
186 }
187 return _buffered_addr;
188 }
189 MetaspaceObj::Type msotype() const { return _msotype; }
190 FollowMode follow_mode() const { return _follow_mode; }
191 };
192
193 class SourceObjList {
194 uintx _total_bytes;
195 GrowableArray<SourceObjInfo*>* _objs; // Source objects to be archived
196 CHeapBitMap _ptrmap; // Marks the addresses of the pointer fields
197 // in the source objects
198 public:
199 SourceObjList();
200 ~SourceObjList();
201
202 GrowableArray<SourceObjInfo*>* objs() const { return _objs; }
203
204 void append(SourceObjInfo* src_info);
205 void remember_embedded_pointer(SourceObjInfo* pointing_obj, MetaspaceClosure::Ref* ref);
206 void relocate(int i, ArchiveBuilder* builder);
207
208 // convenience accessor
209 SourceObjInfo* at(int i) const { return objs()->at(i); }
210 };
211
212 static const int INITIAL_TABLE_SIZE = 15889;
213 static const int MAX_TABLE_SIZE = 1000000;
214
215 ReservedSpace _shared_rs;
216 VirtualSpace _shared_vs;
217
218 // The "pz" region is used only during static dumps to reserve an unused space between SharedBaseAddress and
219 // the bottom of the rw region. During runtime, this space will be filled with a reserved area that disallows
220 // read/write/exec, so we can track for bad CompressedKlassPointers encoding.
221 // Note: this region does NOT exist in the cds archive.
222 DumpRegion _pz_region;
223
224 DumpRegion _rw_region;
225 DumpRegion _ro_region;
226 DumpRegion _ac_region; // AOT code
227
228 // Combined bitmap to track pointers in both RW and RO regions. This is updated
229 // as objects are copied into RW and RO.
230 CHeapBitMap _ptrmap;
231
232 // _ptrmap is split into these two bitmaps which are written into the archive.
233 CHeapBitMap _rw_ptrmap; // marks pointers in the RW region
234 CHeapBitMap _ro_ptrmap; // marks pointers in the RO region
235 CHeapBitMap _ac_ptrmap; // marks pointers in the CC region
236
237 SourceObjList _rw_src_objs; // objs to put in rw region
238 SourceObjList _ro_src_objs; // objs to put in ro region
239 ResizeableHashTable<address, SourceObjInfo, AnyObj::C_HEAP, mtClassShared> _src_obj_table;
240 ResizeableHashTable<address, address, AnyObj::C_HEAP, mtClassShared> _buffered_to_src_table;
241 GrowableArray<Klass*>* _klasses;
242 GrowableArray<Symbol*>* _symbols;
243 unsigned int _entropy_seed;
244
245 // statistics
246 DumpAllocStats _alloc_stats;
247 size_t _total_heap_region_size;
248 struct {
249 size_t _num_ptrs;
250 size_t _num_tagged_ptrs;
251 size_t _num_nulled_ptrs;
252 } _relocated_ptr_info;
253
254 void print_region_stats(FileMapInfo *map_info,
255 ArchiveMappedHeapInfo* mapped_heap_info,
256 ArchiveStreamedHeapInfo* streamed_heap_info);
257 void print_bitmap_region_stats(size_t size, size_t total_size);
258 void print_heap_region_stats(char* start, size_t size, size_t total_size);
259
260 // For global access.
261 static ArchiveBuilder* _current;
262
263 public:
264 // Use this when you allocate space outside of ArchiveBuilder::dump_{rw,ro}_region.
265 // These are usually for misc tables that are allocated in the RO space.
266 class OtherROAllocMark {
267 char* _oldtop;
268 public:
269 OtherROAllocMark() {
270 _oldtop = _current->_ro_region.top();
271 }
272 ~OtherROAllocMark();
273 };
274
275 void count_relocated_pointer(bool tagged, bool nulled);
276
277 private:
278 FollowMode get_follow_mode(MetaspaceClosure::Ref *ref);
279
280 void iterate_sorted_roots(MetaspaceClosure* it);
281 void sort_klasses();
282 static int compare_symbols_by_address(Symbol** a, Symbol** b);
283 static int compare_klass_by_name(Klass** a, Klass** b);
284 void update_hidden_class_loader_type(InstanceKlass* ik) NOT_CDS_JAVA_HEAP_RETURN;
285
286 void make_shallow_copies(DumpRegion *dump_region, const SourceObjList* src_objs);
287 void make_shallow_copy(DumpRegion *dump_region, SourceObjInfo* src_info);
288
289 void relocate_embedded_pointers(SourceObjList* src_objs);
290
291 bool is_excluded(Klass* k);
292 void clean_up_src_obj_table();
293
294 protected:
295 virtual void iterate_roots(MetaspaceClosure* it) = 0;
296 void start_dump_region(DumpRegion* next);
297
298 public:
299 address reserve_buffer();
300
301 address buffer_bottom() const { return _buffer_bottom; }
302 address buffer_top() const { return (address)current_dump_region()->top(); }
303 address requested_static_archive_bottom() const { return _requested_static_archive_bottom; }
304 address mapped_static_archive_bottom() const { return _mapped_static_archive_bottom; }
305 intx buffer_to_requested_delta() const { return _buffer_to_requested_delta; }
306
307 bool is_in_buffer_space(address p) const {
308 return (buffer_bottom() != nullptr && buffer_bottom() <= p && p < buffer_top());
309 }
310
311 template <typename T> bool is_in_requested_static_archive(T p) const {
312 return _requested_static_archive_bottom <= (address)p && (address)p < _requested_static_archive_top;
313 }
314
315 template <typename T> bool is_in_mapped_static_archive(T p) const {
316 return _mapped_static_archive_bottom <= (address)p && (address)p < _mapped_static_archive_top;
317 }
318
319 template <typename T> bool is_in_buffer_space(T obj) const {
320 return is_in_buffer_space(address(obj));
321 }
322
323 template <typename T> T to_requested(T obj) const {
324 assert(is_in_buffer_space(obj), "must be");
325 return (T)(address(obj) + _buffer_to_requested_delta);
326 }
327
328 template <typename T> T requested_to_buffered(T obj) const {
329 T b = (T)(address(obj) - _buffer_to_requested_delta);
330 assert(is_in_buffer_space(b), "must be");
331 return b;
332 }
333
334 static intx get_buffer_to_requested_delta() {
335 return current()->buffer_to_requested_delta();
336 }
337
338 inline static u4 to_offset_u4(uintx offset) {
339 guarantee(offset <= MAX_SHARED_DELTA, "must be 32-bit offset " INTPTR_FORMAT, offset);
340 return (u4)offset;
341 }
342
343 public:
344 static const uintx MAX_SHARED_DELTA = ArchiveUtils::MAX_SHARED_DELTA;;
345
346 // The address p points to an object inside the output buffer. When the archive is mapped
347 // at the requested address, what's the offset of this object from _requested_static_archive_bottom?
348 uintx buffer_to_offset(address p) const;
349
350 // Same as buffer_to_offset, except that the address p points to either (a) an object
351 // inside the output buffer, or (b), an object in the currently mapped static archive.
352 uintx any_to_offset(address p) const;
353
354 // The reverse of buffer_to_offset()
355 address offset_to_buffered_address(u4 offset) const;
356
357 template <typename T>
358 u4 buffer_to_offset_u4(T p) const {
359 uintx offset = buffer_to_offset((address)p);
360 return to_offset_u4(offset);
361 }
362
363 template <typename T>
364 u4 any_to_offset_u4(T p) const {
365 assert(p != nullptr, "must not be null");
366 uintx offset = any_to_offset((address)p);
367 return to_offset_u4(offset);
368 }
369
370 template <typename T>
371 u4 any_or_null_to_offset_u4(T p) const {
372 if (p == nullptr) {
373 return 0;
374 } else {
375 return any_to_offset_u4<T>(p);
376 }
377 }
378
379 template <typename T>
380 T offset_to_buffered(u4 offset) const {
381 return (T)offset_to_buffered_address(offset);
382 }
383
384 public:
385 ArchiveBuilder();
386 ~ArchiveBuilder();
387
388 int entropy();
389 void gather_klasses_and_symbols();
390 void gather_source_objs();
391 bool gather_klass_and_symbol(MetaspaceClosure::Ref* ref, bool read_only);
392 bool gather_one_source_obj(MetaspaceClosure::Ref* ref, bool read_only);
393 void remember_embedded_pointer_in_enclosing_obj(MetaspaceClosure::Ref* ref);
394
395 DumpRegion* pz_region() { return &_pz_region; }
396 DumpRegion* rw_region() { return &_rw_region; }
397 DumpRegion* ro_region() { return &_ro_region; }
398 DumpRegion* ac_region() { return &_ac_region; }
399
400 static char* rw_region_alloc(size_t num_bytes) {
401 return current()->rw_region()->allocate(num_bytes);
402 }
403 static char* ro_region_alloc(size_t num_bytes) {
404 return current()->ro_region()->allocate(num_bytes);
405 }
406 static char* ac_region_alloc(size_t num_bytes) {
407 return current()->ac_region()->allocate(num_bytes);
408 }
409
410 void start_ac_region();
411 void end_ac_region();
412
413 template <typename T>
414 static Array<T>* new_ro_array(int length) {
415 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T));
416 Array<T>* array = (Array<T>*)ro_region_alloc(byte_size);
417 array->initialize(length);
418 return array;
419 }
420
421 template <typename T>
422 static Array<T>* new_rw_array(int length) {
423 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T));
424 Array<T>* array = (Array<T>*)rw_region_alloc(byte_size);
425 array->initialize(length);
426 return array;
427 }
428
429 template <typename T>
430 static size_t ro_array_bytesize(int length) {
431 size_t byte_size = Array<T>::byte_sizeof(length, sizeof(T));
432 return align_up(byte_size, SharedSpaceObjectAlignment);
433 }
434
435 char* ro_strdup(const char* s);
436
437 static int compare_src_objs(SourceObjInfo** a, SourceObjInfo** b);
438 void sort_metadata_objs();
439 void dump_rw_metadata();
440 void dump_ro_metadata();
441 void relocate_metaspaceobj_embedded_pointers();
442 void record_regenerated_object(address orig_src_obj, address regen_src_obj);
443 void make_klasses_shareable();
444 void make_training_data_shareable();
445 void relocate_to_requested();
446 void write_archive(FileMapInfo* mapinfo,
447 ArchiveMappedHeapInfo* mapped_heap_info,
448 ArchiveStreamedHeapInfo* streamed_heap_info);
449 void write_region(FileMapInfo* mapinfo, int region_idx, DumpRegion* dump_region,
450 bool read_only, bool allow_exec);
451
452 void write_pointer_in_buffer(address* ptr_location, address src_addr);
453 template <typename T> void write_pointer_in_buffer(T* ptr_location, T src_addr) {
454 write_pointer_in_buffer((address*)ptr_location, (address)src_addr);
455 }
456
457 void mark_and_relocate_to_buffered_addr(address* ptr_location);
458 template <typename T> void mark_and_relocate_to_buffered_addr(T ptr_location) {
459 mark_and_relocate_to_buffered_addr((address*)ptr_location);
460 }
461
462 bool has_been_archived(address src_addr) const;
463 template <typename T> bool has_been_archived(T src_addr) const {
464 return has_been_archived((address)src_addr);
465 }
466
467 address get_buffered_addr(address src_addr) const;
468 template <typename T> T get_buffered_addr(T src_addr) const {
469 CDS_ONLY(return (T)get_buffered_addr((address)src_addr);)
470 NOT_CDS(return nullptr;)
471 }
472
473 address get_source_addr(address buffered_addr) const;
474 template <typename T> T get_source_addr(T buffered_addr) const {
475 return (T)get_source_addr((address)buffered_addr);
476 }
477
478 // All klasses and symbols that will be copied into the archive
479 GrowableArray<Klass*>* klasses() const { return _klasses; }
480 GrowableArray<Symbol*>* symbols() const { return _symbols; }
481
482 static bool is_active() {
483 CDS_ONLY(return (_current != nullptr));
484 NOT_CDS(return false;)
485 }
486
487 static ArchiveBuilder* current() {
488 assert(_current != nullptr, "ArchiveBuilder must be active");
489 return _current;
490 }
491
492 static DumpAllocStats* alloc_stats() {
493 return &(current()->_alloc_stats);
494 }
495
496 static CompactHashtableStats* symbol_stats() {
497 return alloc_stats()->symbol_stats();
498 }
499
500 static CompactHashtableStats* string_stats() {
501 return alloc_stats()->string_stats();
502 }
503
504 narrowKlass get_requested_narrow_klass(Klass* k);
505
506 static Klass* get_buffered_klass(Klass* src_klass) {
507 Klass* klass = (Klass*)current()->get_buffered_addr((address)src_klass);
508 assert(klass != nullptr && klass->is_klass(), "must be");
509 return klass;
510 }
511
512 static Symbol* get_buffered_symbol(Symbol* src_symbol) {
513 return (Symbol*)current()->get_buffered_addr((address)src_symbol);
514 }
515
516 static void log_as_hex(address base, address top, address requested_base, bool is_heap = false);
517 void print_stats();
518 void report_out_of_space(const char* name, size_t needed_bytes);
519
520 #ifdef _LP64
521 // The CDS archive contains pre-computed narrow Klass IDs. It carries them in the headers of
522 // archived heap objects. With +UseCompactObjectHeaders, it also carries them in prototypes
523 // in Klass.
524 // When generating the archive, these narrow Klass IDs are computed using the following scheme:
525 // 1) The future encoding base is assumed to point to the first address of the generated mapping.
526 // That means that at runtime, the narrow Klass encoding must be set up with base pointing to
527 // the start address of the mapped CDS metadata archive (wherever that may be). This precludes
528 // zero-based encoding.
529 // 2) The shift must be large enough to result in an encoding range that covers the future assumed
530 // runtime Klass range. That future Klass range will contain both the CDS metadata archive and
531 // the future runtime class space. Since we do not know the size of the future class space, we
532 // need to chose an encoding base/shift combination that will result in a "large enough" size.
533 // The details depend on whether we use compact object headers or legacy object headers.
534 // In Legacy Mode, a narrow Klass ID is 32 bit. This gives us an encoding range size of 4G even
535 // with shift = 0, which is all we need. Therefore, we use a shift=0 for pre-calculating the
536 // narrow Klass IDs.
537 // TinyClassPointer Mode:
538 // We use the highest possible shift value to maximize the encoding range size.
539 static int precomputed_narrow_klass_shift();
540 #endif // _LP64
541
542 };
543
544 #endif // SHARE_CDS_ARCHIVEBUILDER_HPP