1 /*
2 * Copyright (c) 2005, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP
26 #define SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP
27
28 #include "gc/parallel/mutableSpace.hpp"
29 #include "gc/parallel/objectStartArray.hpp"
30 #include "gc/parallel/parallelScavengeHeap.hpp"
31 #include "gc/parallel/parMarkBitMap.hpp"
32 #include "gc/shared/collectedHeap.hpp"
33 #include "gc/shared/collectorCounters.hpp"
34 #include "gc/shared/referenceProcessor.hpp"
35 #include "gc/shared/taskTerminator.hpp"
36 #include "oops/oop.hpp"
37 #include "runtime/atomic.hpp"
38 #include "runtime/orderAccess.hpp"
39
40 class ParallelScavengeHeap;
41 class PSAdaptiveSizePolicy;
42 class PSYoungGen;
43 class PSOldGen;
44 class ParCompactionManager;
45 class PSParallelCompact;
46 class MoveAndUpdateClosure;
47 class ParallelOldTracer;
48 class STWGCTimer;
49
50 // The SplitInfo class holds the information needed to 'split' a source region
51 // so that the live data can be copied to two destination *spaces*. Normally,
52 // all the live data in a region is copied to a single destination space (e.g.,
53 // everything live in a region in eden is copied entirely into the old gen).
54 // However, when the heap is nearly full, all the live data in eden may not fit
55 // into the old gen. Copying only some of the regions from eden to old gen
56 // requires finding a region that does not contain a partial object (i.e., no
57 // live object crosses the region boundary) somewhere near the last object that
58 // does fit into the old gen. Since it's not always possible to find such a
59 // region, splitting is necessary for predictable behavior.
60 //
61 // A region is always split at the end of the partial object. This avoids
62 // additional tests when calculating the new location of a pointer, which is a
63 // very hot code path. The partial object and everything to its left will be
64 // copied to another space (call it dest_space_1). The live data to the right
65 // of the partial object will be copied either within the space itself, or to a
66 // different destination space (distinct from dest_space_1).
67 //
68 // Split points are identified during the summary phase, when region
69 // destinations are computed: data about the split, including the
70 // partial_object_size, is recorded in a SplitInfo record and the
71 // partial_object_size field in the summary data is set to zero. The zeroing is
72 // possible (and necessary) since the partial object will move to a different
73 // destination space than anything to its right, thus the partial object should
74 // not affect the locations of any objects to its right.
75 //
76 // The recorded data is used during the compaction phase, but only rarely: when
77 // the partial object on the split region will be copied across a destination
78 // region boundary. This test is made once each time a region is filled, and is
79 // a simple address comparison, so the overhead is negligible (see
80 // PSParallelCompact::first_src_addr()).
81 //
82 // Notes:
83 //
84 // Only regions with partial objects are split; a region without a partial
85 // object does not need any extra bookkeeping.
86 //
87 // At most one region is split per space, so the amount of data required is
88 // constant.
89 //
90 // A region is split only when the destination space would overflow. Once that
91 // happens, the destination space is abandoned and no other data (even from
92 // other source spaces) is targeted to that destination space. Abandoning the
93 // destination space may leave a somewhat large unused area at the end, if a
94 // large object caused the overflow.
95 //
96 // Future work:
97 //
98 // More bookkeeping would be required to continue to use the destination space.
99 // The most general solution would allow data from regions in two different
100 // source spaces to be "joined" in a single destination region. At the very
101 // least, additional code would be required in next_src_region() to detect the
102 // join and skip to an out-of-order source region. If the join region was also
103 // the last destination region to which a split region was copied (the most
104 // likely case), then additional work would be needed to get fill_region() to
105 // stop iteration and switch to a new source region at the right point. Basic
106 // idea would be to use a fake value for the top of the source space. It is
107 // doable, if a bit tricky.
108 //
109 // A simpler (but less general) solution would fill the remainder of the
110 // destination region with a dummy object and continue filling the next
111 // destination region.
112
113 class SplitInfo
114 {
115 public:
116 // Return true if this split info is valid (i.e., if a split has been
117 // recorded). The very first region cannot have a partial object and thus is
118 // never split, so 0 is the 'invalid' value.
119 bool is_valid() const { return _split_region_idx > 0; }
120
121 // Return true if this split holds data for the specified source region.
122 inline bool is_split(size_t region_idx) const;
123
124 // Obj at the split point doesn't fit the previous space and will be relocated to the next space.
125 HeapWord* split_point() const { return _split_point; }
126
127 // Number of live words before the split point on this region.
128 size_t preceding_live_words() const { return _preceding_live_words; }
129
130 // A split region has two "destinations", living in two spaces. This method
131 // returns the first one -- destination for the first live word on
132 // this split region.
133 HeapWord* preceding_destination() const {
134 assert(_preceding_destination != nullptr, "inv");
135 return _preceding_destination;
136 }
137
138 // Number of regions the preceding live words are relocated into.
139 uint preceding_destination_count() const { return _preceding_destination_count; }
140
141 void record(size_t split_region_idx, HeapWord* split_point, size_t preceding_live_words);
142
143 void clear();
144
145 DEBUG_ONLY(void verify_clear();)
146
147 private:
148 size_t _split_region_idx;
149 HeapWord* _split_point;
150 size_t _preceding_live_words;
151 HeapWord* _preceding_destination;
152 uint _preceding_destination_count;
153 };
154
155 inline bool SplitInfo::is_split(size_t region_idx) const
156 {
157 return _split_region_idx == region_idx && is_valid();
158 }
159
160 class SpaceInfo
161 {
162 public:
163 MutableSpace* space() const { return _space; }
164
165 // Where the free space will start after the collection. Valid only after the
166 // summary phase completes.
167 HeapWord* new_top() const { return _new_top; }
168
169 // Allows new_top to be set.
170 HeapWord** new_top_addr() { return &_new_top; }
171
172 // Where the dense prefix ends, or the compacted region begins.
173 HeapWord* dense_prefix() const { return _dense_prefix; }
174
175 // The start array for the (generation containing the) space, or null if there
176 // is no start array.
177 ObjectStartArray* start_array() const { return _start_array; }
178
179 SplitInfo& split_info() { return _split_info; }
180
181 void set_space(MutableSpace* s) { _space = s; }
182 void set_new_top(HeapWord* addr) { _new_top = addr; }
183 void set_dense_prefix(HeapWord* addr) { _dense_prefix = addr; }
184 void set_start_array(ObjectStartArray* s) { _start_array = s; }
185
186 private:
187 MutableSpace* _space;
188 HeapWord* _new_top;
189 HeapWord* _dense_prefix;
190 ObjectStartArray* _start_array;
191 SplitInfo _split_info;
192 };
193
194 class ParallelCompactData
195 {
196 public:
197 // Sizes are in HeapWords, unless indicated otherwise.
198 static const size_t Log2RegionSize;
199 static const size_t RegionSize;
200 static const size_t RegionSizeBytes;
201
202 // Mask for the bits in a size_t to get an offset within a region.
203 static const size_t RegionSizeOffsetMask;
204 // Mask for the bits in a pointer to get an offset within a region.
205 static const size_t RegionAddrOffsetMask;
206 // Mask for the bits in a pointer to get the address of the start of a region.
207 static const size_t RegionAddrMask;
208
209 class RegionData
210 {
211 public:
212 // Destination for the first live word in this region.
213 // Therefore, the new addr for every live obj on this region can be calculated as:
214 //
215 // new_addr := _destination + live_words_offset(old_addr);
216 //
217 // where, live_words_offset is the number of live words accumulated from
218 // region-start to old_addr.
219 HeapWord* destination() const { return _destination; }
220
221 // A destination region can have multiple source regions; only the first
222 // one is recorded. Since all live objs are slided down, subsequent source
223 // regions can be found via plain heap-region iteration.
224 size_t source_region() const { return _source_region; }
225
226 // Reuse _source_region to store the corresponding shadow region index
227 size_t shadow_region() const { return _source_region; }
228
229 // The starting address of the partial object extending onto the region.
230 HeapWord* partial_obj_addr() const { return _partial_obj_addr; }
231
232 // Size of the partial object extending onto the region (words).
233 size_t partial_obj_size() const { return _partial_obj_size; }
234
235 // Size of live data that lies within this region due to objects that start
236 // in this region (words). This does not include the partial object
237 // extending onto the region (if any), or the part of an object that extends
238 // onto the next region (if any).
239 size_t live_obj_size() const { return dc_and_los() & los_mask; }
240
241 // Total live data that lies within the region (words).
242 size_t data_size() const { return partial_obj_size() + live_obj_size(); }
243
244 // The destination_count is the number of other regions to which data from
245 // this region will be copied. At the end of the summary phase, the valid
246 // values of destination_count are
247 //
248 // 0 - data from the region will be compacted completely into itself, or the
249 // region is empty. The region can be claimed and then filled.
250 // 1 - data from the region will be compacted into 1 other region; some
251 // data from the region may also be compacted into the region itself.
252 // 2 - data from the region will be copied to 2 other regions.
253 //
254 // During compaction as regions are emptied, the destination_count is
255 // decremented (atomically) and when it reaches 0, it can be claimed and
256 // then filled.
257 //
258 // A region is claimed for processing by atomically changing the
259 // destination_count to the claimed value (dc_claimed). After a region has
260 // been filled, the destination_count should be set to the completed value
261 // (dc_completed).
262 inline uint destination_count() const;
263 inline uint destination_count_raw() const;
264
265 // Whether this region is available to be claimed, has been claimed, or has
266 // been completed.
267 //
268 // Minor subtlety: claimed() returns true if the region is marked
269 // completed(), which is desirable since a region must be claimed before it
270 // can be completed.
271 bool available() const { return dc_and_los() < dc_one; }
272 bool claimed() const { return dc_and_los() >= dc_claimed; }
273 bool completed() const { return dc_and_los() >= dc_completed; }
274
275 // These are not atomic.
276 void set_destination(HeapWord* addr) { _destination = addr; }
277 void set_source_region(size_t region) { _source_region = region; }
278 void set_shadow_region(size_t region) { _source_region = region; }
279 void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
280 void set_partial_obj_size(size_t words) {
281 _partial_obj_size = (region_sz_t) words;
282 }
283
284 inline void set_destination_count(uint count);
285 inline void set_live_obj_size(size_t words);
286
287 inline void set_completed();
288 inline bool claim_unsafe();
289
290 // These are atomic.
291 inline void add_live_obj(size_t words);
292 inline void decrement_destination_count();
293 inline bool claim();
294
295 // Possible values of _shadow_state, and transition is as follows
296 // Normal Path:
297 // UnusedRegion -> mark_normal() -> NormalRegion
298 // Shadow Path:
299 // UnusedRegion -> mark_shadow() -> ShadowRegion ->
300 // mark_filled() -> FilledShadow -> mark_copied() -> CopiedShadow
301 static const int UnusedRegion = 0; // The region is not collected yet
302 static const int ShadowRegion = 1; // Stolen by an idle thread, and a shadow region is created for it
303 static const int FilledShadow = 2; // Its shadow region has been filled and ready to be copied back
304 static const int CopiedShadow = 3; // The data of the shadow region has been copied back
305 static const int NormalRegion = 4; // The region will be collected by the original parallel algorithm
306
307 // Mark the current region as normal or shadow to enter different processing paths
308 inline bool mark_normal();
309 inline bool mark_shadow();
310 // Mark the shadow region as filled and ready to be copied back
311 inline void mark_filled();
312 // Mark the shadow region as copied back to avoid double copying.
313 inline bool mark_copied();
314 // Special case: see the comment in PSParallelCompact::fill_and_update_shadow_region.
315 // Return to the normal path here
316 inline void shadow_to_normal();
317
318 int shadow_state() { return _shadow_state.load_relaxed(); }
319
320 bool is_clear();
321
322 void verify_clear() NOT_DEBUG_RETURN;
323
324 private:
325 // The type used to represent object sizes within a region.
326 typedef uint region_sz_t;
327
328 // Constants for manipulating the _dc_and_los field, which holds both the
329 // destination count and live obj size. The live obj size lives at the
330 // least significant end so no masking is necessary when adding.
331 static const region_sz_t dc_shift; // Shift amount.
332 static const region_sz_t dc_mask; // Mask for destination count.
333 static const region_sz_t dc_one; // 1, shifted appropriately.
334 static const region_sz_t dc_claimed; // Region has been claimed.
335 static const region_sz_t dc_completed; // Region has been completed.
336 static const region_sz_t los_mask; // Mask for live obj size.
337
338 HeapWord* _destination;
339 size_t _source_region;
340 HeapWord* _partial_obj_addr;
341 region_sz_t _partial_obj_size;
342 Atomic<region_sz_t> _dc_and_los;
343 Atomic<int> _shadow_state;
344
345 region_sz_t dc_and_los() const { return _dc_and_los.load_relaxed(); }
346 #ifdef ASSERT
347 public:
348 uint _pushed; // 0 until region is pushed onto a stack
349 private:
350 #endif
351 };
352
353 public:
354 ParallelCompactData();
355 bool initialize(MemRegion reserved_heap);
356
357 size_t region_count() const { return _region_count; }
358 size_t reserved_byte_size() const { return _reserved_byte_size; }
359
360 // Convert region indices to/from RegionData pointers.
361 inline RegionData* region(size_t region_idx) const;
362 inline size_t region(const RegionData* const region_ptr) const;
363
364 HeapWord* summarize_split_space(size_t src_region, SplitInfo& split_info,
365 HeapWord* destination, HeapWord* target_end,
366 HeapWord** target_next);
367
368 size_t live_words_in_space(const MutableSpace* space,
369 HeapWord** full_region_prefix_end = nullptr);
370
371 bool summarize(SplitInfo& split_info,
372 HeapWord* source_beg, HeapWord* source_end,
373 HeapWord** source_next,
374 HeapWord* target_beg, HeapWord* target_end,
375 HeapWord** target_next);
376
377 void clear_range(size_t beg_region, size_t end_region);
378
379 // Return the number of words between addr and the start of the region
380 // containing addr.
381 inline size_t region_offset(const HeapWord* addr) const;
382
383 // Convert addresses to/from a region index or region pointer.
384 inline size_t addr_to_region_idx(const HeapWord* addr) const;
385 inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
386 inline HeapWord* region_to_addr(size_t region) const;
387 inline HeapWord* region_to_addr(const RegionData* region) const;
388
389 inline HeapWord* region_align_down(HeapWord* addr) const;
390 inline HeapWord* region_align_up(HeapWord* addr) const;
391 inline bool is_region_aligned(HeapWord* addr) const;
392
393 #ifdef ASSERT
394 void verify_clear();
395 #endif // #ifdef ASSERT
396
397 private:
398 HeapWord* _heap_start;
399 #ifdef ASSERT
400 HeapWord* _heap_end;
401 #endif // #ifdef ASSERT
402
403 PSVirtualSpace* _region_vspace;
404 size_t _reserved_byte_size;
405 RegionData* _region_data;
406 size_t _region_count;
407 };
408
409 inline uint
410 ParallelCompactData::RegionData::destination_count_raw() const
411 {
412 return dc_and_los() & dc_mask;
413 }
414
415 inline uint
416 ParallelCompactData::RegionData::destination_count() const
417 {
418 return destination_count_raw() >> dc_shift;
419 }
420
421 inline void
422 ParallelCompactData::RegionData::set_destination_count(uint count)
423 {
424 assert(count <= (dc_completed >> dc_shift), "count too large");
425 const region_sz_t live_sz = (region_sz_t) live_obj_size();
426 _dc_and_los.store_relaxed((count << dc_shift) | live_sz);
427 }
428
429 inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
430 {
431 assert(words <= los_mask, "would overflow");
432 _dc_and_los.store_relaxed(destination_count_raw() | (region_sz_t)words);
433 }
434
435 inline void ParallelCompactData::RegionData::decrement_destination_count()
436 {
437 assert(dc_and_los() < dc_claimed, "already claimed");
438 assert(dc_and_los() >= dc_one, "count would go negative");
439 _dc_and_los.add_then_fetch(dc_mask);
440 }
441
442 inline void ParallelCompactData::RegionData::set_completed()
443 {
444 assert(claimed(), "must be claimed first");
445 _dc_and_los.store_relaxed(dc_completed | (region_sz_t) live_obj_size());
446 }
447
448 // MT-unsafe claiming of a region. Should only be used during single threaded
449 // execution.
450 inline bool ParallelCompactData::RegionData::claim_unsafe()
451 {
452 if (available()) {
453 _dc_and_los.store_relaxed(dc_and_los() | dc_claimed);
454 return true;
455 }
456 return false;
457 }
458
459 inline void ParallelCompactData::RegionData::add_live_obj(size_t words)
460 {
461 assert(words <= (size_t)los_mask - live_obj_size(), "overflow");
462 _dc_and_los.add_then_fetch(static_cast<region_sz_t>(words));
463 }
464
465 inline bool ParallelCompactData::RegionData::claim()
466 {
467 const region_sz_t los = static_cast<region_sz_t>(live_obj_size());
468 return _dc_and_los.compare_set(los, dc_claimed | los);
469 }
470
471 inline bool ParallelCompactData::RegionData::mark_normal() {
472 return _shadow_state.compare_set(UnusedRegion, NormalRegion);
473 }
474
475 inline bool ParallelCompactData::RegionData::mark_shadow() {
476 if (shadow_state() != UnusedRegion) return false;
477 return _shadow_state.compare_set(UnusedRegion, ShadowRegion);
478 }
479
480 inline void ParallelCompactData::RegionData::mark_filled() {
481 int old = _shadow_state.compare_exchange(ShadowRegion, FilledShadow);
482 assert(old == ShadowRegion, "Fail to mark the region as filled");
483 }
484
485 inline bool ParallelCompactData::RegionData::mark_copied() {
486 return _shadow_state.compare_set(FilledShadow, CopiedShadow);
487 }
488
489 void ParallelCompactData::RegionData::shadow_to_normal() {
490 int old = _shadow_state.compare_exchange(ShadowRegion, NormalRegion);
491 assert(old == ShadowRegion, "Fail to mark the region as finish");
492 }
493
494 inline ParallelCompactData::RegionData*
495 ParallelCompactData::region(size_t region_idx) const
496 {
497 assert(region_idx <= region_count(), "bad arg");
498 return _region_data + region_idx;
499 }
500
501 inline size_t
502 ParallelCompactData::region(const RegionData* const region_ptr) const
503 {
504 assert(region_ptr >= _region_data, "bad arg");
505 assert(region_ptr <= _region_data + region_count(), "bad arg");
506 return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
507 }
508
509 inline size_t
510 ParallelCompactData::region_offset(const HeapWord* addr) const
511 {
512 assert(addr >= _heap_start, "bad addr");
513 // This method would mistakenly return 0 for _heap_end; hence exclusive.
514 assert(addr < _heap_end, "bad addr");
515 return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
516 }
517
518 inline size_t
519 ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
520 {
521 assert(addr >= _heap_start, "bad addr " PTR_FORMAT " _heap_start " PTR_FORMAT, p2i(addr), p2i(_heap_start));
522 assert(addr <= _heap_end, "bad addr " PTR_FORMAT " _heap_end " PTR_FORMAT, p2i(addr), p2i(_heap_end));
523 return pointer_delta(addr, _heap_start) >> Log2RegionSize;
524 }
525
526 inline ParallelCompactData::RegionData*
527 ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const
528 {
529 return region(addr_to_region_idx(addr));
530 }
531
532 inline HeapWord*
533 ParallelCompactData::region_to_addr(size_t region) const
534 {
535 assert(region <= _region_count, "region out of range");
536 return _heap_start + (region << Log2RegionSize);
537 }
538
539 inline HeapWord*
540 ParallelCompactData::region_to_addr(const RegionData* region) const
541 {
542 return region_to_addr(pointer_delta(region, _region_data,
543 sizeof(RegionData)));
544 }
545
546 inline HeapWord*
547 ParallelCompactData::region_align_down(HeapWord* addr) const
548 {
549 assert(addr >= _heap_start, "bad addr");
550 assert(addr < _heap_end + RegionSize, "bad addr");
551 return (HeapWord*)(size_t(addr) & RegionAddrMask);
552 }
553
554 inline HeapWord*
555 ParallelCompactData::region_align_up(HeapWord* addr) const
556 {
557 assert(addr >= _heap_start, "bad addr");
558 assert(addr <= _heap_end, "bad addr");
559 return region_align_down(addr + RegionSizeOffsetMask);
560 }
561
562 inline bool
563 ParallelCompactData::is_region_aligned(HeapWord* addr) const
564 {
565 return (size_t(addr) & RegionAddrOffsetMask) == 0;
566 }
567
568 // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
569 // do_addr() method.
570 //
571 // The closure is initialized with the number of heap words to process
572 // (words_remaining()), and becomes 'full' when it reaches 0. The do_addr()
573 // methods in subclasses should update the total as words are processed. Since
574 // only one subclass actually uses this mechanism to terminate iteration, the
575 // default initial value is > 0. The implementation is here and not in the
576 // single subclass that uses it to avoid making is_full() virtual, and thus
577 // adding a virtual call per live object.
578
579
580 // The Parallel collector is a stop-the-world garbage collector that
581 // does parts of the collection using parallel threads. The collection includes
582 // the tenured generation and the young generation.
583 //
584 // A collection consists of the following phases.
585 //
586 // - marking phase
587 // - summary phase (single-threaded)
588 // - forward (to new address) phase
589 // - adjust pointers phase
590 // - compacting phase
591 // - clean up phase
592 //
593 // Roughly speaking these phases correspond, respectively, to
594 //
595 // - mark all the live objects
596 // - calculating destination-region for each region for better parallellism in following phases
597 // - calculate the destination of each object at the end of the collection
598 // - adjust pointers to reflect new destination of objects
599 // - move the objects to their destination
600 // - update some references and reinitialize some variables
601 //
602 // A space that is being collected is divided into regions and with each region
603 // is associated an object of type ParallelCompactData. Each region is of a
604 // fixed size and typically will contain more than 1 object and may have parts
605 // of objects at the front and back of the region.
606 //
607 // region -----+---------------------+----------
608 // objects covered [ AAA )[ BBB )[ CCC )[ DDD )
609 //
610 // The marking phase does a complete marking of all live objects in the heap.
611 // The marking also compiles the size of the data for all live objects covered
612 // by the region. This size includes the part of any live object spanning onto
613 // the region (part of AAA if it is live) from the front, all live objects
614 // contained in the region (BBB and/or CCC if they are live), and the part of
615 // any live objects covered by the region that extends off the region (part of
616 // DDD if it is live). The marking phase uses multiple GC threads and marking
617 // is done in a bit array of type ParMarkBitMap. The marking of the bit map is
618 // done atomically as is the accumulation of the size of the live objects
619 // covered by a region.
620 //
621 // The summary phase calculates the total live data to the left of each region
622 // XXX. Based on that total and the bottom of the space, it can calculate the
623 // starting location of the live data in XXX. The summary phase calculates for
624 // each region XXX quantities such as
625 //
626 // - the amount of live data at the beginning of a region from an object
627 // entering the region.
628 // - the location of the first live data on the region
629 // - a count of the number of regions receiving live data from XXX.
630 //
631 // See ParallelCompactData for precise details. The summary phase also
632 // calculates the dense prefix for the compaction. The dense prefix is a
633 // portion at the beginning of the space that is not moved. The objects in the
634 // dense prefix do need to have their object references updated. See method
635 // summarize_dense_prefix().
636 //
637 // The forward (to new address) phase calculates the new address of each
638 // objects and records old-addr-to-new-addr asssociation.
639 //
640 // The adjust pointers phase remap all pointers to reflect the new address of each object.
641 //
642 // The compaction phase moves objects to their new location.
643 //
644 // Compaction is done on a region basis. A region that is ready to be filled is
645 // put on a ready list and GC threads take region off the list and fill them. A
646 // region is ready to be filled if it empty of live objects. Such a region may
647 // have been initially empty (only contained dead objects) or may have had all
648 // its live objects copied out already. A region that compacts into itself is
649 // also ready for filling. The ready list is initially filled with empty
650 // regions and regions compacting into themselves. There is always at least 1
651 // region that can be put on the ready list. The regions are atomically added
652 // and removed from the ready list.
653 //
654 // During compaction, there is a natural task dependency among regions because
655 // destination regions may also be source regions themselves. Consequently, the
656 // destination regions are not available for processing until all live objects
657 // within them are evacuated to their destinations. These dependencies lead to
658 // limited thread utilization as threads spin waiting on regions to be ready.
659 // Shadow regions are utilized to address these region dependencies. The basic
660 // idea is that, if a region is unavailable because it still contains live
661 // objects and thus cannot serve as a destination momentarily, the GC thread
662 // may allocate a shadow region as a substitute destination and directly copy
663 // live objects into this shadow region. Live objects in the shadow region will
664 // be copied into the target destination region when it becomes available.
665 //
666 // For more details on shadow regions, please refer to ยง4.2 of the VEE'19 paper:
667 // Haoyu Li, Mingyu Wu, Binyu Zang, and Haibo Chen. 2019. ScissorGC: scalable
668 // and efficient compaction for Java full garbage collection. In Proceedings of
669 // the 15th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution
670 // Environments (VEE 2019). ACM, New York, NY, USA, 108-121. DOI:
671 // https://doi.org/10.1145/3313808.3313820
672
673 class PSParallelCompact : AllStatic {
674 public:
675 // Convenient access to type names.
676 typedef ParallelCompactData::RegionData RegionData;
677
678 // By the end of full-gc, all live objs are compacted into the first three spaces, old, eden, and from.
679 typedef enum {
680 old_space_id,
681 eden_space_id,
682 from_space_id,
683 to_space_id,
684 last_space_id
685 } SpaceId;
686
687 // Inline closure decls
688 //
689 class IsAliveClosure: public BoolObjectClosure {
690 public:
691 virtual bool do_object_b(oop p);
692 };
693
694 private:
695 static STWGCTimer _gc_timer;
696 static ParallelOldTracer _gc_tracer;
697 static elapsedTimer _accumulated_time;
698 static unsigned int _maximum_compaction_gc_num;
699 static CollectorCounters* _counters;
700 static ParMarkBitMap _mark_bitmap;
701 static ParallelCompactData _summary_data;
702 static IsAliveClosure _is_alive_closure;
703 static SpaceInfo _space_info[last_space_id];
704
705 // Reference processing (used in ...follow_contents)
706 static SpanSubjectToDiscoveryClosure _span_based_discoverer;
707 static ReferenceProcessor* _ref_processor;
708
709 public:
710 static ParallelOldTracer* gc_tracer() { return &_gc_tracer; }
711
712 private:
713
714 static void initialize_space_info();
715
716 // Clear the marking bitmap and summary data that cover the specified space.
717 static void clear_data_covering_space(SpaceId id);
718
719 static void pre_compact();
720 static void post_compact();
721
722 static bool check_maximum_compaction(bool should_do_max_compaction,
723 size_t total_live_words,
724 MutableSpace* const old_space,
725 HeapWord* full_region_prefix_end);
726
727 // Mark live objects
728 static void marking_phase(ParallelOldTracer *gc_tracer);
729
730 // Identify the dense-fix in the old-space to avoid moving much memory with little reclaimed.
731 static HeapWord* compute_dense_prefix_for_old_space(MutableSpace* old_space,
732 HeapWord* full_region_prefix_end);
733
734 // Create a filler obj (if needed) right before the dense-prefix-boundary to
735 // make the heap parsable.
736 static void fill_dense_prefix_end(SpaceId id);
737
738 static void summary_phase(bool should_do_max_compaction);
739
740 static void adjust_pointers();
741 static void forward_to_new_addr();
742
743 static void verify_forward() NOT_DEBUG_RETURN;
744 static void verify_filler_in_dense_prefix() NOT_DEBUG_RETURN;
745
746 // Move objects to new locations.
747 static void compact();
748
749 static void report_object_count_after_gc();
750 // Add available regions to the stack and draining tasks to the task queue.
751 static void prepare_region_draining_tasks(uint parallel_gc_threads);
752
753 static void fill_range_in_dense_prefix(HeapWord* start, HeapWord* end);
754
755 public:
756 static void fill_dead_objs_in_dense_prefix();
757
758 // This method invokes a full collection.
759 // clear_all_soft_refs controls whether soft-refs should be cleared or not.
760 // should_do_max_compaction controls whether all spaces for dead objs should be reclaimed.
761 static bool invoke(bool clear_all_soft_refs, bool should_do_max_compaction);
762
763 template<typename Func>
764 static void adjust_in_space_helper(SpaceId id, Atomic<uint>* claim_counter, Func&& on_stripe);
765
766 static void adjust_in_old_space(Atomic<uint>* claim_counter);
767
768 static void adjust_in_young_space(SpaceId id, Atomic<uint>* claim_counter);
769
770 static void adjust_pointers_in_spaces(uint worker_id, Atomic<uint>* claim_counter);
771
772 static void post_initialize();
773 // Perform initialization for PSParallelCompact that requires
774 // allocations. This should be called during the VM initialization
775 // at a pointer where it would be appropriate to return a JNI_ENOMEM
776 // in the event of a failure.
777 static bool initialize_aux_data();
778
779 // Closure accessors
780 static BoolObjectClosure* is_alive_closure() { return &_is_alive_closure; }
781
782 // Public accessors
783 static elapsedTimer* accumulated_time() { return &_accumulated_time; }
784
785 static CollectorCounters* counters() { return _counters; }
786
787 static inline bool is_marked(oop obj);
788
789 template <class T> static inline void adjust_pointer(T* p);
790
791 // Convenience wrappers for per-space data kept in _space_info.
792 static inline MutableSpace* space(SpaceId space_id);
793 static inline HeapWord* new_top(SpaceId space_id);
794 static inline HeapWord* dense_prefix(SpaceId space_id);
795 static inline ObjectStartArray* start_array(SpaceId space_id);
796
797 // Return the address of the count + 1st live word in the range [beg, end).
798 static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);
799
800 // Return the address of the word to be copied to dest_addr, which must be
801 // aligned to a region boundary.
802 static HeapWord* first_src_addr(HeapWord* const dest_addr,
803 SpaceId src_space_id,
804 size_t src_region_idx);
805
806 // Determine the next source region, set closure.source() to the start of the
807 // new region return the region index. Parameter end_addr is the address one
808 // beyond the end of source range just processed. If necessary, switch to a
809 // new source space and set src_space_id (in-out parameter) and src_space_top
810 // (out parameter) accordingly.
811 static size_t next_src_region(MoveAndUpdateClosure& closure,
812 SpaceId& src_space_id,
813 HeapWord*& src_space_top,
814 HeapWord* end_addr);
815
816 // Decrement the destination count for each non-empty source region in the
817 // range [beg_region, region(region_align_up(end_addr))). If the destination
818 // count for a region goes to 0 and it needs to be filled, enqueue it.
819 static void decrement_destination_counts(ParCompactionManager* cm,
820 SpaceId src_space_id,
821 size_t beg_region,
822 HeapWord* end_addr);
823
824 static HeapWord* partial_obj_end(HeapWord* region_start_addr);
825
826 static void fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region);
827 static void fill_and_update_region(ParCompactionManager* cm, size_t region);
828
829 static bool steal_unavailable_region(ParCompactionManager* cm, size_t& region_idx);
830 static void fill_and_update_shadow_region(ParCompactionManager* cm, size_t region);
831 // Copy the content of a shadow region back to its corresponding heap region
832 static void copy_back(HeapWord* shadow_addr, HeapWord* region_addr);
833 // Collect empty regions as shadow regions and initialize the
834 // _next_shadow_region filed for each compact manager
835 static void initialize_shadow_regions(uint parallel_gc_threads);
836
837 static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
838 static ParallelCompactData& summary_data() { return _summary_data; }
839
840 // Reference Processing
841 static ReferenceProcessor* ref_processor() { return _ref_processor; }
842
843 static STWGCTimer* gc_timer() { return &_gc_timer; }
844
845 // Return the SpaceId for the given address.
846 static SpaceId space_id(HeapWord* addr);
847
848 static void print_on(outputStream* st);
849
850 #ifdef ASSERT
851 // Sanity check the new location of a word in the heap.
852 static inline void check_new_location(HeapWord* old_addr, HeapWord* new_addr);
853 // Verify that all the regions have been emptied.
854 static void verify_complete(SpaceId space_id);
855 #endif // #ifdef ASSERT
856 };
857
858 class MoveAndUpdateClosure: public StackObj {
859 private:
860 ParMarkBitMap* const _bitmap;
861 size_t _words_remaining; // Words left to copy.
862 static inline size_t calculate_words_remaining(size_t region);
863
864 protected:
865 HeapWord* _source; // Next addr that would be read.
866 HeapWord* _destination; // Next addr to be written.
867 ObjectStartArray* const _start_array;
868 size_t _offset;
869
870 inline void decrement_words_remaining(size_t words);
871 // Update variables to indicate that word_count words were processed.
872 inline void update_state(size_t words);
873
874 public:
875 ParMarkBitMap* bitmap() const { return _bitmap; }
876
877 size_t words_remaining() const { return _words_remaining; }
878 bool is_full() const { return _words_remaining == 0; }
879 HeapWord* source() const { return _source; }
880 void set_source(HeapWord* addr) {
881 assert(addr != nullptr, "precondition");
882 _source = addr;
883 }
884
885 // If the object will fit (size <= words_remaining()), copy it to the current
886 // destination, update the interior oops and the start array.
887 void do_addr(HeapWord* addr, size_t words, markWord mark);
888
889 inline MoveAndUpdateClosure(ParMarkBitMap* bitmap, size_t region);
890
891 // Accessors.
892 HeapWord* destination() const { return _destination; }
893 HeapWord* copy_destination() const { return _destination + _offset; }
894
895 // Copy enough words to fill this closure or to the end of an object,
896 // whichever is smaller, starting at source(). The start array is not
897 // updated.
898 void copy_partial_obj(size_t partial_obj_size);
899
900 virtual void complete_region(HeapWord* dest_addr, PSParallelCompact::RegionData* region_ptr);
901 };
902
903 inline void MoveAndUpdateClosure::decrement_words_remaining(size_t words) {
904 assert(_words_remaining >= words, "processed too many words");
905 _words_remaining -= words;
906 }
907
908 inline size_t MoveAndUpdateClosure::calculate_words_remaining(size_t region) {
909 HeapWord* dest_addr = PSParallelCompact::summary_data().region_to_addr(region);
910 PSParallelCompact::SpaceId dest_space_id = PSParallelCompact::space_id(dest_addr);
911 HeapWord* new_top = PSParallelCompact::new_top(dest_space_id);
912 return MIN2(pointer_delta(new_top, dest_addr),
913 ParallelCompactData::RegionSize);
914 }
915
916 inline
917 MoveAndUpdateClosure::MoveAndUpdateClosure(ParMarkBitMap* bitmap, size_t region_idx) :
918 _bitmap(bitmap),
919 _words_remaining(calculate_words_remaining(region_idx)),
920 _source(nullptr),
921 _destination(PSParallelCompact::summary_data().region_to_addr(region_idx)),
922 _start_array(PSParallelCompact::start_array(PSParallelCompact::space_id(_destination))),
923 _offset(0) {}
924
925 inline void MoveAndUpdateClosure::update_state(size_t words)
926 {
927 decrement_words_remaining(words);
928 _source += words;
929 _destination += words;
930 }
931
932 class MoveAndUpdateShadowClosure: public MoveAndUpdateClosure {
933 inline size_t calculate_shadow_offset(size_t region_idx, size_t shadow_idx);
934 public:
935 inline MoveAndUpdateShadowClosure(ParMarkBitMap* bitmap, size_t region, size_t shadow);
936
937 virtual void complete_region(HeapWord* dest_addr, PSParallelCompact::RegionData* region_ptr);
938
939 private:
940 size_t _shadow;
941 };
942
943 inline size_t MoveAndUpdateShadowClosure::calculate_shadow_offset(size_t region_idx, size_t shadow_idx) {
944 ParallelCompactData& sd = PSParallelCompact::summary_data();
945 HeapWord* dest_addr = sd.region_to_addr(region_idx);
946 HeapWord* shadow_addr = sd.region_to_addr(shadow_idx);
947 return pointer_delta(shadow_addr, dest_addr);
948 }
949
950 inline
951 MoveAndUpdateShadowClosure::MoveAndUpdateShadowClosure(ParMarkBitMap* bitmap, size_t region, size_t shadow) :
952 MoveAndUpdateClosure(bitmap, region),
953 _shadow(shadow) {
954 _offset = calculate_shadow_offset(region, shadow);
955 }
956
957 void steal_marking_work(TaskTerminator& terminator, uint worker_id);
958
959 #endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP