1 /*
  2  * Copyright (c) 2005, 2025, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP
 26 #define SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP
 27 
 28 #include "gc/parallel/mutableSpace.hpp"
 29 #include "gc/parallel/objectStartArray.hpp"
 30 #include "gc/parallel/parallelScavengeHeap.hpp"
 31 #include "gc/parallel/parMarkBitMap.hpp"
 32 #include "gc/shared/collectedHeap.hpp"
 33 #include "gc/shared/collectorCounters.hpp"
 34 #include "gc/shared/referenceProcessor.hpp"
 35 #include "gc/shared/taskTerminator.hpp"
 36 #include "oops/oop.hpp"
 37 #include "runtime/atomicAccess.hpp"
 38 #include "runtime/orderAccess.hpp"
 39 
 40 class ParallelScavengeHeap;
 41 class PSAdaptiveSizePolicy;
 42 class PSYoungGen;
 43 class PSOldGen;
 44 class ParCompactionManager;
 45 class PSParallelCompact;
 46 class MoveAndUpdateClosure;
 47 class ParallelOldTracer;
 48 class STWGCTimer;
 49 
 50 // The SplitInfo class holds the information needed to 'split' a source region
 51 // so that the live data can be copied to two destination *spaces*.  Normally,
 52 // all the live data in a region is copied to a single destination space (e.g.,
 53 // everything live in a region in eden is copied entirely into the old gen).
 54 // However, when the heap is nearly full, all the live data in eden may not fit
 55 // into the old gen.  Copying only some of the regions from eden to old gen
 56 // requires finding a region that does not contain a partial object (i.e., no
 57 // live object crosses the region boundary) somewhere near the last object that
 58 // does fit into the old gen.  Since it's not always possible to find such a
 59 // region, splitting is necessary for predictable behavior.
 60 //
 61 // A region is always split at the end of the partial object.  This avoids
 62 // additional tests when calculating the new location of a pointer, which is a
 63 // very hot code path.  The partial object and everything to its left will be
 64 // copied to another space (call it dest_space_1).  The live data to the right
 65 // of the partial object will be copied either within the space itself, or to a
 66 // different destination space (distinct from dest_space_1).
 67 //
 68 // Split points are identified during the summary phase, when region
 69 // destinations are computed:  data about the split, including the
 70 // partial_object_size, is recorded in a SplitInfo record and the
 71 // partial_object_size field in the summary data is set to zero.  The zeroing is
 72 // possible (and necessary) since the partial object will move to a different
 73 // destination space than anything to its right, thus the partial object should
 74 // not affect the locations of any objects to its right.
 75 //
 76 // The recorded data is used during the compaction phase, but only rarely:  when
 77 // the partial object on the split region will be copied across a destination
 78 // region boundary.  This test is made once each time a region is filled, and is
 79 // a simple address comparison, so the overhead is negligible (see
 80 // PSParallelCompact::first_src_addr()).
 81 //
 82 // Notes:
 83 //
 84 // Only regions with partial objects are split; a region without a partial
 85 // object does not need any extra bookkeeping.
 86 //
 87 // At most one region is split per space, so the amount of data required is
 88 // constant.
 89 //
 90 // A region is split only when the destination space would overflow.  Once that
 91 // happens, the destination space is abandoned and no other data (even from
 92 // other source spaces) is targeted to that destination space.  Abandoning the
 93 // destination space may leave a somewhat large unused area at the end, if a
 94 // large object caused the overflow.
 95 //
 96 // Future work:
 97 //
 98 // More bookkeeping would be required to continue to use the destination space.
 99 // The most general solution would allow data from regions in two different
100 // source spaces to be "joined" in a single destination region.  At the very
101 // least, additional code would be required in next_src_region() to detect the
102 // join and skip to an out-of-order source region.  If the join region was also
103 // the last destination region to which a split region was copied (the most
104 // likely case), then additional work would be needed to get fill_region() to
105 // stop iteration and switch to a new source region at the right point.  Basic
106 // idea would be to use a fake value for the top of the source space.  It is
107 // doable, if a bit tricky.
108 //
109 // A simpler (but less general) solution would fill the remainder of the
110 // destination region with a dummy object and continue filling the next
111 // destination region.
112 
113 class SplitInfo
114 {
115 public:
116   // Return true if this split info is valid (i.e., if a split has been
117   // recorded).  The very first region cannot have a partial object and thus is
118   // never split, so 0 is the 'invalid' value.
119   bool is_valid() const { return _split_region_idx > 0; }
120 
121   // Return true if this split holds data for the specified source region.
122   inline bool is_split(size_t region_idx) const;
123 
124   // Obj at the split point doesn't fit the previous space and will be relocated to the next space.
125   HeapWord* split_point() const { return _split_point; }
126 
127   // Number of live words before the split point on this region.
128   size_t preceding_live_words() const { return _preceding_live_words; }
129 
130   // A split region has two "destinations", living in two spaces. This method
131   // returns the first one -- destination for the first live word on
132   // this split region.
133   HeapWord* preceding_destination() const {
134     assert(_preceding_destination != nullptr, "inv");
135     return _preceding_destination;
136   }
137 
138   // Number of regions the preceding live words are relocated into.
139   uint preceding_destination_count() const { return _preceding_destination_count; }
140 
141   void record(size_t split_region_idx, HeapWord* split_point, size_t preceding_live_words);
142 
143   void clear();
144 
145   DEBUG_ONLY(void verify_clear();)
146 
147 private:
148   size_t       _split_region_idx;
149   HeapWord*    _split_point;
150   size_t       _preceding_live_words;
151   HeapWord*    _preceding_destination;
152   uint         _preceding_destination_count;
153 };
154 
155 inline bool SplitInfo::is_split(size_t region_idx) const
156 {
157   return _split_region_idx == region_idx && is_valid();
158 }
159 
160 class SpaceInfo
161 {
162 public:
163   MutableSpace* space() const { return _space; }
164 
165   // Where the free space will start after the collection.  Valid only after the
166   // summary phase completes.
167   HeapWord* new_top() const { return _new_top; }
168 
169   // Allows new_top to be set.
170   HeapWord** new_top_addr() { return &_new_top; }
171 
172   // Where the dense prefix ends, or the compacted region begins.
173   HeapWord* dense_prefix() const { return _dense_prefix; }
174 
175   // The start array for the (generation containing the) space, or null if there
176   // is no start array.
177   ObjectStartArray* start_array() const { return _start_array; }
178 
179   SplitInfo& split_info() { return _split_info; }
180 
181   void set_space(MutableSpace* s)           { _space = s; }
182   void set_new_top(HeapWord* addr)          { _new_top = addr; }
183   void set_dense_prefix(HeapWord* addr)     { _dense_prefix = addr; }
184   void set_start_array(ObjectStartArray* s) { _start_array = s; }
185 
186 private:
187   MutableSpace*     _space;
188   HeapWord*         _new_top;
189   HeapWord*         _dense_prefix;
190   ObjectStartArray* _start_array;
191   SplitInfo         _split_info;
192 };
193 
194 class ParallelCompactData
195 {
196 public:
197   // Sizes are in HeapWords, unless indicated otherwise.
198   static const size_t Log2RegionSize;
199   static const size_t RegionSize;
200   static const size_t RegionSizeBytes;
201 
202   // Mask for the bits in a size_t to get an offset within a region.
203   static const size_t RegionSizeOffsetMask;
204   // Mask for the bits in a pointer to get an offset within a region.
205   static const size_t RegionAddrOffsetMask;
206   // Mask for the bits in a pointer to get the address of the start of a region.
207   static const size_t RegionAddrMask;
208 
209   class RegionData
210   {
211   public:
212     // Destination for the first live word in this region.
213     // Therefore, the new addr for every live obj on this region can be calculated as:
214     //
215     // new_addr := _destination + live_words_offset(old_addr);
216     //
217     // where, live_words_offset is the number of live words accumulated from
218     // region-start to old_addr.
219     HeapWord* destination() const { return _destination; }
220 
221     // A destination region can have multiple source regions; only the first
222     // one is recorded. Since all live objs are slided down, subsequent source
223     // regions can be found via plain heap-region iteration.
224     size_t source_region() const { return _source_region; }
225 
226     // Reuse _source_region to store the corresponding shadow region index
227     size_t shadow_region() const { return _source_region; }
228 
229     // The starting address of the partial object extending onto the region.
230     HeapWord* partial_obj_addr() const { return _partial_obj_addr; }
231 
232     // Size of the partial object extending onto the region (words).
233     size_t partial_obj_size() const { return _partial_obj_size; }
234 
235     // Size of live data that lies within this region due to objects that start
236     // in this region (words).  This does not include the partial object
237     // extending onto the region (if any), or the part of an object that extends
238     // onto the next region (if any).
239     size_t live_obj_size() const { return _dc_and_los & los_mask; }
240 
241     // Total live data that lies within the region (words).
242     size_t data_size() const { return partial_obj_size() + live_obj_size(); }
243 
244     // The destination_count is the number of other regions to which data from
245     // this region will be copied.  At the end of the summary phase, the valid
246     // values of destination_count are
247     //
248     // 0 - data from the region will be compacted completely into itself, or the
249     //     region is empty.  The region can be claimed and then filled.
250     // 1 - data from the region will be compacted into 1 other region; some
251     //     data from the region may also be compacted into the region itself.
252     // 2 - data from the region will be copied to 2 other regions.
253     //
254     // During compaction as regions are emptied, the destination_count is
255     // decremented (atomically) and when it reaches 0, it can be claimed and
256     // then filled.
257     //
258     // A region is claimed for processing by atomically changing the
259     // destination_count to the claimed value (dc_claimed).  After a region has
260     // been filled, the destination_count should be set to the completed value
261     // (dc_completed).
262     inline uint destination_count() const;
263     inline uint destination_count_raw() const;
264 
265     // Whether this region is available to be claimed, has been claimed, or has
266     // been completed.
267     //
268     // Minor subtlety:  claimed() returns true if the region is marked
269     // completed(), which is desirable since a region must be claimed before it
270     // can be completed.
271     bool available() const { return _dc_and_los < dc_one; }
272     bool claimed()   const { return _dc_and_los >= dc_claimed; }
273     bool completed() const { return _dc_and_los >= dc_completed; }
274 
275     // These are not atomic.
276     void set_destination(HeapWord* addr)       { _destination = addr; }
277     void set_source_region(size_t region)      { _source_region = region; }
278     void set_shadow_region(size_t region)      { _source_region = region; }
279     void set_partial_obj_addr(HeapWord* addr)  { _partial_obj_addr = addr; }
280     void set_partial_obj_size(size_t words)    {
281       _partial_obj_size = (region_sz_t) words;
282     }
283 
284     inline void set_destination_count(uint count);
285     inline void set_live_obj_size(size_t words);
286 
287     inline void set_completed();
288     inline bool claim_unsafe();
289 
290     // These are atomic.
291     inline void add_live_obj(size_t words);
292     inline void decrement_destination_count();
293     inline bool claim();
294 
295     // Possible values of _shadow_state, and transition is as follows
296     // Normal Path:
297     // UnusedRegion -> mark_normal() -> NormalRegion
298     // Shadow Path:
299     // UnusedRegion -> mark_shadow() -> ShadowRegion ->
300     // mark_filled() -> FilledShadow -> mark_copied() -> CopiedShadow
301     static const int UnusedRegion = 0; // The region is not collected yet
302     static const int ShadowRegion = 1; // Stolen by an idle thread, and a shadow region is created for it
303     static const int FilledShadow = 2; // Its shadow region has been filled and ready to be copied back
304     static const int CopiedShadow = 3; // The data of the shadow region has been copied back
305     static const int NormalRegion = 4; // The region will be collected by the original parallel algorithm
306 
307     // Mark the current region as normal or shadow to enter different processing paths
308     inline bool mark_normal();
309     inline bool mark_shadow();
310     // Mark the shadow region as filled and ready to be copied back
311     inline void mark_filled();
312     // Mark the shadow region as copied back to avoid double copying.
313     inline bool mark_copied();
314     // Special case: see the comment in PSParallelCompact::fill_and_update_shadow_region.
315     // Return to the normal path here
316     inline void shadow_to_normal();
317 
318     int shadow_state() { return _shadow_state; }
319 
320     bool is_clear();
321 
322     void verify_clear() NOT_DEBUG_RETURN;
323 
324   private:
325     // The type used to represent object sizes within a region.
326     typedef uint region_sz_t;
327 
328     // Constants for manipulating the _dc_and_los field, which holds both the
329     // destination count and live obj size.  The live obj size lives at the
330     // least significant end so no masking is necessary when adding.
331     static const region_sz_t dc_shift;           // Shift amount.
332     static const region_sz_t dc_mask;            // Mask for destination count.
333     static const region_sz_t dc_one;             // 1, shifted appropriately.
334     static const region_sz_t dc_claimed;         // Region has been claimed.
335     static const region_sz_t dc_completed;       // Region has been completed.
336     static const region_sz_t los_mask;           // Mask for live obj size.
337 
338     HeapWord*            _destination;
339     size_t               _source_region;
340     HeapWord*            _partial_obj_addr;
341     region_sz_t          _partial_obj_size;
342     region_sz_t volatile _dc_and_los;
343     int         volatile _shadow_state;
344 
345 #ifdef ASSERT
346    public:
347     uint                 _pushed;   // 0 until region is pushed onto a stack
348    private:
349 #endif
350   };
351 
352 public:
353   ParallelCompactData();
354   bool initialize(MemRegion reserved_heap);
355 
356   size_t region_count() const { return _region_count; }
357   size_t reserved_byte_size() const { return _reserved_byte_size; }
358 
359   // Convert region indices to/from RegionData pointers.
360   inline RegionData* region(size_t region_idx) const;
361   inline size_t     region(const RegionData* const region_ptr) const;
362 
363   HeapWord* summarize_split_space(size_t src_region, SplitInfo& split_info,
364                                   HeapWord* destination, HeapWord* target_end,
365                                   HeapWord** target_next);
366 
367   size_t live_words_in_space(const MutableSpace* space,
368                              HeapWord** full_region_prefix_end = nullptr);
369 
370   bool summarize(SplitInfo& split_info,
371                  HeapWord* source_beg, HeapWord* source_end,
372                  HeapWord** source_next,
373                  HeapWord* target_beg, HeapWord* target_end,
374                  HeapWord** target_next);
375 
376   void clear_range(size_t beg_region, size_t end_region);
377 
378   // Return the number of words between addr and the start of the region
379   // containing addr.
380   inline size_t     region_offset(const HeapWord* addr) const;
381 
382   // Convert addresses to/from a region index or region pointer.
383   inline size_t     addr_to_region_idx(const HeapWord* addr) const;
384   inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
385   inline HeapWord*  region_to_addr(size_t region) const;
386   inline HeapWord*  region_to_addr(const RegionData* region) const;
387 
388   inline HeapWord*  region_align_down(HeapWord* addr) const;
389   inline HeapWord*  region_align_up(HeapWord* addr) const;
390   inline bool       is_region_aligned(HeapWord* addr) const;
391 
392 #ifdef  ASSERT
393   void verify_clear();
394 #endif  // #ifdef ASSERT
395 
396 private:
397   bool initialize_region_data(size_t heap_size);
398   PSVirtualSpace* create_vspace(size_t count, size_t element_size);
399 
400   HeapWord*       _heap_start;
401 #ifdef  ASSERT
402   HeapWord*       _heap_end;
403 #endif  // #ifdef ASSERT
404 
405   PSVirtualSpace* _region_vspace;
406   size_t          _reserved_byte_size;
407   RegionData*     _region_data;
408   size_t          _region_count;
409 };
410 
411 inline uint
412 ParallelCompactData::RegionData::destination_count_raw() const
413 {
414   return _dc_and_los & dc_mask;
415 }
416 
417 inline uint
418 ParallelCompactData::RegionData::destination_count() const
419 {
420   return destination_count_raw() >> dc_shift;
421 }
422 
423 inline void
424 ParallelCompactData::RegionData::set_destination_count(uint count)
425 {
426   assert(count <= (dc_completed >> dc_shift), "count too large");
427   const region_sz_t live_sz = (region_sz_t) live_obj_size();
428   _dc_and_los = (count << dc_shift) | live_sz;
429 }
430 
431 inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
432 {
433   assert(words <= los_mask, "would overflow");
434   _dc_and_los = destination_count_raw() | (region_sz_t)words;
435 }
436 
437 inline void ParallelCompactData::RegionData::decrement_destination_count()
438 {
439   assert(_dc_and_los < dc_claimed, "already claimed");
440   assert(_dc_and_los >= dc_one, "count would go negative");
441   AtomicAccess::add(&_dc_and_los, dc_mask);
442 }
443 
444 inline void ParallelCompactData::RegionData::set_completed()
445 {
446   assert(claimed(), "must be claimed first");
447   _dc_and_los = dc_completed | (region_sz_t) live_obj_size();
448 }
449 
450 // MT-unsafe claiming of a region.  Should only be used during single threaded
451 // execution.
452 inline bool ParallelCompactData::RegionData::claim_unsafe()
453 {
454   if (available()) {
455     _dc_and_los |= dc_claimed;
456     return true;
457   }
458   return false;
459 }
460 
461 inline void ParallelCompactData::RegionData::add_live_obj(size_t words)
462 {
463   assert(words <= (size_t)los_mask - live_obj_size(), "overflow");
464   AtomicAccess::add(&_dc_and_los, static_cast<region_sz_t>(words));
465 }
466 
467 inline bool ParallelCompactData::RegionData::claim()
468 {
469   const region_sz_t los = static_cast<region_sz_t>(live_obj_size());
470   const region_sz_t old = AtomicAccess::cmpxchg(&_dc_and_los, los, dc_claimed | los);
471   return old == los;
472 }
473 
474 inline bool ParallelCompactData::RegionData::mark_normal() {
475   return AtomicAccess::cmpxchg(&_shadow_state, UnusedRegion, NormalRegion) == UnusedRegion;
476 }
477 
478 inline bool ParallelCompactData::RegionData::mark_shadow() {
479   if (_shadow_state != UnusedRegion) return false;
480   return AtomicAccess::cmpxchg(&_shadow_state, UnusedRegion, ShadowRegion) == UnusedRegion;
481 }
482 
483 inline void ParallelCompactData::RegionData::mark_filled() {
484   int old = AtomicAccess::cmpxchg(&_shadow_state, ShadowRegion, FilledShadow);
485   assert(old == ShadowRegion, "Fail to mark the region as filled");
486 }
487 
488 inline bool ParallelCompactData::RegionData::mark_copied() {
489   return AtomicAccess::cmpxchg(&_shadow_state, FilledShadow, CopiedShadow) == FilledShadow;
490 }
491 
492 void ParallelCompactData::RegionData::shadow_to_normal() {
493   int old = AtomicAccess::cmpxchg(&_shadow_state, ShadowRegion, NormalRegion);
494   assert(old == ShadowRegion, "Fail to mark the region as finish");
495 }
496 
497 inline ParallelCompactData::RegionData*
498 ParallelCompactData::region(size_t region_idx) const
499 {
500   assert(region_idx <= region_count(), "bad arg");
501   return _region_data + region_idx;
502 }
503 
504 inline size_t
505 ParallelCompactData::region(const RegionData* const region_ptr) const
506 {
507   assert(region_ptr >= _region_data, "bad arg");
508   assert(region_ptr <= _region_data + region_count(), "bad arg");
509   return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
510 }
511 
512 inline size_t
513 ParallelCompactData::region_offset(const HeapWord* addr) const
514 {
515   assert(addr >= _heap_start, "bad addr");
516   // This method would mistakenly return 0 for _heap_end; hence exclusive.
517   assert(addr < _heap_end, "bad addr");
518   return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
519 }
520 
521 inline size_t
522 ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
523 {
524   assert(addr >= _heap_start, "bad addr " PTR_FORMAT " _heap_start " PTR_FORMAT, p2i(addr), p2i(_heap_start));
525   assert(addr <= _heap_end, "bad addr " PTR_FORMAT " _heap_end " PTR_FORMAT, p2i(addr), p2i(_heap_end));
526   return pointer_delta(addr, _heap_start) >> Log2RegionSize;
527 }
528 
529 inline ParallelCompactData::RegionData*
530 ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const
531 {
532   return region(addr_to_region_idx(addr));
533 }
534 
535 inline HeapWord*
536 ParallelCompactData::region_to_addr(size_t region) const
537 {
538   assert(region <= _region_count, "region out of range");
539   return _heap_start + (region << Log2RegionSize);
540 }
541 
542 inline HeapWord*
543 ParallelCompactData::region_to_addr(const RegionData* region) const
544 {
545   return region_to_addr(pointer_delta(region, _region_data,
546                                       sizeof(RegionData)));
547 }
548 
549 inline HeapWord*
550 ParallelCompactData::region_align_down(HeapWord* addr) const
551 {
552   assert(addr >= _heap_start, "bad addr");
553   assert(addr < _heap_end + RegionSize, "bad addr");
554   return (HeapWord*)(size_t(addr) & RegionAddrMask);
555 }
556 
557 inline HeapWord*
558 ParallelCompactData::region_align_up(HeapWord* addr) const
559 {
560   assert(addr >= _heap_start, "bad addr");
561   assert(addr <= _heap_end, "bad addr");
562   return region_align_down(addr + RegionSizeOffsetMask);
563 }
564 
565 inline bool
566 ParallelCompactData::is_region_aligned(HeapWord* addr) const
567 {
568   return (size_t(addr) & RegionAddrOffsetMask) == 0;
569 }
570 
571 // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
572 // do_addr() method.
573 //
574 // The closure is initialized with the number of heap words to process
575 // (words_remaining()), and becomes 'full' when it reaches 0.  The do_addr()
576 // methods in subclasses should update the total as words are processed.  Since
577 // only one subclass actually uses this mechanism to terminate iteration, the
578 // default initial value is > 0.  The implementation is here and not in the
579 // single subclass that uses it to avoid making is_full() virtual, and thus
580 // adding a virtual call per live object.
581 
582 
583 // The Parallel collector is a stop-the-world garbage collector that
584 // does parts of the collection using parallel threads.  The collection includes
585 // the tenured generation and the young generation.
586 //
587 // A collection consists of the following phases.
588 //
589 //      - marking phase
590 //      - summary phase (single-threaded)
591 //      - forward (to new address) phase
592 //      - adjust pointers phase
593 //      - compacting phase
594 //      - clean up phase
595 //
596 // Roughly speaking these phases correspond, respectively, to
597 //
598 //      - mark all the live objects
599 //      - calculating destination-region for each region for better parallellism in following phases
600 //      - calculate the destination of each object at the end of the collection
601 //      - adjust pointers to reflect new destination of objects
602 //      - move the objects to their destination
603 //      - update some references and reinitialize some variables
604 //
605 // A space that is being collected is divided into regions and with each region
606 // is associated an object of type ParallelCompactData.  Each region is of a
607 // fixed size and typically will contain more than 1 object and may have parts
608 // of objects at the front and back of the region.
609 //
610 // region            -----+---------------------+----------
611 // objects covered   [ AAA  )[ BBB )[ CCC   )[ DDD     )
612 //
613 // The marking phase does a complete marking of all live objects in the heap.
614 // The marking also compiles the size of the data for all live objects covered
615 // by the region.  This size includes the part of any live object spanning onto
616 // the region (part of AAA if it is live) from the front, all live objects
617 // contained in the region (BBB and/or CCC if they are live), and the part of
618 // any live objects covered by the region that extends off the region (part of
619 // DDD if it is live).  The marking phase uses multiple GC threads and marking
620 // is done in a bit array of type ParMarkBitMap.  The marking of the bit map is
621 // done atomically as is the accumulation of the size of the live objects
622 // covered by a region.
623 //
624 // The summary phase calculates the total live data to the left of each region
625 // XXX.  Based on that total and the bottom of the space, it can calculate the
626 // starting location of the live data in XXX.  The summary phase calculates for
627 // each region XXX quantities such as
628 //
629 //      - the amount of live data at the beginning of a region from an object
630 //        entering the region.
631 //      - the location of the first live data on the region
632 //      - a count of the number of regions receiving live data from XXX.
633 //
634 // See ParallelCompactData for precise details.  The summary phase also
635 // calculates the dense prefix for the compaction.  The dense prefix is a
636 // portion at the beginning of the space that is not moved.  The objects in the
637 // dense prefix do need to have their object references updated.  See method
638 // summarize_dense_prefix().
639 //
640 // The forward (to new address) phase calculates the new address of each
641 // objects and records old-addr-to-new-addr asssociation.
642 //
643 // The adjust pointers phase remap all pointers to reflect the new address of each object.
644 //
645 // The compaction phase moves objects to their new location.
646 //
647 // Compaction is done on a region basis.  A region that is ready to be filled is
648 // put on a ready list and GC threads take region off the list and fill them.  A
649 // region is ready to be filled if it empty of live objects.  Such a region may
650 // have been initially empty (only contained dead objects) or may have had all
651 // its live objects copied out already.  A region that compacts into itself is
652 // also ready for filling.  The ready list is initially filled with empty
653 // regions and regions compacting into themselves.  There is always at least 1
654 // region that can be put on the ready list.  The regions are atomically added
655 // and removed from the ready list.
656 //
657 // During compaction, there is a natural task dependency among regions because
658 // destination regions may also be source regions themselves.  Consequently, the
659 // destination regions are not available for processing until all live objects
660 // within them are evacuated to their destinations.  These dependencies lead to
661 // limited thread utilization as threads spin waiting on regions to be ready.
662 // Shadow regions are utilized to address these region dependencies.  The basic
663 // idea is that, if a region is unavailable because it still contains live
664 // objects and thus cannot serve as a destination momentarily, the GC thread
665 // may allocate a shadow region as a substitute destination and directly copy
666 // live objects into this shadow region.  Live objects in the shadow region will
667 // be copied into the target destination region when it becomes available.
668 //
669 // For more details on shadow regions, please refer to ยง4.2 of the VEE'19 paper:
670 // Haoyu Li, Mingyu Wu, Binyu Zang, and Haibo Chen.  2019.  ScissorGC: scalable
671 // and efficient compaction for Java full garbage collection.  In Proceedings of
672 // the 15th ACM SIGPLAN/SIGOPS International Conference on Virtual Execution
673 // Environments (VEE 2019).  ACM, New York, NY, USA, 108-121.  DOI:
674 // https://doi.org/10.1145/3313808.3313820
675 
676 class PSParallelCompact : AllStatic {
677 public:
678   // Convenient access to type names.
679   typedef ParallelCompactData::RegionData RegionData;
680 
681   // By the end of full-gc, all live objs are compacted into the first three spaces, old, eden, and from.
682   typedef enum {
683     old_space_id,
684     eden_space_id,
685     from_space_id,
686     to_space_id,
687     last_space_id
688   } SpaceId;
689 
690   // Inline closure decls
691   //
692   class IsAliveClosure: public BoolObjectClosure {
693    public:
694     virtual bool do_object_b(oop p);
695   };
696 
697 private:
698   static STWGCTimer           _gc_timer;
699   static ParallelOldTracer    _gc_tracer;
700   static elapsedTimer         _accumulated_time;
701   static unsigned int         _maximum_compaction_gc_num;
702   static CollectorCounters*   _counters;
703   static ParMarkBitMap        _mark_bitmap;
704   static ParallelCompactData  _summary_data;
705   static IsAliveClosure       _is_alive_closure;
706   static SpaceInfo            _space_info[last_space_id];
707 
708   // Reference processing (used in ...follow_contents)
709   static SpanSubjectToDiscoveryClosure  _span_based_discoverer;
710   static ReferenceProcessor*  _ref_processor;
711 
712 public:
713   static ParallelOldTracer* gc_tracer() { return &_gc_tracer; }
714 
715 private:
716 
717   static void initialize_space_info();
718 
719   // Clear the marking bitmap and summary data that cover the specified space.
720   static void clear_data_covering_space(SpaceId id);
721 
722   static void pre_compact();
723   static void post_compact();
724 
725   static bool check_maximum_compaction(bool should_do_max_compaction,
726                                        size_t total_live_words,
727                                        MutableSpace* const old_space,
728                                        HeapWord* full_region_prefix_end);
729 
730   // Mark live objects
731   static void marking_phase(ParallelOldTracer *gc_tracer);
732 
733   // Identify the dense-fix in the old-space to avoid moving much memory with little reclaimed.
734   static HeapWord* compute_dense_prefix_for_old_space(MutableSpace* old_space,
735                                                       HeapWord* full_region_prefix_end);
736 
737   // Create a filler obj (if needed) right before the dense-prefix-boundary to
738   // make the heap parsable.
739   static void fill_dense_prefix_end(SpaceId id);
740 
741   static void summary_phase(bool should_do_max_compaction);
742 
743   static void adjust_pointers();
744   static void forward_to_new_addr();
745 
746   static void verify_forward() NOT_DEBUG_RETURN;
747   static void verify_filler_in_dense_prefix() NOT_DEBUG_RETURN;
748 
749   // Move objects to new locations.
750   static void compact();
751 
752   // Add available regions to the stack and draining tasks to the task queue.
753   static void prepare_region_draining_tasks(uint parallel_gc_threads);
754 
755   static void fill_range_in_dense_prefix(HeapWord* start, HeapWord* end);
756 
757 public:
758   static void fill_dead_objs_in_dense_prefix();
759 
760   // This method invokes a full collection.
761   // clear_all_soft_refs controls whether soft-refs should be cleared or not.
762   // should_do_max_compaction controls whether all spaces for dead objs should be reclaimed.
763   static bool invoke(bool clear_all_soft_refs, bool should_do_max_compaction);
764 
765   template<typename Func>
766   static void adjust_in_space_helper(SpaceId id, volatile uint* claim_counter, Func&& on_stripe);
767 
768   static void adjust_in_old_space(volatile uint* claim_counter);
769 
770   static void adjust_in_young_space(SpaceId id, volatile uint* claim_counter);
771 
772   static void adjust_pointers_in_spaces(uint worker_id, volatile uint* claim_counter);
773 
774   static void post_initialize();
775   // Perform initialization for PSParallelCompact that requires
776   // allocations.  This should be called during the VM initialization
777   // at a pointer where it would be appropriate to return a JNI_ENOMEM
778   // in the event of a failure.
779   static bool initialize_aux_data();
780 
781   // Closure accessors
782   static BoolObjectClosure* is_alive_closure()     { return &_is_alive_closure; }
783 
784   // Public accessors
785   static elapsedTimer* accumulated_time() { return &_accumulated_time; }
786 
787   static CollectorCounters* counters()    { return _counters; }
788 
789   static inline bool is_marked(oop obj);
790 
791   template <class T> static inline void adjust_pointer(T* p);
792 
793   // Convenience wrappers for per-space data kept in _space_info.
794   static inline MutableSpace*     space(SpaceId space_id);
795   static inline HeapWord*         new_top(SpaceId space_id);
796   static inline HeapWord*         dense_prefix(SpaceId space_id);
797   static inline ObjectStartArray* start_array(SpaceId space_id);
798 
799   // Return the address of the count + 1st live word in the range [beg, end).
800   static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);
801 
802   // Return the address of the word to be copied to dest_addr, which must be
803   // aligned to a region boundary.
804   static HeapWord* first_src_addr(HeapWord* const dest_addr,
805                                   SpaceId src_space_id,
806                                   size_t src_region_idx);
807 
808   // Determine the next source region, set closure.source() to the start of the
809   // new region return the region index.  Parameter end_addr is the address one
810   // beyond the end of source range just processed.  If necessary, switch to a
811   // new source space and set src_space_id (in-out parameter) and src_space_top
812   // (out parameter) accordingly.
813   static size_t next_src_region(MoveAndUpdateClosure& closure,
814                                 SpaceId& src_space_id,
815                                 HeapWord*& src_space_top,
816                                 HeapWord* end_addr);
817 
818   // Decrement the destination count for each non-empty source region in the
819   // range [beg_region, region(region_align_up(end_addr))).  If the destination
820   // count for a region goes to 0 and it needs to be filled, enqueue it.
821   static void decrement_destination_counts(ParCompactionManager* cm,
822                                            SpaceId src_space_id,
823                                            size_t beg_region,
824                                            HeapWord* end_addr);
825 
826   static HeapWord* partial_obj_end(HeapWord* region_start_addr);
827 
828   static void fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region);
829   static void fill_and_update_region(ParCompactionManager* cm, size_t region);
830 
831   static bool steal_unavailable_region(ParCompactionManager* cm, size_t& region_idx);
832   static void fill_and_update_shadow_region(ParCompactionManager* cm, size_t region);
833   // Copy the content of a shadow region back to its corresponding heap region
834   static void copy_back(HeapWord* shadow_addr, HeapWord* region_addr);
835   // Collect empty regions as shadow regions and initialize the
836   // _next_shadow_region filed for each compact manager
837   static void initialize_shadow_regions(uint parallel_gc_threads);
838 
839   static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
840   static ParallelCompactData& summary_data() { return _summary_data; }
841 
842   // Reference Processing
843   static ReferenceProcessor* ref_processor() { return _ref_processor; }
844 
845   static STWGCTimer* gc_timer() { return &_gc_timer; }
846 
847   // Return the SpaceId for the given address.
848   static SpaceId space_id(HeapWord* addr);
849 
850   static void print_on(outputStream* st);
851 
852 #ifdef  ASSERT
853   // Sanity check the new location of a word in the heap.
854   static inline void check_new_location(HeapWord* old_addr, HeapWord* new_addr);
855   // Verify that all the regions have been emptied.
856   static void verify_complete(SpaceId space_id);
857 #endif  // #ifdef ASSERT
858 };
859 
860 class MoveAndUpdateClosure: public StackObj {
861 private:
862   ParMarkBitMap* const        _bitmap;
863   size_t                      _words_remaining; // Words left to copy.
864   static inline size_t calculate_words_remaining(size_t region);
865 
866 protected:
867   HeapWord*               _source;          // Next addr that would be read.
868   HeapWord*               _destination;     // Next addr to be written.
869   ObjectStartArray* const _start_array;
870   size_t                  _offset;
871 
872   inline void decrement_words_remaining(size_t words);
873   // Update variables to indicate that word_count words were processed.
874   inline void update_state(size_t words);
875 
876 public:
877   ParMarkBitMap*        bitmap() const { return _bitmap; }
878 
879   size_t    words_remaining()    const { return _words_remaining; }
880   bool      is_full()            const { return _words_remaining == 0; }
881   HeapWord* source()             const { return _source; }
882   void      set_source(HeapWord* addr) {
883     assert(addr != nullptr, "precondition");
884     _source = addr;
885   }
886 
887   // If the object will fit (size <= words_remaining()), copy it to the current
888   // destination, update the interior oops and the start array.
889   void do_addr(HeapWord* addr, size_t words, markWord mark);
890 
891   inline MoveAndUpdateClosure(ParMarkBitMap* bitmap, size_t region);
892 
893   // Accessors.
894   HeapWord* destination() const         { return _destination; }
895   HeapWord* copy_destination() const    { return _destination + _offset; }
896 
897   // Copy enough words to fill this closure or to the end of an object,
898   // whichever is smaller, starting at source(). The start array is not
899   // updated.
900   void copy_partial_obj(size_t partial_obj_size);
901 
902   virtual void complete_region(HeapWord* dest_addr, PSParallelCompact::RegionData* region_ptr);
903 };
904 
905 inline void MoveAndUpdateClosure::decrement_words_remaining(size_t words) {
906   assert(_words_remaining >= words, "processed too many words");
907   _words_remaining -= words;
908 }
909 
910 inline size_t MoveAndUpdateClosure::calculate_words_remaining(size_t region) {
911   HeapWord* dest_addr = PSParallelCompact::summary_data().region_to_addr(region);
912   PSParallelCompact::SpaceId dest_space_id = PSParallelCompact::space_id(dest_addr);
913   HeapWord* new_top = PSParallelCompact::new_top(dest_space_id);
914   return MIN2(pointer_delta(new_top, dest_addr),
915               ParallelCompactData::RegionSize);
916 }
917 
918 inline
919 MoveAndUpdateClosure::MoveAndUpdateClosure(ParMarkBitMap* bitmap, size_t region_idx) :
920   _bitmap(bitmap),
921   _words_remaining(calculate_words_remaining(region_idx)),
922   _source(nullptr),
923   _destination(PSParallelCompact::summary_data().region_to_addr(region_idx)),
924   _start_array(PSParallelCompact::start_array(PSParallelCompact::space_id(_destination))),
925   _offset(0) {}
926 
927 inline void MoveAndUpdateClosure::update_state(size_t words)
928 {
929   decrement_words_remaining(words);
930   _source += words;
931   _destination += words;
932 }
933 
934 class MoveAndUpdateShadowClosure: public MoveAndUpdateClosure {
935   inline size_t calculate_shadow_offset(size_t region_idx, size_t shadow_idx);
936 public:
937   inline MoveAndUpdateShadowClosure(ParMarkBitMap* bitmap, size_t region, size_t shadow);
938 
939   virtual void complete_region(HeapWord* dest_addr, PSParallelCompact::RegionData* region_ptr);
940 
941 private:
942   size_t _shadow;
943 };
944 
945 inline size_t MoveAndUpdateShadowClosure::calculate_shadow_offset(size_t region_idx, size_t shadow_idx) {
946   ParallelCompactData& sd = PSParallelCompact::summary_data();
947   HeapWord* dest_addr = sd.region_to_addr(region_idx);
948   HeapWord* shadow_addr = sd.region_to_addr(shadow_idx);
949   return pointer_delta(shadow_addr, dest_addr);
950 }
951 
952 inline
953 MoveAndUpdateShadowClosure::MoveAndUpdateShadowClosure(ParMarkBitMap* bitmap, size_t region, size_t shadow) :
954   MoveAndUpdateClosure(bitmap, region),
955   _shadow(shadow) {
956   _offset = calculate_shadow_offset(region, shadow);
957 }
958 
959 void steal_marking_work(TaskTerminator& terminator, uint worker_id);
960 
961 #endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP