1 /*
2 * Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
3 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
4 * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP
27 #define SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP
28
29 #include "gc/parallel/mutableSpace.hpp"
30 #include "gc/parallel/objectStartArray.hpp"
31 #include "gc/parallel/parallelScavengeHeap.hpp"
32 #include "gc/parallel/parMarkBitMap.hpp"
33 #include "gc/shared/collectedHeap.hpp"
34 #include "gc/shared/collectorCounters.hpp"
35 #include "gc/shared/referenceProcessor.hpp"
36 #include "gc/shared/taskTerminator.hpp"
37 #include "oops/oop.hpp"
38 #include "runtime/atomic.hpp"
39 #include "runtime/orderAccess.hpp"
40
41 class ParallelScavengeHeap;
42 class PSAdaptiveSizePolicy;
43 class PSYoungGen;
44 class PSOldGen;
45 class ParCompactionManagerNew;
46 class PSParallelCompactNew;
47 class ParallelOldTracer;
48 class STWGCTimer;
49
50 class SpaceInfoNew
51 {
52 public:
53 MutableSpace* space() const { return _space; }
54
55 // The start array for the (generation containing the) space, or null if there
56 // is no start array.
57 ObjectStartArray* start_array() const { return _start_array; }
58
59 void set_space(MutableSpace* s) { _space = s; }
60 void set_start_array(ObjectStartArray* s) { _start_array = s; }
61
62 private:
63 MutableSpace* _space;
64 ObjectStartArray* _start_array;
65 };
66
67 // The Parallel compaction collector is a stop-the-world garbage collector that
68 // does parts of the collection using parallel threads. The collection includes
69 // the tenured generation and the young generation.
70 //
71 // A collection consists of the following phases.
72 //
73 // - marking phase
74 // - summary phase (single-threaded)
75 // - forward (to new address) phase
76 // - adjust pointers phase
77 // - compacting phase
78 // - clean up phase
79 //
80 // Roughly speaking these phases correspond, respectively, to
81 //
82 // - mark all the live objects
83 // - set-up temporary regions to enable parallelism in following phases
84 // - calculate the destination of each object at the end of the collection
85 // - adjust pointers to reflect new destination of objects
86 // - move the objects to their destination
87 // - update some references and reinitialize some variables
88 //
89 // A space that is being collected is divided into regions and with each region
90 // is associated an object of type PCRegionData. Regions are targeted to be of
91 // a mostly uniform size, but if an object would cross a region boundary, then
92 // the boundary is adjusted to be after the end of that object.
93 //
94 // The marking phase does a complete marking of all live objects in the heap.
95 // The marking phase uses multiple GC threads and marking is done in a bit
96 // array of type ParMarkBitMap. The marking of the bit map is done atomically.
97 //
98 // The summary phase sets up the regions, such that region covers roughly
99 // uniform memory regions (currently same size as SpaceAlignment). However,
100 // if that would result in an object crossing a region boundary, then
101 // the upper bounds is adjusted such that the region ends after that object.
102 // This way we can ensure that a GC worker thread can fully 'own' a region
103 // during the forwarding, adjustment and compaction phases, without worrying
104 // about other threads messing with parts of the object. The summary phase
105 // also sets up an alternative set of regions, where each region covers
106 // a single space. This is used for a serial compaction mode which achieves
107 // maximum compaction at the expense of parallelism during the forwarding
108 // compaction phases.
109 //
110 // The forwarding phase calculates the new address of each live
111 // object and records old-addr-to-new-addr association. It does this using
112 // multiple GC threads. Each thread 'claims' a source region and appends it to a
113 // local work-list. The region is also set as the current compaction region
114 // for that thread. All live objects in the region are then visited and its
115 // new location calculated by tracking the compaction point in the compaction
116 // region. Once the source region is exhausted, the next source region is
117 // claimed from the global pool and appended to the end of the local work-list.
118 // Once the compaction region is exhausted, the top of the old compaction region
119 // is recorded, and the next compaction region is fetched from the front of the
120 // local work-list (which is guaranteed to already have finished processing, or
121 // is the same as the source region). This way, each worker forms a local
122 // list of regions in which the worker can compact as if it were a serial
123 // compaction.
124 //
125 // The adjust pointers phase remaps all pointers to reflect the new address of each
126 // object. Again, this uses multiple GC worker threads. Each thread claims
127 // a region, processes all references in all live objects of that region. Then
128 // the thread proceeds to claim the next region from the global pool, until
129 // all regions have been processed.
130 //
131 // The compaction phase moves objects to their new location. Again, this uses
132 // multiple GC worker threads. Each worker processes the local work-list that
133 // has been set-up during the forwarding phase and processes it from bottom
134 // to top, copying each live object to its new location (which is guaranteed
135 // to be lower in that threads parts of the heap, and thus would never overwrite
136 // other objects).
137 //
138 // This algorithm will usually leave gaps of non-fillable memory at the end
139 // of regions, and potentially whole empty regions at the end of compaction.
140 // The post-compaction phase fills those gaps with filler objects to ensure
141 // that the heap remains parsable.
142 //
143 // In some situations, this inefficiency of leaving gaps can lead to a
144 // situation where after a full GC, it is still not possible to satisfy an
145 // allocation, even though there should be enough memory available. When
146 // that happens, the collector switches to a serial mode, where we only
147 // have 4 regions which correspond exaxtly to the 4 spaces, and the forwarding
148 // and compaction phases are executed using only a single thread. This
149 // achieves maximum compaction. This serial mode is also invoked when
150 // System.gc() is called *and* UseMaximumCompactionOnSystemGC is set to
151 // true (which is the default), or when the number of full GCs exceeds
152 // the HeapMaximumCompactionInterval.
153 //
154 // Possible improvements to the algorithm include:
155 // - Identify and ignore a 'dense prefix'. This requires that we collect
156 // liveness data during marking, or that we scan the prefix object-by-object
157 // during the summary phase.
158 // - When an object does not fit into a remaining gap of a region, and the
159 // object is rather large, we could attempt to forward/compact subsequent
160 // objects 'around' that large object in an attempt to minimize the
161 // resulting gap. This could be achieved by reconfiguring the regions
162 // to exclude the large object.
163 // - Instead of finding out *after* the whole compaction that an allocation
164 // can still not be satisfied, and then re-running the whole compaction
165 // serially, we could determine that after the forwarding phase, and
166 // re-do only forwarding serially, thus avoiding running marking,
167 // adjusting references and compaction twice.
168 class PCRegionData /*: public CHeapObj<mtGC> */ {
169 // A region index
170 size_t const _idx;
171
172 // The start of the region
173 HeapWord* const _bottom;
174 // The top of the region. (first word after last live object in containing space)
175 HeapWord* const _top;
176 // The end of the region (first word after last word of the region)
177 HeapWord* const _end;
178
179 // The next compaction address
180 HeapWord* _new_top;
181
182 // Points to the next region in the GC-worker-local work-list
183 PCRegionData* _local_next;
184
185 // Parallel workers claiming protocol, used during adjust-references phase.
186 volatile bool _claimed;
187
188 public:
189
190 PCRegionData(size_t idx, HeapWord* bottom, HeapWord* top, HeapWord* end) :
191 _idx(idx), _bottom(bottom), _top(top), _end(end), _new_top(bottom),
192 _local_next(nullptr), _claimed(false) {}
193
194 size_t idx() const { return _idx; };
195
196 HeapWord* bottom() const { return _bottom; }
197 HeapWord* top() const { return _top; }
198 HeapWord* end() const { return _end; }
199
200 PCRegionData* local_next() const { return _local_next; }
201 PCRegionData** local_next_addr() { return &_local_next; }
202
203 HeapWord* new_top() const {
204 return _new_top;
205 }
206 void set_new_top(HeapWord* new_top) {
207 _new_top = new_top;
208 }
209
210 bool contains(oop obj) {
211 auto* obj_start = cast_from_oop<HeapWord*>(obj);
212 HeapWord* obj_end = obj_start + obj->size();
213 return _bottom <= obj_start && obj_start < _end && _bottom < obj_end && obj_end <= _end;
214 }
215
216 bool claim() {
217 bool claimed = _claimed;
218 if (claimed) {
219 return false;
220 }
221 return !Atomic::cmpxchg(&_claimed, false, true);
222 }
223 };
224
225 class PSParallelCompactNew : AllStatic {
226 public:
227 typedef enum {
228 old_space_id, eden_space_id,
229 from_space_id, to_space_id, last_space_id
230 } SpaceId;
231
232 public:
233 class IsAliveClosure: public BoolObjectClosure {
234 public:
235 bool do_object_b(oop p) final;
236 };
237
238 private:
239 static STWGCTimer _gc_timer;
240 static ParallelOldTracer _gc_tracer;
241 static elapsedTimer _accumulated_time;
242 static unsigned int _maximum_compaction_gc_num;
243 static CollectorCounters* _counters;
244 static ParMarkBitMap _mark_bitmap;
245 static IsAliveClosure _is_alive_closure;
246 static SpaceInfoNew _space_info[last_space_id];
247
248 // The head of the global region data list.
249 static size_t _num_regions;
250 static PCRegionData* _region_data_array;
251 static PCRegionData** _per_worker_region_data;
252
253 static size_t _num_regions_serial;
254 static PCRegionData* _region_data_array_serial;
255 static bool _serial;
256
257 // Reference processing (used in ...follow_contents)
258 static SpanSubjectToDiscoveryClosure _span_based_discoverer;
259 static ReferenceProcessor* _ref_processor;
260
261 static uint get_num_workers() { return _serial ? 1 : ParallelScavengeHeap::heap()->workers().active_workers(); }
262 static size_t get_num_regions() { return _serial ? _num_regions_serial : _num_regions; }
263 static PCRegionData* get_region_data_array() { return _serial ? _region_data_array_serial : _region_data_array; }
264
265 public:
266 static ParallelOldTracer* gc_tracer() { return &_gc_tracer; }
267
268 private:
269
270 static void initialize_space_info();
271
272 // Clear the marking bitmap and summary data that cover the specified space.
273 static void clear_data_covering_space(SpaceId id);
274
275 static void pre_compact();
276
277 static void post_compact();
278
279 static bool check_maximum_compaction();
280
281 // Mark live objects
282 static void marking_phase(ParallelOldTracer *gc_tracer);
283
284 static void summary_phase();
285 static void setup_regions_parallel();
286 static void setup_regions_serial();
287
288 static void adjust_pointers();
289 static void forward_to_new_addr();
290
291 // Move objects to new locations.
292 static void compact();
293
294 public:
295 static bool invoke(bool maximum_heap_compaction, bool serial);
296 static bool invoke_no_policy(bool maximum_heap_compaction, bool serial);
297
298 static void adjust_pointers_in_spaces(uint worker_id);
299
300 static void post_initialize();
301 // Perform initialization for PSParallelCompactNew that requires
302 // allocations. This should be called during the VM initialization
303 // at a pointer where it would be appropriate to return a JNI_ENOMEM
304 // in the event of a failure.
305 static bool initialize_aux_data();
306
307 // Closure accessors
308 static BoolObjectClosure* is_alive_closure() { return &_is_alive_closure; }
309
310 // Public accessors
311 static elapsedTimer* accumulated_time() { return &_accumulated_time; }
312
313 static CollectorCounters* counters() { return _counters; }
314
315 static inline bool is_marked(oop obj);
316
317 template <class T> static inline void adjust_pointer(T* p);
318
319 // Convenience wrappers for per-space data kept in _space_info.
320 static inline MutableSpace* space(SpaceId space_id);
321 static inline ObjectStartArray* start_array(SpaceId space_id);
322
323 static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
324
325 // Reference Processing
326 static ReferenceProcessor* ref_processor() { return _ref_processor; }
327
328 static STWGCTimer* gc_timer() { return &_gc_timer; }
329
330 // Return the SpaceId for the given address.
331 static SpaceId space_id(HeapWord* addr);
332
333 static void print_on(outputStream* st);
334 };
335
336 void steal_marking_work_new(TaskTerminator& terminator, uint worker_id);
337
338 #endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACTNEW_HPP