1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef SHARE_GC_G1_G1CONCURRENTMARK_HPP
26 #define SHARE_GC_G1_G1CONCURRENTMARK_HPP
27
28 #include "gc/g1/g1ConcurrentMarkBitMap.hpp"
29 #include "gc/g1/g1HeapRegionSet.hpp"
30 #include "gc/g1/g1HeapVerifier.hpp"
31 #include "gc/g1/g1RegionMarkStatsCache.hpp"
32 #include "gc/shared/gcCause.hpp"
33 #include "gc/shared/partialArraySplitter.hpp"
34 #include "gc/shared/partialArrayState.hpp"
35 #include "gc/shared/partialArrayTaskStats.hpp"
36 #include "gc/shared/taskqueue.hpp"
37 #include "gc/shared/taskTerminator.hpp"
38 #include "gc/shared/verifyOption.hpp"
39 #include "gc/shared/workerThread.hpp"
40 #include "gc/shared/workerUtils.hpp"
41 #include "memory/allocation.hpp"
42 #include "runtime/atomic.hpp"
43 #include "utilities/compilerWarnings.hpp"
44 #include "utilities/numberSeq.hpp"
45
46 class ConcurrentGCTimer;
47 class G1CollectedHeap;
48 class G1CSetCandidateGroup;
49 class G1CSetCandidateGroupList;
50 class G1ConcurrentMark;
51 class G1ConcurrentMarkThread;
52 class G1CMOopClosure;
53 class G1CMTask;
54 class G1OldTracer;
55 class G1RegionToSpaceMapper;
56 class G1SurvivorRegions;
57 class ThreadClosure;
58
59 typedef ScannerTask G1TaskQueueEntry;
60 typedef GenericTaskQueue<G1TaskQueueEntry, mtGC> G1CMTaskQueue;
61 typedef GenericTaskQueueSet<G1CMTaskQueue, mtGC> G1CMTaskQueueSet;
62
63 // Closure used by CM during concurrent reference discovery
64 // and reference processing (during remarking) to determine
65 // if a particular object is alive. It is primarily used
66 // to determine if referents of discovered reference objects
67 // are alive. An instance is also embedded into the
68 // reference processor as the _is_alive_non_header field
69 class G1CMIsAliveClosure : public BoolObjectClosure {
70 G1ConcurrentMark* _cm;
71
72 public:
73 G1CMIsAliveClosure();
74 G1CMIsAliveClosure(G1ConcurrentMark* cm);
75 void initialize(G1ConcurrentMark* cm);
76
77 bool do_object_b(oop obj);
78 };
79
80 class G1CMSubjectToDiscoveryClosure : public BoolObjectClosure {
81 G1CollectedHeap* _g1h;
82 public:
83 G1CMSubjectToDiscoveryClosure(G1CollectedHeap* g1h) : _g1h(g1h) { }
84 bool do_object_b(oop obj);
85 };
86
87 // Represents the overflow mark stack used by concurrent marking.
88 //
89 // Stores oops in a huge buffer in virtual memory that is always fully committed.
90 // Resizing may only happen during a STW pause when the stack is empty.
91 //
92 // Memory is allocated on a "chunk" basis, i.e. a set of oops. For this, the mark
93 // stack memory is split into evenly sized chunks of oops. Users can only
94 // add or remove entries on that basis.
95 // Chunks are filled in increasing address order. Not completely filled chunks
96 // have a null element as a terminating element.
97 //
98 // Every chunk has a header containing a single pointer element used for memory
99 // management. This wastes some space, but is negligible (< .1% with current sizing).
100 //
101 // Memory management is done using a mix of tracking a high water-mark indicating
102 // that all chunks at a lower address are valid chunks, and a singly linked free
103 // list connecting all empty chunks.
104 class G1CMMarkStack {
105 public:
106 // Number of TaskQueueEntries that can fit in a single chunk.
107 static const size_t EntriesPerChunk = 1024 - 1 /* One reference for the next pointer */;
108 private:
109 struct TaskQueueEntryChunk {
110 TaskQueueEntryChunk* next;
111 G1TaskQueueEntry data[EntriesPerChunk];
112 };
113
114 class ChunkAllocator {
115 // The chunk allocator relies on a growable array data structure that allows resizing without the
116 // need to copy existing items. The basic approach involves organizing the array into chunks,
117 // essentially creating an "array of arrays"; referred to as buckets in this implementation. To
118 // facilitate efficient indexing, the size of the first bucket is set to a power of 2. This choice
119 // allows for quick conversion of an array index into a bucket index and the corresponding offset
120 // within the bucket. Additionally, each new bucket added to the growable array doubles the capacity of
121 // the growable array.
122 //
123 // Illustration of the growable array data structure.
124 //
125 // +----+ +----+----+
126 // | |------->| | |
127 // | | +----+----+
128 // +----+ +----+----+
129 // | |------->| | |
130 // | | +----+----+
131 // +----+ +-----+-----+-----+-----+
132 // | |------->| | | | |
133 // | | +-----+-----+-----+-----+
134 // +----+ +-----+-----+-----+-----+-----+-----+-----+----+
135 // | |------->| | | | | | | | |
136 // | | +-----+-----+-----+-----+-----+-----+-----+----+
137 // +----+
138 //
139 size_t _min_capacity;
140 size_t _max_capacity;
141 size_t _capacity;
142 size_t _num_buckets;
143 bool _should_grow;
144 Atomic<TaskQueueEntryChunk*>* _buckets;
145 char _pad0[DEFAULT_PADDING_SIZE];
146 Atomic<size_t> _size;
147 char _pad4[DEFAULT_PADDING_SIZE - sizeof(size_t)];
148
149 size_t bucket_size(size_t bucket) {
150 return (bucket == 0) ?
151 _min_capacity :
152 _min_capacity * ( 1ULL << (bucket - 1));
153 }
154
155 static unsigned int find_highest_bit(uintptr_t mask) {
156 return count_leading_zeros(mask) ^ (BitsPerWord - 1U);
157 }
158
159 size_t get_bucket(size_t array_idx) {
160 if (array_idx < _min_capacity) {
161 return 0;
162 }
163
164 return find_highest_bit(array_idx) - find_highest_bit(_min_capacity) + 1;
165 }
166
167 size_t get_bucket_index(size_t array_idx) {
168 if (array_idx < _min_capacity) {
169 return array_idx;
170 }
171 return array_idx - (1ULL << find_highest_bit(array_idx));
172 }
173
174 bool reserve(size_t new_capacity);
175
176 public:
177 ChunkAllocator();
178
179 ~ChunkAllocator();
180
181 bool initialize(size_t initial_capacity, size_t max_capacity);
182
183 void reset() {
184 _size.store_relaxed(0);
185 _should_grow = false;
186 }
187
188 // During G1CMConcurrentMarkingTask or finalize_marking phases, we prefer to restart the marking when
189 // the G1CMMarkStack overflows. Attempts to expand the G1CMMarkStack should be followed with a restart
190 // of the marking. On failure to allocate a new chuck, the caller just returns and forces a restart.
191 // This approach offers better memory utilization for the G1CMMarkStack, as each iteration of the
192 // marking potentially involves traversing fewer unmarked nodes in the graph.
193
194 // However, during the reference processing phase, instead of restarting the marking process, the
195 // G1CMMarkStack is expanded upon failure to allocate a new chunk. The decision between these two
196 // modes of expansion is determined by the _should_grow parameter.
197 void set_should_grow() {
198 _should_grow = true;
199 }
200
201 size_t capacity() const { return _capacity; }
202
203 // Expand the mark stack doubling its size.
204 bool try_expand();
205 bool try_expand_to(size_t desired_capacity);
206
207 TaskQueueEntryChunk* allocate_new_chunk();
208 };
209
210 ChunkAllocator _chunk_allocator;
211
212 char _pad0[DEFAULT_PADDING_SIZE];
213 Atomic<TaskQueueEntryChunk*> _free_list; // Linked list of free chunks that can be allocated by users.
214 char _pad1[DEFAULT_PADDING_SIZE - sizeof(TaskQueueEntryChunk*)];
215 Atomic<TaskQueueEntryChunk*> _chunk_list; // List of chunks currently containing data.
216 volatile size_t _chunks_in_chunk_list;
217 char _pad2[DEFAULT_PADDING_SIZE - sizeof(TaskQueueEntryChunk*) - sizeof(size_t)];
218
219 // Atomically add the given chunk to the list.
220 void add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem);
221 // Atomically remove and return a chunk from the given list. Returns null if the
222 // list is empty.
223 TaskQueueEntryChunk* remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list);
224
225 void add_chunk_to_chunk_list(TaskQueueEntryChunk* elem);
226 void add_chunk_to_free_list(TaskQueueEntryChunk* elem);
227
228 TaskQueueEntryChunk* remove_chunk_from_chunk_list();
229 TaskQueueEntryChunk* remove_chunk_from_free_list();
230
231 public:
232 G1CMMarkStack();
233 ~G1CMMarkStack() = default;
234
235 // Alignment and minimum capacity of this mark stack in number of oops.
236 static size_t capacity_alignment();
237
238 // Allocate and initialize the mark stack.
239 bool initialize();
240
241 // Pushes the given buffer containing at most EntriesPerChunk elements on the mark
242 // stack. If less than EntriesPerChunk elements are to be pushed, the array must
243 // be terminated with a null.
244 // Returns whether the buffer contents were successfully pushed to the global mark
245 // stack.
246 bool par_push_chunk(G1TaskQueueEntry* buffer);
247
248 // Pops a chunk from this mark stack, copying them into the given buffer. This
249 // chunk may contain up to EntriesPerChunk elements. If there are less, the last
250 // element in the array is a null pointer.
251 bool par_pop_chunk(G1TaskQueueEntry* buffer);
252
253 // Return whether the chunk list is empty. Racy due to unsynchronized access to
254 // _chunk_list.
255 bool is_empty() const { return _chunk_list.load_relaxed() == nullptr; }
256
257 size_t capacity() const { return _chunk_allocator.capacity(); }
258
259 void set_should_grow() {
260 _chunk_allocator.set_should_grow();
261 }
262
263 // Expand the stack, typically in response to an overflow condition
264 void expand();
265
266 // Return the approximate number of oops on this mark stack. Racy due to
267 // unsynchronized access to _chunks_in_chunk_list.
268 size_t size() const { return _chunks_in_chunk_list * EntriesPerChunk; }
269
270 void set_empty();
271
272 // Apply Fn to every oop on the mark stack. The mark stack must not
273 // be modified while iterating.
274 template<typename Fn> void iterate(Fn fn) const PRODUCT_RETURN;
275 };
276
277 // Root MemRegions are memory areas that contain objects which references are
278 // roots wrt to the marking. They must be scanned before marking to maintain the
279 // SATB invariant.
280 // Typically they contain the areas from TAMS to top of the regions.
281 // We could scan and mark through these objects during the concurrent start pause,
282 // but for pause time reasons we move this work to the concurrent phase.
283 // Garbage collections that evacuate must either complete or abort this procedure
284 // before they can move objects because evacuation might determine that some of these
285 // "root objects" are dead, potentially dropping some references.
286 // Root MemRegions comprise of the contents of survivor regions at the end
287 // of the GC, and any objects copied into the old gen during GC.
288 class G1CMRootMemRegions {
289 // The set of root MemRegions.
290 MemRegion* _root_regions;
291 uint const _max_regions;
292
293 Atomic<uint> _num_regions; // Actual number of root regions.
294 Atomic<uint> _num_claimed_regions; // Number of root regions currently claimed.
295
296 uint num_regions() const { return _num_regions.load_relaxed(); }
297 uint num_claimed_regions() const { return _num_claimed_regions.load_relaxed(); }
298
299 public:
300 G1CMRootMemRegions(uint const max_regions);
301 ~G1CMRootMemRegions();
302
303 void add(HeapWord* start, HeapWord* end);
304
305 // Reset data structure to initial state.
306 void reset();
307
308 // Claim the next root MemRegion to scan atomically, or return null if
309 // all have been claimed.
310 const MemRegion* claim_next();
311
312 // Number of root regions to still process.
313 uint num_remaining_regions() const;
314
315 // Returns whether all root regions have been processed or the processing been aborted.
316 bool work_completed() const;
317
318 // Is the given memregion contained in the root regions; the MemRegion must
319 // match exactly.
320 bool contains(const MemRegion mr) const;
321 };
322
323 // This class manages data structures and methods for doing liveness analysis in
324 // G1's concurrent cycle.
325 class G1ConcurrentMark : public CHeapObj<mtGC> {
326 friend class G1ClearBitMapTask;
327 friend class G1CMBitMapClosure;
328 friend class G1CMConcurrentMarkingTask;
329 friend class G1CMDrainMarkingStackClosure;
330 friend class G1CMKeepAliveAndDrainClosure;
331 friend class G1CMRefProcProxyTask;
332 friend class G1CMRemarkTask;
333 friend class G1CMRootRegionScanTask;
334 friend class G1CMTask;
335 friend class G1CollectorState;
336 friend class G1ConcurrentMarkThread;
337
338 G1ConcurrentMarkThread* _cm_thread; // The thread doing the work
339 G1CollectedHeap* _g1h; // The heap
340
341 // Concurrent marking support structures
342 G1CMBitMap _mark_bitmap;
343
344 // Heap bounds
345 MemRegion const _heap;
346
347 // Root region tracking and claiming
348 G1CMRootMemRegions _root_regions;
349 Atomic<bool> _root_region_scan_aborted;
350
351 // For grey objects
352 G1CMMarkStack _global_mark_stack; // Grey objects behind global finger
353 Atomic<HeapWord*> _finger; // The global finger, region aligned,
354 // always pointing to the end of the
355 // last claimed region
356
357 uint _worker_id_offset;
358 uint _max_num_tasks; // Maximum number of marking tasks
359 uint _num_active_tasks; // Number of tasks currently active
360 G1CMTask** _tasks; // Task queue array (max_worker_id length)
361
362 G1CMTaskQueueSet* _task_queues; // Task queue set
363 TaskTerminator _terminator; // For termination
364
365 PartialArrayStateManager* _partial_array_state_manager;
366
367 // Two sync barriers that are used to synchronize tasks when an
368 // overflow occurs. The algorithm is the following. All tasks enter
369 // the first one to ensure that they have all stopped manipulating
370 // the global data structures. After they exit it, they re-initialize
371 // their data structures and task 0 re-initializes the global data
372 // structures. Then, they enter the second sync barrier. This
373 // ensure, that no task starts doing work before all data
374 // structures (local and global) have been re-initialized. When they
375 // exit it, they are free to start working again.
376 WorkerThreadsBarrierSync _first_overflow_barrier_sync;
377 WorkerThreadsBarrierSync _second_overflow_barrier_sync;
378
379 // Number of completed mark cycles.
380 Atomic<uint> _completed_mark_cycles;
381
382 // This is set by any task, when an overflow on the global data
383 // structures is detected
384 Atomic<bool> _has_overflown;
385 // True: marking is concurrent, false: we're in remark
386 Atomic<bool> _concurrent;
387 // Set at the end of a Full GC so that marking aborts
388 Atomic<bool> _has_aborted;
389
390 // Used when remark aborts due to an overflow to indicate that
391 // another concurrent marking phase should start
392 Atomic<bool> _restart_for_overflow;
393
394 ConcurrentGCTimer* _gc_timer_cm;
395
396 G1OldTracer* _gc_tracer_cm;
397
398 // Timing statistics. All of them are in ms
399 NumberSeq _remark_times;
400 NumberSeq _remark_mark_times;
401 NumberSeq _remark_weak_ref_times;
402 NumberSeq _cleanup_times;
403
404 WorkerThreads* _concurrent_workers;
405 uint _num_concurrent_workers; // The number of marking worker threads we're using
406 uint _max_concurrent_workers; // Maximum number of marking worker threads
407
408 enum class VerifyLocation {
409 RemarkBefore,
410 RemarkAfter,
411 RemarkOverflow,
412 CleanupBefore,
413 CleanupAfter
414 };
415 static const char* verify_location_string(VerifyLocation location);
416 void verify_during_pause(G1HeapVerifier::G1VerifyType type,
417 VerifyLocation location);
418
419 void finalize_marking();
420
421 void weak_refs_work();
422
423 // After reclaiming empty regions, update heap sizes.
424 void compute_new_sizes();
425
426 // Resets all the marking data structures. Called when we have to restart
427 // marking or when marking completes (via set_non_marking_state below).
428 void reset_marking_for_restart();
429
430 // We do this after we're done with marking so that the marking data
431 // structures are initialized to a sensible and predictable state.
432 void reset_at_marking_complete();
433
434 // Called to indicate how many threads are currently active.
435 void set_concurrency(uint active_tasks);
436
437 // Should be called to indicate which phase we're in (concurrent
438 // mark or remark) and how many threads are currently active.
439 void set_concurrency_and_phase(uint active_tasks, bool concurrent);
440
441 // Prints all gathered CM-related statistics
442 void print_stats();
443
444 void print_and_reset_taskqueue_stats();
445
446 HeapWord* finger() { return _finger.load_relaxed(); }
447 bool concurrent() { return _concurrent.load_relaxed(); }
448 uint active_tasks() { return _num_active_tasks; }
449 TaskTerminator* terminator() { return &_terminator; }
450
451 // Claims the next available region to be scanned by a marking
452 // task/thread. It might return null if the next region is empty or
453 // we have run out of regions. In the latter case, out_of_regions()
454 // determines whether we've really run out of regions or the task
455 // should call claim_region() again. This might seem a bit
456 // awkward. Originally, the code was written so that claim_region()
457 // either successfully returned with a non-empty region or there
458 // were no more regions to be claimed. The problem with this was
459 // that, in certain circumstances, it iterated over large chunks of
460 // the heap finding only empty regions and, while it was working, it
461 // was preventing the calling task to call its regular clock
462 // method. So, this way, each task will spend very little time in
463 // claim_region() and is allowed to call the regular clock method
464 // frequently.
465 G1HeapRegion* claim_region(uint worker_id);
466
467 // Determines whether we've run out of regions to scan. Note that
468 // the finger can point past the heap end in case the heap was expanded
469 // to satisfy an allocation without doing a GC. This is fine, because all
470 // objects in those regions will be considered live anyway because of
471 // SATB guarantees (i.e. their TAMS will be equal to bottom).
472 bool out_of_regions() { return finger() >= _heap.end(); }
473
474 // Returns the task with the given id
475 G1CMTask* task(uint id) {
476 // During concurrent start we use the parallel gc threads to do some work, so
477 // we can only compare against _max_num_tasks.
478 assert(id < _max_num_tasks, "Task id %u not within bounds up to %u", id, _max_num_tasks);
479 return _tasks[id];
480 }
481
482 // Access / manipulation of the overflow flag which is set to
483 // indicate that the global stack has overflown
484 bool has_overflown() { return _has_overflown.load_relaxed(); }
485 void set_has_overflown() { _has_overflown.store_relaxed(true); }
486 void clear_has_overflown() { _has_overflown.store_relaxed(false); }
487 bool restart_for_overflow() { return _restart_for_overflow.load_relaxed(); }
488
489 // Methods to enter the two overflow sync barriers
490 void enter_first_sync_barrier(uint worker_id);
491 void enter_second_sync_barrier(uint worker_id);
492
493 // Clear the next marking bitmap in parallel using the given WorkerThreads. If may_yield is
494 // true, periodically insert checks to see if this method should exit prematurely.
495 void clear_bitmap(WorkerThreads* workers, bool may_yield);
496
497 // Records whether the region mark stats cache may contain entries due to marking activity,
498 // and the cache for freed regions needs to be cleared for those.
499 bool _is_region_mark_stats_cache_in_use;
500 // Region statistics gathered during marking.
501 G1RegionMarkStats* _region_mark_stats;
502 // Top pointer for each region at the start of marking. Must be valid, i.e. be within
503 // [bottom, end] of a region for all committed regions.
504 Atomic<HeapWord*>* _top_at_mark_starts;
505 // Top pointer for each region at the start of the rebuild remembered set process
506 // for regions which remembered sets need to be rebuilt. A null for a given region
507 // means that this region does not need to be scanned during the remembered set rebuild
508 // phase at all.
509 Atomic<HeapWord*>* _top_at_rebuild_starts;
510 // True when Remark pause selected regions for rebuilding.
511 bool _needs_remembered_set_rebuild;
512
513 G1ConcurrentMarkThread* cm_thread() const;
514
515 // Concurrent cycle state queries.
516 bool is_in_concurrent_cycle() const;
517 bool is_in_marking() const;
518 bool is_in_marking_or_rebuild() const;
519 bool is_in_reset_for_next_cycle() const;
520
521 void assert_fully_initialized() const { assert(is_fully_initialized(), "must be"); }
522 // The TAMS may be read and returns useful values related to the current concurrent marking.
523 // This is the case only during the concurrent cycle or the Concurrent Start pause.
524 inline bool tams_may_be_read() const;
525 // Update the TAMS for the given region to the current top.
526 inline void update_top_at_mark_start(G1HeapRegion* r);
527 // Reset the TAMS for the given region to bottom.
528 inline void set_top_at_mark_start_to_bottom(G1HeapRegion* r);
529
530 public:
531 // To be called when an object is marked the first time, e.g. after a successful
532 // mark_in_bitmap call. Updates various statistics data.
533 void add_to_liveness(uint worker_id, oop const obj, size_t size);
534 // Did the last marking find a live object between bottom and TAMS?
535 bool contains_live_object(uint region) const;
536 // Live bytes in the given region as determined by concurrent marking, i.e. the amount of
537 // live bytes between bottom and TAMS.
538 size_t live_bytes(uint region) const;
539 // Set live bytes for concurrent marking.
540 void set_live_bytes(uint region, size_t live_bytes);
541 // Approximate number of incoming references found during marking.
542 size_t incoming_refs(uint region) const;
543
544 void note_start_of_mark_for_region(G1HeapRegion* r);
545 inline void assert_top_at_mark_start_is_bottom(G1HeapRegion* r);
546
547 // Returns the TAMS for the given region; outside of the concurrent cycle or Concurrent Start
548 // pause, always returns r->bottom().
549 // Intended to be used for queries that are not allowed to fail at any time, but give a
550 // reasonable value, e.g. for logging to avoid having to do lots of check at every call site.
551 // Do not use for logic.
552 inline HeapWord* top_at_mark_start_or_bottom(const G1HeapRegion* r) const;
553 // Special method to return TAMS for verification purposes. During verification, if Full GC
554 // aborted a concurrent cycle, we need to use the TAMS data because the bitmap < TAMS may
555 // legitimately contain marks, however since we are in a Full GC tams_may_be_read() returns
556 // false. The other methods would return bottom(), which is wrong for verification.
557 inline HeapWord* top_at_mark_start_for_verification(const G1HeapRegion* r,
558 bool concurrent_cycle_aborted) const;
559
560 inline HeapWord* top_at_mark_start(const G1HeapRegion* r) const;
561 inline HeapWord* top_at_mark_start(uint region) const;
562 // Returns whether the given object been allocated since marking start (i.e. >= TAMS in that region).
563 inline bool obj_allocated_since_mark_start(oop obj) const;
564
565 // Sets the internal top_at_region_start for the given region to current top of the region.
566 inline void update_top_at_rebuild_start(G1HeapRegion* r);
567 // TARS for the given region during remembered set rebuilding.
568 inline HeapWord* top_at_rebuild_start(G1HeapRegion* r) const;
569
570 uint worker_id_offset() const { return _worker_id_offset; }
571
572 void fully_initialize();
573 bool is_fully_initialized() const { return _cm_thread != nullptr; }
574
575 uint max_num_tasks() const {return _max_num_tasks; }
576
577 void assert_statistics_clear(G1HeapRegion* r);
578
579 // Notification for marking that a new region has been added to the heap. Updates the TAMS and
580 // live bytes for this region during a Concurrent Start pause.
581 void notify_new_region(G1HeapRegion* r, size_t marked_live_bytes_below_tams = 0);
582
583 // Resets region marking state for the given region, i.e. TAMS, statistics, task metadata,
584 // etc. to initial state.
585 void reset_region_marking_state(G1HeapRegion* r);
586 // Notification for eagerly reclaimed regions to do extra clean up.
587 void humongous_object_eagerly_reclaimed(G1HeapRegion* r);
588 // Manipulation of the global mark stack.
589 // The push and pop operations are used by tasks for transfers
590 // between task-local queues and the global mark stack.
591 bool mark_stack_push(G1TaskQueueEntry* arr) {
592 if (!_global_mark_stack.par_push_chunk(arr)) {
593 set_has_overflown();
594 return false;
595 }
596 return true;
597 }
598 bool mark_stack_pop(G1TaskQueueEntry* arr) {
599 return _global_mark_stack.par_pop_chunk(arr);
600 }
601 size_t mark_stack_size() const { return _global_mark_stack.size(); }
602 size_t partial_mark_stack_size_target() const { return _global_mark_stack.capacity() / 3; }
603 bool mark_stack_empty() const { return _global_mark_stack.is_empty(); }
604
605 void concurrent_cycle_start();
606 // Abandon current marking iteration due to a Full GC.
607 bool concurrent_cycle_abort();
608 void concurrent_cycle_end(bool mark_cycle_completed);
609
610 // Notifies marking threads to abort. This is a best-effort notification. Does not
611 // guarantee or update any state after the call. Root region scan must not be
612 // running or being aborted.
613 void abort_marking_threads();
614
615 // Total cpu time spent in mark worker threads in seconds.
616 double worker_threads_cpu_time_s();
617
618 // Attempts to steal an object from the task queues of other tasks
619 bool try_stealing(uint worker_id, G1TaskQueueEntry& task_entry);
620
621 G1ConcurrentMark(G1CollectedHeap* g1h,
622 G1RegionToSpaceMapper* bitmap_storage);
623 ~G1ConcurrentMark();
624
625 G1CMBitMap* mark_bitmap() const { return (G1CMBitMap*)&_mark_bitmap; }
626
627 // Calculates the number of concurrent GC threads to be used in the marking phase.
628 uint calc_active_marking_workers();
629
630 PartialArrayStateManager* partial_array_state_manager() const;
631
632 // Resets the global marking data structures, as well as the
633 // task local ones; should be called during concurrent start.
634 void reset();
635
636 // Moves all per-task cached data into global state.
637 void flush_all_task_caches(bool ends_use_of_mark_cache = true);
638 // Prepare internal data structures for the next mark cycle. This includes clearing
639 // the next mark bitmap and some internal data structures. This method is intended
640 // to be called concurrently to the mutator. It will yield to safepoint requests.
641 void cleanup_for_next_mark();
642
643 // Recycle the memory that has been requested by allocators associated with
644 // this manager.
645 void reset_partial_array_state_manager();
646
647 // Clear the next marking bitmap during safepoint.
648 void clear_bitmap(WorkerThreads* workers);
649
650 // These two methods do the work that needs to be done at the start and end of the
651 // concurrent start pause.
652 void pre_concurrent_start(GCCause::Cause cause);
653
654 // Start the particular type of concurrent cycle. After this call threads may be running.
655 void start_full_concurrent_cycle();
656 void start_undo_concurrent_cycle();
657
658 void notify_concurrent_cycle_completed();
659
660 // Stop active components/the concurrent mark thread.
661 void stop();
662
663 void add_root_region(G1HeapRegion* r);
664 void add_root_region_set_bottom(G1HeapRegion* r);
665 bool is_root_region(G1HeapRegion* r);
666
667 // Scan all the root regions concurrently and mark everything reachable from
668 // them.
669 void scan_root_regions_concurrently();
670 // Complete root region scan work in the safepoint, return if we did some work.
671 bool complete_root_regions_scan_in_safepoint();
672
673 // Abort an active concurrent root region scan outside safepoint.
674 void abort_root_region_scan();
675
676 bool has_root_region_scan_aborted() const;
677
678 private:
679 // Abort an active concurrent root region scan during safepoint.
680 void abort_root_region_scan_at_safepoint();
681
682 void assert_root_region_scan_completed_or_aborted() PRODUCT_RETURN;
683 G1CMRootMemRegions* root_regions() { return &_root_regions; }
684
685 // Perform root region scan until all root regions have been processed, or
686 // the process has been aborted. Returns true if we did some work.
687 bool scan_root_regions(WorkerThreads* workers, bool concurrent);
688 // Scan a single root MemRegion to mark everything reachable from it.
689 void scan_root_region(const MemRegion* region, uint worker_id);
690
691 public:
692
693 // Do concurrent phase of marking, to a tentative transitive closure.
694 void mark_from_roots();
695
696 // Do concurrent preclean work.
697 void preclean();
698
699 // Executes the Remark pause.
700 void remark();
701
702 // Executes the Cleanup pause.
703 void cleanup();
704
705 // Mark in the marking bitmap. Used during evacuation failure to
706 // remember what objects need handling. Not for use during marking.
707 inline void raw_mark_in_bitmap(oop obj);
708
709 // Clears marks for all objects in the given region in the marking
710 // bitmap. This should only be used to clean the bitmap during a
711 // safepoint.
712 void clear_bitmap_for_region(G1HeapRegion* hr);
713
714 // Verify that there are no collection set oops on the stacks (taskqueues /
715 // global mark stack) and fingers (global / per-task).
716 // If marking is not in progress, it's a no-op.
717 void verify_no_collection_set_oops() PRODUCT_RETURN;
718
719 inline bool do_yield_check();
720
721 uint completed_mark_cycles() const;
722
723 bool has_aborted() { return _has_aborted.load_relaxed(); }
724
725 void print_summary_info();
726
727 void threads_do(ThreadClosure* tc) const;
728
729 void print_on(outputStream* st) const;
730
731 // Mark the given object on the marking bitmap if it is below TAMS.
732 inline bool mark_in_bitmap(uint worker_id, oop const obj);
733
734 inline bool is_marked_in_bitmap(oop p) const;
735
736 ConcurrentGCTimer* gc_timer_cm() const { return _gc_timer_cm; }
737
738 G1OldTracer* gc_tracer_cm() const { return _gc_tracer_cm; }
739
740 private:
741 // Rebuilds the remembered sets for chosen regions in parallel and concurrently
742 // to the application. Also scrubs dead objects to ensure region is parsable.
743 void rebuild_and_scrub();
744
745 uint needs_remembered_set_rebuild() const { return _needs_remembered_set_rebuild; }
746 };
747
748 // A class representing a marking task.
749 class G1CMTask : public TerminatorTerminator {
750 private:
751 enum PrivateConstants {
752 // The regular clock call is called once the scanned words reaches
753 // this limit
754 words_scanned_period = 12*1024,
755 // The regular clock call is called once the number of visited
756 // references reaches this limit
757 refs_reached_period = 1024,
758 };
759
760 uint _worker_id;
761 G1CollectedHeap* _g1h;
762 G1ConcurrentMark* _cm;
763 G1CMBitMap* _mark_bitmap;
764 // the task queue of this task
765 G1CMTaskQueue* _task_queue;
766 PartialArraySplitter _partial_array_splitter;
767
768 G1RegionMarkStatsCache _mark_stats_cache;
769 // Number of calls to this task
770 uint _calls;
771
772 // When the virtual timer reaches this time, the marking step should exit
773 double _time_target_ms;
774 // Start cpu time of the current marking step
775 jlong _start_cpu_time_ns;
776
777 // Oop closure used for iterations over oops
778 G1CMOopClosure* _cm_oop_closure;
779
780 // Region this task is scanning, null if we're not scanning any
781 G1HeapRegion* _curr_region;
782 // Local finger of this task, null if we're not scanning a region
783 HeapWord* _finger;
784 // Limit of the region this task is scanning, null if we're not scanning one
785 HeapWord* _region_limit;
786
787 // Number of words this task has scanned
788 size_t _words_scanned;
789 // When _words_scanned reaches this limit, the regular clock is
790 // called. Notice that this might be decreased under certain
791 // circumstances (i.e. when we believe that we did an expensive
792 // operation).
793 size_t _words_scanned_limit;
794 // Initial value of _words_scanned_limit (i.e. what it was
795 // before it was decreased).
796 size_t _real_words_scanned_limit;
797
798 // Number of references this task has visited
799 size_t _refs_reached;
800 // When _refs_reached reaches this limit, the regular clock is
801 // called. Notice this this might be decreased under certain
802 // circumstances (i.e. when we believe that we did an expensive
803 // operation).
804 size_t _refs_reached_limit;
805 // Initial value of _refs_reached_limit (i.e. what it was before
806 // it was decreased).
807 size_t _real_refs_reached_limit;
808
809 // If true, then the task has aborted for some reason
810 bool _has_aborted;
811 // Set when the task aborts because it has met its time quota
812 bool _has_timed_out;
813 // True when we're draining SATB buffers; this avoids the task
814 // aborting due to SATB buffers being available (as we're already
815 // dealing with them)
816 bool _draining_satb_buffers;
817
818 // Number sequence of past step times
819 NumberSeq _step_times_ms;
820 // Elapsed time of this task
821 double _elapsed_time_ms;
822 // Termination time of this task
823 double _termination_time_ms;
824
825 TruncatedSeq _marking_step_diff_ms;
826
827 // Updates the local fields after this task has claimed
828 // a new region to scan
829 void setup_for_region(G1HeapRegion* hr);
830 // Makes the limit of the region up-to-date
831 void update_region_limit();
832
833 // Handles the processing of the current region.
834 void process_current_region(G1CMBitMapClosure& bitmap_closure);
835
836 // Claims a new region if available.
837 void claim_new_region();
838
839 // Attempts to steal work from other tasks.
840 void attempt_stealing();
841
842 // Handles the termination protocol.
843 void attempt_termination(bool is_serial);
844
845 // Handles the has_aborted scenario.
846 void handle_abort(bool is_serial, double elapsed_time_ms);
847
848 // Called when either the words scanned or the refs visited limit
849 // has been reached
850 void reached_limit();
851 // Recalculates the words scanned and refs visited limits
852 void recalculate_limits();
853 // Decreases the words scanned and refs visited limits when we reach
854 // an expensive operation
855 void decrease_limits();
856 // Checks whether the words scanned or refs visited reached their
857 // respective limit and calls reached_limit() if they have
858 void check_limits() {
859 if (_words_scanned >= _words_scanned_limit ||
860 _refs_reached >= _refs_reached_limit) {
861 reached_limit();
862 }
863 }
864 // Supposed to be called regularly during a marking step as
865 // it checks a bunch of conditions that might cause the marking step
866 // to abort
867 // Return true if the marking step should continue. Otherwise, return false to abort
868 bool regular_clock_call();
869
870 // Set abort flag if regular_clock_call() check fails
871 inline void abort_marking_if_regular_check_fail();
872
873 // Test whether obj might have already been passed over by the
874 // mark bitmap scan, and so needs to be pushed onto the mark stack.
875 bool is_below_finger(oop obj, HeapWord* global_finger) const;
876
877 static bool should_be_sliced(oop obj);
878 // Start processing the given objArrayOop by first pushing its continuations and
879 // then scanning the first chunk including the header.
880 size_t start_partial_array_processing(objArrayOop obj);
881 // Process the given continuation. Returns the number of words scanned.
882 size_t process_partial_array(const G1TaskQueueEntry& task, bool stolen);
883 // Apply the closure to the given range of elements in the objArray.
884 inline void process_array_chunk(objArrayOop obj, size_t start, size_t end);
885 public:
886 // Resets the task completely for a new marking; should be called right at the beginning of a marking phase.
887 void reset(G1CMBitMap* mark_bitmap);
888 // Minimal reset of the task, making it ready for continuing to mark.
889 void reset_for_restart();
890 // Register/unregister Partial Array Splitter Allocator with the PartialArrayStateManager.
891 // This allows us to discard memory arenas used for partial object array states at the end
892 // of a concurrent mark cycle.
893 void register_partial_array_splitter();
894 void unregister_partial_array_splitter();
895 // Clears all the fields that correspond to a claimed region.
896 void clear_region_fields();
897
898 // The main method of this class which performs a marking step
899 // trying not to exceed the given duration. However, it might exit
900 // prematurely, according to some conditions (i.e. SATB buffers are
901 // available for processing).
902 void do_marking_step(double target_ms,
903 bool do_termination,
904 bool is_serial);
905
906 // These two calls start and stop the timer
907 void record_start_time() {
908 _elapsed_time_ms = os::elapsedTime() * 1000.0;
909 }
910 void record_end_time() {
911 _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
912 }
913
914 // Returns the worker ID associated with this task.
915 uint worker_id() { return _worker_id; }
916
917 // From TerminatorTerminator. It determines whether this task should
918 // exit the termination protocol after it's entered it.
919 virtual bool should_exit_termination();
920
921 // Resets the local region fields after a task has finished scanning a
922 // region; or when they have become stale as a result of the region
923 // being evacuated.
924 void giveup_current_region();
925
926 HeapWord* finger() { return _finger; }
927
928 bool has_aborted() { return _has_aborted; }
929 void set_has_aborted() { _has_aborted = true; }
930 void clear_has_aborted() { _has_aborted = false; }
931
932 void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
933
934 // Increment the number of references this task has visited.
935 void increment_refs_reached() { ++_refs_reached; }
936
937 // Grey the object by marking it. If not already marked, push it on
938 // the local queue if below the finger. obj is required to be below its region's TAMS.
939 // Returns whether there has been a mark to the bitmap.
940 inline bool make_reference_grey(oop obj);
941
942 // Grey the object (by calling make_grey_reference) if required,
943 // e.g. obj is below its containing region's TAMS.
944 // Precondition: obj is a valid heap object.
945 // Returns true if the reference caused a mark to be set in the marking bitmap.
946 template <class T>
947 inline bool deal_with_reference(T* p);
948
949 // Scan the klass and visit its children.
950 inline void process_klass(Klass* klass);
951
952 // Scans an object and visits its children.
953 inline void process_entry(G1TaskQueueEntry task_entry, bool stolen);
954
955 // Pushes an object on the local queue.
956 inline void push(G1TaskQueueEntry task_entry);
957
958 // Move entries to the global stack.
959 void move_entries_to_global_stack();
960 // Move entries from the global stack, return true if we were successful to do so.
961 bool get_entries_from_global_stack();
962
963 // Pops and scans objects from the local queue. If partially is
964 // true, then it stops when the queue size is of a given limit. If
965 // partially is false, then it stops when the queue is empty.
966 void drain_local_queue(bool partially);
967 // Moves entries from the global stack to the local queue and
968 // drains the local queue. If partially is true, then it stops when
969 // both the global stack and the local queue reach a given size. If
970 // partially if false, it tries to empty them totally.
971 void drain_global_stack(bool partially);
972 // Keeps picking SATB buffers and processing them until no SATB
973 // buffers are available.
974 void drain_satb_buffers();
975
976 // Moves the local finger to a new location
977 inline void move_finger_to(HeapWord* new_finger) {
978 assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
979 _finger = new_finger;
980 }
981
982 G1CMTask(uint worker_id,
983 G1ConcurrentMark *cm,
984 G1CMTaskQueue* task_queue,
985 G1RegionMarkStats* mark_stats);
986
987 inline void update_liveness(oop const obj, size_t const obj_size);
988
989 inline void inc_incoming_refs(oop const obj);
990
991 void verify_no_mark_stats_for(uint region_idx) PRODUCT_RETURN;
992 // Clear (without flushing) the mark cache entry for the given region.
993 void clear_mark_stats_cache(uint region_idx);
994 // Evict the whole statistics cache into the global statistics. Returns the
995 // number of cache hits and misses so far.
996 Pair<size_t, size_t> flush_mark_stats_cache();
997 // Prints statistics associated with this task
998 void print_stats();
999 #if TASKQUEUE_STATS
1000 PartialArrayTaskStats* partial_array_task_stats() {
1001 return _partial_array_splitter.stats();
1002 }
1003 #endif
1004 };
1005
1006 // Class that's used to to print out per-region liveness
1007 // information. It's currently used at the end of marking and also
1008 // after we sort the old regions at the end of the cleanup operation.
1009 class G1PrintRegionLivenessInfoClosure : public G1HeapRegionClosure {
1010 // Accumulators for these values.
1011 size_t _total_used_bytes;
1012 size_t _total_capacity_bytes;
1013 size_t _total_live_bytes;
1014
1015 // Accumulator for the remembered set size
1016 size_t _total_remset_bytes;
1017
1018 // Accumulator for code roots memory size
1019 size_t _total_code_roots_bytes;
1020
1021 static double bytes_to_mb(size_t val) {
1022 return (double) val / (double) M;
1023 }
1024
1025 void log_cset_candidate_group_add_total(G1CSetCandidateGroup* gr, const char* type);
1026 void log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type);
1027 void log_cset_candidate_groups();
1028
1029 public:
1030 // The header and footer are printed in the constructor and
1031 // destructor respectively.
1032 G1PrintRegionLivenessInfoClosure(const char* phase_name);
1033 virtual bool do_heap_region(G1HeapRegion* r);
1034 ~G1PrintRegionLivenessInfoClosure();
1035 };
1036 #endif // SHARE_GC_G1_G1CONCURRENTMARK_HPP