1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectionSetChooser.hpp"
34 #include "gc/g1/g1CollectorState.hpp"
35 #include "gc/g1/g1ConcurrentMark.inline.hpp"
36 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
37 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
38 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
39 #include "gc/g1/g1ConcurrentRefine.hpp"
40 #include "gc/g1/g1HeapRegion.inline.hpp"
41 #include "gc/g1/g1HeapRegionManager.hpp"
42 #include "gc/g1/g1HeapRegionPrinter.hpp"
43 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
44 #include "gc/g1/g1HeapRegionSet.inline.hpp"
45 #include "gc/g1/g1HeapVerifier.hpp"
46 #include "gc/g1/g1OopClosures.inline.hpp"
47 #include "gc/g1/g1Policy.hpp"
48 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
49 #include "gc/g1/g1ThreadLocalData.hpp"
50 #include "gc/g1/g1Trace.hpp"
51 #include "gc/shared/gcId.hpp"
52 #include "gc/shared/gcTimer.hpp"
53 #include "gc/shared/gcTraceTime.inline.hpp"
54 #include "gc/shared/gcVMOperations.hpp"
55 #include "gc/shared/partialArraySplitter.inline.hpp"
56 #include "gc/shared/partialArrayState.hpp"
57 #include "gc/shared/partialArrayTaskStats.hpp"
58 #include "gc/shared/referencePolicy.hpp"
59 #include "gc/shared/suspendibleThreadSet.hpp"
60 #include "gc/shared/taskqueue.inline.hpp"
61 #include "gc/shared/taskTerminator.hpp"
62 #include "gc/shared/weakProcessor.inline.hpp"
63 #include "gc/shared/workerPolicy.hpp"
64 #include "jvm.h"
65 #include "logging/log.hpp"
66 #include "memory/allocation.hpp"
67 #include "memory/iterator.hpp"
68 #include "memory/metaspaceUtils.hpp"
69 #include "memory/resourceArea.hpp"
70 #include "memory/universe.hpp"
71 #include "nmt/memTracker.hpp"
72 #include "oops/access.inline.hpp"
73 #include "oops/oop.inline.hpp"
74 #include "oops/oopCast.inline.hpp"
75 #include "runtime/globals_extension.hpp"
76 #include "runtime/handles.inline.hpp"
77 #include "runtime/java.hpp"
78 #include "runtime/orderAccess.hpp"
79 #include "runtime/os.hpp"
80 #include "runtime/prefetch.inline.hpp"
81 #include "runtime/threads.hpp"
82 #include "utilities/align.hpp"
83 #include "utilities/checkedCast.hpp"
84 #include "utilities/formatBuffer.hpp"
85 #include "utilities/growableArray.hpp"
86 #include "utilities/powerOfTwo.hpp"
87
88 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
89
90 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
91 assert(cm != nullptr, "must be");
92 }
93
94 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
95 assert(cm != nullptr, "must be");
96 assert(_cm == nullptr, "double initialize");
97 _cm = cm;
98 }
99
100 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
101 assert(addr < _cm->finger(), "invariant");
102 assert(addr >= _task->finger(), "invariant");
103
104 // We move that task's local finger along.
105 _task->move_finger_to(addr);
106
107 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
108 // we only partially drain the local queue and global stack
109 _task->drain_local_queue(true);
110 _task->drain_global_stack(true);
111
112 // if the has_aborted flag has been raised, we need to bail out of
113 // the iteration
114 return !_task->has_aborted();
115 }
116
117 G1CMMarkStack::G1CMMarkStack() :
118 _chunk_allocator() {
119 set_empty();
120 }
121
122 size_t G1CMMarkStack::capacity_alignment() {
123 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
124 }
125
126 bool G1CMMarkStack::initialize() {
127 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
128
129 size_t initial_capacity = MarkStackSize;
130 size_t max_capacity = MarkStackSizeMax;
131
132 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
133
134 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
136
137 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
138 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
139
140 size_t limit = (INT_MAX - 1);
141 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
143
144 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
145 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
146
147 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
148
149 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
150 initial_num_chunks, max_capacity);
151
152 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
153 }
154
155 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
156 if (_size.load_relaxed() >= _max_capacity) {
157 return nullptr;
158 }
159
160 size_t cur_idx = _size.fetch_then_add(1u);
161
162 if (cur_idx >= _max_capacity) {
163 return nullptr;
164 }
165
166 size_t bucket = get_bucket(cur_idx);
167 if (_buckets[bucket].load_acquire() == nullptr) {
168 if (!_should_grow) {
169 // Prefer to restart the CM.
170 return nullptr;
171 }
172
173 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
174 if (_buckets[bucket].load_acquire() == nullptr) {
175 size_t desired_capacity = bucket_size(bucket) * 2;
176 if (!try_expand_to(desired_capacity)) {
177 return nullptr;
178 }
179 }
180 }
181
182 size_t bucket_idx = get_bucket_index(cur_idx);
183 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
184 result->next = nullptr;
185 return result;
186 }
187
188 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
189 _min_capacity(0),
190 _max_capacity(0),
191 _capacity(0),
192 _num_buckets(0),
193 _should_grow(false),
194 _buckets(nullptr),
195 _size(0)
196 { }
197
198 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
199 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
200
201 _min_capacity = initial_capacity;
202 _max_capacity = max_capacity;
203 _num_buckets = get_bucket(_max_capacity) + 1;
204
205 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
206
207 for (size_t i = 0; i < _num_buckets; i++) {
208 _buckets[i].store_relaxed(nullptr);
209 }
210
211 size_t new_capacity = bucket_size(0);
212
213 if (!reserve(new_capacity)) {
214 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
215 return false;
216 }
217 return true;
218 }
219
220 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
221 if (_capacity == _max_capacity) {
222 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
223 return false;
224 }
225
226 size_t old_capacity = _capacity;
227 desired_capacity = MIN2(desired_capacity, _max_capacity);
228
229 if (reserve(desired_capacity)) {
230 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
231 old_capacity, desired_capacity);
232 return true;
233 }
234 return false;
235 }
236
237 bool G1CMMarkStack::ChunkAllocator::try_expand() {
238 size_t new_capacity = _capacity * 2;
239 return try_expand_to(new_capacity);
240 }
241
242 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
243 if (_buckets == nullptr) {
244 return;
245 }
246
247 for (size_t i = 0; i < _num_buckets; i++) {
248 if (_buckets[i].load_relaxed() != nullptr) {
249 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
250 _buckets[i].store_relaxed(nullptr);
251 }
252 }
253
254 FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
255 }
256
257 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
258 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
259
260 size_t highest_bucket = get_bucket(new_capacity - 1);
261 size_t i = get_bucket(_capacity);
262
263 // Allocate all buckets associated with indexes between the current capacity (_capacity)
264 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
265 // array and that the capacity accurately reflects the reserved memory.
266 for (; i <= highest_bucket; i++) {
267 if (_buckets[i].load_acquire() != nullptr) {
268 continue; // Skip over already allocated buckets.
269 }
270
271 size_t bucket_capacity = bucket_size(i);
272
273 // Trim bucket size so that we do not exceed the _max_capacity.
274 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
275 bucket_capacity :
276 _max_capacity - _capacity;
277
278
279 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
280
281 if (bucket_base == nullptr) {
282 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
283 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
284 return false;
285 }
286 _capacity += bucket_capacity;
287 _buckets[i].release_store(bucket_base);
288 }
289 return true;
290 }
291
292 void G1CMMarkStack::expand() {
293 _chunk_allocator.try_expand();
294 }
295
296 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
297 elem->next = list->load_relaxed();
298 list->store_relaxed(elem);
299 }
300
301 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
302 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
303 add_chunk_to_list(&_chunk_list, elem);
304 _chunks_in_chunk_list++;
305 }
306
307 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
308 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
309 add_chunk_to_list(&_free_list, elem);
310 }
311
312 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
313 TaskQueueEntryChunk* result = list->load_relaxed();
314 if (result != nullptr) {
315 list->store_relaxed(list->load_relaxed()->next);
316 }
317 return result;
318 }
319
320 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
321 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
322 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
323 if (result != nullptr) {
324 _chunks_in_chunk_list--;
325 }
326 return result;
327 }
328
329 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
330 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
331 return remove_chunk_from_list(&_free_list);
332 }
333
334 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
335 // Get a new chunk.
336 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
337
338 if (new_chunk == nullptr) {
339 // Did not get a chunk from the free list. Allocate from backing memory.
340 new_chunk = _chunk_allocator.allocate_new_chunk();
341
342 if (new_chunk == nullptr) {
343 return false;
344 }
345 }
346
347 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
348
349 add_chunk_to_chunk_list(new_chunk);
350
351 return true;
352 }
353
354 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
355 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
356
357 if (cur == nullptr) {
358 return false;
359 }
360
361 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
362
363 add_chunk_to_free_list(cur);
364 return true;
365 }
366
367 void G1CMMarkStack::set_empty() {
368 _chunks_in_chunk_list = 0;
369 _chunk_list.store_relaxed(nullptr);
370 _free_list.store_relaxed(nullptr);
371 _chunk_allocator.reset();
372 }
373
374 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
375 _root_regions(MemRegion::create_array(max_regions, mtGC)),
376 _max_regions(max_regions),
377 _num_root_regions(0),
378 _claimed_root_regions(0),
379 _scan_in_progress(false),
380 _should_abort(false) { }
381
382 G1CMRootMemRegions::~G1CMRootMemRegions() {
383 MemRegion::destroy_array(_root_regions, _max_regions);
384 }
385
386 void G1CMRootMemRegions::reset() {
387 _num_root_regions.store_relaxed(0);
388 }
389
390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
391 assert_at_safepoint();
392 size_t idx = _num_root_regions.fetch_then_add(1u);
393 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
394 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
395 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
396 _root_regions[idx].set_start(start);
397 _root_regions[idx].set_end(end);
398 }
399
400 void G1CMRootMemRegions::prepare_for_scan() {
401 assert(!scan_in_progress(), "pre-condition");
402
403 _scan_in_progress.store_relaxed(num_root_regions() > 0);
404
405 _claimed_root_regions.store_relaxed(0);
406 _should_abort.store_relaxed(false);
407 }
408
409 const MemRegion* G1CMRootMemRegions::claim_next() {
410 if (_should_abort.load_relaxed()) {
411 // If someone has set the should_abort flag, we return null to
412 // force the caller to bail out of their loop.
413 return nullptr;
414 }
415
416 uint local_num_root_regions = num_root_regions();
417 if (_claimed_root_regions.load_relaxed() >= local_num_root_regions) {
418 return nullptr;
419 }
420
421 size_t claimed_index = _claimed_root_regions.fetch_then_add(1u);
422 if (claimed_index < local_num_root_regions) {
423 return &_root_regions[claimed_index];
424 }
425 return nullptr;
426 }
427
428 uint G1CMRootMemRegions::num_root_regions() const {
429 return (uint)_num_root_regions.load_relaxed();
430 }
431
432 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
433 uint local_num_root_regions = num_root_regions();
434 for (uint i = 0; i < local_num_root_regions; i++) {
435 if (_root_regions[i].equals(mr)) {
436 return true;
437 }
438 }
439 return false;
440 }
441
442 void G1CMRootMemRegions::notify_scan_done() {
443 MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
444 _scan_in_progress.store_relaxed(false);
445 G1RootRegionScan_lock->notify_all();
446 }
447
448 void G1CMRootMemRegions::cancel_scan() {
449 notify_scan_done();
450 }
451
452 void G1CMRootMemRegions::scan_finished() {
453 assert(scan_in_progress(), "pre-condition");
454
455 if (!_should_abort.load_relaxed()) {
456 assert(_claimed_root_regions.load_relaxed() >= num_root_regions(),
457 "we should have claimed all root regions, claimed %zu, length = %u",
458 _claimed_root_regions.load_relaxed(), num_root_regions());
459 }
460
461 notify_scan_done();
462 }
463
464 bool G1CMRootMemRegions::wait_until_scan_finished() {
465 if (!scan_in_progress()) {
466 return false;
467 }
468
469 {
470 MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
471 while (scan_in_progress()) {
472 ml.wait();
473 }
474 }
475 return true;
476 }
477
478 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
479 G1RegionToSpaceMapper* bitmap_storage) :
480 _cm_thread(nullptr),
481 _g1h(g1h),
482
483 _mark_bitmap(),
484
485 _heap(_g1h->reserved()),
486
487 _root_regions(_g1h->max_num_regions()),
488
489 _global_mark_stack(),
490
491 _finger(nullptr), // _finger set in set_non_marking_state
492
493 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
494 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
495 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
496 _tasks(nullptr), // _tasks set inside late_init()
497 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
498 _terminator(_max_num_tasks, _task_queues),
499 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
500
501 _first_overflow_barrier_sync(),
502 _second_overflow_barrier_sync(),
503
504 _completed_mark_cycles(0),
505 _has_overflown(false),
506 _concurrent(false),
507 _has_aborted(false),
508 _restart_for_overflow(false),
509 _gc_timer_cm(new ConcurrentGCTimer()),
510 _gc_tracer_cm(new G1OldTracer()),
511
512 // _verbose_level set below
513
514 _remark_times(),
515 _remark_mark_times(),
516 _remark_weak_ref_times(),
517 _cleanup_times(),
518
519 _concurrent_workers(nullptr),
520 _num_concurrent_workers(0),
521 _max_concurrent_workers(0),
522
523 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
524 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
525 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
526 _needs_remembered_set_rebuild(false)
527 {
528 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
529
530 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
531 }
532
533 void G1ConcurrentMark::fully_initialize() {
534 if (is_fully_initialized()) {
535 return;
536 }
537
538 // Create & start ConcurrentMark thread.
539 _cm_thread = new G1ConcurrentMarkThread(this);
540 if (_cm_thread->osthread() == nullptr) {
541 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
542 }
543
544 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
545 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
546
547 _max_concurrent_workers = ConcGCThreads;
548
549 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
550 _concurrent_workers->initialize_workers();
551 _num_concurrent_workers = _concurrent_workers->active_workers();
552
553 if (!_global_mark_stack.initialize()) {
554 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
555 }
556
557 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
558
559 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
560 _num_active_tasks = _max_num_tasks;
561
562 for (uint i = 0; i < _max_num_tasks; ++i) {
563 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
564 _task_queues->register_queue(i, task_queue);
565
566 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
567 }
568
569 for (uint i = 0; i < _g1h->max_num_regions(); i++) {
570 ::new (&_region_mark_stats[i]) G1RegionMarkStats{};
571 ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>{};
572 ::new (&_top_at_rebuild_starts[i]) Atomic<HeapWord*>{};
573 }
574
575 reset_at_marking_complete();
576 }
577
578 bool G1ConcurrentMark::in_progress() const {
579 return is_fully_initialized() ? _cm_thread->in_progress() : false;
580 }
581
582 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
583 return _partial_array_state_manager;
584 }
585
586 void G1ConcurrentMark::reset() {
587 _has_aborted.store_relaxed(false);
588
589 reset_marking_for_restart();
590
591 // Reset all tasks, since different phases will use different number of active
592 // threads. So, it's easiest to have all of them ready.
593 for (uint i = 0; i < _max_num_tasks; ++i) {
594 _tasks[i]->reset(mark_bitmap());
595 }
596
597 uint max_num_regions = _g1h->max_num_regions();
598 for (uint i = 0; i < max_num_regions; i++) {
599 _top_at_rebuild_starts[i].store_relaxed(nullptr);
600 _region_mark_stats[i].clear();
601 }
602
603 _root_regions.reset();
604 }
605
606 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
607 uint region_idx = r->hrm_index();
608 for (uint j = 0; j < _max_num_tasks; ++j) {
609 _tasks[j]->clear_mark_stats_cache(region_idx);
610 }
611 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
612 _region_mark_stats[region_idx].clear();
613 }
614
615 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
616 assert_at_safepoint();
617 assert(r->is_starts_humongous(), "Got humongous continues region here");
618
619 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
620 mark_bitmap()->clear(r->bottom());
621
622 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
623 return;
624 }
625
626 // Clear any statistics about the region gathered so far.
627 _g1h->humongous_obj_regions_iterate(r,
628 [&] (G1HeapRegion* r) {
629 clear_statistics(r);
630 });
631 }
632
633 void G1ConcurrentMark::reset_marking_for_restart() {
634 _global_mark_stack.set_empty();
635
636 // Expand the marking stack, if we have to and if we can.
637 if (has_overflown()) {
638 _global_mark_stack.expand();
639
640 uint max_num_regions = _g1h->max_num_regions();
641 for (uint i = 0; i < max_num_regions; i++) {
642 _region_mark_stats[i].clear_during_overflow();
643 }
644 }
645
646 clear_has_overflown();
647 _finger.store_relaxed(_heap.start());
648
649 for (uint i = 0; i < _max_num_tasks; ++i) {
650 _tasks[i]->reset_for_restart();
651 }
652 }
653
654 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
655 assert(active_tasks <= _max_num_tasks, "we should not have more");
656
657 _num_active_tasks = active_tasks;
658 // Need to update the three data structures below according to the
659 // number of active threads for this phase.
660 _terminator.reset_for_reuse(active_tasks);
661 _first_overflow_barrier_sync.set_n_workers(active_tasks);
662 _second_overflow_barrier_sync.set_n_workers(active_tasks);
663 }
664
665 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
666 set_concurrency(active_tasks);
667
668 _concurrent.store_relaxed(concurrent);
669
670 if (!concurrent) {
671 // At this point we should be in a STW phase, and completed marking.
672 assert_at_safepoint_on_vm_thread();
673 assert(out_of_regions(),
674 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
675 p2i(finger()), p2i(_heap.end()));
676 }
677 }
678
679 #if TASKQUEUE_STATS
680 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
681
682 _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
683
684 auto get_pa_stats = [&](uint i) {
685 return _tasks[i]->partial_array_task_stats();
686 };
687
688 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
689 "G1ConcurrentMark Partial Array Task Stats");
690
691 for (uint i = 0; i < _max_num_tasks; ++i) {
692 get_pa_stats(i)->reset();
693 }
694 }
695 #endif
696
697 void G1ConcurrentMark::reset_at_marking_complete() {
698 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
699 // We set the global marking state to some default values when we're
700 // not doing marking.
701 reset_marking_for_restart();
702 _num_active_tasks = 0;
703 }
704
705 G1ConcurrentMark::~G1ConcurrentMark() {
706 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
707 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
708 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
709 // The G1ConcurrentMark instance is never freed.
710 ShouldNotReachHere();
711 }
712
713 class G1ClearBitMapTask : public WorkerTask {
714 public:
715 static size_t chunk_size() { return M; }
716
717 private:
718 // Heap region closure used for clearing the _mark_bitmap.
719 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
720 private:
721 G1ConcurrentMark* _cm;
722 G1CMBitMap* _bitmap;
723 bool _suspendible; // If suspendible, do yield checks.
724
725 bool suspendible() {
726 return _suspendible;
727 }
728
729 bool is_clear_concurrent_undo() {
730 return suspendible() && _cm->cm_thread()->in_undo_mark();
731 }
732
733 bool has_aborted() {
734 if (suspendible()) {
735 _cm->do_yield_check();
736 return _cm->has_aborted();
737 }
738 return false;
739 }
740
741 HeapWord* region_clear_limit(G1HeapRegion* r) {
742 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
743 // live_words data are current wrt to the _mark_bitmap. We use this information
744 // to only clear ranges of the bitmap that require clearing.
745 if (is_clear_concurrent_undo()) {
746 // No need to clear bitmaps for empty regions (which includes regions we
747 // did not mark through).
748 if (!_cm->contains_live_object(r->hrm_index())) {
749 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
750 return r->bottom();
751 }
752 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
753 }
754 return r->end();
755 }
756
757 public:
758 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
759 G1HeapRegionClosure(),
760 _cm(cm),
761 _bitmap(cm->mark_bitmap()),
762 _suspendible(suspendible)
763 { }
764
765 virtual bool do_heap_region(G1HeapRegion* r) {
766 if (has_aborted()) {
767 return true;
768 }
769
770 HeapWord* cur = r->bottom();
771 HeapWord* const end = region_clear_limit(r);
772
773 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
774
775 while (cur < end) {
776
777 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
778 _bitmap->clear_range(mr);
779
780 cur += chunk_size_in_words;
781
782 // Repeat the asserts from before the start of the closure. We will do them
783 // as asserts here to minimize their overhead on the product. However, we
784 // will have them as guarantees at the beginning / end of the bitmap
785 // clearing to get some checking in the product.
786 assert(!suspendible() || _cm->in_progress(), "invariant");
787 assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
788
789 // Abort iteration if necessary.
790 if (has_aborted()) {
791 return true;
792 }
793 }
794 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
795
796 _cm->reset_top_at_mark_start(r);
797
798 return false;
799 }
800 };
801
802 G1ClearBitmapHRClosure _cl;
803 G1HeapRegionClaimer _hr_claimer;
804 bool _suspendible; // If the task is suspendible, workers must join the STS.
805
806 public:
807 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
808 WorkerTask("G1 Clear Bitmap"),
809 _cl(cm, suspendible),
810 _hr_claimer(n_workers),
811 _suspendible(suspendible)
812 { }
813
814 void work(uint worker_id) {
815 SuspendibleThreadSetJoiner sts_join(_suspendible);
816 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
817 }
818
819 bool is_complete() {
820 return _cl.is_complete();
821 }
822 };
823
824 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
825 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
826
827 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
828 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
829
830 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
831
832 G1ClearBitMapTask cl(this, num_workers, may_yield);
833
834 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
835 workers->run_task(&cl, num_workers);
836 guarantee(may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
837 }
838
839 void G1ConcurrentMark::cleanup_for_next_mark() {
840 // Make sure that the concurrent mark thread looks to still be in
841 // the current cycle.
842 guarantee(is_fully_initialized(), "should be initializd");
843 guarantee(in_progress(), "invariant");
844
845 // We are finishing up the current cycle by clearing the next
846 // marking bitmap and getting it ready for the next cycle. During
847 // this time no other cycle can start. So, let's make sure that this
848 // is the case.
849 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
850
851 clear_bitmap(_concurrent_workers, true);
852
853 reset_partial_array_state_manager();
854
855 // Repeat the asserts from above.
856 guarantee(is_fully_initialized(), "should be initializd");
857 guarantee(in_progress(), "invariant");
858 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
859 }
860
861 void G1ConcurrentMark::reset_partial_array_state_manager() {
862 for (uint i = 0; i < _max_num_tasks; ++i) {
863 _tasks[i]->unregister_partial_array_splitter();
864 }
865
866 partial_array_state_manager()->reset();
867
868 for (uint i = 0; i < _max_num_tasks; ++i) {
869 _tasks[i]->register_partial_array_splitter();
870 }
871 }
872
873 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
874 assert_at_safepoint_on_vm_thread();
875 // To avoid fragmentation the full collection requesting to clear the bitmap
876 // might use fewer workers than available. To ensure the bitmap is cleared
877 // as efficiently as possible the number of active workers are temporarily
878 // increased to include all currently created workers.
879 WithActiveWorkers update(workers, workers->created_workers());
880 clear_bitmap(workers, false);
881 }
882
883 class G1PreConcurrentStartTask : public G1BatchedTask {
884 // Reset marking state.
885 class ResetMarkingStateTask;
886 // For each region note start of marking.
887 class NoteStartOfMarkTask;
888
889 public:
890 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
891 };
892
893 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
894 G1ConcurrentMark* _cm;
895 public:
896 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
897
898 double worker_cost() const override { return 1.0; }
899 void do_work(uint worker_id) override;
900 };
901
902 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
903 G1HeapRegionClaimer _claimer;
904 public:
905 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
906
907 double worker_cost() const override {
908 // The work done per region is very small, therefore we choose this magic number to cap the number
909 // of threads used when there are few regions.
910 const double regions_per_thread = 1000;
911 return _claimer.n_regions() / regions_per_thread;
912 }
913
914 void set_max_workers(uint max_workers) override;
915 void do_work(uint worker_id) override;
916 };
917
918 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
919 // Reset marking state.
920 _cm->reset();
921 }
922
923 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
924 G1ConcurrentMark* _cm;
925
926 public:
927 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
928
929 bool do_heap_region(G1HeapRegion* r) override {
930 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
931 _cm->update_top_at_mark_start(r);
932 } else {
933 _cm->reset_top_at_mark_start(r);
934 }
935 return false;
936 }
937 };
938
939 void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
940 NoteStartOfMarkHRClosure start_cl;
941 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
942 }
943
944 void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
945 _claimer.set_n_workers(max_workers);
946 }
947
948 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
949 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
950 add_serial_task(new ResetMarkingStateTask(cm));
951 add_parallel_task(new NoteStartOfMarkTask());
952 };
953
954 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
955 assert_at_safepoint_on_vm_thread();
956
957 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
958
959 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
960
961 G1PreConcurrentStartTask cl(cause, this);
962 G1CollectedHeap::heap()->run_batch_task(&cl);
963
964 _gc_tracer_cm->set_gc_cause(cause);
965 }
966
967
968 void G1ConcurrentMark::post_concurrent_mark_start() {
969 // Start Concurrent Marking weak-reference discovery.
970 ReferenceProcessor* rp = _g1h->ref_processor_cm();
971 rp->start_discovery(false /* always_clear */);
972
973 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
974 // This is the start of the marking cycle, we're expected all
975 // threads to have SATB queues with active set to false.
976 satb_mq_set.set_active_all_threads(true, /* new active value */
977 false /* expected_active */);
978
979 _root_regions.prepare_for_scan();
980
981 // update_g1_committed() will be called at the end of an evac pause
982 // when marking is on. So, it's also called at the end of the
983 // concurrent start pause to update the heap end, if the heap expands
984 // during it. No need to call it here.
985 }
986
987 void G1ConcurrentMark::post_concurrent_undo_start() {
988 root_regions()->cancel_scan();
989 }
990
991 /*
992 * Notice that in the next two methods, we actually leave the STS
993 * during the barrier sync and join it immediately afterwards. If we
994 * do not do this, the following deadlock can occur: one thread could
995 * be in the barrier sync code, waiting for the other thread to also
996 * sync up, whereas another one could be trying to yield, while also
997 * waiting for the other threads to sync up too.
998 *
999 * Note, however, that this code is also used during remark and in
1000 * this case we should not attempt to leave / enter the STS, otherwise
1001 * we'll either hit an assert (debug / fastdebug) or deadlock
1002 * (product). So we should only leave / enter the STS if we are
1003 * operating concurrently.
1004 *
1005 * Because the thread that does the sync barrier has left the STS, it
1006 * is possible to be suspended for a Full GC or an evacuation pause
1007 * could occur. This is actually safe, since the entering the sync
1008 * barrier is one of the last things do_marking_step() does, and it
1009 * doesn't manipulate any data structures afterwards.
1010 */
1011
1012 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1013 bool barrier_aborted;
1014 {
1015 SuspendibleThreadSetLeaver sts_leave(concurrent());
1016 barrier_aborted = !_first_overflow_barrier_sync.enter();
1017 }
1018
1019 // at this point everyone should have synced up and not be doing any
1020 // more work
1021
1022 if (barrier_aborted) {
1023 // If the barrier aborted we ignore the overflow condition and
1024 // just abort the whole marking phase as quickly as possible.
1025 return;
1026 }
1027 }
1028
1029 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1030 SuspendibleThreadSetLeaver sts_leave(concurrent());
1031 _second_overflow_barrier_sync.enter();
1032
1033 // at this point everything should be re-initialized and ready to go
1034 }
1035
1036 class G1CMConcurrentMarkingTask : public WorkerTask {
1037 G1ConcurrentMark* _cm;
1038
1039 public:
1040 void work(uint worker_id) {
1041 ResourceMark rm;
1042
1043 SuspendibleThreadSetJoiner sts_join;
1044
1045 assert(worker_id < _cm->active_tasks(), "invariant");
1046
1047 G1CMTask* task = _cm->task(worker_id);
1048 task->record_start_time();
1049 if (!_cm->has_aborted()) {
1050 do {
1051 task->do_marking_step(G1ConcMarkStepDurationMillis,
1052 true /* do_termination */,
1053 false /* is_serial*/);
1054
1055 _cm->do_yield_check();
1056 } while (!_cm->has_aborted() && task->has_aborted());
1057 }
1058 task->record_end_time();
1059 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1060 }
1061
1062 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1063 WorkerTask("Concurrent Mark"), _cm(cm) { }
1064
1065 ~G1CMConcurrentMarkingTask() { }
1066 };
1067
1068 uint G1ConcurrentMark::calc_active_marking_workers() {
1069 uint result = 0;
1070 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1071 result = _max_concurrent_workers;
1072 } else {
1073 result =
1074 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1075 1, /* Minimum workers */
1076 _num_concurrent_workers,
1077 Threads::number_of_non_daemon_threads());
1078 // Don't scale the result down by scale_concurrent_workers() because
1079 // that scaling has already gone into "_max_concurrent_workers".
1080 }
1081 assert(result > 0 && result <= _max_concurrent_workers,
1082 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1083 _max_concurrent_workers, result);
1084 return result;
1085 }
1086
1087 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1088 #ifdef ASSERT
1089 HeapWord* last = region->last();
1090 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1091 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1092 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1093 assert(top_at_mark_start(hr) == region->start(),
1094 "MemRegion start should be equal to TAMS");
1095 #endif
1096
1097 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1098
1099 const uintx interval = PrefetchScanIntervalInBytes;
1100 HeapWord* curr = region->start();
1101 const HeapWord* end = region->end();
1102 while (curr < end) {
1103 Prefetch::read(curr, interval);
1104 oop obj = cast_to_oop(curr);
1105 size_t size = obj->oop_iterate_size(&cl);
1106 assert(size == obj->size(), "sanity");
1107 curr += size;
1108 }
1109 }
1110
1111 class G1CMRootRegionScanTask : public WorkerTask {
1112 G1ConcurrentMark* _cm;
1113 public:
1114 G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1115 WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1116
1117 void work(uint worker_id) {
1118 G1CMRootMemRegions* root_regions = _cm->root_regions();
1119 const MemRegion* region = root_regions->claim_next();
1120 while (region != nullptr) {
1121 _cm->scan_root_region(region, worker_id);
1122 region = root_regions->claim_next();
1123 }
1124 }
1125 };
1126
1127 void G1ConcurrentMark::scan_root_regions() {
1128 // scan_in_progress() will have been set to true only if there was
1129 // at least one root region to scan. So, if it's false, we
1130 // should not attempt to do any further work.
1131 if (root_regions()->scan_in_progress()) {
1132 assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1133
1134 // Assign one worker to each root-region but subject to the max constraint.
1135 const uint num_workers = MIN2(root_regions()->num_root_regions(),
1136 _max_concurrent_workers);
1137
1138 G1CMRootRegionScanTask task(this);
1139 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1140 task.name(), num_workers, root_regions()->num_root_regions());
1141 _concurrent_workers->run_task(&task, num_workers);
1142
1143 // It's possible that has_aborted() is true here without actually
1144 // aborting the survivor scan earlier. This is OK as it's
1145 // mainly used for sanity checking.
1146 root_regions()->scan_finished();
1147 }
1148 }
1149
1150 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1151 return root_regions()->wait_until_scan_finished();
1152 }
1153
1154 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1155 root_regions()->add(top_at_mark_start(r), r->top());
1156 }
1157
1158 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1159 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1160 }
1161
1162 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1163 root_regions()->abort();
1164 root_regions()->wait_until_scan_finished();
1165 }
1166
1167 void G1ConcurrentMark::concurrent_cycle_start() {
1168 _gc_timer_cm->register_gc_start();
1169
1170 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1171
1172 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1173 }
1174
1175 uint G1ConcurrentMark::completed_mark_cycles() const {
1176 return _completed_mark_cycles.load_relaxed();
1177 }
1178
1179 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1180 _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1181
1182 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1183
1184 if (mark_cycle_completed) {
1185 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1186 }
1187
1188 if (has_aborted()) {
1189 log_info(gc, marking)("Concurrent Mark Abort");
1190 _gc_tracer_cm->report_concurrent_mode_failure();
1191 }
1192
1193 _gc_timer_cm->register_gc_end();
1194
1195 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1196 }
1197
1198 void G1ConcurrentMark::mark_from_roots() {
1199 _restart_for_overflow.store_relaxed(false);
1200
1201 uint active_workers = calc_active_marking_workers();
1202
1203 // Setting active workers is not guaranteed since fewer
1204 // worker threads may currently exist and more may not be
1205 // available.
1206 active_workers = _concurrent_workers->set_active_workers(active_workers);
1207 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1208
1209 _num_concurrent_workers = active_workers;
1210
1211 // Parallel task terminator is set in "set_concurrency_and_phase()"
1212 set_concurrency_and_phase(active_workers, true /* concurrent */);
1213
1214 G1CMConcurrentMarkingTask marking_task(this);
1215 _concurrent_workers->run_task(&marking_task);
1216 print_stats();
1217 }
1218
1219 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1220 static const char* location_strings[] = { "Remark Before",
1221 "Remark After",
1222 "Remark Overflow",
1223 "Cleanup Before",
1224 "Cleanup After" };
1225 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1226 }
1227
1228 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1229 VerifyLocation location) {
1230 G1HeapVerifier* verifier = _g1h->verifier();
1231
1232 verifier->verify_region_sets_optional();
1233
1234 const char* caller = verify_location_string(location);
1235
1236 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1237 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1238
1239 size_t const BufLen = 512;
1240 char buffer[BufLen];
1241
1242 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1243 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1244
1245 // Only check bitmap in Remark, and not at After-Verification because the regions
1246 // already have their TAMS'es reset.
1247 if (location != VerifyLocation::RemarkAfter) {
1248 verifier->verify_bitmap_clear(true /* above_tams_only */);
1249 }
1250 }
1251 }
1252
1253 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1254 G1CollectedHeap* _g1h;
1255 public:
1256 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1257
1258 bool do_object_b(oop obj) {
1259 return !_g1h->is_obj_dead(obj);
1260 }
1261 };
1262
1263 void G1ConcurrentMark::remark() {
1264 assert_at_safepoint_on_vm_thread();
1265
1266 // If a full collection has happened, we should not continue. However we might
1267 // have ended up here as the Remark VM operation has been scheduled already.
1268 if (has_aborted()) {
1269 return;
1270 }
1271
1272 G1Policy* policy = _g1h->policy();
1273 policy->record_pause_start_time();
1274
1275 double start = os::elapsedTime();
1276
1277 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1278
1279 {
1280 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1281 finalize_marking();
1282 }
1283
1284 double mark_work_end = os::elapsedTime();
1285
1286 bool const mark_finished = !has_overflown();
1287 if (mark_finished) {
1288 weak_refs_work();
1289
1290 // Unload Klasses, String, Code Cache, etc.
1291 if (ClassUnloadingWithConcurrentMark) {
1292 G1CMIsAliveClosure is_alive(this);
1293 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1294 }
1295
1296 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1297 // We're done with marking.
1298 // This is the end of the marking cycle, we're expected all
1299 // threads to have SATB queues with active set to true.
1300 satb_mq_set.set_active_all_threads(false, /* new active value */
1301 true /* expected_active */);
1302
1303 {
1304 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1305 flush_all_task_caches();
1306 }
1307
1308 // All marking completed. Check bitmap now as we will start to reset TAMSes
1309 // in parallel below so that we can not do this in the After-Remark verification.
1310 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1311
1312 {
1313 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1314
1315 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1316 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1317 _g1h->workers()->active_workers());
1318 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1319 _g1h->workers()->run_task(&cl, num_workers);
1320
1321 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1322 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1323
1324 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1325
1326 if (_needs_remembered_set_rebuild) {
1327 // Prune rebuild candidates based on G1HeapWastePercent.
1328 // Improves rebuild time in addition to remembered set memory usage.
1329 G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1330 }
1331 }
1332
1333 if (log_is_enabled(Trace, gc, liveness)) {
1334 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1335 _g1h->heap_region_iterate(&cl);
1336 }
1337
1338 // Potentially, some empty-regions have been reclaimed; make this a
1339 // "collection" so that pending allocation can retry before attempting a
1340 // GC pause.
1341 _g1h->increment_total_collections();
1342
1343 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1344 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1345 // triggered, it likely means there are very few regular GCs, making resizing
1346 // based on gc heuristics less effective.
1347 if (_g1h->last_gc_was_periodic()) {
1348 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1349 }
1350
1351 compute_new_sizes();
1352
1353 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1354
1355 assert(!restart_for_overflow(), "sanity");
1356 // Completely reset the marking state (except bitmaps) since marking completed.
1357 reset_at_marking_complete();
1358
1359 G1CollectedHeap::finish_codecache_marking_cycle();
1360
1361 {
1362 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1363 G1ObjectCountIsAliveClosure is_alive(_g1h);
1364 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1365 }
1366 } else {
1367 // We overflowed. Restart concurrent marking.
1368 _restart_for_overflow.store_relaxed(true);
1369
1370 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1371
1372 // Clear the marking state because we will be restarting
1373 // marking due to overflowing the global mark stack.
1374 reset_marking_for_restart();
1375 }
1376
1377 // Statistics
1378 double now = os::elapsedTime();
1379 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1380 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1381 _remark_times.add((now - start) * 1000.0);
1382
1383 _g1h->update_perf_counter_cpu_time();
1384
1385 policy->record_concurrent_mark_remark_end();
1386 }
1387
1388 void G1ConcurrentMark::compute_new_sizes() {
1389 MetaspaceGC::compute_new_size();
1390
1391 // Cleanup will have freed any regions completely full of garbage.
1392 // Update the soft reference policy with the new heap occupancy.
1393 Universe::heap()->update_capacity_and_used_at_gc();
1394
1395 // We reclaimed old regions so we should calculate the sizes to make
1396 // sure we update the old gen/space data.
1397 _g1h->monitoring_support()->update_sizes();
1398 }
1399
1400 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1401 G1CollectedHeap* _g1h;
1402
1403 public:
1404 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1405
1406 bool do_heap_region(G1HeapRegion* r) override {
1407 // Update the remset tracking state from updating to complete
1408 // if remembered sets have been rebuilt.
1409 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1410 return false;
1411 }
1412 };
1413
1414 void G1ConcurrentMark::cleanup() {
1415 assert_at_safepoint_on_vm_thread();
1416
1417 // If a full collection has happened, we shouldn't do this.
1418 if (has_aborted()) {
1419 return;
1420 }
1421
1422 G1Policy* policy = _g1h->policy();
1423 policy->record_pause_start_time();
1424
1425 double start = os::elapsedTime();
1426
1427 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1428
1429 if (needs_remembered_set_rebuild()) {
1430 // Update the remset tracking information as well as marking all regions
1431 // as fully parsable.
1432 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1433 G1UpdateRegionsAfterRebuild cl(_g1h);
1434 _g1h->heap_region_iterate(&cl);
1435 } else {
1436 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1437 }
1438
1439 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1440
1441 // Local statistics
1442 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1443
1444 {
1445 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1446 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1447 }
1448 }
1449
1450 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1451 // Uses the G1CMTask associated with a worker thread (for serial reference
1452 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1453 // trace referent objects.
1454 //
1455 // Using the G1CMTask and embedded local queues avoids having the worker
1456 // threads operating on the global mark stack. This reduces the risk
1457 // of overflowing the stack - which we would rather avoid at this late
1458 // state. Also using the tasks' local queues removes the potential
1459 // of the workers interfering with each other that could occur if
1460 // operating on the global stack.
1461
1462 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1463 G1ConcurrentMark* _cm;
1464 G1CMTask* _task;
1465 uint _ref_counter_limit;
1466 uint _ref_counter;
1467 bool _is_serial;
1468 public:
1469 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1470 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1471 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1472 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1473 }
1474
1475 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1476 virtual void do_oop( oop* p) { do_oop_work(p); }
1477
1478 template <class T> void do_oop_work(T* p) {
1479 if (_cm->has_overflown()) {
1480 return;
1481 }
1482 if (!_task->deal_with_reference(p)) {
1483 // We did not add anything to the mark bitmap (or mark stack), so there is
1484 // no point trying to drain it.
1485 return;
1486 }
1487 _ref_counter--;
1488
1489 if (_ref_counter == 0) {
1490 // We have dealt with _ref_counter_limit references, pushing them
1491 // and objects reachable from them on to the local stack (and
1492 // possibly the global stack). Call G1CMTask::do_marking_step() to
1493 // process these entries.
1494 //
1495 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1496 // there's nothing more to do (i.e. we're done with the entries that
1497 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1498 // above) or we overflow.
1499 //
1500 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1501 // flag while there may still be some work to do. (See the comment at
1502 // the beginning of G1CMTask::do_marking_step() for those conditions -
1503 // one of which is reaching the specified time target.) It is only
1504 // when G1CMTask::do_marking_step() returns without setting the
1505 // has_aborted() flag that the marking step has completed.
1506 do {
1507 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1508 _task->do_marking_step(mark_step_duration_ms,
1509 false /* do_termination */,
1510 _is_serial);
1511 } while (_task->has_aborted() && !_cm->has_overflown());
1512 _ref_counter = _ref_counter_limit;
1513 }
1514 }
1515 };
1516
1517 // 'Drain' oop closure used by both serial and parallel reference processing.
1518 // Uses the G1CMTask associated with a given worker thread (for serial
1519 // reference processing the G1CMtask for worker 0 is used). Calls the
1520 // do_marking_step routine, with an unbelievably large timeout value,
1521 // to drain the marking data structures of the remaining entries
1522 // added by the 'keep alive' oop closure above.
1523
1524 class G1CMDrainMarkingStackClosure : public VoidClosure {
1525 G1ConcurrentMark* _cm;
1526 G1CMTask* _task;
1527 bool _is_serial;
1528 public:
1529 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1530 _cm(cm), _task(task), _is_serial(is_serial) {
1531 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1532 }
1533
1534 void do_void() {
1535 do {
1536 // We call G1CMTask::do_marking_step() to completely drain the local
1537 // and global marking stacks of entries pushed by the 'keep alive'
1538 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1539 //
1540 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1541 // if there's nothing more to do (i.e. we've completely drained the
1542 // entries that were pushed as a result of applying the 'keep alive'
1543 // closure to the entries on the discovered ref lists) or we overflow
1544 // the global marking stack.
1545 //
1546 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1547 // flag while there may still be some work to do. (See the comment at
1548 // the beginning of G1CMTask::do_marking_step() for those conditions -
1549 // one of which is reaching the specified time target.) It is only
1550 // when G1CMTask::do_marking_step() returns without setting the
1551 // has_aborted() flag that the marking step has completed.
1552
1553 _task->do_marking_step(1000000000.0 /* something very large */,
1554 true /* do_termination */,
1555 _is_serial);
1556 } while (_task->has_aborted() && !_cm->has_overflown());
1557 }
1558 };
1559
1560 class G1CMRefProcProxyTask : public RefProcProxyTask {
1561 G1CollectedHeap& _g1h;
1562 G1ConcurrentMark& _cm;
1563
1564 public:
1565 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1566 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1567 _g1h(g1h),
1568 _cm(cm) {}
1569
1570 void work(uint worker_id) override {
1571 assert(worker_id < _max_workers, "sanity");
1572 G1CMIsAliveClosure is_alive(&_cm);
1573 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1574 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1575 BarrierEnqueueDiscoveredFieldClosure enqueue;
1576 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1577 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1578 }
1579
1580 void prepare_run_task_hook() override {
1581 // We need to reset the concurrency level before each
1582 // proxy task execution, so that the termination protocol
1583 // and overflow handling in G1CMTask::do_marking_step() knows
1584 // how many workers to wait for.
1585 _cm.set_concurrency(_queue_count);
1586 }
1587 };
1588
1589 void G1ConcurrentMark::weak_refs_work() {
1590 ResourceMark rm;
1591
1592 {
1593 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1594
1595 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1596
1597 // See the comment in G1CollectedHeap::ref_processing_init()
1598 // about how reference processing currently works in G1.
1599
1600 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1601
1602 // Prefer to grow the stack until the max capacity.
1603 _global_mark_stack.set_should_grow();
1604
1605 // Parallel processing task executor.
1606 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1607 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1608
1609 // Process the weak references.
1610 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1611 _gc_tracer_cm->report_gc_reference_stats(stats);
1612 pt.print_all_references();
1613
1614 // The do_oop work routines of the keep_alive and drain_marking_stack
1615 // oop closures will set the has_overflown flag if we overflow the
1616 // global marking stack.
1617
1618 assert(has_overflown() || _global_mark_stack.is_empty(),
1619 "Mark stack should be empty (unless it has overflown)");
1620 }
1621
1622 if (has_overflown()) {
1623 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1624 // overflowed while processing references. Exit the VM.
1625 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1626 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1627 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1628 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1629 return;
1630 }
1631
1632 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1633
1634 {
1635 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1636 G1CMIsAliveClosure is_alive(this);
1637 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1638 }
1639 }
1640
1641 class G1PrecleanYieldClosure : public YieldClosure {
1642 G1ConcurrentMark* _cm;
1643
1644 public:
1645 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1646
1647 virtual bool should_return() {
1648 return _cm->has_aborted();
1649 }
1650
1651 virtual bool should_return_fine_grain() {
1652 _cm->do_yield_check();
1653 return _cm->has_aborted();
1654 }
1655 };
1656
1657 void G1ConcurrentMark::preclean() {
1658 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1659
1660 SuspendibleThreadSetJoiner joiner;
1661
1662 BarrierEnqueueDiscoveredFieldClosure enqueue;
1663
1664 set_concurrency_and_phase(1, true);
1665
1666 G1PrecleanYieldClosure yield_cl(this);
1667
1668 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1669 // Precleaning is single threaded. Temporarily disable MT discovery.
1670 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1671 rp->preclean_discovered_references(rp->is_alive_non_header(),
1672 &enqueue,
1673 &yield_cl,
1674 _gc_timer_cm);
1675 }
1676
1677 // Closure for marking entries in SATB buffers.
1678 class G1CMSATBBufferClosure : public SATBBufferClosure {
1679 private:
1680 G1CMTask* _task;
1681 G1CollectedHeap* _g1h;
1682
1683 // This is very similar to G1CMTask::deal_with_reference, but with
1684 // more relaxed requirements for the argument, so this must be more
1685 // circumspect about treating the argument as an object.
1686 void do_entry(void* entry) const {
1687 _task->increment_refs_reached();
1688 oop const obj = cast_to_oop(entry);
1689 _task->make_reference_grey(obj);
1690 }
1691
1692 public:
1693 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1694 : _task(task), _g1h(g1h) { }
1695
1696 virtual void do_buffer(void** buffer, size_t size) {
1697 for (size_t i = 0; i < size; ++i) {
1698 do_entry(buffer[i]);
1699 }
1700 }
1701 };
1702
1703 class G1RemarkThreadsClosure : public ThreadClosure {
1704 G1SATBMarkQueueSet& _qset;
1705
1706 public:
1707 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1708 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1709
1710 void do_thread(Thread* thread) {
1711 // Transfer any partial buffer to the qset for completed buffer processing.
1712 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1713 }
1714 };
1715
1716 class G1CMRemarkTask : public WorkerTask {
1717 // For Threads::possibly_parallel_threads_do
1718 ThreadsClaimTokenScope _threads_claim_token_scope;
1719 G1ConcurrentMark* _cm;
1720 public:
1721 void work(uint worker_id) {
1722 G1CMTask* task = _cm->task(worker_id);
1723 task->record_start_time();
1724 {
1725 ResourceMark rm;
1726
1727 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1728 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1729 }
1730
1731 do {
1732 task->do_marking_step(1000000000.0 /* something very large */,
1733 true /* do_termination */,
1734 false /* is_serial */);
1735 } while (task->has_aborted() && !_cm->has_overflown());
1736 // If we overflow, then we do not want to restart. We instead
1737 // want to abort remark and do concurrent marking again.
1738 task->record_end_time();
1739 }
1740
1741 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1742 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1743 _cm->terminator()->reset_for_reuse(active_workers);
1744 }
1745 };
1746
1747 void G1ConcurrentMark::finalize_marking() {
1748 ResourceMark rm;
1749
1750 _g1h->ensure_parsability(false);
1751
1752 // this is remark, so we'll use up all active threads
1753 uint active_workers = _g1h->workers()->active_workers();
1754 set_concurrency_and_phase(active_workers, false /* concurrent */);
1755 // Leave _parallel_marking_threads at it's
1756 // value originally calculated in the G1ConcurrentMark
1757 // constructor and pass values of the active workers
1758 // through the task.
1759
1760 {
1761 G1CMRemarkTask remarkTask(this, active_workers);
1762 // We will start all available threads, even if we decide that the
1763 // active_workers will be fewer. The extra ones will just bail out
1764 // immediately.
1765 _g1h->workers()->run_task(&remarkTask);
1766 }
1767
1768 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1769 guarantee(has_overflown() ||
1770 satb_mq_set.completed_buffers_num() == 0,
1771 "Invariant: has_overflown = %s, num buffers = %zu",
1772 BOOL_TO_STR(has_overflown()),
1773 satb_mq_set.completed_buffers_num());
1774
1775 print_stats();
1776 }
1777
1778 void G1ConcurrentMark::flush_all_task_caches() {
1779 size_t hits = 0;
1780 size_t misses = 0;
1781 for (uint i = 0; i < _max_num_tasks; i++) {
1782 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1783 hits += stats.first;
1784 misses += stats.second;
1785 }
1786 size_t sum = hits + misses;
1787 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1788 hits, misses, percent_of(hits, sum));
1789 }
1790
1791 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1792 assert_at_safepoint();
1793 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1794 }
1795
1796 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1797 // "Checkpoint" the finger.
1798 HeapWord* local_finger = finger();
1799
1800 while (local_finger < _heap.end()) {
1801 assert(_g1h->is_in_reserved(local_finger), "invariant");
1802
1803 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1804 // Make sure that the reads below do not float before loading curr_region.
1805 OrderAccess::loadload();
1806 // Above heap_region_containing may return null as we always scan claim
1807 // until the end of the heap. In this case, just jump to the next region.
1808 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1809
1810 // Is the gap between reading the finger and doing the CAS too long?
1811 HeapWord* res = _finger.compare_exchange(local_finger, end);
1812 if (res == local_finger && curr_region != nullptr) {
1813 // We succeeded.
1814 HeapWord* bottom = curr_region->bottom();
1815 HeapWord* limit = top_at_mark_start(curr_region);
1816
1817 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1818 // Notice that _finger == end cannot be guaranteed here since,
1819 // someone else might have moved the finger even further.
1820 assert(finger() >= end, "The finger should have moved forward");
1821
1822 if (limit > bottom) {
1823 return curr_region;
1824 } else {
1825 assert(limit == bottom,
1826 "The region limit should be at bottom");
1827 // We return null and the caller should try calling
1828 // claim_region() again.
1829 return nullptr;
1830 }
1831 } else {
1832 // Read the finger again.
1833 HeapWord* next_finger = finger();
1834 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1835 local_finger = next_finger;
1836 }
1837 }
1838
1839 return nullptr;
1840 }
1841
1842 #ifndef PRODUCT
1843 class VerifyNoCSetOops {
1844 G1CollectedHeap* _g1h;
1845 const char* _phase;
1846 int _info;
1847
1848 public:
1849 VerifyNoCSetOops(const char* phase, int info = -1) :
1850 _g1h(G1CollectedHeap::heap()),
1851 _phase(phase),
1852 _info(info)
1853 { }
1854
1855 void operator()(G1TaskQueueEntry task_entry) const {
1856 if (task_entry.is_partial_array_state()) {
1857 oop obj = task_entry.to_partial_array_state()->source();
1858 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1859 return;
1860 }
1861 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1862 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1863 p2i(task_entry.to_oop()), _phase, _info);
1864 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1865 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1866 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1867 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1868 }
1869 };
1870
1871 void G1ConcurrentMark::verify_no_collection_set_oops() {
1872 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1873 "should be at a safepoint or initializing");
1874 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1875 return;
1876 }
1877
1878 // Verify entries on the global mark stack
1879 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1880
1881 // Verify entries on the task queues
1882 for (uint i = 0; i < _max_num_tasks; ++i) {
1883 G1CMTaskQueue* queue = _task_queues->queue(i);
1884 queue->iterate(VerifyNoCSetOops("Queue", i));
1885 }
1886
1887 // Verify the global finger
1888 HeapWord* global_finger = finger();
1889 if (global_finger != nullptr && global_finger < _heap.end()) {
1890 // Since we always iterate over all regions, we might get a null G1HeapRegion
1891 // here.
1892 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1893 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1894 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1895 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1896 }
1897
1898 // Verify the task fingers
1899 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1900 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1901 G1CMTask* task = _tasks[i];
1902 HeapWord* task_finger = task->finger();
1903 if (task_finger != nullptr && task_finger < _heap.end()) {
1904 // See above note on the global finger verification.
1905 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1906 guarantee(r == nullptr || task_finger == r->bottom() ||
1907 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1908 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1909 p2i(task_finger), HR_FORMAT_PARAMS(r));
1910 }
1911 }
1912 }
1913 #endif // PRODUCT
1914
1915 void G1ConcurrentMark::rebuild_and_scrub() {
1916 if (!needs_remembered_set_rebuild()) {
1917 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1918 }
1919
1920 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1921 }
1922
1923 void G1ConcurrentMark::print_stats() {
1924 if (!log_is_enabled(Debug, gc, stats)) {
1925 return;
1926 }
1927 log_debug(gc, stats)("---------------------------------------------------------------------");
1928 for (size_t i = 0; i < _num_active_tasks; ++i) {
1929 _tasks[i]->print_stats();
1930 log_debug(gc, stats)("---------------------------------------------------------------------");
1931 }
1932 }
1933
1934 bool G1ConcurrentMark::concurrent_cycle_abort() {
1935 // If we start the compaction before the CM threads finish
1936 // scanning the root regions we might trip them over as we'll
1937 // be moving objects / updating references. So let's wait until
1938 // they are done. By telling them to abort, they should complete
1939 // early.
1940 root_region_scan_abort_and_wait();
1941
1942 // We haven't started a concurrent cycle no need to do anything; we might have
1943 // aborted the marking because of shutting down though. In this case the marking
1944 // might have already completed the abort (leading to in_progress() below to
1945 // return false), however this still left marking state particularly in the
1946 // shared marking bitmap that must be cleaned up.
1947 // If there are multiple full gcs during shutdown we do this work repeatedly for
1948 // nothing, but this situation should be extremely rare (a full gc after shutdown
1949 // has been signalled is already rare), and this work should be negligible compared
1950 // to actual full gc work.
1951
1952 if (!is_fully_initialized() || (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating())) {
1953 return false;
1954 }
1955
1956 reset_marking_for_restart();
1957
1958 abort_marking_threads();
1959
1960 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1961 satb_mq_set.abandon_partial_marking();
1962 // This can be called either during or outside marking, we'll read
1963 // the expected_active value from the SATB queue set.
1964 satb_mq_set.set_active_all_threads(false, /* new active value */
1965 satb_mq_set.is_active() /* expected_active */);
1966 return true;
1967 }
1968
1969 void G1ConcurrentMark::abort_marking_threads() {
1970 assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1971 _has_aborted.store_relaxed(true);
1972 _first_overflow_barrier_sync.abort();
1973 _second_overflow_barrier_sync.abort();
1974 }
1975
1976 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1977 class CountCpuTimeThreadClosure : public ThreadClosure {
1978 public:
1979 jlong _total_cpu_time;
1980
1981 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1982
1983 void do_thread(Thread* t) {
1984 _total_cpu_time += os::thread_cpu_time(t);
1985 }
1986 } cl;
1987
1988 threads_do(&cl);
1989
1990 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
1991 }
1992
1993 static void print_ms_time_info(const char* prefix, const char* name,
1994 NumberSeq& ns) {
1995 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1996 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1997 if (ns.num() > 0) {
1998 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
1999 prefix, ns.sd(), ns.maximum());
2000 }
2001 }
2002
2003 void G1ConcurrentMark::print_summary_info() {
2004 Log(gc, marking) log;
2005 if (!log.is_trace()) {
2006 return;
2007 }
2008
2009 log.trace(" Concurrent marking:");
2010 if (!is_fully_initialized()) {
2011 log.trace(" has not been initialized yet");
2012 return;
2013 }
2014 print_ms_time_info(" ", "remarks", _remark_times);
2015 {
2016 print_ms_time_info(" ", "final marks", _remark_mark_times);
2017 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2018
2019 }
2020 print_ms_time_info(" ", "cleanups", _cleanup_times);
2021 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2022 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2023 log.trace(" Total stop_world time = %8.2f s.",
2024 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2025 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2026 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2027 }
2028
2029 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2030 if (is_fully_initialized()) { // they are initialized late
2031 tc->do_thread(_cm_thread);
2032 _concurrent_workers->threads_do(tc);
2033 }
2034 }
2035
2036 void G1ConcurrentMark::print_on(outputStream* st) const {
2037 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2038 _mark_bitmap.print_on(st, " Bits: ");
2039 }
2040
2041 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2042 ReferenceProcessor* result = g1h->ref_processor_cm();
2043 assert(result != nullptr, "CM reference processor should not be null");
2044 return result;
2045 }
2046
2047 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2048 G1CMTask* task)
2049 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2050 _g1h(g1h), _task(task)
2051 { }
2052
2053 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2054 assert(hr != nullptr,
2055 "claim_region() should have filtered out null regions");
2056 _curr_region = hr;
2057 _finger = hr->bottom();
2058 update_region_limit();
2059 }
2060
2061 void G1CMTask::update_region_limit() {
2062 G1HeapRegion* hr = _curr_region;
2063 HeapWord* bottom = hr->bottom();
2064 HeapWord* limit = _cm->top_at_mark_start(hr);
2065
2066 if (limit == bottom) {
2067 // The region was collected underneath our feet.
2068 // We set the finger to bottom to ensure that the bitmap
2069 // iteration that will follow this will not do anything.
2070 // (this is not a condition that holds when we set the region up,
2071 // as the region is not supposed to be empty in the first place)
2072 _finger = bottom;
2073 } else if (limit >= _region_limit) {
2074 assert(limit >= _finger, "peace of mind");
2075 } else {
2076 assert(limit < _region_limit, "only way to get here");
2077 // This can happen under some pretty unusual circumstances. An
2078 // evacuation pause empties the region underneath our feet (TAMS
2079 // at bottom). We then do some allocation in the region (TAMS
2080 // stays at bottom), followed by the region being used as a GC
2081 // alloc region (TAMS will move to top() and the objects
2082 // originally below it will be greyed). All objects now marked in
2083 // the region are explicitly greyed, if below the global finger,
2084 // and we do not need in fact to scan anything else. So, we simply
2085 // set _finger to be limit to ensure that the bitmap iteration
2086 // doesn't do anything.
2087 _finger = limit;
2088 }
2089
2090 _region_limit = limit;
2091 }
2092
2093 void G1CMTask::giveup_current_region() {
2094 assert(_curr_region != nullptr, "invariant");
2095 clear_region_fields();
2096 }
2097
2098 void G1CMTask::clear_region_fields() {
2099 // Values for these three fields that indicate that we're not
2100 // holding on to a region.
2101 _curr_region = nullptr;
2102 _finger = nullptr;
2103 _region_limit = nullptr;
2104 }
2105
2106 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2107 if (cm_oop_closure == nullptr) {
2108 assert(_cm_oop_closure != nullptr, "invariant");
2109 } else {
2110 assert(_cm_oop_closure == nullptr, "invariant");
2111 }
2112 _cm_oop_closure = cm_oop_closure;
2113 }
2114
2115 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2116 guarantee(mark_bitmap != nullptr, "invariant");
2117 _mark_bitmap = mark_bitmap;
2118 clear_region_fields();
2119
2120 _calls = 0;
2121 _elapsed_time_ms = 0.0;
2122 _termination_time_ms = 0.0;
2123
2124 _mark_stats_cache.reset();
2125 }
2126
2127 void G1CMTask::reset_for_restart() {
2128 clear_region_fields();
2129 _task_queue->set_empty();
2130 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2131 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2132 }
2133
2134 void G1CMTask::register_partial_array_splitter() {
2135
2136 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2137 _cm->max_num_tasks(),
2138 ObjArrayMarkingStride);
2139 }
2140
2141 void G1CMTask::unregister_partial_array_splitter() {
2142 _partial_array_splitter.~PartialArraySplitter();
2143 }
2144
2145 bool G1CMTask::should_exit_termination() {
2146 if (!regular_clock_call()) {
2147 return true;
2148 }
2149
2150 // This is called when we are in the termination protocol. We should
2151 // quit if, for some reason, this task wants to abort or the global
2152 // stack is not empty (this means that we can get work from it).
2153 return !_cm->mark_stack_empty() || has_aborted();
2154 }
2155
2156 void G1CMTask::reached_limit() {
2157 assert(_words_scanned >= _words_scanned_limit ||
2158 _refs_reached >= _refs_reached_limit ,
2159 "shouldn't have been called otherwise");
2160 abort_marking_if_regular_check_fail();
2161 }
2162
2163 bool G1CMTask::regular_clock_call() {
2164 if (has_aborted()) {
2165 return false;
2166 }
2167
2168 // First, we need to recalculate the words scanned and refs reached
2169 // limits for the next clock call.
2170 recalculate_limits();
2171
2172 // During the regular clock call we do the following
2173
2174 // (1) If an overflow has been flagged, then we abort.
2175 if (_cm->has_overflown()) {
2176 return false;
2177 }
2178
2179 // If we are not concurrent (i.e. we're doing remark) we don't need
2180 // to check anything else. The other steps are only needed during
2181 // the concurrent marking phase.
2182 if (!_cm->concurrent()) {
2183 return true;
2184 }
2185
2186 // (2) If marking has been aborted for Full GC, then we also abort.
2187 if (_cm->has_aborted()) {
2188 return false;
2189 }
2190
2191 // (4) We check whether we should yield. If we have to, then we abort.
2192 if (SuspendibleThreadSet::should_yield()) {
2193 // We should yield. To do this we abort the task. The caller is
2194 // responsible for yielding.
2195 return false;
2196 }
2197
2198 // (5) We check whether we've reached our time quota. If we have,
2199 // then we abort.
2200 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2201 if (elapsed_time_ms > _time_target_ms) {
2202 _has_timed_out = true;
2203 return false;
2204 }
2205
2206 // (6) Finally, we check whether there are enough completed STAB
2207 // buffers available for processing. If there are, we abort.
2208 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2209 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2210 // we do need to process SATB buffers, we'll abort and restart
2211 // the marking task to do so
2212 return false;
2213 }
2214 return true;
2215 }
2216
2217 void G1CMTask::recalculate_limits() {
2218 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2219 _words_scanned_limit = _real_words_scanned_limit;
2220
2221 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2222 _refs_reached_limit = _real_refs_reached_limit;
2223 }
2224
2225 void G1CMTask::decrease_limits() {
2226 // This is called when we believe that we're going to do an infrequent
2227 // operation which will increase the per byte scanned cost (i.e. move
2228 // entries to/from the global stack). It basically tries to decrease the
2229 // scanning limit so that the clock is called earlier.
2230
2231 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2232 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2233 }
2234
2235 void G1CMTask::move_entries_to_global_stack() {
2236 // Local array where we'll store the entries that will be popped
2237 // from the local queue.
2238 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2239
2240 size_t n = 0;
2241 G1TaskQueueEntry task_entry;
2242 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2243 buffer[n] = task_entry;
2244 ++n;
2245 }
2246 if (n < G1CMMarkStack::EntriesPerChunk) {
2247 buffer[n] = G1TaskQueueEntry();
2248 }
2249
2250 if (n > 0) {
2251 if (!_cm->mark_stack_push(buffer)) {
2252 set_has_aborted();
2253 }
2254 }
2255
2256 // This operation was quite expensive, so decrease the limits.
2257 decrease_limits();
2258 }
2259
2260 bool G1CMTask::get_entries_from_global_stack() {
2261 // Local array where we'll store the entries that will be popped
2262 // from the global stack.
2263 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2264
2265 if (!_cm->mark_stack_pop(buffer)) {
2266 return false;
2267 }
2268
2269 // We did actually pop at least one entry.
2270 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2271 G1TaskQueueEntry task_entry = buffer[i];
2272 if (task_entry.is_null()) {
2273 break;
2274 }
2275 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2276 bool success = _task_queue->push(task_entry);
2277 // We only call this when the local queue is empty or under a
2278 // given target limit. So, we do not expect this push to fail.
2279 assert(success, "invariant");
2280 }
2281
2282 // This operation was quite expensive, so decrease the limits
2283 decrease_limits();
2284 return true;
2285 }
2286
2287 void G1CMTask::drain_local_queue(bool partially) {
2288 if (has_aborted()) {
2289 return;
2290 }
2291
2292 // Decide what the target size is, depending whether we're going to
2293 // drain it partially (so that other tasks can steal if they run out
2294 // of things to do) or totally (at the very end).
2295 uint target_size;
2296 if (partially) {
2297 target_size = GCDrainStackTargetSize;
2298 } else {
2299 target_size = 0;
2300 }
2301
2302 if (_task_queue->size() > target_size) {
2303 G1TaskQueueEntry entry;
2304 bool ret = _task_queue->pop_local(entry);
2305 while (ret) {
2306 process_entry(entry, false /* stolen */);
2307 if (_task_queue->size() <= target_size || has_aborted()) {
2308 ret = false;
2309 } else {
2310 ret = _task_queue->pop_local(entry);
2311 }
2312 }
2313 }
2314 }
2315
2316 size_t G1CMTask::start_partial_array_processing(oop obj) {
2317 assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2318
2319 objArrayOop obj_array = oop_cast<objArrayOop>(obj);
2320 size_t array_length = obj_array->length();
2321
2322 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2323
2324 // Mark objArray klass metadata
2325 if (_cm_oop_closure->do_metadata()) {
2326 _cm_oop_closure->do_klass(obj_array->klass());
2327
2328 if (obj_array->is_flatArray()) {
2329 FlatArrayKlass* faklass = FlatArrayKlass::cast(obj_array->klass());
2330 _cm_oop_closure->do_klass(faklass->element_klass());
2331 }
2332 }
2333
2334 process_array_chunk(obj_array, 0, initial_chunk_size);
2335
2336 // Include object header size
2337 if (obj_array->is_refArray()) {
2338 return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2339 } else {
2340 FlatArrayKlass* faKlass = FlatArrayKlass::cast(obj_array->klass());
2341 return flatArrayOopDesc::object_size(faKlass->layout_helper(), checked_cast<int>(initial_chunk_size));
2342 }
2343 }
2344
2345 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2346 PartialArrayState* state = task.to_partial_array_state();
2347 // Access state before release by claim().
2348 objArrayOop obj = oop_cast<objArrayOop>(state->source());
2349
2350 PartialArraySplitter::Claim claim =
2351 _partial_array_splitter.claim(state, _task_queue, stolen);
2352
2353 process_array_chunk(obj, claim._start, claim._end);
2354
2355 if (obj->is_refArray()) {
2356 return heap_word_size((claim._end - claim._start) * heapOopSize);
2357 } else {
2358 assert(obj->is_flatArray(), "Must be!");
2359 size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2360 size_t nof_elements = claim._end - claim._start;
2361 return heap_word_size(nof_elements * element_byte_size);
2362 }
2363 }
2364
2365 void G1CMTask::drain_global_stack(bool partially) {
2366 if (has_aborted()) {
2367 return;
2368 }
2369
2370 // We have a policy to drain the local queue before we attempt to
2371 // drain the global stack.
2372 assert(partially || _task_queue->size() == 0, "invariant");
2373
2374 // Decide what the target size is, depending whether we're going to
2375 // drain it partially (so that other tasks can steal if they run out
2376 // of things to do) or totally (at the very end).
2377 // Notice that when draining the global mark stack partially, due to the racyness
2378 // of the mark stack size update we might in fact drop below the target. But,
2379 // this is not a problem.
2380 // In case of total draining, we simply process until the global mark stack is
2381 // totally empty, disregarding the size counter.
2382 if (partially) {
2383 size_t const target_size = _cm->partial_mark_stack_size_target();
2384 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2385 if (get_entries_from_global_stack()) {
2386 drain_local_queue(partially);
2387 }
2388 }
2389 } else {
2390 while (!has_aborted() && get_entries_from_global_stack()) {
2391 drain_local_queue(partially);
2392 }
2393 }
2394 }
2395
2396 // SATB Queue has several assumptions on whether to call the par or
2397 // non-par versions of the methods. this is why some of the code is
2398 // replicated. We should really get rid of the single-threaded version
2399 // of the code to simplify things.
2400 void G1CMTask::drain_satb_buffers() {
2401 if (has_aborted()) {
2402 return;
2403 }
2404
2405 // We set this so that the regular clock knows that we're in the
2406 // middle of draining buffers and doesn't set the abort flag when it
2407 // notices that SATB buffers are available for draining. It'd be
2408 // very counter productive if it did that. :-)
2409 _draining_satb_buffers = true;
2410
2411 G1CMSATBBufferClosure satb_cl(this, _g1h);
2412 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2413
2414 // This keeps claiming and applying the closure to completed buffers
2415 // until we run out of buffers or we need to abort.
2416 while (!has_aborted() &&
2417 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2418 abort_marking_if_regular_check_fail();
2419 }
2420
2421 // Can't assert qset is empty here, even if not aborted. If concurrent,
2422 // some other thread might be adding to the queue. If not concurrent,
2423 // some other thread might have won the race for the last buffer, but
2424 // has not yet decremented the count.
2425
2426 _draining_satb_buffers = false;
2427
2428 // again, this was a potentially expensive operation, decrease the
2429 // limits to get the regular clock call early
2430 decrease_limits();
2431 }
2432
2433 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2434 _mark_stats_cache.reset(region_idx);
2435 }
2436
2437 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2438 return _mark_stats_cache.evict_all();
2439 }
2440
2441 void G1CMTask::print_stats() {
2442 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2443 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2444 _elapsed_time_ms, _termination_time_ms);
2445 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2446 _step_times_ms.num(),
2447 _step_times_ms.avg(),
2448 _step_times_ms.sd(),
2449 _step_times_ms.maximum(),
2450 _step_times_ms.sum());
2451 size_t const hits = _mark_stats_cache.hits();
2452 size_t const misses = _mark_stats_cache.misses();
2453 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2454 hits, misses, percent_of(hits, hits + misses));
2455 }
2456
2457 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2458 return _task_queues->steal(worker_id, task_entry);
2459 }
2460
2461 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2462 if (has_aborted() || _curr_region == nullptr) {
2463 return;
2464 }
2465
2466 // This means that we're already holding on to a region.
2467 assert(_finger != nullptr, "if region is not null, then the finger "
2468 "should not be null either");
2469
2470 // We might have restarted this task after an evacuation pause
2471 // which might have evacuated the region we're holding on to
2472 // underneath our feet. Let's read its limit again to make sure
2473 // that we do not iterate over a region of the heap that
2474 // contains garbage (update_region_limit() will also move
2475 // _finger to the start of the region if it is found empty).
2476 update_region_limit();
2477 // We will start from _finger not from the start of the region,
2478 // as we might be restarting this task after aborting half-way
2479 // through scanning this region. In this case, _finger points to
2480 // the address where we last found a marked object. If this is a
2481 // fresh region, _finger points to start().
2482 MemRegion mr = MemRegion(_finger, _region_limit);
2483
2484 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2485 "humongous regions should go around loop once only");
2486
2487 // Some special cases:
2488 // If the memory region is empty, we can just give up the region.
2489 // If the current region is humongous then we only need to check
2490 // the bitmap for the bit associated with the start of the object,
2491 // scan the object if it's live, and give up the region.
2492 // Otherwise, let's iterate over the bitmap of the part of the region
2493 // that is left.
2494 // If the iteration is successful, give up the region.
2495 if (mr.is_empty()) {
2496 giveup_current_region();
2497 abort_marking_if_regular_check_fail();
2498 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2499 if (_mark_bitmap->is_marked(mr.start())) {
2500 // The object is marked - apply the closure
2501 bitmap_closure.do_addr(mr.start());
2502 }
2503 // Even if this task aborted while scanning the humongous object
2504 // we can (and should) give up the current region.
2505 giveup_current_region();
2506 abort_marking_if_regular_check_fail();
2507 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2508 giveup_current_region();
2509 abort_marking_if_regular_check_fail();
2510 } else {
2511 assert(has_aborted(), "currently the only way to do so");
2512 // The only way to abort the bitmap iteration is to return
2513 // false from the do_bit() method. However, inside the
2514 // do_bit() method we move the _finger to point to the
2515 // object currently being looked at. So, if we bail out, we
2516 // have definitely set _finger to something non-null.
2517 assert(_finger != nullptr, "invariant");
2518
2519 // Region iteration was actually aborted. So now _finger
2520 // points to the address of the object we last scanned. If we
2521 // leave it there, when we restart this task, we will rescan
2522 // the object. It is easy to avoid this. We move the finger by
2523 // enough to point to the next possible object header.
2524 assert(_finger < _region_limit, "invariant");
2525 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2526 if (new_finger >= _region_limit) {
2527 giveup_current_region();
2528 } else {
2529 move_finger_to(new_finger);
2530 }
2531 }
2532 }
2533
2534 void G1CMTask::claim_new_region() {
2535 // Read the note on the claim_region() method on why it might
2536 // return null with potentially more regions available for
2537 // claiming and why we have to check out_of_regions() to determine
2538 // whether we're done or not.
2539 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2540 // We are going to try to claim a new region. We should have
2541 // given up on the previous one.
2542 // Separated the asserts so that we know which one fires.
2543 assert(_curr_region == nullptr, "invariant");
2544 assert(_finger == nullptr, "invariant");
2545 assert(_region_limit == nullptr, "invariant");
2546 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2547 if (claimed_region != nullptr) {
2548 // Yes, we managed to claim one
2549 setup_for_region(claimed_region);
2550 assert(_curr_region == claimed_region, "invariant");
2551 }
2552 // It is important to call the regular clock here. It might take
2553 // a while to claim a region if, for example, we hit a large
2554 // block of empty regions. So we need to call the regular clock
2555 // method once round the loop to make sure it's called
2556 // frequently enough.
2557 abort_marking_if_regular_check_fail();
2558 }
2559 }
2560
2561 void G1CMTask::attempt_stealing() {
2562 // We cannot check whether the global stack is empty, since other
2563 // tasks might be pushing objects to it concurrently.
2564 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2565 "only way to reach here");
2566 while (!has_aborted()) {
2567 G1TaskQueueEntry entry;
2568 if (_cm->try_stealing(_worker_id, entry)) {
2569 process_entry(entry, true /* stolen */);
2570
2571 // And since we're towards the end, let's totally drain the
2572 // local queue and global stack.
2573 drain_local_queue(false);
2574 drain_global_stack(false);
2575 } else {
2576 break;
2577 }
2578 }
2579 }
2580
2581 void G1CMTask::attempt_termination(bool is_serial) {
2582 // We cannot check whether the global stack is empty, since other
2583 // tasks might be concurrently pushing objects on it.
2584 // Separated the asserts so that we know which one fires.
2585 assert(_cm->out_of_regions(), "only way to reach here");
2586 assert(_task_queue->size() == 0, "only way to reach here");
2587 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2588
2589 // The G1CMTask class also extends the TerminatorTerminator class,
2590 // hence its should_exit_termination() method will also decide
2591 // whether to exit the termination protocol or not.
2592 bool finished = (is_serial ||
2593 _cm->terminator()->offer_termination(this));
2594 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2595
2596 if (finished) {
2597 // We're all done.
2598
2599 // We can now guarantee that the global stack is empty, since
2600 // all other tasks have finished. We separated the guarantees so
2601 // that, if a condition is false, we can immediately find out
2602 // which one.
2603 guarantee(_cm->out_of_regions(), "only way to reach here");
2604 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2605 guarantee(_task_queue->size() == 0, "only way to reach here");
2606 guarantee(!_cm->has_overflown(), "only way to reach here");
2607 guarantee(!has_aborted(), "should never happen if termination has completed");
2608 } else {
2609 // Apparently there's more work to do. Let's abort this task. We
2610 // will restart it and hopefully we can find more things to do.
2611 set_has_aborted();
2612 }
2613 }
2614
2615 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2616 if (_has_timed_out) {
2617 double diff_ms = elapsed_time_ms - _time_target_ms;
2618 // Keep statistics of how well we did with respect to hitting
2619 // our target only if we actually timed out (if we aborted for
2620 // other reasons, then the results might get skewed).
2621 _marking_step_diff_ms.add(diff_ms);
2622 }
2623
2624 if (!_cm->has_overflown()) {
2625 return;
2626 }
2627
2628 // This is the interesting one. We aborted because a global
2629 // overflow was raised. This means we have to restart the
2630 // marking phase and start iterating over regions. However, in
2631 // order to do this we have to make sure that all tasks stop
2632 // what they are doing and re-initialize in a safe manner. We
2633 // will achieve this with the use of two barrier sync points.
2634 if (!is_serial) {
2635 // We only need to enter the sync barrier if being called
2636 // from a parallel context
2637 _cm->enter_first_sync_barrier(_worker_id);
2638
2639 // When we exit this sync barrier we know that all tasks have
2640 // stopped doing marking work. So, it's now safe to
2641 // re-initialize our data structures.
2642 }
2643
2644 clear_region_fields();
2645 flush_mark_stats_cache();
2646
2647 if (!is_serial) {
2648 // If we're executing the concurrent phase of marking, reset the marking
2649 // state; otherwise the marking state is reset after reference processing,
2650 // during the remark pause.
2651 // If we reset here as a result of an overflow during the remark we will
2652 // see assertion failures from any subsequent set_concurrency_and_phase()
2653 // calls.
2654 if (_cm->concurrent() && _worker_id == 0) {
2655 // Worker 0 is responsible for clearing the global data structures because
2656 // of an overflow. During STW we should not clear the overflow flag (in
2657 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2658 // method to abort the pause and restart concurrent marking.
2659 _cm->reset_marking_for_restart();
2660
2661 log_info(gc, marking)("Concurrent Mark reset for overflow");
2662 }
2663
2664 // ...and enter the second barrier.
2665 _cm->enter_second_sync_barrier(_worker_id);
2666 }
2667 }
2668
2669 /*****************************************************************************
2670
2671 The do_marking_step(time_target_ms, ...) method is the building
2672 block of the parallel marking framework. It can be called in parallel
2673 with other invocations of do_marking_step() on different tasks
2674 (but only one per task, obviously) and concurrently with the
2675 mutator threads, or during remark, hence it eliminates the need
2676 for two versions of the code. When called during remark, it will
2677 pick up from where the task left off during the concurrent marking
2678 phase. Interestingly, tasks are also claimable during evacuation
2679 pauses too, since do_marking_step() ensures that it aborts before
2680 it needs to yield.
2681
2682 The data structures that it uses to do marking work are the
2683 following:
2684
2685 (1) Marking Bitmap. If there are grey objects that appear only
2686 on the bitmap (this happens either when dealing with an overflow
2687 or when the concurrent start pause has simply marked the roots
2688 and didn't push them on the stack), then tasks claim heap
2689 regions whose bitmap they then scan to find grey objects. A
2690 global finger indicates where the end of the last claimed region
2691 is. A local finger indicates how far into the region a task has
2692 scanned. The two fingers are used to determine how to grey an
2693 object (i.e. whether simply marking it is OK, as it will be
2694 visited by a task in the future, or whether it needs to be also
2695 pushed on a stack).
2696
2697 (2) Local Queue. The local queue of the task which is accessed
2698 reasonably efficiently by the task. Other tasks can steal from
2699 it when they run out of work. Throughout the marking phase, a
2700 task attempts to keep its local queue short but not totally
2701 empty, so that entries are available for stealing by other
2702 tasks. Only when there is no more work, a task will totally
2703 drain its local queue.
2704
2705 (3) Global Mark Stack. This handles local queue overflow. During
2706 marking only sets of entries are moved between it and the local
2707 queues, as access to it requires a mutex and more fine-grain
2708 interaction with it which might cause contention. If it
2709 overflows, then the marking phase should restart and iterate
2710 over the bitmap to identify grey objects. Throughout the marking
2711 phase, tasks attempt to keep the global mark stack at a small
2712 length but not totally empty, so that entries are available for
2713 popping by other tasks. Only when there is no more work, tasks
2714 will totally drain the global mark stack.
2715
2716 (4) SATB Buffer Queue. This is where completed SATB buffers are
2717 made available. Buffers are regularly removed from this queue
2718 and scanned for roots, so that the queue doesn't get too
2719 long. During remark, all completed buffers are processed, as
2720 well as the filled in parts of any uncompleted buffers.
2721
2722 The do_marking_step() method tries to abort when the time target
2723 has been reached. There are a few other cases when the
2724 do_marking_step() method also aborts:
2725
2726 (1) When the marking phase has been aborted (after a Full GC).
2727
2728 (2) When a global overflow (on the global stack) has been
2729 triggered. Before the task aborts, it will actually sync up with
2730 the other tasks to ensure that all the marking data structures
2731 (local queues, stacks, fingers etc.) are re-initialized so that
2732 when do_marking_step() completes, the marking phase can
2733 immediately restart.
2734
2735 (3) When enough completed SATB buffers are available. The
2736 do_marking_step() method only tries to drain SATB buffers right
2737 at the beginning. So, if enough buffers are available, the
2738 marking step aborts and the SATB buffers are processed at
2739 the beginning of the next invocation.
2740
2741 (4) To yield. when we have to yield then we abort and yield
2742 right at the end of do_marking_step(). This saves us from a lot
2743 of hassle as, by yielding we might allow a Full GC. If this
2744 happens then objects will be compacted underneath our feet, the
2745 heap might shrink, etc. We save checking for this by just
2746 aborting and doing the yield right at the end.
2747
2748 From the above it follows that the do_marking_step() method should
2749 be called in a loop (or, otherwise, regularly) until it completes.
2750
2751 If a marking step completes without its has_aborted() flag being
2752 true, it means it has completed the current marking phase (and
2753 also all other marking tasks have done so and have all synced up).
2754
2755 A method called regular_clock_call() is invoked "regularly" (in
2756 sub ms intervals) throughout marking. It is this clock method that
2757 checks all the abort conditions which were mentioned above and
2758 decides when the task should abort. A work-based scheme is used to
2759 trigger this clock method: when the number of object words the
2760 marking phase has scanned or the number of references the marking
2761 phase has visited reach a given limit. Additional invocations to
2762 the method clock have been planted in a few other strategic places
2763 too. The initial reason for the clock method was to avoid calling
2764 cpu time gathering too regularly, as it is quite expensive. So,
2765 once it was in place, it was natural to piggy-back all the other
2766 conditions on it too and not constantly check them throughout the code.
2767
2768 If do_termination is true then do_marking_step will enter its
2769 termination protocol.
2770
2771 The value of is_serial must be true when do_marking_step is being
2772 called serially (i.e. by the VMThread) and do_marking_step should
2773 skip any synchronization in the termination and overflow code.
2774 Examples include the serial remark code and the serial reference
2775 processing closures.
2776
2777 The value of is_serial must be false when do_marking_step is
2778 being called by any of the worker threads.
2779 Examples include the concurrent marking code (CMMarkingTask),
2780 the MT remark code, and the MT reference processing closures.
2781
2782 *****************************************************************************/
2783
2784 void G1CMTask::do_marking_step(double time_target_ms,
2785 bool do_termination,
2786 bool is_serial) {
2787 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2788
2789 _start_cpu_time_ns = os::current_thread_cpu_time();
2790
2791 // If do_stealing is true then do_marking_step will attempt to
2792 // steal work from the other G1CMTasks. It only makes sense to
2793 // enable stealing when the termination protocol is enabled
2794 // and do_marking_step() is not being called serially.
2795 bool do_stealing = do_termination && !is_serial;
2796
2797 G1Predictions const& predictor = _g1h->policy()->predictor();
2798 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2799 _time_target_ms = time_target_ms - diff_prediction_ms;
2800
2801 // set up the variables that are used in the work-based scheme to
2802 // call the regular clock method
2803 _words_scanned = 0;
2804 _refs_reached = 0;
2805 recalculate_limits();
2806
2807 // clear all flags
2808 clear_has_aborted();
2809 _has_timed_out = false;
2810 _draining_satb_buffers = false;
2811
2812 ++_calls;
2813
2814 // Set up the bitmap and oop closures. Anything that uses them is
2815 // eventually called from this method, so it is OK to allocate these
2816 // statically.
2817 G1CMBitMapClosure bitmap_closure(this, _cm);
2818 G1CMOopClosure cm_oop_closure(_g1h, this);
2819 set_cm_oop_closure(&cm_oop_closure);
2820
2821 if (_cm->has_overflown()) {
2822 // This can happen if the mark stack overflows during a GC pause
2823 // and this task, after a yield point, restarts. We have to abort
2824 // as we need to get into the overflow protocol which happens
2825 // right at the end of this task.
2826 set_has_aborted();
2827 }
2828
2829 // First drain any available SATB buffers. After this, we will not
2830 // look at SATB buffers before the next invocation of this method.
2831 // If enough completed SATB buffers are queued up, the regular clock
2832 // will abort this task so that it restarts.
2833 drain_satb_buffers();
2834 // ...then partially drain the local queue and the global stack
2835 drain_local_queue(true);
2836 drain_global_stack(true);
2837
2838 do {
2839 process_current_region(bitmap_closure);
2840 // At this point we have either completed iterating over the
2841 // region we were holding on to, or we have aborted.
2842
2843 // We then partially drain the local queue and the global stack.
2844 drain_local_queue(true);
2845 drain_global_stack(true);
2846
2847 claim_new_region();
2848
2849 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2850 "at this point we should be out of regions");
2851 } while ( _curr_region != nullptr && !has_aborted());
2852
2853 // We cannot check whether the global stack is empty, since other
2854 // tasks might be pushing objects to it concurrently.
2855 assert(has_aborted() || _cm->out_of_regions(),
2856 "at this point we should be out of regions");
2857 // Try to reduce the number of available SATB buffers so that
2858 // remark has less work to do.
2859 drain_satb_buffers();
2860
2861 // Since we've done everything else, we can now totally drain the
2862 // local queue and global stack.
2863 drain_local_queue(false);
2864 drain_global_stack(false);
2865
2866 // Attempt at work stealing from other task's queues.
2867 if (do_stealing && !has_aborted()) {
2868 // We have not aborted. This means that we have finished all that
2869 // we could. Let's try to do some stealing...
2870 attempt_stealing();
2871 }
2872
2873 // We still haven't aborted. Now, let's try to get into the
2874 // termination protocol.
2875 if (do_termination && !has_aborted()) {
2876 attempt_termination(is_serial);
2877 }
2878
2879 // Mainly for debugging purposes to make sure that a pointer to the
2880 // closure which was statically allocated in this frame doesn't
2881 // escape it by accident.
2882 set_cm_oop_closure(nullptr);
2883 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2884 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2885 // Update the step history.
2886 _step_times_ms.add(elapsed_time_ms);
2887
2888 if (has_aborted()) {
2889 // The task was aborted for some reason.
2890 handle_abort(is_serial, elapsed_time_ms);
2891 }
2892 }
2893
2894 G1CMTask::G1CMTask(uint worker_id,
2895 G1ConcurrentMark* cm,
2896 G1CMTaskQueue* task_queue,
2897 G1RegionMarkStats* mark_stats) :
2898 _worker_id(worker_id),
2899 _g1h(G1CollectedHeap::heap()),
2900 _cm(cm),
2901 _mark_bitmap(nullptr),
2902 _task_queue(task_queue),
2903 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2904 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2905 _calls(0),
2906 _time_target_ms(0.0),
2907 _start_cpu_time_ns(0),
2908 _cm_oop_closure(nullptr),
2909 _curr_region(nullptr),
2910 _finger(nullptr),
2911 _region_limit(nullptr),
2912 _words_scanned(0),
2913 _words_scanned_limit(0),
2914 _real_words_scanned_limit(0),
2915 _refs_reached(0),
2916 _refs_reached_limit(0),
2917 _real_refs_reached_limit(0),
2918 _has_aborted(false),
2919 _has_timed_out(false),
2920 _draining_satb_buffers(false),
2921 _step_times_ms(),
2922 _elapsed_time_ms(0.0),
2923 _termination_time_ms(0.0),
2924 _marking_step_diff_ms()
2925 {
2926 guarantee(task_queue != nullptr, "invariant");
2927
2928 _marking_step_diff_ms.add(0.5);
2929 }
2930
2931 // These are formatting macros that are used below to ensure
2932 // consistent formatting. The *_H_* versions are used to format the
2933 // header for a particular value and they should be kept consistent
2934 // with the corresponding macro. Also note that most of the macros add
2935 // the necessary white space (as a prefix) which makes them a bit
2936 // easier to compose.
2937
2938 // All the output lines are prefixed with this string to be able to
2939 // identify them easily in a large log file.
2940 #define G1PPRL_LINE_PREFIX "###"
2941
2942 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2943 #ifdef _LP64
2944 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2945 #else // _LP64
2946 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2947 #endif // _LP64
2948
2949 // For per-region info
2950 #define G1PPRL_TYPE_FORMAT " %-4s"
2951 #define G1PPRL_TYPE_H_FORMAT " %4s"
2952 #define G1PPRL_STATE_FORMAT " %-5s"
2953 #define G1PPRL_STATE_H_FORMAT " %5s"
2954 #define G1PPRL_BYTE_FORMAT " %9zu"
2955 #define G1PPRL_BYTE_H_FORMAT " %9s"
2956 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2957 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2958 #define G1PPRL_GID_H_FORMAT " %9s"
2959 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2960 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2961 #define G1PPRL_LEN_H_FORMAT " %14s"
2962 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2963 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2964
2965 // For summary info
2966 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2967 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2968 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2969 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2970
2971 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2972 _total_used_bytes(0),
2973 _total_capacity_bytes(0),
2974 _total_live_bytes(0),
2975 _total_remset_bytes(0),
2976 _total_code_roots_bytes(0)
2977 {
2978 if (!log_is_enabled(Trace, gc, liveness)) {
2979 return;
2980 }
2981
2982 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2983 MemRegion reserved = g1h->reserved();
2984 double now = os::elapsedTime();
2985
2986 // Print the header of the output.
2987 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2988 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2989 G1PPRL_SUM_ADDR_FORMAT("reserved")
2990 G1PPRL_SUM_BYTE_FORMAT("region-size"),
2991 p2i(reserved.start()), p2i(reserved.end()),
2992 G1HeapRegion::GrainBytes);
2993 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2994 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2995 G1PPRL_TYPE_H_FORMAT
2996 G1PPRL_ADDR_BASE_H_FORMAT
2997 G1PPRL_BYTE_H_FORMAT
2998 G1PPRL_BYTE_H_FORMAT
2999 G1PPRL_STATE_H_FORMAT
3000 G1PPRL_BYTE_H_FORMAT
3001 G1PPRL_GID_H_FORMAT,
3002 "type", "address-range",
3003 "used", "live",
3004 "state", "code-roots",
3005 "group-id");
3006 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3007 G1PPRL_TYPE_H_FORMAT
3008 G1PPRL_ADDR_BASE_H_FORMAT
3009 G1PPRL_BYTE_H_FORMAT
3010 G1PPRL_BYTE_H_FORMAT
3011 G1PPRL_STATE_H_FORMAT
3012 G1PPRL_BYTE_H_FORMAT
3013 G1PPRL_GID_H_FORMAT,
3014 "", "",
3015 "(bytes)", "(bytes)",
3016 "", "(bytes)", "");
3017 }
3018
3019 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3020 if (!log_is_enabled(Trace, gc, liveness)) {
3021 return false;
3022 }
3023
3024 const char* type = r->get_type_str();
3025 HeapWord* bottom = r->bottom();
3026 HeapWord* end = r->end();
3027 size_t capacity_bytes = r->capacity();
3028 size_t used_bytes = r->used();
3029 size_t live_bytes = r->live_bytes();
3030 size_t remset_bytes = r->rem_set()->mem_size();
3031 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3032 const char* remset_type = r->rem_set()->get_short_state_str();
3033 uint cset_group_id = r->rem_set()->has_cset_group()
3034 ? r->rem_set()->cset_group_id()
3035 : G1CSetCandidateGroup::NoRemSetId;
3036
3037 _total_used_bytes += used_bytes;
3038 _total_capacity_bytes += capacity_bytes;
3039 _total_live_bytes += live_bytes;
3040 _total_remset_bytes += remset_bytes;
3041 _total_code_roots_bytes += code_roots_bytes;
3042
3043 // Print a line for this particular region.
3044 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3045 G1PPRL_TYPE_FORMAT
3046 G1PPRL_ADDR_BASE_FORMAT
3047 G1PPRL_BYTE_FORMAT
3048 G1PPRL_BYTE_FORMAT
3049 G1PPRL_STATE_FORMAT
3050 G1PPRL_BYTE_FORMAT
3051 G1PPRL_GID_FORMAT,
3052 type, p2i(bottom), p2i(end),
3053 used_bytes, live_bytes,
3054 remset_type, code_roots_bytes,
3055 cset_group_id);
3056
3057 return false;
3058 }
3059
3060 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3061 if (!log_is_enabled(Trace, gc, liveness)) {
3062 return;
3063 }
3064
3065 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3066 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3067 // add static memory usages to remembered set sizes
3068 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3069
3070 log_cset_candidate_groups();
3071
3072 // Print the footer of the output.
3073 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3074 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3075 " SUMMARY"
3076 G1PPRL_SUM_MB_FORMAT("capacity")
3077 G1PPRL_SUM_MB_PERC_FORMAT("used")
3078 G1PPRL_SUM_MB_PERC_FORMAT("live")
3079 G1PPRL_SUM_MB_FORMAT("remset")
3080 G1PPRL_SUM_MB_FORMAT("code-roots"),
3081 bytes_to_mb(_total_capacity_bytes),
3082 bytes_to_mb(_total_used_bytes),
3083 percent_of(_total_used_bytes, _total_capacity_bytes),
3084 bytes_to_mb(_total_live_bytes),
3085 percent_of(_total_live_bytes, _total_capacity_bytes),
3086 bytes_to_mb(_total_remset_bytes),
3087 bytes_to_mb(_total_code_roots_bytes));
3088 }
3089
3090 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3091 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3092 G1PPRL_GID_FORMAT
3093 G1PPRL_LEN_FORMAT
3094 G1PPRL_GID_GCEFF_FORMAT
3095 G1PPRL_GID_LIVENESS_FORMAT
3096 G1PPRL_BYTE_FORMAT
3097 G1PPRL_TYPE_H_FORMAT,
3098 group->group_id(),
3099 group->length(),
3100 group->length() > 0 ? group->gc_efficiency() : 0.0,
3101 group->length() > 0 ? group->liveness_percent() : 0.0,
3102 group->card_set()->mem_size(),
3103 type);
3104 _total_remset_bytes += group->card_set()->mem_size();
3105 }
3106
3107 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3108 for (G1CSetCandidateGroup* group : gl) {
3109 log_cset_candidate_group_add_total(group, type);
3110 }
3111 }
3112
3113 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3114 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3115 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3116 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3117 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3118 G1PPRL_GID_H_FORMAT
3119 G1PPRL_LEN_H_FORMAT
3120 G1PPRL_GCEFF_H_FORMAT
3121 G1PPRL_BYTE_H_FORMAT
3122 G1PPRL_BYTE_H_FORMAT
3123 G1PPRL_TYPE_H_FORMAT,
3124 "groud-id", "num-regions",
3125 "gc-eff", "liveness",
3126 "remset", "type");
3127
3128 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3129 G1PPRL_GID_H_FORMAT
3130 G1PPRL_LEN_H_FORMAT
3131 G1PPRL_GCEFF_H_FORMAT
3132 G1PPRL_BYTE_H_FORMAT
3133 G1PPRL_BYTE_H_FORMAT
3134 G1PPRL_TYPE_H_FORMAT,
3135 "", "",
3136 "(bytes/ms)", "%",
3137 "(bytes)", "");
3138
3139 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3140
3141 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3142
3143 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3144 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3145 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3146 }