1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectionSetChooser.hpp"
34 #include "gc/g1/g1CollectorState.hpp"
35 #include "gc/g1/g1ConcurrentMark.inline.hpp"
36 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
37 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
38 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
39 #include "gc/g1/g1ConcurrentRefine.hpp"
40 #include "gc/g1/g1HeapRegion.inline.hpp"
41 #include "gc/g1/g1HeapRegionManager.hpp"
42 #include "gc/g1/g1HeapRegionPrinter.hpp"
43 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
44 #include "gc/g1/g1HeapRegionSet.inline.hpp"
45 #include "gc/g1/g1HeapVerifier.hpp"
46 #include "gc/g1/g1OopClosures.inline.hpp"
47 #include "gc/g1/g1Policy.hpp"
48 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
49 #include "gc/g1/g1ThreadLocalData.hpp"
50 #include "gc/g1/g1Trace.hpp"
51 #include "gc/shared/gcId.hpp"
52 #include "gc/shared/gcTimer.hpp"
53 #include "gc/shared/gcTraceTime.inline.hpp"
54 #include "gc/shared/gcVMOperations.hpp"
55 #include "gc/shared/partialArraySplitter.inline.hpp"
56 #include "gc/shared/partialArrayState.hpp"
57 #include "gc/shared/partialArrayTaskStats.hpp"
58 #include "gc/shared/referencePolicy.hpp"
59 #include "gc/shared/suspendibleThreadSet.hpp"
60 #include "gc/shared/taskqueue.inline.hpp"
61 #include "gc/shared/taskTerminator.hpp"
62 #include "gc/shared/weakProcessor.inline.hpp"
63 #include "gc/shared/workerPolicy.hpp"
64 #include "jvm.h"
65 #include "logging/log.hpp"
66 #include "memory/allocation.hpp"
67 #include "memory/iterator.hpp"
68 #include "memory/metaspaceUtils.hpp"
69 #include "memory/resourceArea.hpp"
70 #include "memory/universe.hpp"
71 #include "nmt/memTracker.hpp"
72 #include "oops/access.inline.hpp"
73 #include "oops/oop.inline.hpp"
74 #include "oops/oopCast.inline.hpp"
75 #include "runtime/globals_extension.hpp"
76 #include "runtime/handles.inline.hpp"
77 #include "runtime/java.hpp"
78 #include "runtime/orderAccess.hpp"
79 #include "runtime/os.hpp"
80 #include "runtime/prefetch.inline.hpp"
81 #include "runtime/threads.hpp"
82 #include "utilities/align.hpp"
83 #include "utilities/checkedCast.hpp"
84 #include "utilities/formatBuffer.hpp"
85 #include "utilities/growableArray.hpp"
86 #include "utilities/powerOfTwo.hpp"
87
88 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
89
90 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
91 assert(cm != nullptr, "must be");
92 }
93
94 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
95 assert(cm != nullptr, "must be");
96 assert(_cm == nullptr, "double initialize");
97 _cm = cm;
98 }
99
100 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
101 assert(addr < _cm->finger(), "invariant");
102 assert(addr >= _task->finger(), "invariant");
103
104 // We move that task's local finger along.
105 _task->move_finger_to(addr);
106
107 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
108 // we only partially drain the local queue and global stack
109 _task->drain_local_queue(true);
110 _task->drain_global_stack(true);
111
112 // if the has_aborted flag has been raised, we need to bail out of
113 // the iteration
114 return !_task->has_aborted();
115 }
116
117 G1CMMarkStack::G1CMMarkStack() :
118 _chunk_allocator() {
119 set_empty();
120 }
121
122 size_t G1CMMarkStack::capacity_alignment() {
123 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
124 }
125
126 bool G1CMMarkStack::initialize() {
127 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
128
129 size_t initial_capacity = MarkStackSize;
130 size_t max_capacity = MarkStackSizeMax;
131
132 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
133
134 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
136
137 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
138 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
139
140 size_t limit = (INT_MAX - 1);
141 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
143
144 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
145 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
146
147 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
148
149 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
150 initial_num_chunks, max_capacity);
151
152 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
153 }
154
155 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
156 if (_size.load_relaxed() >= _max_capacity) {
157 return nullptr;
158 }
159
160 size_t cur_idx = _size.fetch_then_add(1u);
161
162 if (cur_idx >= _max_capacity) {
163 return nullptr;
164 }
165
166 size_t bucket = get_bucket(cur_idx);
167 if (_buckets[bucket].load_acquire() == nullptr) {
168 if (!_should_grow) {
169 // Prefer to restart the CM.
170 return nullptr;
171 }
172
173 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
174 if (_buckets[bucket].load_acquire() == nullptr) {
175 size_t desired_capacity = bucket_size(bucket) * 2;
176 if (!try_expand_to(desired_capacity)) {
177 return nullptr;
178 }
179 }
180 }
181
182 size_t bucket_idx = get_bucket_index(cur_idx);
183 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
184 result->next = nullptr;
185 return result;
186 }
187
188 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
189 _min_capacity(0),
190 _max_capacity(0),
191 _capacity(0),
192 _num_buckets(0),
193 _should_grow(false),
194 _buckets(nullptr),
195 _size(0)
196 { }
197
198 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
199 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
200
201 _min_capacity = initial_capacity;
202 _max_capacity = max_capacity;
203 _num_buckets = get_bucket(_max_capacity) + 1;
204
205 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
206
207 for (size_t i = 0; i < _num_buckets; i++) {
208 _buckets[i].store_relaxed(nullptr);
209 }
210
211 size_t new_capacity = bucket_size(0);
212
213 if (!reserve(new_capacity)) {
214 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
215 return false;
216 }
217 return true;
218 }
219
220 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
221 if (_capacity == _max_capacity) {
222 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
223 return false;
224 }
225
226 size_t old_capacity = _capacity;
227 desired_capacity = MIN2(desired_capacity, _max_capacity);
228
229 if (reserve(desired_capacity)) {
230 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
231 old_capacity, desired_capacity);
232 return true;
233 }
234 return false;
235 }
236
237 bool G1CMMarkStack::ChunkAllocator::try_expand() {
238 size_t new_capacity = _capacity * 2;
239 return try_expand_to(new_capacity);
240 }
241
242 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
243 if (_buckets == nullptr) {
244 return;
245 }
246
247 for (size_t i = 0; i < _num_buckets; i++) {
248 if (_buckets[i].load_relaxed() != nullptr) {
249 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
250 _buckets[i].store_relaxed(nullptr);
251 }
252 }
253
254 FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
255 }
256
257 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
258 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
259
260 size_t highest_bucket = get_bucket(new_capacity - 1);
261 size_t i = get_bucket(_capacity);
262
263 // Allocate all buckets associated with indexes between the current capacity (_capacity)
264 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
265 // array and that the capacity accurately reflects the reserved memory.
266 for (; i <= highest_bucket; i++) {
267 if (_buckets[i].load_acquire() != nullptr) {
268 continue; // Skip over already allocated buckets.
269 }
270
271 size_t bucket_capacity = bucket_size(i);
272
273 // Trim bucket size so that we do not exceed the _max_capacity.
274 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
275 bucket_capacity :
276 _max_capacity - _capacity;
277
278
279 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
280
281 if (bucket_base == nullptr) {
282 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
283 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
284 return false;
285 }
286 _capacity += bucket_capacity;
287 _buckets[i].release_store(bucket_base);
288 }
289 return true;
290 }
291
292 void G1CMMarkStack::expand() {
293 _chunk_allocator.try_expand();
294 }
295
296 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
297 elem->next = list->load_relaxed();
298 list->store_relaxed(elem);
299 }
300
301 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
302 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
303 add_chunk_to_list(&_chunk_list, elem);
304 _chunks_in_chunk_list++;
305 }
306
307 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
308 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
309 add_chunk_to_list(&_free_list, elem);
310 }
311
312 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
313 TaskQueueEntryChunk* result = list->load_relaxed();
314 if (result != nullptr) {
315 list->store_relaxed(list->load_relaxed()->next);
316 }
317 return result;
318 }
319
320 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
321 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
322 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
323 if (result != nullptr) {
324 _chunks_in_chunk_list--;
325 }
326 return result;
327 }
328
329 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
330 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
331 return remove_chunk_from_list(&_free_list);
332 }
333
334 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
335 // Get a new chunk.
336 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
337
338 if (new_chunk == nullptr) {
339 // Did not get a chunk from the free list. Allocate from backing memory.
340 new_chunk = _chunk_allocator.allocate_new_chunk();
341
342 if (new_chunk == nullptr) {
343 return false;
344 }
345 }
346
347 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
348
349 add_chunk_to_chunk_list(new_chunk);
350
351 return true;
352 }
353
354 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
355 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
356
357 if (cur == nullptr) {
358 return false;
359 }
360
361 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
362
363 add_chunk_to_free_list(cur);
364 return true;
365 }
366
367 void G1CMMarkStack::set_empty() {
368 _chunks_in_chunk_list = 0;
369 _chunk_list.store_relaxed(nullptr);
370 _free_list.store_relaxed(nullptr);
371 _chunk_allocator.reset();
372 }
373
374 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
375 _root_regions(MemRegion::create_array(max_regions, mtGC)),
376 _max_regions(max_regions),
377 _num_root_regions(0),
378 _claimed_root_regions(0),
379 _scan_in_progress(false),
380 _should_abort(false) { }
381
382 G1CMRootMemRegions::~G1CMRootMemRegions() {
383 MemRegion::destroy_array(_root_regions, _max_regions);
384 }
385
386 void G1CMRootMemRegions::reset() {
387 _num_root_regions.store_relaxed(0);
388 }
389
390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
391 assert_at_safepoint();
392 size_t idx = _num_root_regions.fetch_then_add(1u);
393 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
394 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
395 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
396 _root_regions[idx].set_start(start);
397 _root_regions[idx].set_end(end);
398 }
399
400 void G1CMRootMemRegions::prepare_for_scan() {
401 assert(!scan_in_progress(), "pre-condition");
402
403 _scan_in_progress.store_relaxed(num_root_regions() > 0);
404
405 _claimed_root_regions.store_relaxed(0);
406 _should_abort.store_relaxed(false);
407 }
408
409 const MemRegion* G1CMRootMemRegions::claim_next() {
410 if (_should_abort.load_relaxed()) {
411 // If someone has set the should_abort flag, we return null to
412 // force the caller to bail out of their loop.
413 return nullptr;
414 }
415
416 uint local_num_root_regions = num_root_regions();
417 if (_claimed_root_regions.load_relaxed() >= local_num_root_regions) {
418 return nullptr;
419 }
420
421 size_t claimed_index = _claimed_root_regions.fetch_then_add(1u);
422 if (claimed_index < local_num_root_regions) {
423 return &_root_regions[claimed_index];
424 }
425 return nullptr;
426 }
427
428 uint G1CMRootMemRegions::num_root_regions() const {
429 return (uint)_num_root_regions.load_relaxed();
430 }
431
432 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
433 uint local_num_root_regions = num_root_regions();
434 for (uint i = 0; i < local_num_root_regions; i++) {
435 if (_root_regions[i].equals(mr)) {
436 return true;
437 }
438 }
439 return false;
440 }
441
442 void G1CMRootMemRegions::notify_scan_done() {
443 MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
444 _scan_in_progress.store_relaxed(false);
445 G1RootRegionScan_lock->notify_all();
446 }
447
448 void G1CMRootMemRegions::cancel_scan() {
449 notify_scan_done();
450 }
451
452 void G1CMRootMemRegions::scan_finished() {
453 assert(scan_in_progress(), "pre-condition");
454
455 if (!_should_abort.load_relaxed()) {
456 assert(_claimed_root_regions.load_relaxed() >= num_root_regions(),
457 "we should have claimed all root regions, claimed %zu, length = %u",
458 _claimed_root_regions.load_relaxed(), num_root_regions());
459 }
460
461 notify_scan_done();
462 }
463
464 bool G1CMRootMemRegions::wait_until_scan_finished() {
465 if (!scan_in_progress()) {
466 return false;
467 }
468
469 {
470 MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
471 while (scan_in_progress()) {
472 ml.wait();
473 }
474 }
475 return true;
476 }
477
478 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
479 G1RegionToSpaceMapper* bitmap_storage) :
480 _cm_thread(nullptr),
481 _g1h(g1h),
482
483 _mark_bitmap(),
484
485 _heap(_g1h->reserved()),
486
487 _root_regions(_g1h->max_num_regions()),
488
489 _global_mark_stack(),
490
491 _finger(nullptr), // _finger set in set_non_marking_state
492
493 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
494 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
495 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
496 _tasks(nullptr), // _tasks set inside late_init()
497 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
498 _terminator(_max_num_tasks, _task_queues),
499 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
500
501 _first_overflow_barrier_sync(),
502 _second_overflow_barrier_sync(),
503
504 _completed_mark_cycles(0),
505 _has_overflown(false),
506 _concurrent(false),
507 _has_aborted(false),
508 _restart_for_overflow(false),
509 _gc_timer_cm(new ConcurrentGCTimer()),
510 _gc_tracer_cm(new G1OldTracer()),
511
512 // _verbose_level set below
513
514 _remark_times(),
515 _remark_mark_times(),
516 _remark_weak_ref_times(),
517 _cleanup_times(),
518
519 _concurrent_workers(nullptr),
520 _num_concurrent_workers(0),
521 _max_concurrent_workers(0),
522
523 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
524 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
525 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
526 _needs_remembered_set_rebuild(false)
527 {
528 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
529
530 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
531 }
532
533 void G1ConcurrentMark::fully_initialize() {
534 if (is_fully_initialized()) {
535 return;
536 }
537
538 // Create & start ConcurrentMark thread.
539 _cm_thread = new G1ConcurrentMarkThread(this);
540 if (_cm_thread->osthread() == nullptr) {
541 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
542 }
543
544 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
545 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
546
547 _max_concurrent_workers = ConcGCThreads;
548
549 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
550 _concurrent_workers->initialize_workers();
551 _num_concurrent_workers = _concurrent_workers->active_workers();
552
553 if (!_global_mark_stack.initialize()) {
554 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
555 }
556
557 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
558
559 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
560 _num_active_tasks = _max_num_tasks;
561
562 for (uint i = 0; i < _max_num_tasks; ++i) {
563 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
564 _task_queues->register_queue(i, task_queue);
565
566 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
567 }
568
569 uint max_num_regions = _g1h->max_num_regions();
570 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
571 ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
572 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
573
574 reset_at_marking_complete();
575 }
576
577 bool G1ConcurrentMark::in_progress() const {
578 return is_fully_initialized() ? _cm_thread->in_progress() : false;
579 }
580
581 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
582 return _partial_array_state_manager;
583 }
584
585 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
586 assert(is_fully_initialized(), "must be");
587 return _cm_thread;
588 }
589
590 void G1ConcurrentMark::reset() {
591 _has_aborted.store_relaxed(false);
592
593 reset_marking_for_restart();
594
595 // Reset all tasks, since different phases will use different number of active
596 // threads. So, it's easiest to have all of them ready.
597 for (uint i = 0; i < _max_num_tasks; ++i) {
598 _tasks[i]->reset(mark_bitmap());
599 }
600
601 uint max_num_regions = _g1h->max_num_regions();
602 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
603 for (uint i = 0; i < max_num_regions; i++) {
604 _region_mark_stats[i].clear();
605 }
606
607 _root_regions.reset();
608 }
609
610 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
611 uint region_idx = r->hrm_index();
612 for (uint j = 0; j < _max_num_tasks; ++j) {
613 _tasks[j]->clear_mark_stats_cache(region_idx);
614 }
615 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
616 _region_mark_stats[region_idx].clear();
617 }
618
619 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
620 assert_at_safepoint();
621 assert(r->is_starts_humongous(), "Got humongous continues region here");
622
623 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
624 mark_bitmap()->clear(r->bottom());
625
626 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
627 return;
628 }
629
630 // Clear any statistics about the region gathered so far.
631 _g1h->humongous_obj_regions_iterate(r,
632 [&] (G1HeapRegion* r) {
633 clear_statistics(r);
634 });
635 }
636
637 void G1ConcurrentMark::reset_marking_for_restart() {
638 _global_mark_stack.set_empty();
639
640 // Expand the marking stack, if we have to and if we can.
641 if (has_overflown()) {
642 _global_mark_stack.expand();
643
644 uint max_num_regions = _g1h->max_num_regions();
645 for (uint i = 0; i < max_num_regions; i++) {
646 _region_mark_stats[i].clear_during_overflow();
647 }
648 }
649
650 clear_has_overflown();
651 _finger.store_relaxed(_heap.start());
652
653 for (uint i = 0; i < _max_num_tasks; ++i) {
654 _tasks[i]->reset_for_restart();
655 }
656 }
657
658 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
659 assert(active_tasks <= _max_num_tasks, "we should not have more");
660
661 _num_active_tasks = active_tasks;
662 // Need to update the three data structures below according to the
663 // number of active threads for this phase.
664 _terminator.reset_for_reuse(active_tasks);
665 _first_overflow_barrier_sync.set_n_workers(active_tasks);
666 _second_overflow_barrier_sync.set_n_workers(active_tasks);
667 }
668
669 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
670 set_concurrency(active_tasks);
671
672 _concurrent.store_relaxed(concurrent);
673
674 if (!concurrent) {
675 // At this point we should be in a STW phase, and completed marking.
676 assert_at_safepoint_on_vm_thread();
677 assert(out_of_regions(),
678 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
679 p2i(finger()), p2i(_heap.end()));
680 }
681 }
682
683 #if TASKQUEUE_STATS
684 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
685
686 _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
687
688 auto get_pa_stats = [&](uint i) {
689 return _tasks[i]->partial_array_task_stats();
690 };
691
692 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
693 "G1ConcurrentMark Partial Array Task Stats");
694
695 for (uint i = 0; i < _max_num_tasks; ++i) {
696 get_pa_stats(i)->reset();
697 }
698 }
699 #endif
700
701 void G1ConcurrentMark::reset_at_marking_complete() {
702 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
703 // We set the global marking state to some default values when we're
704 // not doing marking.
705 reset_marking_for_restart();
706 _num_active_tasks = 0;
707 }
708
709 G1ConcurrentMark::~G1ConcurrentMark() {
710 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
711 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
712 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
713 // The G1ConcurrentMark instance is never freed.
714 ShouldNotReachHere();
715 }
716
717 class G1ClearBitMapTask : public WorkerTask {
718 public:
719 static size_t chunk_size() { return M; }
720
721 private:
722 // Heap region closure used for clearing the _mark_bitmap.
723 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
724 G1ConcurrentMark* _cm;
725 G1CMBitMap* _bitmap;
726 bool _suspendible; // If suspendible, do yield checks.
727
728 bool suspendible() {
729 return _suspendible;
730 }
731
732 bool is_clear_concurrent_undo() {
733 return suspendible() && _cm->cm_thread()->in_undo_mark();
734 }
735
736 bool has_aborted() {
737 if (suspendible()) {
738 _cm->do_yield_check();
739 return _cm->has_aborted();
740 }
741 return false;
742 }
743
744 HeapWord* region_clear_limit(G1HeapRegion* r) {
745 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
746 // live_words data are current wrt to the _mark_bitmap. We use this information
747 // to only clear ranges of the bitmap that require clearing.
748 if (is_clear_concurrent_undo()) {
749 // No need to clear bitmaps for empty regions (which includes regions we
750 // did not mark through).
751 if (!_cm->contains_live_object(r->hrm_index())) {
752 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
753 return r->bottom();
754 }
755 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
756 }
757 return r->end();
758 }
759
760 public:
761 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
762 G1HeapRegionClosure(),
763 _cm(cm),
764 _bitmap(cm->mark_bitmap()),
765 _suspendible(suspendible)
766 { }
767
768 virtual bool do_heap_region(G1HeapRegion* r) {
769 if (has_aborted()) {
770 return true;
771 }
772
773 HeapWord* cur = r->bottom();
774 HeapWord* const end = region_clear_limit(r);
775
776 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
777
778 while (cur < end) {
779
780 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
781 _bitmap->clear_range(mr);
782
783 cur += chunk_size_in_words;
784
785 // Repeat the asserts from before the start of the closure. We will do them
786 // as asserts here to minimize their overhead on the product. However, we
787 // will have them as guarantees at the beginning / end of the bitmap
788 // clearing to get some checking in the product.
789 assert(!suspendible() || _cm->in_progress(), "invariant");
790 assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
791
792 // Abort iteration if necessary.
793 if (has_aborted()) {
794 return true;
795 }
796 }
797 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
798
799 _cm->reset_top_at_mark_start(r);
800
801 return false;
802 }
803 };
804
805 G1ClearBitmapHRClosure _cl;
806 G1HeapRegionClaimer _hr_claimer;
807 bool _suspendible; // If the task is suspendible, workers must join the STS.
808
809 public:
810 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
811 WorkerTask("G1 Clear Bitmap"),
812 _cl(cm, suspendible),
813 _hr_claimer(n_workers),
814 _suspendible(suspendible)
815 { }
816
817 void work(uint worker_id) {
818 SuspendibleThreadSetJoiner sts_join(_suspendible);
819 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
820 }
821 };
822
823 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
824 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
825
826 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
827 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
828
829 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
830
831 G1ClearBitMapTask cl(this, num_workers, may_yield);
832
833 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
834 workers->run_task(&cl, num_workers);
835 }
836
837 void G1ConcurrentMark::cleanup_for_next_mark() {
838 // Make sure that the concurrent mark thread looks to still be in
839 // the current cycle.
840 guarantee(is_fully_initialized(), "should be initializd");
841 guarantee(in_progress(), "invariant");
842
843 // We are finishing up the current cycle by clearing the next
844 // marking bitmap and getting it ready for the next cycle. During
845 // this time no other cycle can start. So, let's make sure that this
846 // is the case.
847 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
848
849 clear_bitmap(_concurrent_workers, true);
850
851 reset_partial_array_state_manager();
852
853 // Repeat the asserts from above.
854 guarantee(is_fully_initialized(), "should be initializd");
855 guarantee(in_progress(), "invariant");
856 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
857 }
858
859 void G1ConcurrentMark::reset_partial_array_state_manager() {
860 for (uint i = 0; i < _max_num_tasks; ++i) {
861 _tasks[i]->unregister_partial_array_splitter();
862 }
863
864 partial_array_state_manager()->reset();
865
866 for (uint i = 0; i < _max_num_tasks; ++i) {
867 _tasks[i]->register_partial_array_splitter();
868 }
869 }
870
871 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
872 assert_at_safepoint_on_vm_thread();
873 // To avoid fragmentation the full collection requesting to clear the bitmap
874 // might use fewer workers than available. To ensure the bitmap is cleared
875 // as efficiently as possible the number of active workers are temporarily
876 // increased to include all currently created workers.
877 WithActiveWorkers update(workers, workers->created_workers());
878 clear_bitmap(workers, false);
879 }
880
881 class G1PreConcurrentStartTask : public G1BatchedTask {
882 // Reset marking state.
883 class ResetMarkingStateTask;
884 // For each region note start of marking.
885 class NoteStartOfMarkTask;
886
887 public:
888 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
889 };
890
891 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
892 G1ConcurrentMark* _cm;
893 public:
894 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
895
896 double worker_cost() const override { return 1.0; }
897 void do_work(uint worker_id) override;
898 };
899
900 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
901
902 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
903 G1ConcurrentMark* _cm;
904
905 public:
906 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
907
908 bool do_heap_region(G1HeapRegion* r) override {
909 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
910 _cm->update_top_at_mark_start(r);
911 } else {
912 _cm->reset_top_at_mark_start(r);
913 }
914 return false;
915 }
916 } _region_cl;
917
918 G1HeapRegionClaimer _claimer;
919 public:
920 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
921
922 double worker_cost() const override {
923 // The work done per region is very small, therefore we choose this magic number to cap the number
924 // of threads used when there are few regions.
925 const double regions_per_thread = 1000;
926 return _claimer.n_regions() / regions_per_thread;
927 }
928
929 void set_max_workers(uint max_workers) override {
930 _claimer.set_n_workers(max_workers);
931 }
932
933 void do_work(uint worker_id) override {
934 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
935 }
936 };
937
938 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
939 // Reset marking state.
940 _cm->reset();
941 }
942
943 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
944 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
945 add_serial_task(new ResetMarkingStateTask(cm));
946 add_parallel_task(new NoteStartOfMarkTask());
947 };
948
949 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
950 assert_at_safepoint_on_vm_thread();
951
952 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
953
954 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
955
956 G1PreConcurrentStartTask cl(cause, this);
957 G1CollectedHeap::heap()->run_batch_task(&cl);
958
959 _gc_tracer_cm->set_gc_cause(cause);
960 }
961
962 void G1ConcurrentMark::start_full_concurrent_cycle() {
963 // Start Concurrent Marking weak-reference discovery.
964 ReferenceProcessor* rp = _g1h->ref_processor_cm();
965 rp->start_discovery(false /* always_clear */);
966
967 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
968 // This is the start of the marking cycle, we're expected all
969 // threads to have SATB queues with active set to false.
970 satb_mq_set.set_active_all_threads(true, /* new active value */
971 false /* expected_active */);
972
973 _root_regions.prepare_for_scan();
974
975 // update_g1_committed() will be called at the end of an evac pause
976 // when marking is on. So, it's also called at the end of the
977 // concurrent start pause to update the heap end, if the heap expands
978 // during it. No need to call it here.
979
980 // Signal the thread to start work.
981 cm_thread()->start_full_mark();
982 }
983
984 void G1ConcurrentMark::start_undo_concurrent_cycle() {
985 root_regions()->cancel_scan();
986
987 // Signal the thread to start work.
988 cm_thread()->start_undo_mark();
989 }
990
991 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
992 cm_thread()->set_idle();
993 }
994
995 void G1ConcurrentMark::stop() {
996 if (is_fully_initialized()) {
997 cm_thread()->stop();
998 }
999 }
1000
1001 /*
1002 * Notice that in the next two methods, we actually leave the STS
1003 * during the barrier sync and join it immediately afterwards. If we
1004 * do not do this, the following deadlock can occur: one thread could
1005 * be in the barrier sync code, waiting for the other thread to also
1006 * sync up, whereas another one could be trying to yield, while also
1007 * waiting for the other threads to sync up too.
1008 *
1009 * Note, however, that this code is also used during remark and in
1010 * this case we should not attempt to leave / enter the STS, otherwise
1011 * we'll either hit an assert (debug / fastdebug) or deadlock
1012 * (product). So we should only leave / enter the STS if we are
1013 * operating concurrently.
1014 *
1015 * Because the thread that does the sync barrier has left the STS, it
1016 * is possible to be suspended for a Full GC or an evacuation pause
1017 * could occur. This is actually safe, since the entering the sync
1018 * barrier is one of the last things do_marking_step() does, and it
1019 * doesn't manipulate any data structures afterwards.
1020 */
1021
1022 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1023 bool barrier_aborted;
1024 {
1025 SuspendibleThreadSetLeaver sts_leave(concurrent());
1026 barrier_aborted = !_first_overflow_barrier_sync.enter();
1027 }
1028
1029 // at this point everyone should have synced up and not be doing any
1030 // more work
1031
1032 if (barrier_aborted) {
1033 // If the barrier aborted we ignore the overflow condition and
1034 // just abort the whole marking phase as quickly as possible.
1035 return;
1036 }
1037 }
1038
1039 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1040 SuspendibleThreadSetLeaver sts_leave(concurrent());
1041 _second_overflow_barrier_sync.enter();
1042
1043 // at this point everything should be re-initialized and ready to go
1044 }
1045
1046 class G1CMConcurrentMarkingTask : public WorkerTask {
1047 G1ConcurrentMark* _cm;
1048
1049 public:
1050 void work(uint worker_id) {
1051 ResourceMark rm;
1052
1053 SuspendibleThreadSetJoiner sts_join;
1054
1055 assert(worker_id < _cm->active_tasks(), "invariant");
1056
1057 G1CMTask* task = _cm->task(worker_id);
1058 task->record_start_time();
1059 if (!_cm->has_aborted()) {
1060 do {
1061 task->do_marking_step(G1ConcMarkStepDurationMillis,
1062 true /* do_termination */,
1063 false /* is_serial*/);
1064
1065 _cm->do_yield_check();
1066 } while (!_cm->has_aborted() && task->has_aborted());
1067 }
1068 task->record_end_time();
1069 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1070 }
1071
1072 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1073 WorkerTask("Concurrent Mark"), _cm(cm) { }
1074
1075 ~G1CMConcurrentMarkingTask() { }
1076 };
1077
1078 uint G1ConcurrentMark::calc_active_marking_workers() {
1079 uint result = 0;
1080 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1081 result = _max_concurrent_workers;
1082 } else {
1083 result =
1084 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1085 1, /* Minimum workers */
1086 _num_concurrent_workers,
1087 Threads::number_of_non_daemon_threads());
1088 // Don't scale the result down by scale_concurrent_workers() because
1089 // that scaling has already gone into "_max_concurrent_workers".
1090 }
1091 assert(result > 0 && result <= _max_concurrent_workers,
1092 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1093 _max_concurrent_workers, result);
1094 return result;
1095 }
1096
1097 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1098 #ifdef ASSERT
1099 HeapWord* last = region->last();
1100 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1101 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1102 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1103 assert(top_at_mark_start(hr) == region->start(),
1104 "MemRegion start should be equal to TAMS");
1105 #endif
1106
1107 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1108
1109 const uintx interval = PrefetchScanIntervalInBytes;
1110 HeapWord* curr = region->start();
1111 const HeapWord* end = region->end();
1112 while (curr < end) {
1113 Prefetch::read(curr, interval);
1114 oop obj = cast_to_oop(curr);
1115 size_t size = obj->oop_iterate_size(&cl);
1116 assert(size == obj->size(), "sanity");
1117 curr += size;
1118 }
1119 }
1120
1121 class G1CMRootRegionScanTask : public WorkerTask {
1122 G1ConcurrentMark* _cm;
1123 public:
1124 G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1125 WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1126
1127 void work(uint worker_id) {
1128 G1CMRootMemRegions* root_regions = _cm->root_regions();
1129 const MemRegion* region = root_regions->claim_next();
1130 while (region != nullptr) {
1131 _cm->scan_root_region(region, worker_id);
1132 region = root_regions->claim_next();
1133 }
1134 }
1135 };
1136
1137 void G1ConcurrentMark::scan_root_regions() {
1138 // scan_in_progress() will have been set to true only if there was
1139 // at least one root region to scan. So, if it's false, we
1140 // should not attempt to do any further work.
1141 if (root_regions()->scan_in_progress()) {
1142 assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1143
1144 // Assign one worker to each root-region but subject to the max constraint.
1145 const uint num_workers = MIN2(root_regions()->num_root_regions(),
1146 _max_concurrent_workers);
1147
1148 G1CMRootRegionScanTask task(this);
1149 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1150 task.name(), num_workers, root_regions()->num_root_regions());
1151 _concurrent_workers->run_task(&task, num_workers);
1152
1153 // It's possible that has_aborted() is true here without actually
1154 // aborting the survivor scan earlier. This is OK as it's
1155 // mainly used for sanity checking.
1156 root_regions()->scan_finished();
1157 }
1158 }
1159
1160 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1161 return root_regions()->wait_until_scan_finished();
1162 }
1163
1164 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1165 root_regions()->add(top_at_mark_start(r), r->top());
1166 }
1167
1168 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1169 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1170 }
1171
1172 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1173 root_regions()->abort();
1174 root_regions()->wait_until_scan_finished();
1175 }
1176
1177 void G1ConcurrentMark::concurrent_cycle_start() {
1178 _gc_timer_cm->register_gc_start();
1179
1180 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1181
1182 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1183 }
1184
1185 uint G1ConcurrentMark::completed_mark_cycles() const {
1186 return _completed_mark_cycles.load_relaxed();
1187 }
1188
1189 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1190 _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1191
1192 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1193
1194 if (mark_cycle_completed) {
1195 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1196 }
1197
1198 if (has_aborted()) {
1199 log_info(gc, marking)("Concurrent Mark Abort");
1200 _gc_tracer_cm->report_concurrent_mode_failure();
1201 }
1202
1203 _gc_timer_cm->register_gc_end();
1204
1205 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1206 }
1207
1208 void G1ConcurrentMark::mark_from_roots() {
1209 _restart_for_overflow.store_relaxed(false);
1210
1211 uint active_workers = calc_active_marking_workers();
1212
1213 // Setting active workers is not guaranteed since fewer
1214 // worker threads may currently exist and more may not be
1215 // available.
1216 active_workers = _concurrent_workers->set_active_workers(active_workers);
1217 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1218
1219 _num_concurrent_workers = active_workers;
1220
1221 // Parallel task terminator is set in "set_concurrency_and_phase()"
1222 set_concurrency_and_phase(active_workers, true /* concurrent */);
1223
1224 G1CMConcurrentMarkingTask marking_task(this);
1225 _concurrent_workers->run_task(&marking_task);
1226 print_stats();
1227 }
1228
1229 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1230 static const char* location_strings[] = { "Remark Before",
1231 "Remark After",
1232 "Remark Overflow",
1233 "Cleanup Before",
1234 "Cleanup After" };
1235 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1236 }
1237
1238 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1239 VerifyLocation location) {
1240 G1HeapVerifier* verifier = _g1h->verifier();
1241
1242 verifier->verify_region_sets_optional();
1243
1244 const char* caller = verify_location_string(location);
1245
1246 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1247 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1248
1249 size_t const BufLen = 512;
1250 char buffer[BufLen];
1251
1252 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1253 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1254
1255 // Only check bitmap in Remark, and not at After-Verification because the regions
1256 // already have their TAMS'es reset.
1257 if (location != VerifyLocation::RemarkAfter) {
1258 verifier->verify_bitmap_clear(true /* above_tams_only */);
1259 }
1260 }
1261 }
1262
1263 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1264 G1CollectedHeap* _g1h;
1265 public:
1266 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1267
1268 bool do_object_b(oop obj) {
1269 return !_g1h->is_obj_dead(obj);
1270 }
1271 };
1272
1273 void G1ConcurrentMark::remark() {
1274 assert_at_safepoint_on_vm_thread();
1275
1276 // If a full collection has happened, we should not continue. However we might
1277 // have ended up here as the Remark VM operation has been scheduled already.
1278 if (has_aborted()) {
1279 return;
1280 }
1281
1282 G1Policy* policy = _g1h->policy();
1283 policy->record_pause_start_time();
1284
1285 double start = os::elapsedTime();
1286
1287 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1288
1289 {
1290 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1291 finalize_marking();
1292 }
1293
1294 double mark_work_end = os::elapsedTime();
1295
1296 bool const mark_finished = !has_overflown();
1297 if (mark_finished) {
1298 weak_refs_work();
1299
1300 // Unload Klasses, String, Code Cache, etc.
1301 if (ClassUnloadingWithConcurrentMark) {
1302 G1CMIsAliveClosure is_alive(this);
1303 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1304 }
1305
1306 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1307 // We're done with marking.
1308 // This is the end of the marking cycle, we're expected all
1309 // threads to have SATB queues with active set to true.
1310 satb_mq_set.set_active_all_threads(false, /* new active value */
1311 true /* expected_active */);
1312
1313 {
1314 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1315 flush_all_task_caches();
1316 }
1317
1318 // All marking completed. Check bitmap now as we will start to reset TAMSes
1319 // in parallel below so that we can not do this in the After-Remark verification.
1320 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1321
1322 {
1323 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1324
1325 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1326 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1327 _g1h->workers()->active_workers());
1328 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1329 _g1h->workers()->run_task(&cl, num_workers);
1330
1331 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1332 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1333
1334 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1335
1336 if (_needs_remembered_set_rebuild) {
1337 // Prune rebuild candidates based on G1HeapWastePercent.
1338 // Improves rebuild time in addition to remembered set memory usage.
1339 G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1340 }
1341 }
1342
1343 if (log_is_enabled(Trace, gc, liveness)) {
1344 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1345 _g1h->heap_region_iterate(&cl);
1346 }
1347
1348 // Potentially, some empty-regions have been reclaimed; make this a
1349 // "collection" so that pending allocation can retry before attempting a
1350 // GC pause.
1351 _g1h->increment_total_collections();
1352
1353 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1354 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1355 // triggered, it likely means there are very few regular GCs, making resizing
1356 // based on gc heuristics less effective.
1357 if (_g1h->last_gc_was_periodic()) {
1358 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1359 }
1360
1361 compute_new_sizes();
1362
1363 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1364
1365 assert(!restart_for_overflow(), "sanity");
1366 // Completely reset the marking state (except bitmaps) since marking completed.
1367 reset_at_marking_complete();
1368
1369 G1CollectedHeap::finish_codecache_marking_cycle();
1370
1371 {
1372 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1373 G1ObjectCountIsAliveClosure is_alive(_g1h);
1374 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1375 }
1376 } else {
1377 // We overflowed. Restart concurrent marking.
1378 _restart_for_overflow.store_relaxed(true);
1379
1380 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1381
1382 // Clear the marking state because we will be restarting
1383 // marking due to overflowing the global mark stack.
1384 reset_marking_for_restart();
1385 }
1386
1387 // Statistics
1388 double now = os::elapsedTime();
1389 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1390 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1391 _remark_times.add((now - start) * 1000.0);
1392
1393 _g1h->update_perf_counter_cpu_time();
1394
1395 policy->record_concurrent_mark_remark_end();
1396 }
1397
1398 void G1ConcurrentMark::compute_new_sizes() {
1399 MetaspaceGC::compute_new_size();
1400
1401 // Cleanup will have freed any regions completely full of garbage.
1402 // Update the soft reference policy with the new heap occupancy.
1403 Universe::heap()->update_capacity_and_used_at_gc();
1404
1405 // We reclaimed old regions so we should calculate the sizes to make
1406 // sure we update the old gen/space data.
1407 _g1h->monitoring_support()->update_sizes();
1408 }
1409
1410 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1411 G1CollectedHeap* _g1h;
1412
1413 public:
1414 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1415
1416 bool do_heap_region(G1HeapRegion* r) override {
1417 // Update the remset tracking state from updating to complete
1418 // if remembered sets have been rebuilt.
1419 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1420 return false;
1421 }
1422 };
1423
1424 void G1ConcurrentMark::cleanup() {
1425 assert_at_safepoint_on_vm_thread();
1426
1427 // If a full collection has happened, we shouldn't do this.
1428 if (has_aborted()) {
1429 return;
1430 }
1431
1432 G1Policy* policy = _g1h->policy();
1433 policy->record_pause_start_time();
1434
1435 double start = os::elapsedTime();
1436
1437 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1438
1439 if (needs_remembered_set_rebuild()) {
1440 // Update the remset tracking information as well as marking all regions
1441 // as fully parsable.
1442 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1443 G1UpdateRegionsAfterRebuild cl(_g1h);
1444 _g1h->heap_region_iterate(&cl);
1445 } else {
1446 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1447 }
1448
1449 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1450
1451 // Local statistics
1452 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1453
1454 {
1455 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1456 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1457 }
1458 }
1459
1460 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1461 // Uses the G1CMTask associated with a worker thread (for serial reference
1462 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1463 // trace referent objects.
1464 //
1465 // Using the G1CMTask and embedded local queues avoids having the worker
1466 // threads operating on the global mark stack. This reduces the risk
1467 // of overflowing the stack - which we would rather avoid at this late
1468 // state. Also using the tasks' local queues removes the potential
1469 // of the workers interfering with each other that could occur if
1470 // operating on the global stack.
1471
1472 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1473 G1ConcurrentMark* _cm;
1474 G1CMTask* _task;
1475 uint _ref_counter_limit;
1476 uint _ref_counter;
1477 bool _is_serial;
1478 public:
1479 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1480 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1481 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1482 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1483 }
1484
1485 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1486 virtual void do_oop( oop* p) { do_oop_work(p); }
1487
1488 template <class T> void do_oop_work(T* p) {
1489 if (_cm->has_overflown()) {
1490 return;
1491 }
1492 if (!_task->deal_with_reference(p)) {
1493 // We did not add anything to the mark bitmap (or mark stack), so there is
1494 // no point trying to drain it.
1495 return;
1496 }
1497 _ref_counter--;
1498
1499 if (_ref_counter == 0) {
1500 // We have dealt with _ref_counter_limit references, pushing them
1501 // and objects reachable from them on to the local stack (and
1502 // possibly the global stack). Call G1CMTask::do_marking_step() to
1503 // process these entries.
1504 //
1505 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1506 // there's nothing more to do (i.e. we're done with the entries that
1507 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1508 // above) or we overflow.
1509 //
1510 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1511 // flag while there may still be some work to do. (See the comment at
1512 // the beginning of G1CMTask::do_marking_step() for those conditions -
1513 // one of which is reaching the specified time target.) It is only
1514 // when G1CMTask::do_marking_step() returns without setting the
1515 // has_aborted() flag that the marking step has completed.
1516 do {
1517 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1518 _task->do_marking_step(mark_step_duration_ms,
1519 false /* do_termination */,
1520 _is_serial);
1521 } while (_task->has_aborted() && !_cm->has_overflown());
1522 _ref_counter = _ref_counter_limit;
1523 }
1524 }
1525 };
1526
1527 // 'Drain' oop closure used by both serial and parallel reference processing.
1528 // Uses the G1CMTask associated with a given worker thread (for serial
1529 // reference processing the G1CMtask for worker 0 is used). Calls the
1530 // do_marking_step routine, with an unbelievably large timeout value,
1531 // to drain the marking data structures of the remaining entries
1532 // added by the 'keep alive' oop closure above.
1533
1534 class G1CMDrainMarkingStackClosure : public VoidClosure {
1535 G1ConcurrentMark* _cm;
1536 G1CMTask* _task;
1537 bool _is_serial;
1538 public:
1539 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1540 _cm(cm), _task(task), _is_serial(is_serial) {
1541 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1542 }
1543
1544 void do_void() {
1545 do {
1546 // We call G1CMTask::do_marking_step() to completely drain the local
1547 // and global marking stacks of entries pushed by the 'keep alive'
1548 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1549 //
1550 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1551 // if there's nothing more to do (i.e. we've completely drained the
1552 // entries that were pushed as a result of applying the 'keep alive'
1553 // closure to the entries on the discovered ref lists) or we overflow
1554 // the global marking stack.
1555 //
1556 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1557 // flag while there may still be some work to do. (See the comment at
1558 // the beginning of G1CMTask::do_marking_step() for those conditions -
1559 // one of which is reaching the specified time target.) It is only
1560 // when G1CMTask::do_marking_step() returns without setting the
1561 // has_aborted() flag that the marking step has completed.
1562
1563 _task->do_marking_step(1000000000.0 /* something very large */,
1564 true /* do_termination */,
1565 _is_serial);
1566 } while (_task->has_aborted() && !_cm->has_overflown());
1567 }
1568 };
1569
1570 class G1CMRefProcProxyTask : public RefProcProxyTask {
1571 G1CollectedHeap& _g1h;
1572 G1ConcurrentMark& _cm;
1573
1574 public:
1575 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1576 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1577 _g1h(g1h),
1578 _cm(cm) {}
1579
1580 void work(uint worker_id) override {
1581 assert(worker_id < _max_workers, "sanity");
1582 G1CMIsAliveClosure is_alive(&_cm);
1583 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1584 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1585 BarrierEnqueueDiscoveredFieldClosure enqueue;
1586 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1587 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1588 }
1589
1590 void prepare_run_task_hook() override {
1591 // We need to reset the concurrency level before each
1592 // proxy task execution, so that the termination protocol
1593 // and overflow handling in G1CMTask::do_marking_step() knows
1594 // how many workers to wait for.
1595 _cm.set_concurrency(_queue_count);
1596 }
1597 };
1598
1599 void G1ConcurrentMark::weak_refs_work() {
1600 ResourceMark rm;
1601
1602 {
1603 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1604
1605 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1606
1607 // See the comment in G1CollectedHeap::ref_processing_init()
1608 // about how reference processing currently works in G1.
1609
1610 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1611
1612 // Prefer to grow the stack until the max capacity.
1613 _global_mark_stack.set_should_grow();
1614
1615 // Parallel processing task executor.
1616 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1617 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1618
1619 // Process the weak references.
1620 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1621 _gc_tracer_cm->report_gc_reference_stats(stats);
1622 pt.print_all_references();
1623
1624 // The do_oop work routines of the keep_alive and drain_marking_stack
1625 // oop closures will set the has_overflown flag if we overflow the
1626 // global marking stack.
1627
1628 assert(has_overflown() || _global_mark_stack.is_empty(),
1629 "Mark stack should be empty (unless it has overflown)");
1630 }
1631
1632 if (has_overflown()) {
1633 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1634 // overflowed while processing references. Exit the VM.
1635 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1636 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1637 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1638 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1639 return;
1640 }
1641
1642 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1643
1644 {
1645 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1646 G1CMIsAliveClosure is_alive(this);
1647 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1648 }
1649 }
1650
1651 class G1PrecleanYieldClosure : public YieldClosure {
1652 G1ConcurrentMark* _cm;
1653
1654 public:
1655 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1656
1657 virtual bool should_return() {
1658 return _cm->has_aborted();
1659 }
1660
1661 virtual bool should_return_fine_grain() {
1662 _cm->do_yield_check();
1663 return _cm->has_aborted();
1664 }
1665 };
1666
1667 void G1ConcurrentMark::preclean() {
1668 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1669
1670 SuspendibleThreadSetJoiner joiner;
1671
1672 BarrierEnqueueDiscoveredFieldClosure enqueue;
1673
1674 set_concurrency_and_phase(1, true);
1675
1676 G1PrecleanYieldClosure yield_cl(this);
1677
1678 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1679 // Precleaning is single threaded. Temporarily disable MT discovery.
1680 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1681 rp->preclean_discovered_references(rp->is_alive_non_header(),
1682 &enqueue,
1683 &yield_cl,
1684 _gc_timer_cm);
1685 }
1686
1687 // Closure for marking entries in SATB buffers.
1688 class G1CMSATBBufferClosure : public SATBBufferClosure {
1689 private:
1690 G1CMTask* _task;
1691 G1CollectedHeap* _g1h;
1692
1693 // This is very similar to G1CMTask::deal_with_reference, but with
1694 // more relaxed requirements for the argument, so this must be more
1695 // circumspect about treating the argument as an object.
1696 void do_entry(void* entry) const {
1697 _task->increment_refs_reached();
1698 oop const obj = cast_to_oop(entry);
1699 _task->make_reference_grey(obj);
1700 }
1701
1702 public:
1703 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1704 : _task(task), _g1h(g1h) { }
1705
1706 virtual void do_buffer(void** buffer, size_t size) {
1707 for (size_t i = 0; i < size; ++i) {
1708 do_entry(buffer[i]);
1709 }
1710 }
1711 };
1712
1713 class G1RemarkThreadsClosure : public ThreadClosure {
1714 G1SATBMarkQueueSet& _qset;
1715
1716 public:
1717 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1718 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1719
1720 void do_thread(Thread* thread) {
1721 // Transfer any partial buffer to the qset for completed buffer processing.
1722 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1723 }
1724 };
1725
1726 class G1CMRemarkTask : public WorkerTask {
1727 // For Threads::possibly_parallel_threads_do
1728 ThreadsClaimTokenScope _threads_claim_token_scope;
1729 G1ConcurrentMark* _cm;
1730 public:
1731 void work(uint worker_id) {
1732 G1CMTask* task = _cm->task(worker_id);
1733 task->record_start_time();
1734 {
1735 ResourceMark rm;
1736
1737 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1738 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1739 }
1740
1741 do {
1742 task->do_marking_step(1000000000.0 /* something very large */,
1743 true /* do_termination */,
1744 false /* is_serial */);
1745 } while (task->has_aborted() && !_cm->has_overflown());
1746 // If we overflow, then we do not want to restart. We instead
1747 // want to abort remark and do concurrent marking again.
1748 task->record_end_time();
1749 }
1750
1751 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1752 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1753 _cm->terminator()->reset_for_reuse(active_workers);
1754 }
1755 };
1756
1757 void G1ConcurrentMark::finalize_marking() {
1758 ResourceMark rm;
1759
1760 _g1h->ensure_parsability(false);
1761
1762 // this is remark, so we'll use up all active threads
1763 uint active_workers = _g1h->workers()->active_workers();
1764 set_concurrency_and_phase(active_workers, false /* concurrent */);
1765 // Leave _parallel_marking_threads at it's
1766 // value originally calculated in the G1ConcurrentMark
1767 // constructor and pass values of the active workers
1768 // through the task.
1769
1770 {
1771 G1CMRemarkTask remarkTask(this, active_workers);
1772 // We will start all available threads, even if we decide that the
1773 // active_workers will be fewer. The extra ones will just bail out
1774 // immediately.
1775 _g1h->workers()->run_task(&remarkTask);
1776 }
1777
1778 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1779 guarantee(has_overflown() ||
1780 satb_mq_set.completed_buffers_num() == 0,
1781 "Invariant: has_overflown = %s, num buffers = %zu",
1782 BOOL_TO_STR(has_overflown()),
1783 satb_mq_set.completed_buffers_num());
1784
1785 print_stats();
1786 }
1787
1788 void G1ConcurrentMark::flush_all_task_caches() {
1789 size_t hits = 0;
1790 size_t misses = 0;
1791 for (uint i = 0; i < _max_num_tasks; i++) {
1792 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1793 hits += stats.first;
1794 misses += stats.second;
1795 }
1796 size_t sum = hits + misses;
1797 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1798 hits, misses, percent_of(hits, sum));
1799 }
1800
1801 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1802 assert_at_safepoint();
1803 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1804 }
1805
1806 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1807 // "Checkpoint" the finger.
1808 HeapWord* local_finger = finger();
1809
1810 while (local_finger < _heap.end()) {
1811 assert(_g1h->is_in_reserved(local_finger), "invariant");
1812
1813 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1814 // Make sure that the reads below do not float before loading curr_region.
1815 OrderAccess::loadload();
1816 // Above heap_region_containing may return null as we always scan claim
1817 // until the end of the heap. In this case, just jump to the next region.
1818 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1819
1820 // Is the gap between reading the finger and doing the CAS too long?
1821 HeapWord* res = _finger.compare_exchange(local_finger, end);
1822 if (res == local_finger && curr_region != nullptr) {
1823 // We succeeded.
1824 HeapWord* bottom = curr_region->bottom();
1825 HeapWord* limit = top_at_mark_start(curr_region);
1826
1827 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1828 // Notice that _finger == end cannot be guaranteed here since,
1829 // someone else might have moved the finger even further.
1830 assert(finger() >= end, "The finger should have moved forward");
1831
1832 if (limit > bottom) {
1833 return curr_region;
1834 } else {
1835 assert(limit == bottom,
1836 "The region limit should be at bottom");
1837 // We return null and the caller should try calling
1838 // claim_region() again.
1839 return nullptr;
1840 }
1841 } else {
1842 // Read the finger again.
1843 HeapWord* next_finger = finger();
1844 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1845 local_finger = next_finger;
1846 }
1847 }
1848
1849 return nullptr;
1850 }
1851
1852 #ifndef PRODUCT
1853 class VerifyNoCSetOops {
1854 G1CollectedHeap* _g1h;
1855 const char* _phase;
1856 int _info;
1857
1858 public:
1859 VerifyNoCSetOops(const char* phase, int info = -1) :
1860 _g1h(G1CollectedHeap::heap()),
1861 _phase(phase),
1862 _info(info)
1863 { }
1864
1865 void operator()(G1TaskQueueEntry task_entry) const {
1866 if (task_entry.is_partial_array_state()) {
1867 oop obj = task_entry.to_partial_array_state()->source();
1868 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1869 return;
1870 }
1871 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1872 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1873 p2i(task_entry.to_oop()), _phase, _info);
1874 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1875 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1876 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1877 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1878 }
1879 };
1880
1881 void G1ConcurrentMark::verify_no_collection_set_oops() {
1882 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1883 "should be at a safepoint or initializing");
1884 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1885 return;
1886 }
1887
1888 // Verify entries on the global mark stack
1889 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1890
1891 // Verify entries on the task queues
1892 for (uint i = 0; i < _max_num_tasks; ++i) {
1893 G1CMTaskQueue* queue = _task_queues->queue(i);
1894 queue->iterate(VerifyNoCSetOops("Queue", i));
1895 }
1896
1897 // Verify the global finger
1898 HeapWord* global_finger = finger();
1899 if (global_finger != nullptr && global_finger < _heap.end()) {
1900 // Since we always iterate over all regions, we might get a null G1HeapRegion
1901 // here.
1902 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1903 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1904 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1905 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1906 }
1907
1908 // Verify the task fingers
1909 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1910 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1911 G1CMTask* task = _tasks[i];
1912 HeapWord* task_finger = task->finger();
1913 if (task_finger != nullptr && task_finger < _heap.end()) {
1914 // See above note on the global finger verification.
1915 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1916 guarantee(r == nullptr || task_finger == r->bottom() ||
1917 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1918 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1919 p2i(task_finger), HR_FORMAT_PARAMS(r));
1920 }
1921 }
1922 }
1923 #endif // PRODUCT
1924
1925 void G1ConcurrentMark::rebuild_and_scrub() {
1926 if (!needs_remembered_set_rebuild()) {
1927 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1928 }
1929
1930 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1931 }
1932
1933 void G1ConcurrentMark::print_stats() {
1934 if (!log_is_enabled(Debug, gc, stats)) {
1935 return;
1936 }
1937 log_debug(gc, stats)("---------------------------------------------------------------------");
1938 for (size_t i = 0; i < _num_active_tasks; ++i) {
1939 _tasks[i]->print_stats();
1940 log_debug(gc, stats)("---------------------------------------------------------------------");
1941 }
1942 }
1943
1944 bool G1ConcurrentMark::concurrent_cycle_abort() {
1945 // If we start the compaction before the CM threads finish
1946 // scanning the root regions we might trip them over as we'll
1947 // be moving objects / updating references. So let's wait until
1948 // they are done. By telling them to abort, they should complete
1949 // early.
1950 root_region_scan_abort_and_wait();
1951
1952 // We haven't started a concurrent cycle no need to do anything; we might have
1953 // aborted the marking because of shutting down though. In this case the marking
1954 // might have already completed the abort (leading to in_progress() below to
1955 // return false), however this still left marking state particularly in the
1956 // shared marking bitmap that must be cleaned up.
1957 // If there are multiple full gcs during shutdown we do this work repeatedly for
1958 // nothing, but this situation should be extremely rare (a full gc after shutdown
1959 // has been signalled is already rare), and this work should be negligible compared
1960 // to actual full gc work.
1961
1962 if (!is_fully_initialized() || (!cm_thread()->in_progress() && !cm_thread()->should_terminate())) {
1963 return false;
1964 }
1965
1966 reset_marking_for_restart();
1967
1968 abort_marking_threads();
1969
1970 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1971 satb_mq_set.abandon_partial_marking();
1972 // This can be called either during or outside marking, we'll read
1973 // the expected_active value from the SATB queue set.
1974 satb_mq_set.set_active_all_threads(false, /* new active value */
1975 satb_mq_set.is_active() /* expected_active */);
1976 return true;
1977 }
1978
1979 void G1ConcurrentMark::abort_marking_threads() {
1980 assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1981 _has_aborted.store_relaxed(true);
1982 _first_overflow_barrier_sync.abort();
1983 _second_overflow_barrier_sync.abort();
1984 }
1985
1986 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1987 class CountCpuTimeThreadClosure : public ThreadClosure {
1988 public:
1989 jlong _total_cpu_time;
1990
1991 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1992
1993 void do_thread(Thread* t) {
1994 _total_cpu_time += os::thread_cpu_time(t);
1995 }
1996 } cl;
1997
1998 threads_do(&cl);
1999
2000 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2001 }
2002
2003 static void print_ms_time_info(const char* prefix, const char* name,
2004 NumberSeq& ns) {
2005 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2006 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2007 if (ns.num() > 0) {
2008 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2009 prefix, ns.sd(), ns.maximum());
2010 }
2011 }
2012
2013 void G1ConcurrentMark::print_summary_info() {
2014 Log(gc, marking) log;
2015 if (!log.is_trace()) {
2016 return;
2017 }
2018
2019 log.trace(" Concurrent marking:");
2020 if (!is_fully_initialized()) {
2021 log.trace(" has not been initialized yet");
2022 return;
2023 }
2024 print_ms_time_info(" ", "remarks", _remark_times);
2025 {
2026 print_ms_time_info(" ", "final marks", _remark_mark_times);
2027 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2028
2029 }
2030 print_ms_time_info(" ", "cleanups", _cleanup_times);
2031 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2032 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2033 log.trace(" Total stop_world time = %8.2f s.",
2034 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2035 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2036 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2037 }
2038
2039 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2040 if (is_fully_initialized()) { // they are initialized late
2041 tc->do_thread(_cm_thread);
2042 _concurrent_workers->threads_do(tc);
2043 }
2044 }
2045
2046 void G1ConcurrentMark::print_on(outputStream* st) const {
2047 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2048 _mark_bitmap.print_on(st, " Bits: ");
2049 }
2050
2051 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2052 ReferenceProcessor* result = g1h->ref_processor_cm();
2053 assert(result != nullptr, "CM reference processor should not be null");
2054 return result;
2055 }
2056
2057 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2058 G1CMTask* task)
2059 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2060 _g1h(g1h), _task(task)
2061 { }
2062
2063 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2064 assert(hr != nullptr,
2065 "claim_region() should have filtered out null regions");
2066 _curr_region = hr;
2067 _finger = hr->bottom();
2068 update_region_limit();
2069 }
2070
2071 void G1CMTask::update_region_limit() {
2072 G1HeapRegion* hr = _curr_region;
2073 HeapWord* bottom = hr->bottom();
2074 HeapWord* limit = _cm->top_at_mark_start(hr);
2075
2076 if (limit == bottom) {
2077 // The region was collected underneath our feet.
2078 // We set the finger to bottom to ensure that the bitmap
2079 // iteration that will follow this will not do anything.
2080 // (this is not a condition that holds when we set the region up,
2081 // as the region is not supposed to be empty in the first place)
2082 _finger = bottom;
2083 } else if (limit >= _region_limit) {
2084 assert(limit >= _finger, "peace of mind");
2085 } else {
2086 assert(limit < _region_limit, "only way to get here");
2087 // This can happen under some pretty unusual circumstances. An
2088 // evacuation pause empties the region underneath our feet (TAMS
2089 // at bottom). We then do some allocation in the region (TAMS
2090 // stays at bottom), followed by the region being used as a GC
2091 // alloc region (TAMS will move to top() and the objects
2092 // originally below it will be greyed). All objects now marked in
2093 // the region are explicitly greyed, if below the global finger,
2094 // and we do not need in fact to scan anything else. So, we simply
2095 // set _finger to be limit to ensure that the bitmap iteration
2096 // doesn't do anything.
2097 _finger = limit;
2098 }
2099
2100 _region_limit = limit;
2101 }
2102
2103 void G1CMTask::giveup_current_region() {
2104 assert(_curr_region != nullptr, "invariant");
2105 clear_region_fields();
2106 }
2107
2108 void G1CMTask::clear_region_fields() {
2109 // Values for these three fields that indicate that we're not
2110 // holding on to a region.
2111 _curr_region = nullptr;
2112 _finger = nullptr;
2113 _region_limit = nullptr;
2114 }
2115
2116 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2117 if (cm_oop_closure == nullptr) {
2118 assert(_cm_oop_closure != nullptr, "invariant");
2119 } else {
2120 assert(_cm_oop_closure == nullptr, "invariant");
2121 }
2122 _cm_oop_closure = cm_oop_closure;
2123 }
2124
2125 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2126 guarantee(mark_bitmap != nullptr, "invariant");
2127 _mark_bitmap = mark_bitmap;
2128 clear_region_fields();
2129
2130 _calls = 0;
2131 _elapsed_time_ms = 0.0;
2132 _termination_time_ms = 0.0;
2133
2134 _mark_stats_cache.reset();
2135 }
2136
2137 void G1CMTask::reset_for_restart() {
2138 clear_region_fields();
2139 _task_queue->set_empty();
2140 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2141 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2142 }
2143
2144 void G1CMTask::register_partial_array_splitter() {
2145
2146 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2147 _cm->max_num_tasks(),
2148 ObjArrayMarkingStride);
2149 }
2150
2151 void G1CMTask::unregister_partial_array_splitter() {
2152 _partial_array_splitter.~PartialArraySplitter();
2153 }
2154
2155 bool G1CMTask::should_exit_termination() {
2156 if (!regular_clock_call()) {
2157 return true;
2158 }
2159
2160 // This is called when we are in the termination protocol. We should
2161 // quit if, for some reason, this task wants to abort or the global
2162 // stack is not empty (this means that we can get work from it).
2163 return !_cm->mark_stack_empty() || has_aborted();
2164 }
2165
2166 void G1CMTask::reached_limit() {
2167 assert(_words_scanned >= _words_scanned_limit ||
2168 _refs_reached >= _refs_reached_limit ,
2169 "shouldn't have been called otherwise");
2170 abort_marking_if_regular_check_fail();
2171 }
2172
2173 bool G1CMTask::regular_clock_call() {
2174 if (has_aborted()) {
2175 return false;
2176 }
2177
2178 // First, we need to recalculate the words scanned and refs reached
2179 // limits for the next clock call.
2180 recalculate_limits();
2181
2182 // During the regular clock call we do the following
2183
2184 // (1) If an overflow has been flagged, then we abort.
2185 if (_cm->has_overflown()) {
2186 return false;
2187 }
2188
2189 // If we are not concurrent (i.e. we're doing remark) we don't need
2190 // to check anything else. The other steps are only needed during
2191 // the concurrent marking phase.
2192 if (!_cm->concurrent()) {
2193 return true;
2194 }
2195
2196 // (2) If marking has been aborted for Full GC, then we also abort.
2197 if (_cm->has_aborted()) {
2198 return false;
2199 }
2200
2201 // (4) We check whether we should yield. If we have to, then we abort.
2202 if (SuspendibleThreadSet::should_yield()) {
2203 // We should yield. To do this we abort the task. The caller is
2204 // responsible for yielding.
2205 return false;
2206 }
2207
2208 // (5) We check whether we've reached our time quota. If we have,
2209 // then we abort.
2210 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2211 if (elapsed_time_ms > _time_target_ms) {
2212 _has_timed_out = true;
2213 return false;
2214 }
2215
2216 // (6) Finally, we check whether there are enough completed STAB
2217 // buffers available for processing. If there are, we abort.
2218 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2219 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2220 // we do need to process SATB buffers, we'll abort and restart
2221 // the marking task to do so
2222 return false;
2223 }
2224 return true;
2225 }
2226
2227 void G1CMTask::recalculate_limits() {
2228 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2229 _words_scanned_limit = _real_words_scanned_limit;
2230
2231 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2232 _refs_reached_limit = _real_refs_reached_limit;
2233 }
2234
2235 void G1CMTask::decrease_limits() {
2236 // This is called when we believe that we're going to do an infrequent
2237 // operation which will increase the per byte scanned cost (i.e. move
2238 // entries to/from the global stack). It basically tries to decrease the
2239 // scanning limit so that the clock is called earlier.
2240
2241 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2242 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2243 }
2244
2245 void G1CMTask::move_entries_to_global_stack() {
2246 // Local array where we'll store the entries that will be popped
2247 // from the local queue.
2248 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2249
2250 size_t n = 0;
2251 G1TaskQueueEntry task_entry;
2252 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2253 buffer[n] = task_entry;
2254 ++n;
2255 }
2256 if (n < G1CMMarkStack::EntriesPerChunk) {
2257 buffer[n] = G1TaskQueueEntry();
2258 }
2259
2260 if (n > 0) {
2261 if (!_cm->mark_stack_push(buffer)) {
2262 set_has_aborted();
2263 }
2264 }
2265
2266 // This operation was quite expensive, so decrease the limits.
2267 decrease_limits();
2268 }
2269
2270 bool G1CMTask::get_entries_from_global_stack() {
2271 // Local array where we'll store the entries that will be popped
2272 // from the global stack.
2273 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2274
2275 if (!_cm->mark_stack_pop(buffer)) {
2276 return false;
2277 }
2278
2279 // We did actually pop at least one entry.
2280 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2281 G1TaskQueueEntry task_entry = buffer[i];
2282 if (task_entry.is_null()) {
2283 break;
2284 }
2285 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2286 bool success = _task_queue->push(task_entry);
2287 // We only call this when the local queue is empty or under a
2288 // given target limit. So, we do not expect this push to fail.
2289 assert(success, "invariant");
2290 }
2291
2292 // This operation was quite expensive, so decrease the limits
2293 decrease_limits();
2294 return true;
2295 }
2296
2297 void G1CMTask::drain_local_queue(bool partially) {
2298 if (has_aborted()) {
2299 return;
2300 }
2301
2302 // Decide what the target size is, depending whether we're going to
2303 // drain it partially (so that other tasks can steal if they run out
2304 // of things to do) or totally (at the very end).
2305 uint target_size;
2306 if (partially) {
2307 target_size = GCDrainStackTargetSize;
2308 } else {
2309 target_size = 0;
2310 }
2311
2312 if (_task_queue->size() > target_size) {
2313 G1TaskQueueEntry entry;
2314 bool ret = _task_queue->pop_local(entry);
2315 while (ret) {
2316 process_entry(entry, false /* stolen */);
2317 if (_task_queue->size() <= target_size || has_aborted()) {
2318 ret = false;
2319 } else {
2320 ret = _task_queue->pop_local(entry);
2321 }
2322 }
2323 }
2324 }
2325
2326 size_t G1CMTask::start_partial_array_processing(oop obj) {
2327 assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2328
2329 objArrayOop obj_array = oop_cast<objArrayOop>(obj);
2330 size_t array_length = obj_array->length();
2331
2332 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2333
2334 // Mark objArray klass metadata
2335 if (_cm_oop_closure->do_metadata()) {
2336 _cm_oop_closure->do_klass(obj_array->klass());
2337 }
2338
2339 process_array_chunk(obj_array, 0, initial_chunk_size);
2340
2341 // Include object header size
2342 if (obj_array->is_refArray()) {
2343 return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2344 } else {
2345 FlatArrayKlass* fak = FlatArrayKlass::cast(obj_array->klass());
2346 return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2347 }
2348 }
2349
2350 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2351 PartialArrayState* state = task.to_partial_array_state();
2352 // Access state before release by claim().
2353 objArrayOop obj = oop_cast<objArrayOop>(state->source());
2354
2355 PartialArraySplitter::Claim claim =
2356 _partial_array_splitter.claim(state, _task_queue, stolen);
2357
2358 process_array_chunk(obj, claim._start, claim._end);
2359
2360 if (obj->is_refArray()) {
2361 return heap_word_size((claim._end - claim._start) * heapOopSize);
2362 } else {
2363 assert(obj->is_flatArray(), "Must be!");
2364 size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2365 size_t nof_elements = claim._end - claim._start;
2366 return heap_word_size(nof_elements * element_byte_size);
2367 }
2368 }
2369
2370 void G1CMTask::drain_global_stack(bool partially) {
2371 if (has_aborted()) {
2372 return;
2373 }
2374
2375 // We have a policy to drain the local queue before we attempt to
2376 // drain the global stack.
2377 assert(partially || _task_queue->size() == 0, "invariant");
2378
2379 // Decide what the target size is, depending whether we're going to
2380 // drain it partially (so that other tasks can steal if they run out
2381 // of things to do) or totally (at the very end).
2382 // Notice that when draining the global mark stack partially, due to the racyness
2383 // of the mark stack size update we might in fact drop below the target. But,
2384 // this is not a problem.
2385 // In case of total draining, we simply process until the global mark stack is
2386 // totally empty, disregarding the size counter.
2387 if (partially) {
2388 size_t const target_size = _cm->partial_mark_stack_size_target();
2389 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2390 if (get_entries_from_global_stack()) {
2391 drain_local_queue(partially);
2392 }
2393 }
2394 } else {
2395 while (!has_aborted() && get_entries_from_global_stack()) {
2396 drain_local_queue(partially);
2397 }
2398 }
2399 }
2400
2401 // SATB Queue has several assumptions on whether to call the par or
2402 // non-par versions of the methods. this is why some of the code is
2403 // replicated. We should really get rid of the single-threaded version
2404 // of the code to simplify things.
2405 void G1CMTask::drain_satb_buffers() {
2406 if (has_aborted()) {
2407 return;
2408 }
2409
2410 // We set this so that the regular clock knows that we're in the
2411 // middle of draining buffers and doesn't set the abort flag when it
2412 // notices that SATB buffers are available for draining. It'd be
2413 // very counter productive if it did that. :-)
2414 _draining_satb_buffers = true;
2415
2416 G1CMSATBBufferClosure satb_cl(this, _g1h);
2417 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2418
2419 // This keeps claiming and applying the closure to completed buffers
2420 // until we run out of buffers or we need to abort.
2421 while (!has_aborted() &&
2422 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2423 abort_marking_if_regular_check_fail();
2424 }
2425
2426 // Can't assert qset is empty here, even if not aborted. If concurrent,
2427 // some other thread might be adding to the queue. If not concurrent,
2428 // some other thread might have won the race for the last buffer, but
2429 // has not yet decremented the count.
2430
2431 _draining_satb_buffers = false;
2432
2433 // again, this was a potentially expensive operation, decrease the
2434 // limits to get the regular clock call early
2435 decrease_limits();
2436 }
2437
2438 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2439 _mark_stats_cache.reset(region_idx);
2440 }
2441
2442 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2443 return _mark_stats_cache.evict_all();
2444 }
2445
2446 void G1CMTask::print_stats() {
2447 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2448 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2449 _elapsed_time_ms, _termination_time_ms);
2450 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2451 _step_times_ms.num(),
2452 _step_times_ms.avg(),
2453 _step_times_ms.sd(),
2454 _step_times_ms.maximum(),
2455 _step_times_ms.sum());
2456 size_t const hits = _mark_stats_cache.hits();
2457 size_t const misses = _mark_stats_cache.misses();
2458 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2459 hits, misses, percent_of(hits, hits + misses));
2460 }
2461
2462 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2463 return _task_queues->steal(worker_id, task_entry);
2464 }
2465
2466 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2467 if (has_aborted() || _curr_region == nullptr) {
2468 return;
2469 }
2470
2471 // This means that we're already holding on to a region.
2472 assert(_finger != nullptr, "if region is not null, then the finger "
2473 "should not be null either");
2474
2475 // We might have restarted this task after an evacuation pause
2476 // which might have evacuated the region we're holding on to
2477 // underneath our feet. Let's read its limit again to make sure
2478 // that we do not iterate over a region of the heap that
2479 // contains garbage (update_region_limit() will also move
2480 // _finger to the start of the region if it is found empty).
2481 update_region_limit();
2482 // We will start from _finger not from the start of the region,
2483 // as we might be restarting this task after aborting half-way
2484 // through scanning this region. In this case, _finger points to
2485 // the address where we last found a marked object. If this is a
2486 // fresh region, _finger points to start().
2487 MemRegion mr = MemRegion(_finger, _region_limit);
2488
2489 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2490 "humongous regions should go around loop once only");
2491
2492 // Some special cases:
2493 // If the memory region is empty, we can just give up the region.
2494 // If the current region is humongous then we only need to check
2495 // the bitmap for the bit associated with the start of the object,
2496 // scan the object if it's live, and give up the region.
2497 // Otherwise, let's iterate over the bitmap of the part of the region
2498 // that is left.
2499 // If the iteration is successful, give up the region.
2500 if (mr.is_empty()) {
2501 giveup_current_region();
2502 abort_marking_if_regular_check_fail();
2503 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2504 if (_mark_bitmap->is_marked(mr.start())) {
2505 // The object is marked - apply the closure
2506 bitmap_closure.do_addr(mr.start());
2507 }
2508 // Even if this task aborted while scanning the humongous object
2509 // we can (and should) give up the current region.
2510 giveup_current_region();
2511 abort_marking_if_regular_check_fail();
2512 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2513 giveup_current_region();
2514 abort_marking_if_regular_check_fail();
2515 } else {
2516 assert(has_aborted(), "currently the only way to do so");
2517 // The only way to abort the bitmap iteration is to return
2518 // false from the do_bit() method. However, inside the
2519 // do_bit() method we move the _finger to point to the
2520 // object currently being looked at. So, if we bail out, we
2521 // have definitely set _finger to something non-null.
2522 assert(_finger != nullptr, "invariant");
2523
2524 // Region iteration was actually aborted. So now _finger
2525 // points to the address of the object we last scanned. If we
2526 // leave it there, when we restart this task, we will rescan
2527 // the object. It is easy to avoid this. We move the finger by
2528 // enough to point to the next possible object header.
2529 assert(_finger < _region_limit, "invariant");
2530 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2531 if (new_finger >= _region_limit) {
2532 giveup_current_region();
2533 } else {
2534 move_finger_to(new_finger);
2535 }
2536 }
2537 }
2538
2539 void G1CMTask::claim_new_region() {
2540 // Read the note on the claim_region() method on why it might
2541 // return null with potentially more regions available for
2542 // claiming and why we have to check out_of_regions() to determine
2543 // whether we're done or not.
2544 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2545 // We are going to try to claim a new region. We should have
2546 // given up on the previous one.
2547 // Separated the asserts so that we know which one fires.
2548 assert(_curr_region == nullptr, "invariant");
2549 assert(_finger == nullptr, "invariant");
2550 assert(_region_limit == nullptr, "invariant");
2551 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2552 if (claimed_region != nullptr) {
2553 // Yes, we managed to claim one
2554 setup_for_region(claimed_region);
2555 assert(_curr_region == claimed_region, "invariant");
2556 }
2557 // It is important to call the regular clock here. It might take
2558 // a while to claim a region if, for example, we hit a large
2559 // block of empty regions. So we need to call the regular clock
2560 // method once round the loop to make sure it's called
2561 // frequently enough.
2562 abort_marking_if_regular_check_fail();
2563 }
2564 }
2565
2566 void G1CMTask::attempt_stealing() {
2567 // We cannot check whether the global stack is empty, since other
2568 // tasks might be pushing objects to it concurrently.
2569 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2570 "only way to reach here");
2571 while (!has_aborted()) {
2572 G1TaskQueueEntry entry;
2573 if (_cm->try_stealing(_worker_id, entry)) {
2574 process_entry(entry, true /* stolen */);
2575
2576 // And since we're towards the end, let's totally drain the
2577 // local queue and global stack.
2578 drain_local_queue(false);
2579 drain_global_stack(false);
2580 } else {
2581 break;
2582 }
2583 }
2584 }
2585
2586 void G1CMTask::attempt_termination(bool is_serial) {
2587 // We cannot check whether the global stack is empty, since other
2588 // tasks might be concurrently pushing objects on it.
2589 // Separated the asserts so that we know which one fires.
2590 assert(_cm->out_of_regions(), "only way to reach here");
2591 assert(_task_queue->size() == 0, "only way to reach here");
2592 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2593
2594 // The G1CMTask class also extends the TerminatorTerminator class,
2595 // hence its should_exit_termination() method will also decide
2596 // whether to exit the termination protocol or not.
2597 bool finished = (is_serial ||
2598 _cm->terminator()->offer_termination(this));
2599 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2600
2601 if (finished) {
2602 // We're all done.
2603
2604 // We can now guarantee that the global stack is empty, since
2605 // all other tasks have finished. We separated the guarantees so
2606 // that, if a condition is false, we can immediately find out
2607 // which one.
2608 guarantee(_cm->out_of_regions(), "only way to reach here");
2609 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2610 guarantee(_task_queue->size() == 0, "only way to reach here");
2611 guarantee(!_cm->has_overflown(), "only way to reach here");
2612 guarantee(!has_aborted(), "should never happen if termination has completed");
2613 } else {
2614 // Apparently there's more work to do. Let's abort this task. We
2615 // will restart it and hopefully we can find more things to do.
2616 set_has_aborted();
2617 }
2618 }
2619
2620 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2621 if (_has_timed_out) {
2622 double diff_ms = elapsed_time_ms - _time_target_ms;
2623 // Keep statistics of how well we did with respect to hitting
2624 // our target only if we actually timed out (if we aborted for
2625 // other reasons, then the results might get skewed).
2626 _marking_step_diff_ms.add(diff_ms);
2627 }
2628
2629 if (!_cm->has_overflown()) {
2630 return;
2631 }
2632
2633 // This is the interesting one. We aborted because a global
2634 // overflow was raised. This means we have to restart the
2635 // marking phase and start iterating over regions. However, in
2636 // order to do this we have to make sure that all tasks stop
2637 // what they are doing and re-initialize in a safe manner. We
2638 // will achieve this with the use of two barrier sync points.
2639 if (!is_serial) {
2640 // We only need to enter the sync barrier if being called
2641 // from a parallel context
2642 _cm->enter_first_sync_barrier(_worker_id);
2643
2644 // When we exit this sync barrier we know that all tasks have
2645 // stopped doing marking work. So, it's now safe to
2646 // re-initialize our data structures.
2647 }
2648
2649 clear_region_fields();
2650 flush_mark_stats_cache();
2651
2652 if (!is_serial) {
2653 // If we're executing the concurrent phase of marking, reset the marking
2654 // state; otherwise the marking state is reset after reference processing,
2655 // during the remark pause.
2656 // If we reset here as a result of an overflow during the remark we will
2657 // see assertion failures from any subsequent set_concurrency_and_phase()
2658 // calls.
2659 if (_cm->concurrent() && _worker_id == 0) {
2660 // Worker 0 is responsible for clearing the global data structures because
2661 // of an overflow. During STW we should not clear the overflow flag (in
2662 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2663 // method to abort the pause and restart concurrent marking.
2664 _cm->reset_marking_for_restart();
2665
2666 log_info(gc, marking)("Concurrent Mark reset for overflow");
2667 }
2668
2669 // ...and enter the second barrier.
2670 _cm->enter_second_sync_barrier(_worker_id);
2671 }
2672 }
2673
2674 /*****************************************************************************
2675
2676 The do_marking_step(time_target_ms, ...) method is the building
2677 block of the parallel marking framework. It can be called in parallel
2678 with other invocations of do_marking_step() on different tasks
2679 (but only one per task, obviously) and concurrently with the
2680 mutator threads, or during remark, hence it eliminates the need
2681 for two versions of the code. When called during remark, it will
2682 pick up from where the task left off during the concurrent marking
2683 phase. Interestingly, tasks are also claimable during evacuation
2684 pauses too, since do_marking_step() ensures that it aborts before
2685 it needs to yield.
2686
2687 The data structures that it uses to do marking work are the
2688 following:
2689
2690 (1) Marking Bitmap. If there are grey objects that appear only
2691 on the bitmap (this happens either when dealing with an overflow
2692 or when the concurrent start pause has simply marked the roots
2693 and didn't push them on the stack), then tasks claim heap
2694 regions whose bitmap they then scan to find grey objects. A
2695 global finger indicates where the end of the last claimed region
2696 is. A local finger indicates how far into the region a task has
2697 scanned. The two fingers are used to determine how to grey an
2698 object (i.e. whether simply marking it is OK, as it will be
2699 visited by a task in the future, or whether it needs to be also
2700 pushed on a stack).
2701
2702 (2) Local Queue. The local queue of the task which is accessed
2703 reasonably efficiently by the task. Other tasks can steal from
2704 it when they run out of work. Throughout the marking phase, a
2705 task attempts to keep its local queue short but not totally
2706 empty, so that entries are available for stealing by other
2707 tasks. Only when there is no more work, a task will totally
2708 drain its local queue.
2709
2710 (3) Global Mark Stack. This handles local queue overflow. During
2711 marking only sets of entries are moved between it and the local
2712 queues, as access to it requires a mutex and more fine-grain
2713 interaction with it which might cause contention. If it
2714 overflows, then the marking phase should restart and iterate
2715 over the bitmap to identify grey objects. Throughout the marking
2716 phase, tasks attempt to keep the global mark stack at a small
2717 length but not totally empty, so that entries are available for
2718 popping by other tasks. Only when there is no more work, tasks
2719 will totally drain the global mark stack.
2720
2721 (4) SATB Buffer Queue. This is where completed SATB buffers are
2722 made available. Buffers are regularly removed from this queue
2723 and scanned for roots, so that the queue doesn't get too
2724 long. During remark, all completed buffers are processed, as
2725 well as the filled in parts of any uncompleted buffers.
2726
2727 The do_marking_step() method tries to abort when the time target
2728 has been reached. There are a few other cases when the
2729 do_marking_step() method also aborts:
2730
2731 (1) When the marking phase has been aborted (after a Full GC).
2732
2733 (2) When a global overflow (on the global stack) has been
2734 triggered. Before the task aborts, it will actually sync up with
2735 the other tasks to ensure that all the marking data structures
2736 (local queues, stacks, fingers etc.) are re-initialized so that
2737 when do_marking_step() completes, the marking phase can
2738 immediately restart.
2739
2740 (3) When enough completed SATB buffers are available. The
2741 do_marking_step() method only tries to drain SATB buffers right
2742 at the beginning. So, if enough buffers are available, the
2743 marking step aborts and the SATB buffers are processed at
2744 the beginning of the next invocation.
2745
2746 (4) To yield. when we have to yield then we abort and yield
2747 right at the end of do_marking_step(). This saves us from a lot
2748 of hassle as, by yielding we might allow a Full GC. If this
2749 happens then objects will be compacted underneath our feet, the
2750 heap might shrink, etc. We save checking for this by just
2751 aborting and doing the yield right at the end.
2752
2753 From the above it follows that the do_marking_step() method should
2754 be called in a loop (or, otherwise, regularly) until it completes.
2755
2756 If a marking step completes without its has_aborted() flag being
2757 true, it means it has completed the current marking phase (and
2758 also all other marking tasks have done so and have all synced up).
2759
2760 A method called regular_clock_call() is invoked "regularly" (in
2761 sub ms intervals) throughout marking. It is this clock method that
2762 checks all the abort conditions which were mentioned above and
2763 decides when the task should abort. A work-based scheme is used to
2764 trigger this clock method: when the number of object words the
2765 marking phase has scanned or the number of references the marking
2766 phase has visited reach a given limit. Additional invocations to
2767 the method clock have been planted in a few other strategic places
2768 too. The initial reason for the clock method was to avoid calling
2769 cpu time gathering too regularly, as it is quite expensive. So,
2770 once it was in place, it was natural to piggy-back all the other
2771 conditions on it too and not constantly check them throughout the code.
2772
2773 If do_termination is true then do_marking_step will enter its
2774 termination protocol.
2775
2776 The value of is_serial must be true when do_marking_step is being
2777 called serially (i.e. by the VMThread) and do_marking_step should
2778 skip any synchronization in the termination and overflow code.
2779 Examples include the serial remark code and the serial reference
2780 processing closures.
2781
2782 The value of is_serial must be false when do_marking_step is
2783 being called by any of the worker threads.
2784 Examples include the concurrent marking code (CMMarkingTask),
2785 the MT remark code, and the MT reference processing closures.
2786
2787 *****************************************************************************/
2788
2789 void G1CMTask::do_marking_step(double time_target_ms,
2790 bool do_termination,
2791 bool is_serial) {
2792 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2793
2794 _start_cpu_time_ns = os::current_thread_cpu_time();
2795
2796 // If do_stealing is true then do_marking_step will attempt to
2797 // steal work from the other G1CMTasks. It only makes sense to
2798 // enable stealing when the termination protocol is enabled
2799 // and do_marking_step() is not being called serially.
2800 bool do_stealing = do_termination && !is_serial;
2801
2802 G1Predictions const& predictor = _g1h->policy()->predictor();
2803 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2804 _time_target_ms = time_target_ms - diff_prediction_ms;
2805
2806 // set up the variables that are used in the work-based scheme to
2807 // call the regular clock method
2808 _words_scanned = 0;
2809 _refs_reached = 0;
2810 recalculate_limits();
2811
2812 // clear all flags
2813 clear_has_aborted();
2814 _has_timed_out = false;
2815 _draining_satb_buffers = false;
2816
2817 ++_calls;
2818
2819 // Set up the bitmap and oop closures. Anything that uses them is
2820 // eventually called from this method, so it is OK to allocate these
2821 // statically.
2822 G1CMBitMapClosure bitmap_closure(this, _cm);
2823 G1CMOopClosure cm_oop_closure(_g1h, this);
2824 set_cm_oop_closure(&cm_oop_closure);
2825
2826 if (_cm->has_overflown()) {
2827 // This can happen if the mark stack overflows during a GC pause
2828 // and this task, after a yield point, restarts. We have to abort
2829 // as we need to get into the overflow protocol which happens
2830 // right at the end of this task.
2831 set_has_aborted();
2832 }
2833
2834 // First drain any available SATB buffers. After this, we will not
2835 // look at SATB buffers before the next invocation of this method.
2836 // If enough completed SATB buffers are queued up, the regular clock
2837 // will abort this task so that it restarts.
2838 drain_satb_buffers();
2839 // ...then partially drain the local queue and the global stack
2840 drain_local_queue(true);
2841 drain_global_stack(true);
2842
2843 do {
2844 process_current_region(bitmap_closure);
2845 // At this point we have either completed iterating over the
2846 // region we were holding on to, or we have aborted.
2847
2848 // We then partially drain the local queue and the global stack.
2849 drain_local_queue(true);
2850 drain_global_stack(true);
2851
2852 claim_new_region();
2853
2854 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2855 "at this point we should be out of regions");
2856 } while ( _curr_region != nullptr && !has_aborted());
2857
2858 // We cannot check whether the global stack is empty, since other
2859 // tasks might be pushing objects to it concurrently.
2860 assert(has_aborted() || _cm->out_of_regions(),
2861 "at this point we should be out of regions");
2862 // Try to reduce the number of available SATB buffers so that
2863 // remark has less work to do.
2864 drain_satb_buffers();
2865
2866 // Since we've done everything else, we can now totally drain the
2867 // local queue and global stack.
2868 drain_local_queue(false);
2869 drain_global_stack(false);
2870
2871 // Attempt at work stealing from other task's queues.
2872 if (do_stealing && !has_aborted()) {
2873 // We have not aborted. This means that we have finished all that
2874 // we could. Let's try to do some stealing...
2875 attempt_stealing();
2876 }
2877
2878 // We still haven't aborted. Now, let's try to get into the
2879 // termination protocol.
2880 if (do_termination && !has_aborted()) {
2881 attempt_termination(is_serial);
2882 }
2883
2884 // Mainly for debugging purposes to make sure that a pointer to the
2885 // closure which was statically allocated in this frame doesn't
2886 // escape it by accident.
2887 set_cm_oop_closure(nullptr);
2888 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2889 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2890 // Update the step history.
2891 _step_times_ms.add(elapsed_time_ms);
2892
2893 if (has_aborted()) {
2894 // The task was aborted for some reason.
2895 handle_abort(is_serial, elapsed_time_ms);
2896 }
2897 }
2898
2899 G1CMTask::G1CMTask(uint worker_id,
2900 G1ConcurrentMark* cm,
2901 G1CMTaskQueue* task_queue,
2902 G1RegionMarkStats* mark_stats) :
2903 _worker_id(worker_id),
2904 _g1h(G1CollectedHeap::heap()),
2905 _cm(cm),
2906 _mark_bitmap(nullptr),
2907 _task_queue(task_queue),
2908 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2909 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2910 _calls(0),
2911 _time_target_ms(0.0),
2912 _start_cpu_time_ns(0),
2913 _cm_oop_closure(nullptr),
2914 _curr_region(nullptr),
2915 _finger(nullptr),
2916 _region_limit(nullptr),
2917 _words_scanned(0),
2918 _words_scanned_limit(0),
2919 _real_words_scanned_limit(0),
2920 _refs_reached(0),
2921 _refs_reached_limit(0),
2922 _real_refs_reached_limit(0),
2923 _has_aborted(false),
2924 _has_timed_out(false),
2925 _draining_satb_buffers(false),
2926 _step_times_ms(),
2927 _elapsed_time_ms(0.0),
2928 _termination_time_ms(0.0),
2929 _marking_step_diff_ms()
2930 {
2931 guarantee(task_queue != nullptr, "invariant");
2932
2933 _marking_step_diff_ms.add(0.5);
2934 }
2935
2936 // These are formatting macros that are used below to ensure
2937 // consistent formatting. The *_H_* versions are used to format the
2938 // header for a particular value and they should be kept consistent
2939 // with the corresponding macro. Also note that most of the macros add
2940 // the necessary white space (as a prefix) which makes them a bit
2941 // easier to compose.
2942
2943 // All the output lines are prefixed with this string to be able to
2944 // identify them easily in a large log file.
2945 #define G1PPRL_LINE_PREFIX "###"
2946
2947 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2948 #ifdef _LP64
2949 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2950 #else // _LP64
2951 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2952 #endif // _LP64
2953
2954 // For per-region info
2955 #define G1PPRL_TYPE_FORMAT " %-4s"
2956 #define G1PPRL_TYPE_H_FORMAT " %4s"
2957 #define G1PPRL_STATE_FORMAT " %-5s"
2958 #define G1PPRL_STATE_H_FORMAT " %5s"
2959 #define G1PPRL_BYTE_FORMAT " %9zu"
2960 #define G1PPRL_BYTE_H_FORMAT " %9s"
2961 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2962 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2963 #define G1PPRL_GID_H_FORMAT " %9s"
2964 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2965 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2966 #define G1PPRL_LEN_H_FORMAT " %14s"
2967 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2968 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2969
2970 // For summary info
2971 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2972 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2973 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2974 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2975
2976 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2977 _total_used_bytes(0),
2978 _total_capacity_bytes(0),
2979 _total_live_bytes(0),
2980 _total_remset_bytes(0),
2981 _total_code_roots_bytes(0)
2982 {
2983 if (!log_is_enabled(Trace, gc, liveness)) {
2984 return;
2985 }
2986
2987 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2988 MemRegion reserved = g1h->reserved();
2989 double now = os::elapsedTime();
2990
2991 // Print the header of the output.
2992 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2993 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2994 G1PPRL_SUM_ADDR_FORMAT("reserved")
2995 G1PPRL_SUM_BYTE_FORMAT("region-size"),
2996 p2i(reserved.start()), p2i(reserved.end()),
2997 G1HeapRegion::GrainBytes);
2998 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2999 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3000 G1PPRL_TYPE_H_FORMAT
3001 G1PPRL_ADDR_BASE_H_FORMAT
3002 G1PPRL_BYTE_H_FORMAT
3003 G1PPRL_BYTE_H_FORMAT
3004 G1PPRL_STATE_H_FORMAT
3005 G1PPRL_BYTE_H_FORMAT
3006 G1PPRL_GID_H_FORMAT,
3007 "type", "address-range",
3008 "used", "live",
3009 "state", "code-roots",
3010 "group-id");
3011 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3012 G1PPRL_TYPE_H_FORMAT
3013 G1PPRL_ADDR_BASE_H_FORMAT
3014 G1PPRL_BYTE_H_FORMAT
3015 G1PPRL_BYTE_H_FORMAT
3016 G1PPRL_STATE_H_FORMAT
3017 G1PPRL_BYTE_H_FORMAT
3018 G1PPRL_GID_H_FORMAT,
3019 "", "",
3020 "(bytes)", "(bytes)",
3021 "", "(bytes)", "");
3022 }
3023
3024 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3025 if (!log_is_enabled(Trace, gc, liveness)) {
3026 return false;
3027 }
3028
3029 const char* type = r->get_type_str();
3030 HeapWord* bottom = r->bottom();
3031 HeapWord* end = r->end();
3032 size_t capacity_bytes = r->capacity();
3033 size_t used_bytes = r->used();
3034 size_t live_bytes = r->live_bytes();
3035 size_t remset_bytes = r->rem_set()->mem_size();
3036 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3037 const char* remset_type = r->rem_set()->get_short_state_str();
3038 uint cset_group_id = r->rem_set()->has_cset_group()
3039 ? r->rem_set()->cset_group_id()
3040 : G1CSetCandidateGroup::NoRemSetId;
3041
3042 _total_used_bytes += used_bytes;
3043 _total_capacity_bytes += capacity_bytes;
3044 _total_live_bytes += live_bytes;
3045 _total_remset_bytes += remset_bytes;
3046 _total_code_roots_bytes += code_roots_bytes;
3047
3048 // Print a line for this particular region.
3049 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3050 G1PPRL_TYPE_FORMAT
3051 G1PPRL_ADDR_BASE_FORMAT
3052 G1PPRL_BYTE_FORMAT
3053 G1PPRL_BYTE_FORMAT
3054 G1PPRL_STATE_FORMAT
3055 G1PPRL_BYTE_FORMAT
3056 G1PPRL_GID_FORMAT,
3057 type, p2i(bottom), p2i(end),
3058 used_bytes, live_bytes,
3059 remset_type, code_roots_bytes,
3060 cset_group_id);
3061
3062 return false;
3063 }
3064
3065 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3066 if (!log_is_enabled(Trace, gc, liveness)) {
3067 return;
3068 }
3069
3070 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3071 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3072 // add static memory usages to remembered set sizes
3073 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3074
3075 log_cset_candidate_groups();
3076
3077 // Print the footer of the output.
3078 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3079 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3080 " SUMMARY"
3081 G1PPRL_SUM_MB_FORMAT("capacity")
3082 G1PPRL_SUM_MB_PERC_FORMAT("used")
3083 G1PPRL_SUM_MB_PERC_FORMAT("live")
3084 G1PPRL_SUM_MB_FORMAT("remset")
3085 G1PPRL_SUM_MB_FORMAT("code-roots"),
3086 bytes_to_mb(_total_capacity_bytes),
3087 bytes_to_mb(_total_used_bytes),
3088 percent_of(_total_used_bytes, _total_capacity_bytes),
3089 bytes_to_mb(_total_live_bytes),
3090 percent_of(_total_live_bytes, _total_capacity_bytes),
3091 bytes_to_mb(_total_remset_bytes),
3092 bytes_to_mb(_total_code_roots_bytes));
3093 }
3094
3095 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3096 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3097 G1PPRL_GID_FORMAT
3098 G1PPRL_LEN_FORMAT
3099 G1PPRL_GID_GCEFF_FORMAT
3100 G1PPRL_GID_LIVENESS_FORMAT
3101 G1PPRL_BYTE_FORMAT
3102 G1PPRL_TYPE_H_FORMAT,
3103 group->group_id(),
3104 group->length(),
3105 group->length() > 0 ? group->gc_efficiency() : 0.0,
3106 group->length() > 0 ? group->liveness_percent() : 0.0,
3107 group->card_set()->mem_size(),
3108 type);
3109 _total_remset_bytes += group->card_set()->mem_size();
3110 }
3111
3112 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3113 for (G1CSetCandidateGroup* group : gl) {
3114 log_cset_candidate_group_add_total(group, type);
3115 }
3116 }
3117
3118 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3119 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3120 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3121 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3122 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3123 G1PPRL_GID_H_FORMAT
3124 G1PPRL_LEN_H_FORMAT
3125 G1PPRL_GCEFF_H_FORMAT
3126 G1PPRL_BYTE_H_FORMAT
3127 G1PPRL_BYTE_H_FORMAT
3128 G1PPRL_TYPE_H_FORMAT,
3129 "groud-id", "num-regions",
3130 "gc-eff", "liveness",
3131 "remset", "type");
3132
3133 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3134 G1PPRL_GID_H_FORMAT
3135 G1PPRL_LEN_H_FORMAT
3136 G1PPRL_GCEFF_H_FORMAT
3137 G1PPRL_BYTE_H_FORMAT
3138 G1PPRL_BYTE_H_FORMAT
3139 G1PPRL_TYPE_H_FORMAT,
3140 "", "",
3141 "(bytes/ms)", "%",
3142 "(bytes)", "");
3143
3144 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3145
3146 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3147
3148 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3149 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3150 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3151 }