1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectionSetChooser.hpp"
34 #include "gc/g1/g1CollectorState.hpp"
35 #include "gc/g1/g1ConcurrentMark.inline.hpp"
36 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
37 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
38 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
39 #include "gc/g1/g1ConcurrentRefine.hpp"
40 #include "gc/g1/g1HeapRegion.inline.hpp"
41 #include "gc/g1/g1HeapRegionManager.hpp"
42 #include "gc/g1/g1HeapRegionPrinter.hpp"
43 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
44 #include "gc/g1/g1HeapRegionSet.inline.hpp"
45 #include "gc/g1/g1HeapVerifier.hpp"
46 #include "gc/g1/g1OopClosures.inline.hpp"
47 #include "gc/g1/g1Policy.hpp"
48 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
49 #include "gc/g1/g1ThreadLocalData.hpp"
50 #include "gc/g1/g1Trace.hpp"
51 #include "gc/shared/gcId.hpp"
52 #include "gc/shared/gcTimer.hpp"
53 #include "gc/shared/gcTraceTime.inline.hpp"
54 #include "gc/shared/gcVMOperations.hpp"
55 #include "gc/shared/partialArraySplitter.inline.hpp"
56 #include "gc/shared/partialArrayState.hpp"
57 #include "gc/shared/partialArrayTaskStats.hpp"
58 #include "gc/shared/referencePolicy.hpp"
59 #include "gc/shared/suspendibleThreadSet.hpp"
60 #include "gc/shared/taskqueue.inline.hpp"
61 #include "gc/shared/taskTerminator.hpp"
62 #include "gc/shared/weakProcessor.inline.hpp"
63 #include "gc/shared/workerPolicy.hpp"
64 #include "jvm.h"
65 #include "logging/log.hpp"
66 #include "memory/allocation.hpp"
67 #include "memory/iterator.hpp"
68 #include "memory/metaspaceUtils.hpp"
69 #include "memory/resourceArea.hpp"
70 #include "memory/universe.hpp"
71 #include "nmt/memTracker.hpp"
72 #include "oops/access.inline.hpp"
73 #include "oops/oop.inline.hpp"
74 #include "runtime/globals_extension.hpp"
75 #include "runtime/handles.inline.hpp"
76 #include "runtime/java.hpp"
77 #include "runtime/orderAccess.hpp"
78 #include "runtime/os.hpp"
79 #include "runtime/prefetch.inline.hpp"
80 #include "runtime/threads.hpp"
81 #include "utilities/align.hpp"
82 #include "utilities/checkedCast.hpp"
83 #include "utilities/formatBuffer.hpp"
84 #include "utilities/growableArray.hpp"
85 #include "utilities/powerOfTwo.hpp"
86
87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
88
89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
90 assert(cm != nullptr, "must be");
91 }
92
93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
94 assert(cm != nullptr, "must be");
95 assert(_cm == nullptr, "double initialize");
96 _cm = cm;
97 }
98
99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
100 assert(addr < _cm->finger(), "invariant");
101 assert(addr >= _task->finger(), "invariant");
102
103 // We move that task's local finger along.
104 _task->move_finger_to(addr);
105
106 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
107 // we only partially drain the local queue and global stack
108 _task->drain_local_queue(true);
109 _task->drain_global_stack(true);
110
111 // if the has_aborted flag has been raised, we need to bail out of
112 // the iteration
113 return !_task->has_aborted();
114 }
115
116 G1CMMarkStack::G1CMMarkStack() :
117 _chunk_allocator() {
118 set_empty();
119 }
120
121 size_t G1CMMarkStack::capacity_alignment() {
122 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
123 }
124
125 bool G1CMMarkStack::initialize() {
126 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
127
128 size_t initial_capacity = MarkStackSize;
129 size_t max_capacity = MarkStackSizeMax;
130
131 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
132
133 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135
136 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
137 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
138
139 size_t limit = (INT_MAX - 1);
140 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142
143 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
144 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
145
146 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
147
148 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
149 initial_num_chunks, max_capacity);
150
151 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
152 }
153
154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
155 if (_size.load_relaxed() >= _max_capacity) {
156 return nullptr;
157 }
158
159 size_t cur_idx = _size.fetch_then_add(1u);
160
161 if (cur_idx >= _max_capacity) {
162 return nullptr;
163 }
164
165 size_t bucket = get_bucket(cur_idx);
166 if (_buckets[bucket].load_acquire() == nullptr) {
167 if (!_should_grow) {
168 // Prefer to restart the CM.
169 return nullptr;
170 }
171
172 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
173 if (_buckets[bucket].load_acquire() == nullptr) {
174 size_t desired_capacity = bucket_size(bucket) * 2;
175 if (!try_expand_to(desired_capacity)) {
176 return nullptr;
177 }
178 }
179 }
180
181 size_t bucket_idx = get_bucket_index(cur_idx);
182 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
183 result->next = nullptr;
184 return result;
185 }
186
187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
188 _min_capacity(0),
189 _max_capacity(0),
190 _capacity(0),
191 _num_buckets(0),
192 _should_grow(false),
193 _buckets(nullptr),
194 _size(0)
195 { }
196
197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
198 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
199
200 _min_capacity = initial_capacity;
201 _max_capacity = max_capacity;
202 _num_buckets = get_bucket(_max_capacity) + 1;
203
204 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
205
206 for (size_t i = 0; i < _num_buckets; i++) {
207 _buckets[i].store_relaxed(nullptr);
208 }
209
210 size_t new_capacity = bucket_size(0);
211
212 if (!reserve(new_capacity)) {
213 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
214 return false;
215 }
216 return true;
217 }
218
219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
220 if (_capacity == _max_capacity) {
221 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
222 return false;
223 }
224
225 size_t old_capacity = _capacity;
226 desired_capacity = MIN2(desired_capacity, _max_capacity);
227
228 if (reserve(desired_capacity)) {
229 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
230 old_capacity, desired_capacity);
231 return true;
232 }
233 return false;
234 }
235
236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
237 size_t new_capacity = _capacity * 2;
238 return try_expand_to(new_capacity);
239 }
240
241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
242 if (_buckets == nullptr) {
243 return;
244 }
245
246 for (size_t i = 0; i < _num_buckets; i++) {
247 if (_buckets[i].load_relaxed() != nullptr) {
248 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
249 _buckets[i].store_relaxed(nullptr);
250 }
251 }
252
253 FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
254 }
255
256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
257 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
258
259 size_t highest_bucket = get_bucket(new_capacity - 1);
260 size_t i = get_bucket(_capacity);
261
262 // Allocate all buckets associated with indexes between the current capacity (_capacity)
263 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
264 // array and that the capacity accurately reflects the reserved memory.
265 for (; i <= highest_bucket; i++) {
266 if (_buckets[i].load_acquire() != nullptr) {
267 continue; // Skip over already allocated buckets.
268 }
269
270 size_t bucket_capacity = bucket_size(i);
271
272 // Trim bucket size so that we do not exceed the _max_capacity.
273 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
274 bucket_capacity :
275 _max_capacity - _capacity;
276
277
278 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
279
280 if (bucket_base == nullptr) {
281 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
282 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
283 return false;
284 }
285 _capacity += bucket_capacity;
286 _buckets[i].release_store(bucket_base);
287 }
288 return true;
289 }
290
291 void G1CMMarkStack::expand() {
292 _chunk_allocator.try_expand();
293 }
294
295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
296 elem->next = list->load_relaxed();
297 list->store_relaxed(elem);
298 }
299
300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
301 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
302 add_chunk_to_list(&_chunk_list, elem);
303 _chunks_in_chunk_list++;
304 }
305
306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
307 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
308 add_chunk_to_list(&_free_list, elem);
309 }
310
311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
312 TaskQueueEntryChunk* result = list->load_relaxed();
313 if (result != nullptr) {
314 list->store_relaxed(list->load_relaxed()->next);
315 }
316 return result;
317 }
318
319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
320 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
321 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
322 if (result != nullptr) {
323 _chunks_in_chunk_list--;
324 }
325 return result;
326 }
327
328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
329 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
330 return remove_chunk_from_list(&_free_list);
331 }
332
333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
334 // Get a new chunk.
335 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
336
337 if (new_chunk == nullptr) {
338 // Did not get a chunk from the free list. Allocate from backing memory.
339 new_chunk = _chunk_allocator.allocate_new_chunk();
340
341 if (new_chunk == nullptr) {
342 return false;
343 }
344 }
345
346 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
347
348 add_chunk_to_chunk_list(new_chunk);
349
350 return true;
351 }
352
353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
354 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
355
356 if (cur == nullptr) {
357 return false;
358 }
359
360 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
361
362 add_chunk_to_free_list(cur);
363 return true;
364 }
365
366 void G1CMMarkStack::set_empty() {
367 _chunks_in_chunk_list = 0;
368 _chunk_list.store_relaxed(nullptr);
369 _free_list.store_relaxed(nullptr);
370 _chunk_allocator.reset();
371 }
372
373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
374 _root_regions(MemRegion::create_array(max_regions, mtGC)),
375 _max_regions(max_regions),
376 _num_root_regions(0),
377 _claimed_root_regions(0),
378 _scan_in_progress(false),
379 _should_abort(false) { }
380
381 G1CMRootMemRegions::~G1CMRootMemRegions() {
382 MemRegion::destroy_array(_root_regions, _max_regions);
383 }
384
385 void G1CMRootMemRegions::reset() {
386 _num_root_regions.store_relaxed(0);
387 }
388
389 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
390 assert_at_safepoint();
391 size_t idx = _num_root_regions.fetch_then_add(1u);
392 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
393 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
394 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
395 _root_regions[idx].set_start(start);
396 _root_regions[idx].set_end(end);
397 }
398
399 void G1CMRootMemRegions::prepare_for_scan() {
400 assert(!scan_in_progress(), "pre-condition");
401
402 _scan_in_progress.store_relaxed(num_root_regions() > 0);
403
404 _claimed_root_regions.store_relaxed(0);
405 _should_abort.store_relaxed(false);
406 }
407
408 const MemRegion* G1CMRootMemRegions::claim_next() {
409 if (_should_abort.load_relaxed()) {
410 // If someone has set the should_abort flag, we return null to
411 // force the caller to bail out of their loop.
412 return nullptr;
413 }
414
415 uint local_num_root_regions = num_root_regions();
416 if (_claimed_root_regions.load_relaxed() >= local_num_root_regions) {
417 return nullptr;
418 }
419
420 size_t claimed_index = _claimed_root_regions.fetch_then_add(1u);
421 if (claimed_index < local_num_root_regions) {
422 return &_root_regions[claimed_index];
423 }
424 return nullptr;
425 }
426
427 uint G1CMRootMemRegions::num_root_regions() const {
428 return (uint)_num_root_regions.load_relaxed();
429 }
430
431 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
432 uint local_num_root_regions = num_root_regions();
433 for (uint i = 0; i < local_num_root_regions; i++) {
434 if (_root_regions[i].equals(mr)) {
435 return true;
436 }
437 }
438 return false;
439 }
440
441 void G1CMRootMemRegions::notify_scan_done() {
442 MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
443 _scan_in_progress.store_relaxed(false);
444 G1RootRegionScan_lock->notify_all();
445 }
446
447 void G1CMRootMemRegions::cancel_scan() {
448 notify_scan_done();
449 }
450
451 void G1CMRootMemRegions::scan_finished() {
452 assert(scan_in_progress(), "pre-condition");
453
454 if (!_should_abort.load_relaxed()) {
455 assert(_claimed_root_regions.load_relaxed() >= num_root_regions(),
456 "we should have claimed all root regions, claimed %zu, length = %u",
457 _claimed_root_regions.load_relaxed(), num_root_regions());
458 }
459
460 notify_scan_done();
461 }
462
463 bool G1CMRootMemRegions::wait_until_scan_finished() {
464 if (!scan_in_progress()) {
465 return false;
466 }
467
468 {
469 MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
470 while (scan_in_progress()) {
471 ml.wait();
472 }
473 }
474 return true;
475 }
476
477 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
478 G1RegionToSpaceMapper* bitmap_storage) :
479 _cm_thread(nullptr),
480 _g1h(g1h),
481
482 _mark_bitmap(),
483
484 _heap(_g1h->reserved()),
485
486 _root_regions(_g1h->max_num_regions()),
487
488 _global_mark_stack(),
489
490 _finger(nullptr), // _finger set in set_non_marking_state
491
492 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
493 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
494 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
495 _tasks(nullptr), // _tasks set inside late_init()
496 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
497 _terminator(_max_num_tasks, _task_queues),
498 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
499
500 _first_overflow_barrier_sync(),
501 _second_overflow_barrier_sync(),
502
503 _completed_mark_cycles(0),
504 _has_overflown(false),
505 _concurrent(false),
506 _has_aborted(false),
507 _restart_for_overflow(false),
508 _gc_timer_cm(new ConcurrentGCTimer()),
509 _gc_tracer_cm(new G1OldTracer()),
510
511 // _verbose_level set below
512
513 _remark_times(),
514 _remark_mark_times(),
515 _remark_weak_ref_times(),
516 _cleanup_times(),
517
518 _concurrent_workers(nullptr),
519 _num_concurrent_workers(0),
520 _max_concurrent_workers(0),
521
522 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
523 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
524 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
525 _needs_remembered_set_rebuild(false)
526 {
527 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
528
529 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
530 }
531
532 void G1ConcurrentMark::fully_initialize() {
533 if (is_fully_initialized()) {
534 return;
535 }
536
537 // Create & start ConcurrentMark thread.
538 _cm_thread = new G1ConcurrentMarkThread(this);
539 if (_cm_thread->osthread() == nullptr) {
540 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
541 }
542
543 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
544 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
545
546 _max_concurrent_workers = ConcGCThreads;
547
548 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
549 _concurrent_workers->initialize_workers();
550 _num_concurrent_workers = _concurrent_workers->active_workers();
551
552 if (!_global_mark_stack.initialize()) {
553 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
554 }
555
556 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
557
558 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
559 _num_active_tasks = _max_num_tasks;
560
561 for (uint i = 0; i < _max_num_tasks; ++i) {
562 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
563 _task_queues->register_queue(i, task_queue);
564
565 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
566 }
567
568 for (uint i = 0; i < _g1h->max_num_regions(); i++) {
569 ::new (&_region_mark_stats[i]) G1RegionMarkStats{};
570 ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>{};
571 ::new (&_top_at_rebuild_starts[i]) Atomic<HeapWord*>{};
572 }
573
574 reset_at_marking_complete();
575 }
576
577 bool G1ConcurrentMark::in_progress() const {
578 return is_fully_initialized() ? _cm_thread->in_progress() : false;
579 }
580
581 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
582 return _partial_array_state_manager;
583 }
584
585 void G1ConcurrentMark::reset() {
586 _has_aborted.store_relaxed(false);
587
588 reset_marking_for_restart();
589
590 // Reset all tasks, since different phases will use different number of active
591 // threads. So, it's easiest to have all of them ready.
592 for (uint i = 0; i < _max_num_tasks; ++i) {
593 _tasks[i]->reset(mark_bitmap());
594 }
595
596 uint max_num_regions = _g1h->max_num_regions();
597 for (uint i = 0; i < max_num_regions; i++) {
598 _top_at_rebuild_starts[i].store_relaxed(nullptr);
599 _region_mark_stats[i].clear();
600 }
601
602 _root_regions.reset();
603 }
604
605 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
606 uint region_idx = r->hrm_index();
607 for (uint j = 0; j < _max_num_tasks; ++j) {
608 _tasks[j]->clear_mark_stats_cache(region_idx);
609 }
610 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
611 _region_mark_stats[region_idx].clear();
612 }
613
614 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
615 assert_at_safepoint();
616 assert(r->is_starts_humongous(), "Got humongous continues region here");
617
618 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
619 mark_bitmap()->clear(r->bottom());
620
621 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
622 return;
623 }
624
625 // Clear any statistics about the region gathered so far.
626 _g1h->humongous_obj_regions_iterate(r,
627 [&] (G1HeapRegion* r) {
628 clear_statistics(r);
629 });
630 }
631
632 void G1ConcurrentMark::reset_marking_for_restart() {
633 _global_mark_stack.set_empty();
634
635 // Expand the marking stack, if we have to and if we can.
636 if (has_overflown()) {
637 _global_mark_stack.expand();
638
639 uint max_num_regions = _g1h->max_num_regions();
640 for (uint i = 0; i < max_num_regions; i++) {
641 _region_mark_stats[i].clear_during_overflow();
642 }
643 }
644
645 clear_has_overflown();
646 _finger.store_relaxed(_heap.start());
647
648 for (uint i = 0; i < _max_num_tasks; ++i) {
649 _tasks[i]->reset_for_restart();
650 }
651 }
652
653 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
654 assert(active_tasks <= _max_num_tasks, "we should not have more");
655
656 _num_active_tasks = active_tasks;
657 // Need to update the three data structures below according to the
658 // number of active threads for this phase.
659 _terminator.reset_for_reuse(active_tasks);
660 _first_overflow_barrier_sync.set_n_workers(active_tasks);
661 _second_overflow_barrier_sync.set_n_workers(active_tasks);
662 }
663
664 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
665 set_concurrency(active_tasks);
666
667 _concurrent.store_relaxed(concurrent);
668
669 if (!concurrent) {
670 // At this point we should be in a STW phase, and completed marking.
671 assert_at_safepoint_on_vm_thread();
672 assert(out_of_regions(),
673 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
674 p2i(finger()), p2i(_heap.end()));
675 }
676 }
677
678 #if TASKQUEUE_STATS
679 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
680
681 _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
682
683 auto get_pa_stats = [&](uint i) {
684 return _tasks[i]->partial_array_task_stats();
685 };
686
687 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
688 "G1ConcurrentMark Partial Array Task Stats");
689
690 for (uint i = 0; i < _max_num_tasks; ++i) {
691 get_pa_stats(i)->reset();
692 }
693 }
694 #endif
695
696 void G1ConcurrentMark::reset_at_marking_complete() {
697 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
698 // We set the global marking state to some default values when we're
699 // not doing marking.
700 reset_marking_for_restart();
701 _num_active_tasks = 0;
702 }
703
704 G1ConcurrentMark::~G1ConcurrentMark() {
705 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
706 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
707 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
708 // The G1ConcurrentMark instance is never freed.
709 ShouldNotReachHere();
710 }
711
712 class G1ClearBitMapTask : public WorkerTask {
713 public:
714 static size_t chunk_size() { return M; }
715
716 private:
717 // Heap region closure used for clearing the _mark_bitmap.
718 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
719 private:
720 G1ConcurrentMark* _cm;
721 G1CMBitMap* _bitmap;
722 bool _suspendible; // If suspendible, do yield checks.
723
724 bool suspendible() {
725 return _suspendible;
726 }
727
728 bool is_clear_concurrent_undo() {
729 return suspendible() && _cm->cm_thread()->in_undo_mark();
730 }
731
732 bool has_aborted() {
733 if (suspendible()) {
734 _cm->do_yield_check();
735 return _cm->has_aborted();
736 }
737 return false;
738 }
739
740 HeapWord* region_clear_limit(G1HeapRegion* r) {
741 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
742 // live_words data are current wrt to the _mark_bitmap. We use this information
743 // to only clear ranges of the bitmap that require clearing.
744 if (is_clear_concurrent_undo()) {
745 // No need to clear bitmaps for empty regions (which includes regions we
746 // did not mark through).
747 if (!_cm->contains_live_object(r->hrm_index())) {
748 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
749 return r->bottom();
750 }
751 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
752 }
753 return r->end();
754 }
755
756 public:
757 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
758 G1HeapRegionClosure(),
759 _cm(cm),
760 _bitmap(cm->mark_bitmap()),
761 _suspendible(suspendible)
762 { }
763
764 virtual bool do_heap_region(G1HeapRegion* r) {
765 if (has_aborted()) {
766 return true;
767 }
768
769 HeapWord* cur = r->bottom();
770 HeapWord* const end = region_clear_limit(r);
771
772 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
773
774 while (cur < end) {
775
776 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
777 _bitmap->clear_range(mr);
778
779 cur += chunk_size_in_words;
780
781 // Repeat the asserts from before the start of the closure. We will do them
782 // as asserts here to minimize their overhead on the product. However, we
783 // will have them as guarantees at the beginning / end of the bitmap
784 // clearing to get some checking in the product.
785 assert(!suspendible() || _cm->in_progress(), "invariant");
786 assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
787
788 // Abort iteration if necessary.
789 if (has_aborted()) {
790 return true;
791 }
792 }
793 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
794
795 _cm->reset_top_at_mark_start(r);
796
797 return false;
798 }
799 };
800
801 G1ClearBitmapHRClosure _cl;
802 G1HeapRegionClaimer _hr_claimer;
803 bool _suspendible; // If the task is suspendible, workers must join the STS.
804
805 public:
806 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
807 WorkerTask("G1 Clear Bitmap"),
808 _cl(cm, suspendible),
809 _hr_claimer(n_workers),
810 _suspendible(suspendible)
811 { }
812
813 void work(uint worker_id) {
814 SuspendibleThreadSetJoiner sts_join(_suspendible);
815 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
816 }
817
818 bool is_complete() {
819 return _cl.is_complete();
820 }
821 };
822
823 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
824 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
825
826 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
827 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
828
829 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
830
831 G1ClearBitMapTask cl(this, num_workers, may_yield);
832
833 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
834 workers->run_task(&cl, num_workers);
835 guarantee(may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
836 }
837
838 void G1ConcurrentMark::cleanup_for_next_mark() {
839 // Make sure that the concurrent mark thread looks to still be in
840 // the current cycle.
841 guarantee(is_fully_initialized(), "should be initializd");
842 guarantee(in_progress(), "invariant");
843
844 // We are finishing up the current cycle by clearing the next
845 // marking bitmap and getting it ready for the next cycle. During
846 // this time no other cycle can start. So, let's make sure that this
847 // is the case.
848 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
849
850 clear_bitmap(_concurrent_workers, true);
851
852 reset_partial_array_state_manager();
853
854 // Repeat the asserts from above.
855 guarantee(is_fully_initialized(), "should be initializd");
856 guarantee(in_progress(), "invariant");
857 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
858 }
859
860 void G1ConcurrentMark::reset_partial_array_state_manager() {
861 for (uint i = 0; i < _max_num_tasks; ++i) {
862 _tasks[i]->unregister_partial_array_splitter();
863 }
864
865 partial_array_state_manager()->reset();
866
867 for (uint i = 0; i < _max_num_tasks; ++i) {
868 _tasks[i]->register_partial_array_splitter();
869 }
870 }
871
872 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
873 assert_at_safepoint_on_vm_thread();
874 // To avoid fragmentation the full collection requesting to clear the bitmap
875 // might use fewer workers than available. To ensure the bitmap is cleared
876 // as efficiently as possible the number of active workers are temporarily
877 // increased to include all currently created workers.
878 WithActiveWorkers update(workers, workers->created_workers());
879 clear_bitmap(workers, false);
880 }
881
882 class G1PreConcurrentStartTask : public G1BatchedTask {
883 // Reset marking state.
884 class ResetMarkingStateTask;
885 // For each region note start of marking.
886 class NoteStartOfMarkTask;
887
888 public:
889 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
890 };
891
892 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
893 G1ConcurrentMark* _cm;
894 public:
895 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
896
897 double worker_cost() const override { return 1.0; }
898 void do_work(uint worker_id) override;
899 };
900
901 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
902 G1HeapRegionClaimer _claimer;
903 public:
904 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
905
906 double worker_cost() const override {
907 // The work done per region is very small, therefore we choose this magic number to cap the number
908 // of threads used when there are few regions.
909 const double regions_per_thread = 1000;
910 return _claimer.n_regions() / regions_per_thread;
911 }
912
913 void set_max_workers(uint max_workers) override;
914 void do_work(uint worker_id) override;
915 };
916
917 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
918 // Reset marking state.
919 _cm->reset();
920 }
921
922 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
923 G1ConcurrentMark* _cm;
924
925 public:
926 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
927
928 bool do_heap_region(G1HeapRegion* r) override {
929 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
930 _cm->update_top_at_mark_start(r);
931 } else {
932 _cm->reset_top_at_mark_start(r);
933 }
934 return false;
935 }
936 };
937
938 void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
939 NoteStartOfMarkHRClosure start_cl;
940 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
941 }
942
943 void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
944 _claimer.set_n_workers(max_workers);
945 }
946
947 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
948 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
949 add_serial_task(new ResetMarkingStateTask(cm));
950 add_parallel_task(new NoteStartOfMarkTask());
951 };
952
953 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
954 assert_at_safepoint_on_vm_thread();
955
956 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
957
958 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
959
960 G1PreConcurrentStartTask cl(cause, this);
961 G1CollectedHeap::heap()->run_batch_task(&cl);
962
963 _gc_tracer_cm->set_gc_cause(cause);
964 }
965
966
967 void G1ConcurrentMark::post_concurrent_mark_start() {
968 // Start Concurrent Marking weak-reference discovery.
969 ReferenceProcessor* rp = _g1h->ref_processor_cm();
970 rp->start_discovery(false /* always_clear */);
971
972 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
973 // This is the start of the marking cycle, we're expected all
974 // threads to have SATB queues with active set to false.
975 satb_mq_set.set_active_all_threads(true, /* new active value */
976 false /* expected_active */);
977
978 _root_regions.prepare_for_scan();
979
980 // update_g1_committed() will be called at the end of an evac pause
981 // when marking is on. So, it's also called at the end of the
982 // concurrent start pause to update the heap end, if the heap expands
983 // during it. No need to call it here.
984 }
985
986 void G1ConcurrentMark::post_concurrent_undo_start() {
987 root_regions()->cancel_scan();
988 }
989
990 /*
991 * Notice that in the next two methods, we actually leave the STS
992 * during the barrier sync and join it immediately afterwards. If we
993 * do not do this, the following deadlock can occur: one thread could
994 * be in the barrier sync code, waiting for the other thread to also
995 * sync up, whereas another one could be trying to yield, while also
996 * waiting for the other threads to sync up too.
997 *
998 * Note, however, that this code is also used during remark and in
999 * this case we should not attempt to leave / enter the STS, otherwise
1000 * we'll either hit an assert (debug / fastdebug) or deadlock
1001 * (product). So we should only leave / enter the STS if we are
1002 * operating concurrently.
1003 *
1004 * Because the thread that does the sync barrier has left the STS, it
1005 * is possible to be suspended for a Full GC or an evacuation pause
1006 * could occur. This is actually safe, since the entering the sync
1007 * barrier is one of the last things do_marking_step() does, and it
1008 * doesn't manipulate any data structures afterwards.
1009 */
1010
1011 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1012 bool barrier_aborted;
1013 {
1014 SuspendibleThreadSetLeaver sts_leave(concurrent());
1015 barrier_aborted = !_first_overflow_barrier_sync.enter();
1016 }
1017
1018 // at this point everyone should have synced up and not be doing any
1019 // more work
1020
1021 if (barrier_aborted) {
1022 // If the barrier aborted we ignore the overflow condition and
1023 // just abort the whole marking phase as quickly as possible.
1024 return;
1025 }
1026 }
1027
1028 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1029 SuspendibleThreadSetLeaver sts_leave(concurrent());
1030 _second_overflow_barrier_sync.enter();
1031
1032 // at this point everything should be re-initialized and ready to go
1033 }
1034
1035 class G1CMConcurrentMarkingTask : public WorkerTask {
1036 G1ConcurrentMark* _cm;
1037
1038 public:
1039 void work(uint worker_id) {
1040 ResourceMark rm;
1041
1042 SuspendibleThreadSetJoiner sts_join;
1043
1044 assert(worker_id < _cm->active_tasks(), "invariant");
1045
1046 G1CMTask* task = _cm->task(worker_id);
1047 task->record_start_time();
1048 if (!_cm->has_aborted()) {
1049 do {
1050 task->do_marking_step(G1ConcMarkStepDurationMillis,
1051 true /* do_termination */,
1052 false /* is_serial*/);
1053
1054 _cm->do_yield_check();
1055 } while (!_cm->has_aborted() && task->has_aborted());
1056 }
1057 task->record_end_time();
1058 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1059 }
1060
1061 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1062 WorkerTask("Concurrent Mark"), _cm(cm) { }
1063
1064 ~G1CMConcurrentMarkingTask() { }
1065 };
1066
1067 uint G1ConcurrentMark::calc_active_marking_workers() {
1068 uint result = 0;
1069 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1070 result = _max_concurrent_workers;
1071 } else {
1072 result =
1073 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1074 1, /* Minimum workers */
1075 _num_concurrent_workers,
1076 Threads::number_of_non_daemon_threads());
1077 // Don't scale the result down by scale_concurrent_workers() because
1078 // that scaling has already gone into "_max_concurrent_workers".
1079 }
1080 assert(result > 0 && result <= _max_concurrent_workers,
1081 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1082 _max_concurrent_workers, result);
1083 return result;
1084 }
1085
1086 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1087 #ifdef ASSERT
1088 HeapWord* last = region->last();
1089 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1090 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1091 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1092 assert(top_at_mark_start(hr) == region->start(),
1093 "MemRegion start should be equal to TAMS");
1094 #endif
1095
1096 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1097
1098 const uintx interval = PrefetchScanIntervalInBytes;
1099 HeapWord* curr = region->start();
1100 const HeapWord* end = region->end();
1101 while (curr < end) {
1102 Prefetch::read(curr, interval);
1103 oop obj = cast_to_oop(curr);
1104 size_t size = obj->oop_iterate_size(&cl);
1105 assert(size == obj->size(), "sanity");
1106 curr += size;
1107 }
1108 }
1109
1110 class G1CMRootRegionScanTask : public WorkerTask {
1111 G1ConcurrentMark* _cm;
1112 public:
1113 G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1114 WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1115
1116 void work(uint worker_id) {
1117 G1CMRootMemRegions* root_regions = _cm->root_regions();
1118 const MemRegion* region = root_regions->claim_next();
1119 while (region != nullptr) {
1120 _cm->scan_root_region(region, worker_id);
1121 region = root_regions->claim_next();
1122 }
1123 }
1124 };
1125
1126 void G1ConcurrentMark::scan_root_regions() {
1127 // scan_in_progress() will have been set to true only if there was
1128 // at least one root region to scan. So, if it's false, we
1129 // should not attempt to do any further work.
1130 if (root_regions()->scan_in_progress()) {
1131 assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1132
1133 // Assign one worker to each root-region but subject to the max constraint.
1134 const uint num_workers = MIN2(root_regions()->num_root_regions(),
1135 _max_concurrent_workers);
1136
1137 G1CMRootRegionScanTask task(this);
1138 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1139 task.name(), num_workers, root_regions()->num_root_regions());
1140 _concurrent_workers->run_task(&task, num_workers);
1141
1142 // It's possible that has_aborted() is true here without actually
1143 // aborting the survivor scan earlier. This is OK as it's
1144 // mainly used for sanity checking.
1145 root_regions()->scan_finished();
1146 }
1147 }
1148
1149 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1150 return root_regions()->wait_until_scan_finished();
1151 }
1152
1153 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1154 root_regions()->add(top_at_mark_start(r), r->top());
1155 }
1156
1157 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1158 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1159 }
1160
1161 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1162 root_regions()->abort();
1163 root_regions()->wait_until_scan_finished();
1164 }
1165
1166 void G1ConcurrentMark::concurrent_cycle_start() {
1167 _gc_timer_cm->register_gc_start();
1168
1169 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1170
1171 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1172 }
1173
1174 uint G1ConcurrentMark::completed_mark_cycles() const {
1175 return _completed_mark_cycles.load_relaxed();
1176 }
1177
1178 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1179 _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1180
1181 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1182
1183 if (mark_cycle_completed) {
1184 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1185 }
1186
1187 if (has_aborted()) {
1188 log_info(gc, marking)("Concurrent Mark Abort");
1189 _gc_tracer_cm->report_concurrent_mode_failure();
1190 }
1191
1192 _gc_timer_cm->register_gc_end();
1193
1194 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1195 }
1196
1197 void G1ConcurrentMark::mark_from_roots() {
1198 _restart_for_overflow.store_relaxed(false);
1199
1200 uint active_workers = calc_active_marking_workers();
1201
1202 // Setting active workers is not guaranteed since fewer
1203 // worker threads may currently exist and more may not be
1204 // available.
1205 active_workers = _concurrent_workers->set_active_workers(active_workers);
1206 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1207
1208 _num_concurrent_workers = active_workers;
1209
1210 // Parallel task terminator is set in "set_concurrency_and_phase()"
1211 set_concurrency_and_phase(active_workers, true /* concurrent */);
1212
1213 G1CMConcurrentMarkingTask marking_task(this);
1214 _concurrent_workers->run_task(&marking_task);
1215 print_stats();
1216 }
1217
1218 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1219 static const char* location_strings[] = { "Remark Before",
1220 "Remark After",
1221 "Remark Overflow",
1222 "Cleanup Before",
1223 "Cleanup After" };
1224 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1225 }
1226
1227 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1228 VerifyLocation location) {
1229 G1HeapVerifier* verifier = _g1h->verifier();
1230
1231 verifier->verify_region_sets_optional();
1232
1233 const char* caller = verify_location_string(location);
1234
1235 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1236 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1237
1238 size_t const BufLen = 512;
1239 char buffer[BufLen];
1240
1241 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1242 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1243
1244 // Only check bitmap in Remark, and not at After-Verification because the regions
1245 // already have their TAMS'es reset.
1246 if (location != VerifyLocation::RemarkAfter) {
1247 verifier->verify_bitmap_clear(true /* above_tams_only */);
1248 }
1249 }
1250 }
1251
1252 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1253 G1CollectedHeap* _g1h;
1254 public:
1255 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1256
1257 bool do_object_b(oop obj) {
1258 return !_g1h->is_obj_dead(obj);
1259 }
1260 };
1261
1262 void G1ConcurrentMark::remark() {
1263 assert_at_safepoint_on_vm_thread();
1264
1265 // If a full collection has happened, we should not continue. However we might
1266 // have ended up here as the Remark VM operation has been scheduled already.
1267 if (has_aborted()) {
1268 return;
1269 }
1270
1271 G1Policy* policy = _g1h->policy();
1272 policy->record_pause_start_time();
1273
1274 double start = os::elapsedTime();
1275
1276 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1277
1278 {
1279 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1280 finalize_marking();
1281 }
1282
1283 double mark_work_end = os::elapsedTime();
1284
1285 bool const mark_finished = !has_overflown();
1286 if (mark_finished) {
1287 weak_refs_work();
1288
1289 // Unload Klasses, String, Code Cache, etc.
1290 if (ClassUnloadingWithConcurrentMark) {
1291 G1CMIsAliveClosure is_alive(this);
1292 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1293 }
1294
1295 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1296 // We're done with marking.
1297 // This is the end of the marking cycle, we're expected all
1298 // threads to have SATB queues with active set to true.
1299 satb_mq_set.set_active_all_threads(false, /* new active value */
1300 true /* expected_active */);
1301
1302 {
1303 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1304 flush_all_task_caches();
1305 }
1306
1307 // All marking completed. Check bitmap now as we will start to reset TAMSes
1308 // in parallel below so that we can not do this in the After-Remark verification.
1309 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1310
1311 {
1312 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1313
1314 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1315 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1316 _g1h->workers()->active_workers());
1317 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1318 _g1h->workers()->run_task(&cl, num_workers);
1319
1320 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1321 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1322
1323 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1324
1325 if (_needs_remembered_set_rebuild) {
1326 // Prune rebuild candidates based on G1HeapWastePercent.
1327 // Improves rebuild time in addition to remembered set memory usage.
1328 G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1329 }
1330 }
1331
1332 if (log_is_enabled(Trace, gc, liveness)) {
1333 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1334 _g1h->heap_region_iterate(&cl);
1335 }
1336
1337 // Potentially, some empty-regions have been reclaimed; make this a
1338 // "collection" so that pending allocation can retry before attempting a
1339 // GC pause.
1340 _g1h->increment_total_collections();
1341
1342 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1343 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1344 // triggered, it likely means there are very few regular GCs, making resizing
1345 // based on gc heuristics less effective.
1346 if (_g1h->last_gc_was_periodic()) {
1347 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1348 }
1349
1350 compute_new_sizes();
1351
1352 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1353
1354 assert(!restart_for_overflow(), "sanity");
1355 // Completely reset the marking state (except bitmaps) since marking completed.
1356 reset_at_marking_complete();
1357
1358 G1CollectedHeap::finish_codecache_marking_cycle();
1359
1360 {
1361 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1362 G1ObjectCountIsAliveClosure is_alive(_g1h);
1363 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1364 }
1365 } else {
1366 // We overflowed. Restart concurrent marking.
1367 _restart_for_overflow.store_relaxed(true);
1368
1369 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1370
1371 // Clear the marking state because we will be restarting
1372 // marking due to overflowing the global mark stack.
1373 reset_marking_for_restart();
1374 }
1375
1376 // Statistics
1377 double now = os::elapsedTime();
1378 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1379 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1380 _remark_times.add((now - start) * 1000.0);
1381
1382 _g1h->update_perf_counter_cpu_time();
1383
1384 policy->record_concurrent_mark_remark_end();
1385 }
1386
1387 void G1ConcurrentMark::compute_new_sizes() {
1388 MetaspaceGC::compute_new_size();
1389
1390 // Cleanup will have freed any regions completely full of garbage.
1391 // Update the soft reference policy with the new heap occupancy.
1392 Universe::heap()->update_capacity_and_used_at_gc();
1393
1394 // We reclaimed old regions so we should calculate the sizes to make
1395 // sure we update the old gen/space data.
1396 _g1h->monitoring_support()->update_sizes();
1397 }
1398
1399 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1400 G1CollectedHeap* _g1h;
1401
1402 public:
1403 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1404
1405 bool do_heap_region(G1HeapRegion* r) override {
1406 // Update the remset tracking state from updating to complete
1407 // if remembered sets have been rebuilt.
1408 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1409 return false;
1410 }
1411 };
1412
1413 void G1ConcurrentMark::cleanup() {
1414 assert_at_safepoint_on_vm_thread();
1415
1416 // If a full collection has happened, we shouldn't do this.
1417 if (has_aborted()) {
1418 return;
1419 }
1420
1421 G1Policy* policy = _g1h->policy();
1422 policy->record_pause_start_time();
1423
1424 double start = os::elapsedTime();
1425
1426 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1427
1428 if (needs_remembered_set_rebuild()) {
1429 // Update the remset tracking information as well as marking all regions
1430 // as fully parsable.
1431 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1432 G1UpdateRegionsAfterRebuild cl(_g1h);
1433 _g1h->heap_region_iterate(&cl);
1434 } else {
1435 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1436 }
1437
1438 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1439
1440 // Local statistics
1441 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1442
1443 {
1444 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1445 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1446 }
1447 }
1448
1449 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1450 // Uses the G1CMTask associated with a worker thread (for serial reference
1451 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1452 // trace referent objects.
1453 //
1454 // Using the G1CMTask and embedded local queues avoids having the worker
1455 // threads operating on the global mark stack. This reduces the risk
1456 // of overflowing the stack - which we would rather avoid at this late
1457 // state. Also using the tasks' local queues removes the potential
1458 // of the workers interfering with each other that could occur if
1459 // operating on the global stack.
1460
1461 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1462 G1ConcurrentMark* _cm;
1463 G1CMTask* _task;
1464 uint _ref_counter_limit;
1465 uint _ref_counter;
1466 bool _is_serial;
1467 public:
1468 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1469 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1470 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1471 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1472 }
1473
1474 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1475 virtual void do_oop( oop* p) { do_oop_work(p); }
1476
1477 template <class T> void do_oop_work(T* p) {
1478 if (_cm->has_overflown()) {
1479 return;
1480 }
1481 if (!_task->deal_with_reference(p)) {
1482 // We did not add anything to the mark bitmap (or mark stack), so there is
1483 // no point trying to drain it.
1484 return;
1485 }
1486 _ref_counter--;
1487
1488 if (_ref_counter == 0) {
1489 // We have dealt with _ref_counter_limit references, pushing them
1490 // and objects reachable from them on to the local stack (and
1491 // possibly the global stack). Call G1CMTask::do_marking_step() to
1492 // process these entries.
1493 //
1494 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1495 // there's nothing more to do (i.e. we're done with the entries that
1496 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1497 // above) or we overflow.
1498 //
1499 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1500 // flag while there may still be some work to do. (See the comment at
1501 // the beginning of G1CMTask::do_marking_step() for those conditions -
1502 // one of which is reaching the specified time target.) It is only
1503 // when G1CMTask::do_marking_step() returns without setting the
1504 // has_aborted() flag that the marking step has completed.
1505 do {
1506 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1507 _task->do_marking_step(mark_step_duration_ms,
1508 false /* do_termination */,
1509 _is_serial);
1510 } while (_task->has_aborted() && !_cm->has_overflown());
1511 _ref_counter = _ref_counter_limit;
1512 }
1513 }
1514 };
1515
1516 // 'Drain' oop closure used by both serial and parallel reference processing.
1517 // Uses the G1CMTask associated with a given worker thread (for serial
1518 // reference processing the G1CMtask for worker 0 is used). Calls the
1519 // do_marking_step routine, with an unbelievably large timeout value,
1520 // to drain the marking data structures of the remaining entries
1521 // added by the 'keep alive' oop closure above.
1522
1523 class G1CMDrainMarkingStackClosure : public VoidClosure {
1524 G1ConcurrentMark* _cm;
1525 G1CMTask* _task;
1526 bool _is_serial;
1527 public:
1528 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1529 _cm(cm), _task(task), _is_serial(is_serial) {
1530 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1531 }
1532
1533 void do_void() {
1534 do {
1535 // We call G1CMTask::do_marking_step() to completely drain the local
1536 // and global marking stacks of entries pushed by the 'keep alive'
1537 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1538 //
1539 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1540 // if there's nothing more to do (i.e. we've completely drained the
1541 // entries that were pushed as a result of applying the 'keep alive'
1542 // closure to the entries on the discovered ref lists) or we overflow
1543 // the global marking stack.
1544 //
1545 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1546 // flag while there may still be some work to do. (See the comment at
1547 // the beginning of G1CMTask::do_marking_step() for those conditions -
1548 // one of which is reaching the specified time target.) It is only
1549 // when G1CMTask::do_marking_step() returns without setting the
1550 // has_aborted() flag that the marking step has completed.
1551
1552 _task->do_marking_step(1000000000.0 /* something very large */,
1553 true /* do_termination */,
1554 _is_serial);
1555 } while (_task->has_aborted() && !_cm->has_overflown());
1556 }
1557 };
1558
1559 class G1CMRefProcProxyTask : public RefProcProxyTask {
1560 G1CollectedHeap& _g1h;
1561 G1ConcurrentMark& _cm;
1562
1563 public:
1564 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1565 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1566 _g1h(g1h),
1567 _cm(cm) {}
1568
1569 void work(uint worker_id) override {
1570 assert(worker_id < _max_workers, "sanity");
1571 G1CMIsAliveClosure is_alive(&_cm);
1572 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1573 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1574 BarrierEnqueueDiscoveredFieldClosure enqueue;
1575 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1576 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1577 }
1578
1579 void prepare_run_task_hook() override {
1580 // We need to reset the concurrency level before each
1581 // proxy task execution, so that the termination protocol
1582 // and overflow handling in G1CMTask::do_marking_step() knows
1583 // how many workers to wait for.
1584 _cm.set_concurrency(_queue_count);
1585 }
1586 };
1587
1588 void G1ConcurrentMark::weak_refs_work() {
1589 ResourceMark rm;
1590
1591 {
1592 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1593
1594 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1595
1596 // See the comment in G1CollectedHeap::ref_processing_init()
1597 // about how reference processing currently works in G1.
1598
1599 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1600
1601 // Prefer to grow the stack until the max capacity.
1602 _global_mark_stack.set_should_grow();
1603
1604 // Parallel processing task executor.
1605 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1606 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1607
1608 // Process the weak references.
1609 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1610 _gc_tracer_cm->report_gc_reference_stats(stats);
1611 pt.print_all_references();
1612
1613 // The do_oop work routines of the keep_alive and drain_marking_stack
1614 // oop closures will set the has_overflown flag if we overflow the
1615 // global marking stack.
1616
1617 assert(has_overflown() || _global_mark_stack.is_empty(),
1618 "Mark stack should be empty (unless it has overflown)");
1619 }
1620
1621 if (has_overflown()) {
1622 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1623 // overflowed while processing references. Exit the VM.
1624 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1625 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1626 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1627 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1628 return;
1629 }
1630
1631 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1632
1633 {
1634 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1635 G1CMIsAliveClosure is_alive(this);
1636 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1637 }
1638 }
1639
1640 class G1PrecleanYieldClosure : public YieldClosure {
1641 G1ConcurrentMark* _cm;
1642
1643 public:
1644 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1645
1646 virtual bool should_return() {
1647 return _cm->has_aborted();
1648 }
1649
1650 virtual bool should_return_fine_grain() {
1651 _cm->do_yield_check();
1652 return _cm->has_aborted();
1653 }
1654 };
1655
1656 void G1ConcurrentMark::preclean() {
1657 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1658
1659 SuspendibleThreadSetJoiner joiner;
1660
1661 BarrierEnqueueDiscoveredFieldClosure enqueue;
1662
1663 set_concurrency_and_phase(1, true);
1664
1665 G1PrecleanYieldClosure yield_cl(this);
1666
1667 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1668 // Precleaning is single threaded. Temporarily disable MT discovery.
1669 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1670 rp->preclean_discovered_references(rp->is_alive_non_header(),
1671 &enqueue,
1672 &yield_cl,
1673 _gc_timer_cm);
1674 }
1675
1676 // Closure for marking entries in SATB buffers.
1677 class G1CMSATBBufferClosure : public SATBBufferClosure {
1678 private:
1679 G1CMTask* _task;
1680 G1CollectedHeap* _g1h;
1681
1682 // This is very similar to G1CMTask::deal_with_reference, but with
1683 // more relaxed requirements for the argument, so this must be more
1684 // circumspect about treating the argument as an object.
1685 void do_entry(void* entry) const {
1686 _task->increment_refs_reached();
1687 oop const obj = cast_to_oop(entry);
1688 _task->make_reference_grey(obj);
1689 }
1690
1691 public:
1692 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1693 : _task(task), _g1h(g1h) { }
1694
1695 virtual void do_buffer(void** buffer, size_t size) {
1696 for (size_t i = 0; i < size; ++i) {
1697 do_entry(buffer[i]);
1698 }
1699 }
1700 };
1701
1702 class G1RemarkThreadsClosure : public ThreadClosure {
1703 G1SATBMarkQueueSet& _qset;
1704
1705 public:
1706 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1707 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1708
1709 void do_thread(Thread* thread) {
1710 // Transfer any partial buffer to the qset for completed buffer processing.
1711 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1712 }
1713 };
1714
1715 class G1CMRemarkTask : public WorkerTask {
1716 // For Threads::possibly_parallel_threads_do
1717 ThreadsClaimTokenScope _threads_claim_token_scope;
1718 G1ConcurrentMark* _cm;
1719 public:
1720 void work(uint worker_id) {
1721 G1CMTask* task = _cm->task(worker_id);
1722 task->record_start_time();
1723 {
1724 ResourceMark rm;
1725
1726 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1727 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1728 }
1729
1730 do {
1731 task->do_marking_step(1000000000.0 /* something very large */,
1732 true /* do_termination */,
1733 false /* is_serial */);
1734 } while (task->has_aborted() && !_cm->has_overflown());
1735 // If we overflow, then we do not want to restart. We instead
1736 // want to abort remark and do concurrent marking again.
1737 task->record_end_time();
1738 }
1739
1740 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1741 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1742 _cm->terminator()->reset_for_reuse(active_workers);
1743 }
1744 };
1745
1746 void G1ConcurrentMark::finalize_marking() {
1747 ResourceMark rm;
1748
1749 _g1h->ensure_parsability(false);
1750
1751 // this is remark, so we'll use up all active threads
1752 uint active_workers = _g1h->workers()->active_workers();
1753 set_concurrency_and_phase(active_workers, false /* concurrent */);
1754 // Leave _parallel_marking_threads at it's
1755 // value originally calculated in the G1ConcurrentMark
1756 // constructor and pass values of the active workers
1757 // through the task.
1758
1759 {
1760 G1CMRemarkTask remarkTask(this, active_workers);
1761 // We will start all available threads, even if we decide that the
1762 // active_workers will be fewer. The extra ones will just bail out
1763 // immediately.
1764 _g1h->workers()->run_task(&remarkTask);
1765 }
1766
1767 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1768 guarantee(has_overflown() ||
1769 satb_mq_set.completed_buffers_num() == 0,
1770 "Invariant: has_overflown = %s, num buffers = %zu",
1771 BOOL_TO_STR(has_overflown()),
1772 satb_mq_set.completed_buffers_num());
1773
1774 print_stats();
1775 }
1776
1777 void G1ConcurrentMark::flush_all_task_caches() {
1778 size_t hits = 0;
1779 size_t misses = 0;
1780 for (uint i = 0; i < _max_num_tasks; i++) {
1781 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1782 hits += stats.first;
1783 misses += stats.second;
1784 }
1785 size_t sum = hits + misses;
1786 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1787 hits, misses, percent_of(hits, sum));
1788 }
1789
1790 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1791 assert_at_safepoint();
1792 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1793 }
1794
1795 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1796 // "Checkpoint" the finger.
1797 HeapWord* local_finger = finger();
1798
1799 while (local_finger < _heap.end()) {
1800 assert(_g1h->is_in_reserved(local_finger), "invariant");
1801
1802 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1803 // Make sure that the reads below do not float before loading curr_region.
1804 OrderAccess::loadload();
1805 // Above heap_region_containing may return null as we always scan claim
1806 // until the end of the heap. In this case, just jump to the next region.
1807 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1808
1809 // Is the gap between reading the finger and doing the CAS too long?
1810 HeapWord* res = _finger.compare_exchange(local_finger, end);
1811 if (res == local_finger && curr_region != nullptr) {
1812 // We succeeded.
1813 HeapWord* bottom = curr_region->bottom();
1814 HeapWord* limit = top_at_mark_start(curr_region);
1815
1816 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1817 // Notice that _finger == end cannot be guaranteed here since,
1818 // someone else might have moved the finger even further.
1819 assert(finger() >= end, "The finger should have moved forward");
1820
1821 if (limit > bottom) {
1822 return curr_region;
1823 } else {
1824 assert(limit == bottom,
1825 "The region limit should be at bottom");
1826 // We return null and the caller should try calling
1827 // claim_region() again.
1828 return nullptr;
1829 }
1830 } else {
1831 // Read the finger again.
1832 HeapWord* next_finger = finger();
1833 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1834 local_finger = next_finger;
1835 }
1836 }
1837
1838 return nullptr;
1839 }
1840
1841 #ifndef PRODUCT
1842 class VerifyNoCSetOops {
1843 G1CollectedHeap* _g1h;
1844 const char* _phase;
1845 int _info;
1846
1847 public:
1848 VerifyNoCSetOops(const char* phase, int info = -1) :
1849 _g1h(G1CollectedHeap::heap()),
1850 _phase(phase),
1851 _info(info)
1852 { }
1853
1854 void operator()(G1TaskQueueEntry task_entry) const {
1855 if (task_entry.is_partial_array_state()) {
1856 oop obj = task_entry.to_partial_array_state()->source();
1857 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1858 return;
1859 }
1860 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1861 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1862 p2i(task_entry.to_oop()), _phase, _info);
1863 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1864 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1865 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1866 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1867 }
1868 };
1869
1870 void G1ConcurrentMark::verify_no_collection_set_oops() {
1871 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1872 "should be at a safepoint or initializing");
1873 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1874 return;
1875 }
1876
1877 // Verify entries on the global mark stack
1878 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1879
1880 // Verify entries on the task queues
1881 for (uint i = 0; i < _max_num_tasks; ++i) {
1882 G1CMTaskQueue* queue = _task_queues->queue(i);
1883 queue->iterate(VerifyNoCSetOops("Queue", i));
1884 }
1885
1886 // Verify the global finger
1887 HeapWord* global_finger = finger();
1888 if (global_finger != nullptr && global_finger < _heap.end()) {
1889 // Since we always iterate over all regions, we might get a null G1HeapRegion
1890 // here.
1891 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1892 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1893 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1894 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1895 }
1896
1897 // Verify the task fingers
1898 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1899 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1900 G1CMTask* task = _tasks[i];
1901 HeapWord* task_finger = task->finger();
1902 if (task_finger != nullptr && task_finger < _heap.end()) {
1903 // See above note on the global finger verification.
1904 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1905 guarantee(r == nullptr || task_finger == r->bottom() ||
1906 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1907 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1908 p2i(task_finger), HR_FORMAT_PARAMS(r));
1909 }
1910 }
1911 }
1912 #endif // PRODUCT
1913
1914 void G1ConcurrentMark::rebuild_and_scrub() {
1915 if (!needs_remembered_set_rebuild()) {
1916 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1917 }
1918
1919 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1920 }
1921
1922 void G1ConcurrentMark::print_stats() {
1923 if (!log_is_enabled(Debug, gc, stats)) {
1924 return;
1925 }
1926 log_debug(gc, stats)("---------------------------------------------------------------------");
1927 for (size_t i = 0; i < _num_active_tasks; ++i) {
1928 _tasks[i]->print_stats();
1929 log_debug(gc, stats)("---------------------------------------------------------------------");
1930 }
1931 }
1932
1933 bool G1ConcurrentMark::concurrent_cycle_abort() {
1934 // If we start the compaction before the CM threads finish
1935 // scanning the root regions we might trip them over as we'll
1936 // be moving objects / updating references. So let's wait until
1937 // they are done. By telling them to abort, they should complete
1938 // early.
1939 root_region_scan_abort_and_wait();
1940
1941 // We haven't started a concurrent cycle no need to do anything; we might have
1942 // aborted the marking because of shutting down though. In this case the marking
1943 // might have already completed the abort (leading to in_progress() below to
1944 // return false), however this still left marking state particularly in the
1945 // shared marking bitmap that must be cleaned up.
1946 // If there are multiple full gcs during shutdown we do this work repeatedly for
1947 // nothing, but this situation should be extremely rare (a full gc after shutdown
1948 // has been signalled is already rare), and this work should be negligible compared
1949 // to actual full gc work.
1950
1951 if (!is_fully_initialized() || (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating())) {
1952 return false;
1953 }
1954
1955 reset_marking_for_restart();
1956
1957 abort_marking_threads();
1958
1959 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1960 satb_mq_set.abandon_partial_marking();
1961 // This can be called either during or outside marking, we'll read
1962 // the expected_active value from the SATB queue set.
1963 satb_mq_set.set_active_all_threads(false, /* new active value */
1964 satb_mq_set.is_active() /* expected_active */);
1965 return true;
1966 }
1967
1968 void G1ConcurrentMark::abort_marking_threads() {
1969 assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1970 _has_aborted.store_relaxed(true);
1971 _first_overflow_barrier_sync.abort();
1972 _second_overflow_barrier_sync.abort();
1973 }
1974
1975 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1976 class CountCpuTimeThreadClosure : public ThreadClosure {
1977 public:
1978 jlong _total_cpu_time;
1979
1980 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1981
1982 void do_thread(Thread* t) {
1983 _total_cpu_time += os::thread_cpu_time(t);
1984 }
1985 } cl;
1986
1987 threads_do(&cl);
1988
1989 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
1990 }
1991
1992 static void print_ms_time_info(const char* prefix, const char* name,
1993 NumberSeq& ns) {
1994 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1995 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1996 if (ns.num() > 0) {
1997 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
1998 prefix, ns.sd(), ns.maximum());
1999 }
2000 }
2001
2002 void G1ConcurrentMark::print_summary_info() {
2003 Log(gc, marking) log;
2004 if (!log.is_trace()) {
2005 return;
2006 }
2007
2008 log.trace(" Concurrent marking:");
2009 if (!is_fully_initialized()) {
2010 log.trace(" has not been initialized yet");
2011 return;
2012 }
2013 print_ms_time_info(" ", "remarks", _remark_times);
2014 {
2015 print_ms_time_info(" ", "final marks", _remark_mark_times);
2016 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2017
2018 }
2019 print_ms_time_info(" ", "cleanups", _cleanup_times);
2020 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2021 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2022 log.trace(" Total stop_world time = %8.2f s.",
2023 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2024 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2025 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2026 }
2027
2028 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2029 if (is_fully_initialized()) { // they are initialized late
2030 tc->do_thread(_cm_thread);
2031 _concurrent_workers->threads_do(tc);
2032 }
2033 }
2034
2035 void G1ConcurrentMark::print_on(outputStream* st) const {
2036 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2037 _mark_bitmap.print_on(st, " Bits: ");
2038 }
2039
2040 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2041 ReferenceProcessor* result = g1h->ref_processor_cm();
2042 assert(result != nullptr, "CM reference processor should not be null");
2043 return result;
2044 }
2045
2046 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2047 G1CMTask* task)
2048 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2049 _g1h(g1h), _task(task)
2050 { }
2051
2052 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2053 assert(hr != nullptr,
2054 "claim_region() should have filtered out null regions");
2055 _curr_region = hr;
2056 _finger = hr->bottom();
2057 update_region_limit();
2058 }
2059
2060 void G1CMTask::update_region_limit() {
2061 G1HeapRegion* hr = _curr_region;
2062 HeapWord* bottom = hr->bottom();
2063 HeapWord* limit = _cm->top_at_mark_start(hr);
2064
2065 if (limit == bottom) {
2066 // The region was collected underneath our feet.
2067 // We set the finger to bottom to ensure that the bitmap
2068 // iteration that will follow this will not do anything.
2069 // (this is not a condition that holds when we set the region up,
2070 // as the region is not supposed to be empty in the first place)
2071 _finger = bottom;
2072 } else if (limit >= _region_limit) {
2073 assert(limit >= _finger, "peace of mind");
2074 } else {
2075 assert(limit < _region_limit, "only way to get here");
2076 // This can happen under some pretty unusual circumstances. An
2077 // evacuation pause empties the region underneath our feet (TAMS
2078 // at bottom). We then do some allocation in the region (TAMS
2079 // stays at bottom), followed by the region being used as a GC
2080 // alloc region (TAMS will move to top() and the objects
2081 // originally below it will be greyed). All objects now marked in
2082 // the region are explicitly greyed, if below the global finger,
2083 // and we do not need in fact to scan anything else. So, we simply
2084 // set _finger to be limit to ensure that the bitmap iteration
2085 // doesn't do anything.
2086 _finger = limit;
2087 }
2088
2089 _region_limit = limit;
2090 }
2091
2092 void G1CMTask::giveup_current_region() {
2093 assert(_curr_region != nullptr, "invariant");
2094 clear_region_fields();
2095 }
2096
2097 void G1CMTask::clear_region_fields() {
2098 // Values for these three fields that indicate that we're not
2099 // holding on to a region.
2100 _curr_region = nullptr;
2101 _finger = nullptr;
2102 _region_limit = nullptr;
2103 }
2104
2105 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2106 if (cm_oop_closure == nullptr) {
2107 assert(_cm_oop_closure != nullptr, "invariant");
2108 } else {
2109 assert(_cm_oop_closure == nullptr, "invariant");
2110 }
2111 _cm_oop_closure = cm_oop_closure;
2112 }
2113
2114 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2115 guarantee(mark_bitmap != nullptr, "invariant");
2116 _mark_bitmap = mark_bitmap;
2117 clear_region_fields();
2118
2119 _calls = 0;
2120 _elapsed_time_ms = 0.0;
2121 _termination_time_ms = 0.0;
2122
2123 _mark_stats_cache.reset();
2124 }
2125
2126 void G1CMTask::reset_for_restart() {
2127 clear_region_fields();
2128 _task_queue->set_empty();
2129 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2130 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2131 }
2132
2133 void G1CMTask::register_partial_array_splitter() {
2134
2135 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2136 _cm->max_num_tasks(),
2137 ObjArrayMarkingStride);
2138 }
2139
2140 void G1CMTask::unregister_partial_array_splitter() {
2141 _partial_array_splitter.~PartialArraySplitter();
2142 }
2143
2144 bool G1CMTask::should_exit_termination() {
2145 if (!regular_clock_call()) {
2146 return true;
2147 }
2148
2149 // This is called when we are in the termination protocol. We should
2150 // quit if, for some reason, this task wants to abort or the global
2151 // stack is not empty (this means that we can get work from it).
2152 return !_cm->mark_stack_empty() || has_aborted();
2153 }
2154
2155 void G1CMTask::reached_limit() {
2156 assert(_words_scanned >= _words_scanned_limit ||
2157 _refs_reached >= _refs_reached_limit ,
2158 "shouldn't have been called otherwise");
2159 abort_marking_if_regular_check_fail();
2160 }
2161
2162 bool G1CMTask::regular_clock_call() {
2163 if (has_aborted()) {
2164 return false;
2165 }
2166
2167 // First, we need to recalculate the words scanned and refs reached
2168 // limits for the next clock call.
2169 recalculate_limits();
2170
2171 // During the regular clock call we do the following
2172
2173 // (1) If an overflow has been flagged, then we abort.
2174 if (_cm->has_overflown()) {
2175 return false;
2176 }
2177
2178 // If we are not concurrent (i.e. we're doing remark) we don't need
2179 // to check anything else. The other steps are only needed during
2180 // the concurrent marking phase.
2181 if (!_cm->concurrent()) {
2182 return true;
2183 }
2184
2185 // (2) If marking has been aborted for Full GC, then we also abort.
2186 if (_cm->has_aborted()) {
2187 return false;
2188 }
2189
2190 // (4) We check whether we should yield. If we have to, then we abort.
2191 if (SuspendibleThreadSet::should_yield()) {
2192 // We should yield. To do this we abort the task. The caller is
2193 // responsible for yielding.
2194 return false;
2195 }
2196
2197 // (5) We check whether we've reached our time quota. If we have,
2198 // then we abort.
2199 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2200 if (elapsed_time_ms > _time_target_ms) {
2201 _has_timed_out = true;
2202 return false;
2203 }
2204
2205 // (6) Finally, we check whether there are enough completed STAB
2206 // buffers available for processing. If there are, we abort.
2207 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2208 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2209 // we do need to process SATB buffers, we'll abort and restart
2210 // the marking task to do so
2211 return false;
2212 }
2213 return true;
2214 }
2215
2216 void G1CMTask::recalculate_limits() {
2217 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2218 _words_scanned_limit = _real_words_scanned_limit;
2219
2220 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2221 _refs_reached_limit = _real_refs_reached_limit;
2222 }
2223
2224 void G1CMTask::decrease_limits() {
2225 // This is called when we believe that we're going to do an infrequent
2226 // operation which will increase the per byte scanned cost (i.e. move
2227 // entries to/from the global stack). It basically tries to decrease the
2228 // scanning limit so that the clock is called earlier.
2229
2230 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2231 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2232 }
2233
2234 void G1CMTask::move_entries_to_global_stack() {
2235 // Local array where we'll store the entries that will be popped
2236 // from the local queue.
2237 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2238
2239 size_t n = 0;
2240 G1TaskQueueEntry task_entry;
2241 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2242 buffer[n] = task_entry;
2243 ++n;
2244 }
2245 if (n < G1CMMarkStack::EntriesPerChunk) {
2246 buffer[n] = G1TaskQueueEntry();
2247 }
2248
2249 if (n > 0) {
2250 if (!_cm->mark_stack_push(buffer)) {
2251 set_has_aborted();
2252 }
2253 }
2254
2255 // This operation was quite expensive, so decrease the limits.
2256 decrease_limits();
2257 }
2258
2259 bool G1CMTask::get_entries_from_global_stack() {
2260 // Local array where we'll store the entries that will be popped
2261 // from the global stack.
2262 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2263
2264 if (!_cm->mark_stack_pop(buffer)) {
2265 return false;
2266 }
2267
2268 // We did actually pop at least one entry.
2269 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2270 G1TaskQueueEntry task_entry = buffer[i];
2271 if (task_entry.is_null()) {
2272 break;
2273 }
2274 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2275 bool success = _task_queue->push(task_entry);
2276 // We only call this when the local queue is empty or under a
2277 // given target limit. So, we do not expect this push to fail.
2278 assert(success, "invariant");
2279 }
2280
2281 // This operation was quite expensive, so decrease the limits
2282 decrease_limits();
2283 return true;
2284 }
2285
2286 void G1CMTask::drain_local_queue(bool partially) {
2287 if (has_aborted()) {
2288 return;
2289 }
2290
2291 // Decide what the target size is, depending whether we're going to
2292 // drain it partially (so that other tasks can steal if they run out
2293 // of things to do) or totally (at the very end).
2294 uint target_size;
2295 if (partially) {
2296 target_size = GCDrainStackTargetSize;
2297 } else {
2298 target_size = 0;
2299 }
2300
2301 if (_task_queue->size() > target_size) {
2302 G1TaskQueueEntry entry;
2303 bool ret = _task_queue->pop_local(entry);
2304 while (ret) {
2305 process_entry(entry, false /* stolen */);
2306 if (_task_queue->size() <= target_size || has_aborted()) {
2307 ret = false;
2308 } else {
2309 ret = _task_queue->pop_local(entry);
2310 }
2311 }
2312 }
2313 }
2314
2315 size_t G1CMTask::start_partial_array_processing(oop obj) {
2316 assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2317
2318 objArrayOop obj_array = objArrayOop(obj);
2319 size_t array_length = obj_array->length();
2320
2321 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2322
2323 // Mark objArray klass metadata
2324 if (_cm_oop_closure->do_metadata()) {
2325 _cm_oop_closure->do_klass(obj_array->klass());
2326 }
2327
2328 process_array_chunk(obj_array, 0, initial_chunk_size);
2329
2330 // Include object header size
2331 return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2332 }
2333
2334 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2335 PartialArrayState* state = task.to_partial_array_state();
2336 // Access state before release by claim().
2337 objArrayOop obj = objArrayOop(state->source());
2338
2339 PartialArraySplitter::Claim claim =
2340 _partial_array_splitter.claim(state, _task_queue, stolen);
2341
2342 process_array_chunk(obj, claim._start, claim._end);
2343 return heap_word_size((claim._end - claim._start) * heapOopSize);
2344 }
2345
2346 void G1CMTask::drain_global_stack(bool partially) {
2347 if (has_aborted()) {
2348 return;
2349 }
2350
2351 // We have a policy to drain the local queue before we attempt to
2352 // drain the global stack.
2353 assert(partially || _task_queue->size() == 0, "invariant");
2354
2355 // Decide what the target size is, depending whether we're going to
2356 // drain it partially (so that other tasks can steal if they run out
2357 // of things to do) or totally (at the very end).
2358 // Notice that when draining the global mark stack partially, due to the racyness
2359 // of the mark stack size update we might in fact drop below the target. But,
2360 // this is not a problem.
2361 // In case of total draining, we simply process until the global mark stack is
2362 // totally empty, disregarding the size counter.
2363 if (partially) {
2364 size_t const target_size = _cm->partial_mark_stack_size_target();
2365 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2366 if (get_entries_from_global_stack()) {
2367 drain_local_queue(partially);
2368 }
2369 }
2370 } else {
2371 while (!has_aborted() && get_entries_from_global_stack()) {
2372 drain_local_queue(partially);
2373 }
2374 }
2375 }
2376
2377 // SATB Queue has several assumptions on whether to call the par or
2378 // non-par versions of the methods. this is why some of the code is
2379 // replicated. We should really get rid of the single-threaded version
2380 // of the code to simplify things.
2381 void G1CMTask::drain_satb_buffers() {
2382 if (has_aborted()) {
2383 return;
2384 }
2385
2386 // We set this so that the regular clock knows that we're in the
2387 // middle of draining buffers and doesn't set the abort flag when it
2388 // notices that SATB buffers are available for draining. It'd be
2389 // very counter productive if it did that. :-)
2390 _draining_satb_buffers = true;
2391
2392 G1CMSATBBufferClosure satb_cl(this, _g1h);
2393 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2394
2395 // This keeps claiming and applying the closure to completed buffers
2396 // until we run out of buffers or we need to abort.
2397 while (!has_aborted() &&
2398 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2399 abort_marking_if_regular_check_fail();
2400 }
2401
2402 // Can't assert qset is empty here, even if not aborted. If concurrent,
2403 // some other thread might be adding to the queue. If not concurrent,
2404 // some other thread might have won the race for the last buffer, but
2405 // has not yet decremented the count.
2406
2407 _draining_satb_buffers = false;
2408
2409 // again, this was a potentially expensive operation, decrease the
2410 // limits to get the regular clock call early
2411 decrease_limits();
2412 }
2413
2414 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2415 _mark_stats_cache.reset(region_idx);
2416 }
2417
2418 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2419 return _mark_stats_cache.evict_all();
2420 }
2421
2422 void G1CMTask::print_stats() {
2423 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2424 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2425 _elapsed_time_ms, _termination_time_ms);
2426 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2427 _step_times_ms.num(),
2428 _step_times_ms.avg(),
2429 _step_times_ms.sd(),
2430 _step_times_ms.maximum(),
2431 _step_times_ms.sum());
2432 size_t const hits = _mark_stats_cache.hits();
2433 size_t const misses = _mark_stats_cache.misses();
2434 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2435 hits, misses, percent_of(hits, hits + misses));
2436 }
2437
2438 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2439 return _task_queues->steal(worker_id, task_entry);
2440 }
2441
2442 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2443 if (has_aborted() || _curr_region == nullptr) {
2444 return;
2445 }
2446
2447 // This means that we're already holding on to a region.
2448 assert(_finger != nullptr, "if region is not null, then the finger "
2449 "should not be null either");
2450
2451 // We might have restarted this task after an evacuation pause
2452 // which might have evacuated the region we're holding on to
2453 // underneath our feet. Let's read its limit again to make sure
2454 // that we do not iterate over a region of the heap that
2455 // contains garbage (update_region_limit() will also move
2456 // _finger to the start of the region if it is found empty).
2457 update_region_limit();
2458 // We will start from _finger not from the start of the region,
2459 // as we might be restarting this task after aborting half-way
2460 // through scanning this region. In this case, _finger points to
2461 // the address where we last found a marked object. If this is a
2462 // fresh region, _finger points to start().
2463 MemRegion mr = MemRegion(_finger, _region_limit);
2464
2465 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2466 "humongous regions should go around loop once only");
2467
2468 // Some special cases:
2469 // If the memory region is empty, we can just give up the region.
2470 // If the current region is humongous then we only need to check
2471 // the bitmap for the bit associated with the start of the object,
2472 // scan the object if it's live, and give up the region.
2473 // Otherwise, let's iterate over the bitmap of the part of the region
2474 // that is left.
2475 // If the iteration is successful, give up the region.
2476 if (mr.is_empty()) {
2477 giveup_current_region();
2478 abort_marking_if_regular_check_fail();
2479 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2480 if (_mark_bitmap->is_marked(mr.start())) {
2481 // The object is marked - apply the closure
2482 bitmap_closure.do_addr(mr.start());
2483 }
2484 // Even if this task aborted while scanning the humongous object
2485 // we can (and should) give up the current region.
2486 giveup_current_region();
2487 abort_marking_if_regular_check_fail();
2488 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2489 giveup_current_region();
2490 abort_marking_if_regular_check_fail();
2491 } else {
2492 assert(has_aborted(), "currently the only way to do so");
2493 // The only way to abort the bitmap iteration is to return
2494 // false from the do_bit() method. However, inside the
2495 // do_bit() method we move the _finger to point to the
2496 // object currently being looked at. So, if we bail out, we
2497 // have definitely set _finger to something non-null.
2498 assert(_finger != nullptr, "invariant");
2499
2500 // Region iteration was actually aborted. So now _finger
2501 // points to the address of the object we last scanned. If we
2502 // leave it there, when we restart this task, we will rescan
2503 // the object. It is easy to avoid this. We move the finger by
2504 // enough to point to the next possible object header.
2505 assert(_finger < _region_limit, "invariant");
2506 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2507 if (new_finger >= _region_limit) {
2508 giveup_current_region();
2509 } else {
2510 move_finger_to(new_finger);
2511 }
2512 }
2513 }
2514
2515 void G1CMTask::claim_new_region() {
2516 // Read the note on the claim_region() method on why it might
2517 // return null with potentially more regions available for
2518 // claiming and why we have to check out_of_regions() to determine
2519 // whether we're done or not.
2520 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2521 // We are going to try to claim a new region. We should have
2522 // given up on the previous one.
2523 // Separated the asserts so that we know which one fires.
2524 assert(_curr_region == nullptr, "invariant");
2525 assert(_finger == nullptr, "invariant");
2526 assert(_region_limit == nullptr, "invariant");
2527 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2528 if (claimed_region != nullptr) {
2529 // Yes, we managed to claim one
2530 setup_for_region(claimed_region);
2531 assert(_curr_region == claimed_region, "invariant");
2532 }
2533 // It is important to call the regular clock here. It might take
2534 // a while to claim a region if, for example, we hit a large
2535 // block of empty regions. So we need to call the regular clock
2536 // method once round the loop to make sure it's called
2537 // frequently enough.
2538 abort_marking_if_regular_check_fail();
2539 }
2540 }
2541
2542 void G1CMTask::attempt_stealing() {
2543 // We cannot check whether the global stack is empty, since other
2544 // tasks might be pushing objects to it concurrently.
2545 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2546 "only way to reach here");
2547 while (!has_aborted()) {
2548 G1TaskQueueEntry entry;
2549 if (_cm->try_stealing(_worker_id, entry)) {
2550 process_entry(entry, true /* stolen */);
2551
2552 // And since we're towards the end, let's totally drain the
2553 // local queue and global stack.
2554 drain_local_queue(false);
2555 drain_global_stack(false);
2556 } else {
2557 break;
2558 }
2559 }
2560 }
2561
2562 void G1CMTask::attempt_termination(bool is_serial) {
2563 // We cannot check whether the global stack is empty, since other
2564 // tasks might be concurrently pushing objects on it.
2565 // Separated the asserts so that we know which one fires.
2566 assert(_cm->out_of_regions(), "only way to reach here");
2567 assert(_task_queue->size() == 0, "only way to reach here");
2568 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2569
2570 // The G1CMTask class also extends the TerminatorTerminator class,
2571 // hence its should_exit_termination() method will also decide
2572 // whether to exit the termination protocol or not.
2573 bool finished = (is_serial ||
2574 _cm->terminator()->offer_termination(this));
2575 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2576
2577 if (finished) {
2578 // We're all done.
2579
2580 // We can now guarantee that the global stack is empty, since
2581 // all other tasks have finished. We separated the guarantees so
2582 // that, if a condition is false, we can immediately find out
2583 // which one.
2584 guarantee(_cm->out_of_regions(), "only way to reach here");
2585 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2586 guarantee(_task_queue->size() == 0, "only way to reach here");
2587 guarantee(!_cm->has_overflown(), "only way to reach here");
2588 guarantee(!has_aborted(), "should never happen if termination has completed");
2589 } else {
2590 // Apparently there's more work to do. Let's abort this task. We
2591 // will restart it and hopefully we can find more things to do.
2592 set_has_aborted();
2593 }
2594 }
2595
2596 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2597 if (_has_timed_out) {
2598 double diff_ms = elapsed_time_ms - _time_target_ms;
2599 // Keep statistics of how well we did with respect to hitting
2600 // our target only if we actually timed out (if we aborted for
2601 // other reasons, then the results might get skewed).
2602 _marking_step_diff_ms.add(diff_ms);
2603 }
2604
2605 if (!_cm->has_overflown()) {
2606 return;
2607 }
2608
2609 // This is the interesting one. We aborted because a global
2610 // overflow was raised. This means we have to restart the
2611 // marking phase and start iterating over regions. However, in
2612 // order to do this we have to make sure that all tasks stop
2613 // what they are doing and re-initialize in a safe manner. We
2614 // will achieve this with the use of two barrier sync points.
2615 if (!is_serial) {
2616 // We only need to enter the sync barrier if being called
2617 // from a parallel context
2618 _cm->enter_first_sync_barrier(_worker_id);
2619
2620 // When we exit this sync barrier we know that all tasks have
2621 // stopped doing marking work. So, it's now safe to
2622 // re-initialize our data structures.
2623 }
2624
2625 clear_region_fields();
2626 flush_mark_stats_cache();
2627
2628 if (!is_serial) {
2629 // If we're executing the concurrent phase of marking, reset the marking
2630 // state; otherwise the marking state is reset after reference processing,
2631 // during the remark pause.
2632 // If we reset here as a result of an overflow during the remark we will
2633 // see assertion failures from any subsequent set_concurrency_and_phase()
2634 // calls.
2635 if (_cm->concurrent() && _worker_id == 0) {
2636 // Worker 0 is responsible for clearing the global data structures because
2637 // of an overflow. During STW we should not clear the overflow flag (in
2638 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2639 // method to abort the pause and restart concurrent marking.
2640 _cm->reset_marking_for_restart();
2641
2642 log_info(gc, marking)("Concurrent Mark reset for overflow");
2643 }
2644
2645 // ...and enter the second barrier.
2646 _cm->enter_second_sync_barrier(_worker_id);
2647 }
2648 }
2649
2650 /*****************************************************************************
2651
2652 The do_marking_step(time_target_ms, ...) method is the building
2653 block of the parallel marking framework. It can be called in parallel
2654 with other invocations of do_marking_step() on different tasks
2655 (but only one per task, obviously) and concurrently with the
2656 mutator threads, or during remark, hence it eliminates the need
2657 for two versions of the code. When called during remark, it will
2658 pick up from where the task left off during the concurrent marking
2659 phase. Interestingly, tasks are also claimable during evacuation
2660 pauses too, since do_marking_step() ensures that it aborts before
2661 it needs to yield.
2662
2663 The data structures that it uses to do marking work are the
2664 following:
2665
2666 (1) Marking Bitmap. If there are grey objects that appear only
2667 on the bitmap (this happens either when dealing with an overflow
2668 or when the concurrent start pause has simply marked the roots
2669 and didn't push them on the stack), then tasks claim heap
2670 regions whose bitmap they then scan to find grey objects. A
2671 global finger indicates where the end of the last claimed region
2672 is. A local finger indicates how far into the region a task has
2673 scanned. The two fingers are used to determine how to grey an
2674 object (i.e. whether simply marking it is OK, as it will be
2675 visited by a task in the future, or whether it needs to be also
2676 pushed on a stack).
2677
2678 (2) Local Queue. The local queue of the task which is accessed
2679 reasonably efficiently by the task. Other tasks can steal from
2680 it when they run out of work. Throughout the marking phase, a
2681 task attempts to keep its local queue short but not totally
2682 empty, so that entries are available for stealing by other
2683 tasks. Only when there is no more work, a task will totally
2684 drain its local queue.
2685
2686 (3) Global Mark Stack. This handles local queue overflow. During
2687 marking only sets of entries are moved between it and the local
2688 queues, as access to it requires a mutex and more fine-grain
2689 interaction with it which might cause contention. If it
2690 overflows, then the marking phase should restart and iterate
2691 over the bitmap to identify grey objects. Throughout the marking
2692 phase, tasks attempt to keep the global mark stack at a small
2693 length but not totally empty, so that entries are available for
2694 popping by other tasks. Only when there is no more work, tasks
2695 will totally drain the global mark stack.
2696
2697 (4) SATB Buffer Queue. This is where completed SATB buffers are
2698 made available. Buffers are regularly removed from this queue
2699 and scanned for roots, so that the queue doesn't get too
2700 long. During remark, all completed buffers are processed, as
2701 well as the filled in parts of any uncompleted buffers.
2702
2703 The do_marking_step() method tries to abort when the time target
2704 has been reached. There are a few other cases when the
2705 do_marking_step() method also aborts:
2706
2707 (1) When the marking phase has been aborted (after a Full GC).
2708
2709 (2) When a global overflow (on the global stack) has been
2710 triggered. Before the task aborts, it will actually sync up with
2711 the other tasks to ensure that all the marking data structures
2712 (local queues, stacks, fingers etc.) are re-initialized so that
2713 when do_marking_step() completes, the marking phase can
2714 immediately restart.
2715
2716 (3) When enough completed SATB buffers are available. The
2717 do_marking_step() method only tries to drain SATB buffers right
2718 at the beginning. So, if enough buffers are available, the
2719 marking step aborts and the SATB buffers are processed at
2720 the beginning of the next invocation.
2721
2722 (4) To yield. when we have to yield then we abort and yield
2723 right at the end of do_marking_step(). This saves us from a lot
2724 of hassle as, by yielding we might allow a Full GC. If this
2725 happens then objects will be compacted underneath our feet, the
2726 heap might shrink, etc. We save checking for this by just
2727 aborting and doing the yield right at the end.
2728
2729 From the above it follows that the do_marking_step() method should
2730 be called in a loop (or, otherwise, regularly) until it completes.
2731
2732 If a marking step completes without its has_aborted() flag being
2733 true, it means it has completed the current marking phase (and
2734 also all other marking tasks have done so and have all synced up).
2735
2736 A method called regular_clock_call() is invoked "regularly" (in
2737 sub ms intervals) throughout marking. It is this clock method that
2738 checks all the abort conditions which were mentioned above and
2739 decides when the task should abort. A work-based scheme is used to
2740 trigger this clock method: when the number of object words the
2741 marking phase has scanned or the number of references the marking
2742 phase has visited reach a given limit. Additional invocations to
2743 the method clock have been planted in a few other strategic places
2744 too. The initial reason for the clock method was to avoid calling
2745 cpu time gathering too regularly, as it is quite expensive. So,
2746 once it was in place, it was natural to piggy-back all the other
2747 conditions on it too and not constantly check them throughout the code.
2748
2749 If do_termination is true then do_marking_step will enter its
2750 termination protocol.
2751
2752 The value of is_serial must be true when do_marking_step is being
2753 called serially (i.e. by the VMThread) and do_marking_step should
2754 skip any synchronization in the termination and overflow code.
2755 Examples include the serial remark code and the serial reference
2756 processing closures.
2757
2758 The value of is_serial must be false when do_marking_step is
2759 being called by any of the worker threads.
2760 Examples include the concurrent marking code (CMMarkingTask),
2761 the MT remark code, and the MT reference processing closures.
2762
2763 *****************************************************************************/
2764
2765 void G1CMTask::do_marking_step(double time_target_ms,
2766 bool do_termination,
2767 bool is_serial) {
2768 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2769
2770 _start_cpu_time_ns = os::current_thread_cpu_time();
2771
2772 // If do_stealing is true then do_marking_step will attempt to
2773 // steal work from the other G1CMTasks. It only makes sense to
2774 // enable stealing when the termination protocol is enabled
2775 // and do_marking_step() is not being called serially.
2776 bool do_stealing = do_termination && !is_serial;
2777
2778 G1Predictions const& predictor = _g1h->policy()->predictor();
2779 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2780 _time_target_ms = time_target_ms - diff_prediction_ms;
2781
2782 // set up the variables that are used in the work-based scheme to
2783 // call the regular clock method
2784 _words_scanned = 0;
2785 _refs_reached = 0;
2786 recalculate_limits();
2787
2788 // clear all flags
2789 clear_has_aborted();
2790 _has_timed_out = false;
2791 _draining_satb_buffers = false;
2792
2793 ++_calls;
2794
2795 // Set up the bitmap and oop closures. Anything that uses them is
2796 // eventually called from this method, so it is OK to allocate these
2797 // statically.
2798 G1CMBitMapClosure bitmap_closure(this, _cm);
2799 G1CMOopClosure cm_oop_closure(_g1h, this);
2800 set_cm_oop_closure(&cm_oop_closure);
2801
2802 if (_cm->has_overflown()) {
2803 // This can happen if the mark stack overflows during a GC pause
2804 // and this task, after a yield point, restarts. We have to abort
2805 // as we need to get into the overflow protocol which happens
2806 // right at the end of this task.
2807 set_has_aborted();
2808 }
2809
2810 // First drain any available SATB buffers. After this, we will not
2811 // look at SATB buffers before the next invocation of this method.
2812 // If enough completed SATB buffers are queued up, the regular clock
2813 // will abort this task so that it restarts.
2814 drain_satb_buffers();
2815 // ...then partially drain the local queue and the global stack
2816 drain_local_queue(true);
2817 drain_global_stack(true);
2818
2819 do {
2820 process_current_region(bitmap_closure);
2821 // At this point we have either completed iterating over the
2822 // region we were holding on to, or we have aborted.
2823
2824 // We then partially drain the local queue and the global stack.
2825 drain_local_queue(true);
2826 drain_global_stack(true);
2827
2828 claim_new_region();
2829
2830 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2831 "at this point we should be out of regions");
2832 } while ( _curr_region != nullptr && !has_aborted());
2833
2834 // We cannot check whether the global stack is empty, since other
2835 // tasks might be pushing objects to it concurrently.
2836 assert(has_aborted() || _cm->out_of_regions(),
2837 "at this point we should be out of regions");
2838 // Try to reduce the number of available SATB buffers so that
2839 // remark has less work to do.
2840 drain_satb_buffers();
2841
2842 // Since we've done everything else, we can now totally drain the
2843 // local queue and global stack.
2844 drain_local_queue(false);
2845 drain_global_stack(false);
2846
2847 // Attempt at work stealing from other task's queues.
2848 if (do_stealing && !has_aborted()) {
2849 // We have not aborted. This means that we have finished all that
2850 // we could. Let's try to do some stealing...
2851 attempt_stealing();
2852 }
2853
2854 // We still haven't aborted. Now, let's try to get into the
2855 // termination protocol.
2856 if (do_termination && !has_aborted()) {
2857 attempt_termination(is_serial);
2858 }
2859
2860 // Mainly for debugging purposes to make sure that a pointer to the
2861 // closure which was statically allocated in this frame doesn't
2862 // escape it by accident.
2863 set_cm_oop_closure(nullptr);
2864 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2865 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2866 // Update the step history.
2867 _step_times_ms.add(elapsed_time_ms);
2868
2869 if (has_aborted()) {
2870 // The task was aborted for some reason.
2871 handle_abort(is_serial, elapsed_time_ms);
2872 }
2873 }
2874
2875 G1CMTask::G1CMTask(uint worker_id,
2876 G1ConcurrentMark* cm,
2877 G1CMTaskQueue* task_queue,
2878 G1RegionMarkStats* mark_stats) :
2879 _worker_id(worker_id),
2880 _g1h(G1CollectedHeap::heap()),
2881 _cm(cm),
2882 _mark_bitmap(nullptr),
2883 _task_queue(task_queue),
2884 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2885 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2886 _calls(0),
2887 _time_target_ms(0.0),
2888 _start_cpu_time_ns(0),
2889 _cm_oop_closure(nullptr),
2890 _curr_region(nullptr),
2891 _finger(nullptr),
2892 _region_limit(nullptr),
2893 _words_scanned(0),
2894 _words_scanned_limit(0),
2895 _real_words_scanned_limit(0),
2896 _refs_reached(0),
2897 _refs_reached_limit(0),
2898 _real_refs_reached_limit(0),
2899 _has_aborted(false),
2900 _has_timed_out(false),
2901 _draining_satb_buffers(false),
2902 _step_times_ms(),
2903 _elapsed_time_ms(0.0),
2904 _termination_time_ms(0.0),
2905 _marking_step_diff_ms()
2906 {
2907 guarantee(task_queue != nullptr, "invariant");
2908
2909 _marking_step_diff_ms.add(0.5);
2910 }
2911
2912 // These are formatting macros that are used below to ensure
2913 // consistent formatting. The *_H_* versions are used to format the
2914 // header for a particular value and they should be kept consistent
2915 // with the corresponding macro. Also note that most of the macros add
2916 // the necessary white space (as a prefix) which makes them a bit
2917 // easier to compose.
2918
2919 // All the output lines are prefixed with this string to be able to
2920 // identify them easily in a large log file.
2921 #define G1PPRL_LINE_PREFIX "###"
2922
2923 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2924 #ifdef _LP64
2925 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2926 #else // _LP64
2927 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2928 #endif // _LP64
2929
2930 // For per-region info
2931 #define G1PPRL_TYPE_FORMAT " %-4s"
2932 #define G1PPRL_TYPE_H_FORMAT " %4s"
2933 #define G1PPRL_STATE_FORMAT " %-5s"
2934 #define G1PPRL_STATE_H_FORMAT " %5s"
2935 #define G1PPRL_BYTE_FORMAT " %9zu"
2936 #define G1PPRL_BYTE_H_FORMAT " %9s"
2937 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2938 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2939 #define G1PPRL_GID_H_FORMAT " %9s"
2940 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2941 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2942 #define G1PPRL_LEN_H_FORMAT " %14s"
2943 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2944 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2945
2946 // For summary info
2947 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2948 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2949 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2950 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2951
2952 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2953 _total_used_bytes(0),
2954 _total_capacity_bytes(0),
2955 _total_live_bytes(0),
2956 _total_remset_bytes(0),
2957 _total_code_roots_bytes(0)
2958 {
2959 if (!log_is_enabled(Trace, gc, liveness)) {
2960 return;
2961 }
2962
2963 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2964 MemRegion reserved = g1h->reserved();
2965 double now = os::elapsedTime();
2966
2967 // Print the header of the output.
2968 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2969 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2970 G1PPRL_SUM_ADDR_FORMAT("reserved")
2971 G1PPRL_SUM_BYTE_FORMAT("region-size"),
2972 p2i(reserved.start()), p2i(reserved.end()),
2973 G1HeapRegion::GrainBytes);
2974 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2975 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2976 G1PPRL_TYPE_H_FORMAT
2977 G1PPRL_ADDR_BASE_H_FORMAT
2978 G1PPRL_BYTE_H_FORMAT
2979 G1PPRL_BYTE_H_FORMAT
2980 G1PPRL_STATE_H_FORMAT
2981 G1PPRL_BYTE_H_FORMAT
2982 G1PPRL_GID_H_FORMAT,
2983 "type", "address-range",
2984 "used", "live",
2985 "state", "code-roots",
2986 "group-id");
2987 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2988 G1PPRL_TYPE_H_FORMAT
2989 G1PPRL_ADDR_BASE_H_FORMAT
2990 G1PPRL_BYTE_H_FORMAT
2991 G1PPRL_BYTE_H_FORMAT
2992 G1PPRL_STATE_H_FORMAT
2993 G1PPRL_BYTE_H_FORMAT
2994 G1PPRL_GID_H_FORMAT,
2995 "", "",
2996 "(bytes)", "(bytes)",
2997 "", "(bytes)", "");
2998 }
2999
3000 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3001 if (!log_is_enabled(Trace, gc, liveness)) {
3002 return false;
3003 }
3004
3005 const char* type = r->get_type_str();
3006 HeapWord* bottom = r->bottom();
3007 HeapWord* end = r->end();
3008 size_t capacity_bytes = r->capacity();
3009 size_t used_bytes = r->used();
3010 size_t live_bytes = r->live_bytes();
3011 size_t remset_bytes = r->rem_set()->mem_size();
3012 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3013 const char* remset_type = r->rem_set()->get_short_state_str();
3014 uint cset_group_id = r->rem_set()->has_cset_group()
3015 ? r->rem_set()->cset_group_id()
3016 : G1CSetCandidateGroup::NoRemSetId;
3017
3018 _total_used_bytes += used_bytes;
3019 _total_capacity_bytes += capacity_bytes;
3020 _total_live_bytes += live_bytes;
3021 _total_remset_bytes += remset_bytes;
3022 _total_code_roots_bytes += code_roots_bytes;
3023
3024 // Print a line for this particular region.
3025 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3026 G1PPRL_TYPE_FORMAT
3027 G1PPRL_ADDR_BASE_FORMAT
3028 G1PPRL_BYTE_FORMAT
3029 G1PPRL_BYTE_FORMAT
3030 G1PPRL_STATE_FORMAT
3031 G1PPRL_BYTE_FORMAT
3032 G1PPRL_GID_FORMAT,
3033 type, p2i(bottom), p2i(end),
3034 used_bytes, live_bytes,
3035 remset_type, code_roots_bytes,
3036 cset_group_id);
3037
3038 return false;
3039 }
3040
3041 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3042 if (!log_is_enabled(Trace, gc, liveness)) {
3043 return;
3044 }
3045
3046 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3047 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3048 // add static memory usages to remembered set sizes
3049 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3050
3051 log_cset_candidate_groups();
3052
3053 // Print the footer of the output.
3054 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3055 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3056 " SUMMARY"
3057 G1PPRL_SUM_MB_FORMAT("capacity")
3058 G1PPRL_SUM_MB_PERC_FORMAT("used")
3059 G1PPRL_SUM_MB_PERC_FORMAT("live")
3060 G1PPRL_SUM_MB_FORMAT("remset")
3061 G1PPRL_SUM_MB_FORMAT("code-roots"),
3062 bytes_to_mb(_total_capacity_bytes),
3063 bytes_to_mb(_total_used_bytes),
3064 percent_of(_total_used_bytes, _total_capacity_bytes),
3065 bytes_to_mb(_total_live_bytes),
3066 percent_of(_total_live_bytes, _total_capacity_bytes),
3067 bytes_to_mb(_total_remset_bytes),
3068 bytes_to_mb(_total_code_roots_bytes));
3069 }
3070
3071 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3072 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3073 G1PPRL_GID_FORMAT
3074 G1PPRL_LEN_FORMAT
3075 G1PPRL_GID_GCEFF_FORMAT
3076 G1PPRL_GID_LIVENESS_FORMAT
3077 G1PPRL_BYTE_FORMAT
3078 G1PPRL_TYPE_H_FORMAT,
3079 group->group_id(),
3080 group->length(),
3081 group->length() > 0 ? group->gc_efficiency() : 0.0,
3082 group->length() > 0 ? group->liveness_percent() : 0.0,
3083 group->card_set()->mem_size(),
3084 type);
3085 _total_remset_bytes += group->card_set()->mem_size();
3086 }
3087
3088 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3089 for (G1CSetCandidateGroup* group : gl) {
3090 log_cset_candidate_group_add_total(group, type);
3091 }
3092 }
3093
3094 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3095 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3096 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3097 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3098 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3099 G1PPRL_GID_H_FORMAT
3100 G1PPRL_LEN_H_FORMAT
3101 G1PPRL_GCEFF_H_FORMAT
3102 G1PPRL_BYTE_H_FORMAT
3103 G1PPRL_BYTE_H_FORMAT
3104 G1PPRL_TYPE_H_FORMAT,
3105 "groud-id", "num-regions",
3106 "gc-eff", "liveness",
3107 "remset", "type");
3108
3109 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3110 G1PPRL_GID_H_FORMAT
3111 G1PPRL_LEN_H_FORMAT
3112 G1PPRL_GCEFF_H_FORMAT
3113 G1PPRL_BYTE_H_FORMAT
3114 G1PPRL_BYTE_H_FORMAT
3115 G1PPRL_TYPE_H_FORMAT,
3116 "", "",
3117 "(bytes/ms)", "%",
3118 "(bytes)", "");
3119
3120 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3121
3122 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3123
3124 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3125 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3126 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3127 }