1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "runtime/globals_extension.hpp"
74 #include "runtime/handles.inline.hpp"
75 #include "runtime/java.hpp"
76 #include "runtime/orderAccess.hpp"
77 #include "runtime/os.hpp"
78 #include "runtime/prefetch.inline.hpp"
79 #include "runtime/threads.hpp"
80 #include "utilities/align.hpp"
81 #include "utilities/checkedCast.hpp"
82 #include "utilities/formatBuffer.hpp"
83 #include "utilities/growableArray.hpp"
84 #include "utilities/powerOfTwo.hpp"
85
86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
87
88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
89 assert(cm != nullptr, "must be");
90 }
91
92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
93 assert(cm != nullptr, "must be");
94 assert(_cm == nullptr, "double initialize");
95 _cm = cm;
96 }
97
98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
99 assert(addr < _cm->finger(), "invariant");
100 assert(addr >= _task->finger(), "invariant");
101
102 // We move that task's local finger along.
103 _task->move_finger_to(addr);
104
105 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
106 // we only partially drain the local queue and global stack
107 _task->drain_local_queue(true);
108 _task->drain_global_stack(true);
109
110 // if the has_aborted flag has been raised, we need to bail out of
111 // the iteration
112 return !_task->has_aborted();
113 }
114
115 G1CMMarkStack::G1CMMarkStack() :
116 _chunk_allocator() {
117 set_empty();
118 }
119
120 size_t G1CMMarkStack::capacity_alignment() {
121 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
122 }
123
124 bool G1CMMarkStack::initialize() {
125 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
126
127 size_t initial_capacity = MarkStackSize;
128 size_t max_capacity = MarkStackSizeMax;
129
130 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
131
132 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
133 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134
135 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
136 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
137
138 size_t limit = (INT_MAX - 1);
139 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
140 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141
142 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
143 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
144
145 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
146
147 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
148 initial_num_chunks, max_capacity);
149
150 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
151 }
152
153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
154 if (_size.load_relaxed() >= _max_capacity) {
155 return nullptr;
156 }
157
158 size_t cur_idx = _size.fetch_then_add(1u);
159
160 if (cur_idx >= _max_capacity) {
161 return nullptr;
162 }
163
164 size_t bucket = get_bucket(cur_idx);
165 if (_buckets[bucket].load_acquire() == nullptr) {
166 if (!_should_grow) {
167 // Prefer to restart the CM.
168 return nullptr;
169 }
170
171 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
172 if (_buckets[bucket].load_acquire() == nullptr) {
173 size_t desired_capacity = bucket_size(bucket) * 2;
174 if (!try_expand_to(desired_capacity)) {
175 return nullptr;
176 }
177 }
178 }
179
180 size_t bucket_idx = get_bucket_index(cur_idx);
181 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
182 result->next = nullptr;
183 return result;
184 }
185
186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
187 _min_capacity(0),
188 _max_capacity(0),
189 _capacity(0),
190 _num_buckets(0),
191 _should_grow(false),
192 _buckets(nullptr),
193 _size(0)
194 { }
195
196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
197 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
198
199 _min_capacity = initial_capacity;
200 _max_capacity = max_capacity;
201 _num_buckets = get_bucket(_max_capacity) + 1;
202
203 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
204
205 for (size_t i = 0; i < _num_buckets; i++) {
206 _buckets[i].store_relaxed(nullptr);
207 }
208
209 size_t new_capacity = bucket_size(0);
210
211 if (!reserve(new_capacity)) {
212 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
213 return false;
214 }
215 return true;
216 }
217
218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
219 if (_capacity == _max_capacity) {
220 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
221 return false;
222 }
223
224 size_t old_capacity = _capacity;
225 desired_capacity = MIN2(desired_capacity, _max_capacity);
226
227 if (reserve(desired_capacity)) {
228 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
229 old_capacity, desired_capacity);
230 return true;
231 }
232 return false;
233 }
234
235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
236 size_t new_capacity = _capacity * 2;
237 return try_expand_to(new_capacity);
238 }
239
240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
241 if (_buckets == nullptr) {
242 return;
243 }
244
245 for (size_t i = 0; i < _num_buckets; i++) {
246 if (_buckets[i].load_relaxed() != nullptr) {
247 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
248 _buckets[i].store_relaxed(nullptr);
249 }
250 }
251
252 FREE_C_HEAP_ARRAY(_buckets);
253 }
254
255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
256 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
257
258 size_t highest_bucket = get_bucket(new_capacity - 1);
259 size_t i = get_bucket(_capacity);
260
261 // Allocate all buckets associated with indexes between the current capacity (_capacity)
262 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
263 // array and that the capacity accurately reflects the reserved memory.
264 for (; i <= highest_bucket; i++) {
265 if (_buckets[i].load_acquire() != nullptr) {
266 continue; // Skip over already allocated buckets.
267 }
268
269 size_t bucket_capacity = bucket_size(i);
270
271 // Trim bucket size so that we do not exceed the _max_capacity.
272 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
273 bucket_capacity :
274 _max_capacity - _capacity;
275
276
277 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
278
279 if (bucket_base == nullptr) {
280 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
281 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
282 return false;
283 }
284 _capacity += bucket_capacity;
285 _buckets[i].release_store(bucket_base);
286 }
287 return true;
288 }
289
290 void G1CMMarkStack::expand() {
291 _chunk_allocator.try_expand();
292 }
293
294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
295 elem->next = list->load_relaxed();
296 list->store_relaxed(elem);
297 }
298
299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
300 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
301 add_chunk_to_list(&_chunk_list, elem);
302 _chunks_in_chunk_list++;
303 }
304
305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
306 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
307 add_chunk_to_list(&_free_list, elem);
308 }
309
310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
311 TaskQueueEntryChunk* result = list->load_relaxed();
312 if (result != nullptr) {
313 list->store_relaxed(list->load_relaxed()->next);
314 }
315 return result;
316 }
317
318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
319 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
320 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
321 if (result != nullptr) {
322 _chunks_in_chunk_list--;
323 }
324 return result;
325 }
326
327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
328 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
329 return remove_chunk_from_list(&_free_list);
330 }
331
332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
333 // Get a new chunk.
334 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
335
336 if (new_chunk == nullptr) {
337 // Did not get a chunk from the free list. Allocate from backing memory.
338 new_chunk = _chunk_allocator.allocate_new_chunk();
339
340 if (new_chunk == nullptr) {
341 return false;
342 }
343 }
344
345 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
346
347 add_chunk_to_chunk_list(new_chunk);
348
349 return true;
350 }
351
352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
353 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
354
355 if (cur == nullptr) {
356 return false;
357 }
358
359 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
360
361 add_chunk_to_free_list(cur);
362 return true;
363 }
364
365 void G1CMMarkStack::set_empty() {
366 _chunks_in_chunk_list = 0;
367 _chunk_list.store_relaxed(nullptr);
368 _free_list.store_relaxed(nullptr);
369 _chunk_allocator.reset();
370 }
371
372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
373 _root_regions(MemRegion::create_array(max_regions, mtGC)),
374 _max_regions(max_regions),
375 _num_regions(0),
376 _num_claimed_regions(0) { }
377
378 G1CMRootMemRegions::~G1CMRootMemRegions() {
379 MemRegion::destroy_array(_root_regions, _max_regions);
380 }
381
382 void G1CMRootMemRegions::reset() {
383 assert_at_safepoint();
384 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
385
386 _num_regions.store_relaxed(0);
387 _num_claimed_regions.store_relaxed(0);
388 }
389
390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
391 assert_at_safepoint();
392 uint idx = _num_regions.fetch_then_add(1u);
393 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
394 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
395 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
396 _root_regions[idx].set_start(start);
397 _root_regions[idx].set_end(end);
398 }
399
400 const MemRegion* G1CMRootMemRegions::claim_next() {
401 uint local_num_regions = num_regions();
402 if (num_claimed_regions() >= local_num_regions) {
403 return nullptr;
404 }
405
406 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
407 if (claimed_index < local_num_regions) {
408 return &_root_regions[claimed_index];
409 }
410 return nullptr;
411 }
412
413 bool G1CMRootMemRegions::work_completed() const {
414 return num_remaining_regions() == 0;
415 }
416
417 uint G1CMRootMemRegions::num_remaining_regions() const {
418 uint total = num_regions();
419 uint claimed = num_claimed_regions();
420 return (total > claimed) ? total - claimed : 0;
421 }
422
423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
424 uint local_num_root_regions = num_regions();
425 for (uint i = 0; i < local_num_root_regions; i++) {
426 if (_root_regions[i].equals(mr)) {
427 return true;
428 }
429 }
430 return false;
431 }
432
433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
434 G1RegionToSpaceMapper* bitmap_storage) :
435 _cm_thread(nullptr),
436 _g1h(g1h),
437
438 _mark_bitmap(),
439
440 _heap(_g1h->reserved()),
441
442 _root_regions(_g1h->max_num_regions()),
443 _root_region_scan_aborted(false),
444
445 _global_mark_stack(),
446
447 _finger(nullptr), // _finger set in set_non_marking_state
448
449 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
450 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
451 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
452 _tasks(nullptr), // _tasks set inside late_init()
453 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
454 _terminator(_max_num_tasks, _task_queues),
455 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
456
457 _first_overflow_barrier_sync(),
458 _second_overflow_barrier_sync(),
459
460 _completed_mark_cycles(0),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _gc_timer_cm(new ConcurrentGCTimer()),
466 _gc_tracer_cm(new G1OldTracer()),
467
468 // _verbose_level set below
469
470 _remark_times(),
471 _remark_mark_times(),
472 _remark_weak_ref_times(),
473 _cleanup_times(),
474
475 _concurrent_workers(nullptr),
476 _num_concurrent_workers(0),
477 _max_concurrent_workers(0),
478
479 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
480 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
481 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
482 _needs_remembered_set_rebuild(false)
483 {
484 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
485
486 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
487 }
488
489 void G1ConcurrentMark::fully_initialize() {
490 if (is_fully_initialized()) {
491 return;
492 }
493
494 // Create & start ConcurrentMark thread.
495 _cm_thread = new G1ConcurrentMarkThread(this);
496 if (_cm_thread->osthread() == nullptr) {
497 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
498 }
499
500 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
501 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
502
503 _max_concurrent_workers = ConcGCThreads;
504
505 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
506 _concurrent_workers->initialize_workers();
507 _num_concurrent_workers = _concurrent_workers->active_workers();
508
509 if (!_global_mark_stack.initialize()) {
510 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
511 }
512
513 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
514
515 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
516 _num_active_tasks = _max_num_tasks;
517
518 for (uint i = 0; i < _max_num_tasks; ++i) {
519 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
520 _task_queues->register_queue(i, task_queue);
521
522 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
523 }
524
525 uint max_num_regions = _g1h->max_num_regions();
526 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
527 for (uint i = 0; i < max_num_regions; i++) {
528 ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>(_g1h->bottom_addr_for_region(i));
529 }
530 // Contrary to TAMS, the default value of _top_at_rebuild_starts needs to be null.
531 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
532
533 reset_at_marking_complete();
534 }
535
536 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
537 return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
538 }
539
540 bool G1ConcurrentMark::is_in_marking() const {
541 return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
542 }
543
544 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
545 return cm_thread()->is_in_rebuild_or_scrub();
546 }
547
548 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
549 return cm_thread()->is_in_reset_for_next_cycle();
550 }
551
552 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
553 return _partial_array_state_manager;
554 }
555
556 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
557 assert(is_fully_initialized(), "must be");
558 return _cm_thread;
559 }
560
561 void G1ConcurrentMark::reset() {
562 _has_aborted.store_relaxed(false);
563
564 reset_marking_for_restart();
565
566 // Reset all tasks, since different phases will use different number of active
567 // threads. So, it's easiest to have all of them ready.
568 for (uint i = 0; i < _max_num_tasks; ++i) {
569 _tasks[i]->reset(mark_bitmap());
570 }
571
572 uint max_num_regions = _g1h->max_num_regions();
573 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
574 for (uint i = 0; i < max_num_regions; i++) {
575 _region_mark_stats[i].clear();
576 }
577
578 _root_region_scan_aborted.store_relaxed(false);
579 _root_regions.reset();
580 }
581
582 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
583 uint region_idx = r->hrm_index();
584 for (uint j = 0; j < _max_num_tasks; ++j) {
585 _tasks[j]->clear_mark_stats_cache(region_idx);
586 }
587 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
588 _region_mark_stats[region_idx].clear();
589 }
590
591 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
592 assert_at_safepoint();
593 assert(r->is_starts_humongous(), "Got humongous continues region here");
594
595 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
596 mark_bitmap()->clear(r->bottom());
597
598 if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
599 return;
600 }
601
602 // Clear any statistics about the region gathered so far.
603 _g1h->humongous_obj_regions_iterate(r,
604 [&] (G1HeapRegion* r) {
605 clear_statistics(r);
606 });
607 }
608
609 void G1ConcurrentMark::reset_marking_for_restart() {
610 _global_mark_stack.set_empty();
611
612 // Expand the marking stack, if we have to and if we can.
613 if (has_overflown()) {
614 _global_mark_stack.expand();
615
616 uint max_num_regions = _g1h->max_num_regions();
617 for (uint i = 0; i < max_num_regions; i++) {
618 _region_mark_stats[i].clear_during_overflow();
619 }
620 }
621
622 clear_has_overflown();
623 _finger.store_relaxed(_heap.start());
624
625 for (uint i = 0; i < _max_num_tasks; ++i) {
626 _tasks[i]->reset_for_restart();
627 }
628 }
629
630 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
631 assert(active_tasks <= _max_num_tasks, "we should not have more");
632
633 _num_active_tasks = active_tasks;
634 // Need to update the three data structures below according to the
635 // number of active threads for this phase.
636 _terminator.reset_for_reuse(active_tasks);
637 _first_overflow_barrier_sync.set_n_workers(active_tasks);
638 _second_overflow_barrier_sync.set_n_workers(active_tasks);
639 }
640
641 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
642 set_concurrency(active_tasks);
643
644 _concurrent.store_relaxed(concurrent);
645
646 if (!concurrent) {
647 // At this point we should be in a STW phase, and completed marking.
648 assert_at_safepoint_on_vm_thread();
649 assert(out_of_regions(),
650 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
651 p2i(finger()), p2i(_heap.end()));
652 }
653 }
654
655 #if TASKQUEUE_STATS
656 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
657
658 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
659
660 auto get_pa_stats = [&](uint i) {
661 return _tasks[i]->partial_array_task_stats();
662 };
663
664 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
665 "Concurrent Mark Partial Array");
666
667 for (uint i = 0; i < _max_num_tasks; ++i) {
668 get_pa_stats(i)->reset();
669 }
670 }
671 #endif
672
673 void G1ConcurrentMark::reset_at_marking_complete() {
674 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
675 // We set the global marking state to some default values when we're
676 // not doing marking.
677 reset_marking_for_restart();
678 _num_active_tasks = 0;
679 }
680
681 G1ConcurrentMark::~G1ConcurrentMark() {
682 FREE_C_HEAP_ARRAY(_top_at_mark_starts);
683 FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
684 FREE_C_HEAP_ARRAY(_region_mark_stats);
685 // The G1ConcurrentMark instance is never freed.
686 ShouldNotReachHere();
687 }
688
689 class G1ClearBitMapTask : public WorkerTask {
690 public:
691 static size_t chunk_size() { return M; }
692
693 private:
694 // Heap region closure used for clearing the _mark_bitmap.
695 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
696 G1ConcurrentMark* _cm;
697 G1CMBitMap* _bitmap;
698 bool _suspendible; // If suspendible, do yield checks.
699
700 bool suspendible() {
701 return _suspendible;
702 }
703
704 bool is_clear_concurrent_undo() {
705 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
706 }
707
708 bool has_aborted() {
709 if (suspendible()) {
710 _cm->do_yield_check();
711 return _cm->has_aborted();
712 }
713 return false;
714 }
715
716 HeapWord* region_clear_limit(G1HeapRegion* r) {
717 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
718 // live_words data are current wrt to the _mark_bitmap. We use this information
719 // to only clear ranges of the bitmap that require clearing.
720 if (is_clear_concurrent_undo()) {
721 // No need to clear bitmaps for empty regions (which includes regions we
722 // did not mark through).
723 if (!_cm->contains_live_object(r->hrm_index())) {
724 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
725 return r->bottom();
726 }
727 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
728 }
729 return r->end();
730 }
731
732 public:
733 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
734 G1HeapRegionClosure(),
735 _cm(cm),
736 _bitmap(cm->mark_bitmap()),
737 _suspendible(suspendible)
738 { }
739
740 virtual bool do_heap_region(G1HeapRegion* r) {
741 if (has_aborted()) {
742 return true;
743 }
744
745 HeapWord* cur = r->bottom();
746 HeapWord* const end = region_clear_limit(r);
747
748 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
749
750 while (cur < end) {
751
752 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
753 _bitmap->clear_range(mr);
754
755 cur += chunk_size_in_words;
756
757 // Repeat the asserts from before the start of the closure. We will do them
758 // as asserts here to minimize their overhead on the product. However, we
759 // will have them as guarantees at the beginning / end of the bitmap
760 // clearing to get some checking in the product.
761 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
762
763 // Abort iteration if necessary.
764 if (has_aborted()) {
765 return true;
766 }
767 }
768 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
769
770 _cm->reset_top_at_mark_start(r);
771
772 return false;
773 }
774 };
775
776 G1ClearBitmapHRClosure _cl;
777 G1HeapRegionClaimer _hr_claimer;
778 bool _suspendible; // If the task is suspendible, workers must join the STS.
779
780 public:
781 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
782 WorkerTask("G1 Clear Bitmap"),
783 _cl(cm, suspendible),
784 _hr_claimer(n_workers),
785 _suspendible(suspendible)
786 { }
787
788 void work(uint worker_id) {
789 SuspendibleThreadSetJoiner sts_join(_suspendible);
790 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
791 }
792 };
793
794 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
795 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
796
797 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
798 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
799
800 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
801
802 G1ClearBitMapTask cl(this, num_workers, may_yield);
803
804 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
805 workers->run_task(&cl, num_workers);
806 }
807
808 void G1ConcurrentMark::cleanup_for_next_mark() {
809 // Make sure that the concurrent mark thread looks to still be in
810 // the current cycle.
811 guarantee(is_in_reset_for_next_cycle(), "invariant");
812
813 clear_bitmap(_concurrent_workers, true);
814
815 reset_partial_array_state_manager();
816
817 // Should not have changed state yet (even if a Full GC interrupted us).
818 guarantee(is_in_reset_for_next_cycle(), "invariant");
819 }
820
821 void G1ConcurrentMark::reset_partial_array_state_manager() {
822 for (uint i = 0; i < _max_num_tasks; ++i) {
823 _tasks[i]->unregister_partial_array_splitter();
824 }
825
826 partial_array_state_manager()->reset();
827
828 for (uint i = 0; i < _max_num_tasks; ++i) {
829 _tasks[i]->register_partial_array_splitter();
830 }
831 }
832
833 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
834 assert_at_safepoint_on_vm_thread();
835 // To avoid fragmentation the full collection requesting to clear the bitmap
836 // might use fewer workers than available. To ensure the bitmap is cleared
837 // as efficiently as possible the number of active workers are temporarily
838 // increased to include all currently created workers.
839 WithActiveWorkers update(workers, workers->created_workers());
840 clear_bitmap(workers, false);
841 }
842
843 class G1PreConcurrentStartTask : public G1BatchedTask {
844 // Reset marking state.
845 class ResetMarkingStateTask;
846 // For each region note start of marking.
847 class NoteStartOfMarkTask;
848
849 public:
850 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
851 };
852
853 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
854 G1ConcurrentMark* _cm;
855 public:
856 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
857
858 double worker_cost() const override { return 1.0; }
859 void do_work(uint worker_id) override;
860 };
861
862 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
863
864 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
865 G1ConcurrentMark* _cm;
866
867 public:
868 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
869
870 bool do_heap_region(G1HeapRegion* r) override {
871 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
872 _cm->update_top_at_mark_start(r);
873 } else {
874 _cm->reset_top_at_mark_start(r);
875 }
876 return false;
877 }
878 } _region_cl;
879
880 G1HeapRegionClaimer _claimer;
881 public:
882 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
883
884 double worker_cost() const override {
885 // The work done per region is very small, therefore we choose this magic number to cap the number
886 // of threads used when there are few regions.
887 const double regions_per_thread = 1000;
888 return _claimer.n_regions() / regions_per_thread;
889 }
890
891 void set_max_workers(uint max_workers) override {
892 _claimer.set_n_workers(max_workers);
893 }
894
895 void do_work(uint worker_id) override {
896 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
897 }
898 };
899
900 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
901 // Reset marking state.
902 _cm->reset();
903 }
904
905 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
906 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
907 add_serial_task(new ResetMarkingStateTask(cm));
908 add_parallel_task(new NoteStartOfMarkTask());
909 };
910
911 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
912 assert_at_safepoint_on_vm_thread();
913
914 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
915
916 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
917
918 G1PreConcurrentStartTask cl(cause, this);
919 G1CollectedHeap::heap()->run_batch_task(&cl);
920
921 _gc_tracer_cm->set_gc_cause(cause);
922 }
923
924 void G1ConcurrentMark::start_full_concurrent_cycle() {
925 // Start Concurrent Marking weak-reference discovery.
926 ReferenceProcessor* rp = _g1h->ref_processor_cm();
927 rp->start_discovery(false /* always_clear */);
928
929 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
930 // This is the start of the marking cycle, we're expected all
931 // threads to have SATB queues with active set to false.
932 satb_mq_set.set_active_all_threads(true, /* new active value */
933 false /* expected_active */);
934
935 // update_g1_committed() will be called at the end of an evac pause
936 // when marking is on. So, it's also called at the end of the
937 // concurrent start pause to update the heap end, if the heap expands
938 // during it. No need to call it here.
939
940 // Signal the thread to start work.
941 cm_thread()->start_full_cycle();
942 }
943
944 void G1ConcurrentMark::start_undo_concurrent_cycle() {
945 assert_at_safepoint_on_vm_thread();
946 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
947 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
948
949 abort_root_region_scan_at_safepoint();
950
951 // Signal the thread to start work.
952 cm_thread()->start_undo_cycle();
953 }
954
955 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
956 cm_thread()->set_idle();
957 }
958
959 void G1ConcurrentMark::stop() {
960 if (is_fully_initialized()) {
961 cm_thread()->stop();
962 }
963 }
964
965 /*
966 * Notice that in the next two methods, we actually leave the STS
967 * during the barrier sync and join it immediately afterwards. If we
968 * do not do this, the following deadlock can occur: one thread could
969 * be in the barrier sync code, waiting for the other thread to also
970 * sync up, whereas another one could be trying to yield, while also
971 * waiting for the other threads to sync up too.
972 *
973 * Note, however, that this code is also used during remark and in
974 * this case we should not attempt to leave / enter the STS, otherwise
975 * we'll either hit an assert (debug / fastdebug) or deadlock
976 * (product). So we should only leave / enter the STS if we are
977 * operating concurrently.
978 *
979 * Because the thread that does the sync barrier has left the STS, it
980 * is possible to be suspended for a Full GC or an evacuation pause
981 * could occur. This is actually safe, since the entering the sync
982 * barrier is one of the last things do_marking_step() does, and it
983 * doesn't manipulate any data structures afterwards.
984 */
985
986 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
987 bool barrier_aborted;
988 {
989 SuspendibleThreadSetLeaver sts_leave(concurrent());
990 barrier_aborted = !_first_overflow_barrier_sync.enter();
991 }
992
993 // at this point everyone should have synced up and not be doing any
994 // more work
995
996 if (barrier_aborted) {
997 // If the barrier aborted we ignore the overflow condition and
998 // just abort the whole marking phase as quickly as possible.
999 return;
1000 }
1001 }
1002
1003 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1004 SuspendibleThreadSetLeaver sts_leave(concurrent());
1005 _second_overflow_barrier_sync.enter();
1006
1007 // at this point everything should be re-initialized and ready to go
1008 }
1009
1010 class G1CMConcurrentMarkingTask : public WorkerTask {
1011 G1ConcurrentMark* _cm;
1012
1013 public:
1014 void work(uint worker_id) {
1015 ResourceMark rm;
1016
1017 SuspendibleThreadSetJoiner sts_join;
1018
1019 assert(worker_id < _cm->active_tasks(), "invariant");
1020
1021 G1CMTask* task = _cm->task(worker_id);
1022 task->record_start_time();
1023 if (!_cm->has_aborted()) {
1024 do {
1025 task->do_marking_step(G1ConcMarkStepDurationMillis,
1026 true /* do_termination */,
1027 false /* is_serial*/);
1028
1029 _cm->do_yield_check();
1030 } while (!_cm->has_aborted() && task->has_aborted());
1031 }
1032 task->record_end_time();
1033 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1034 }
1035
1036 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1037 WorkerTask("Concurrent Mark"), _cm(cm) { }
1038
1039 ~G1CMConcurrentMarkingTask() { }
1040 };
1041
1042 uint G1ConcurrentMark::calc_active_marking_workers() {
1043 uint result = 0;
1044 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1045 result = _max_concurrent_workers;
1046 } else {
1047 result =
1048 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1049 1, /* Minimum workers */
1050 _num_concurrent_workers,
1051 Threads::number_of_non_daemon_threads());
1052 // Don't scale the result down by scale_concurrent_workers() because
1053 // that scaling has already gone into "_max_concurrent_workers".
1054 }
1055 assert(result > 0 && result <= _max_concurrent_workers,
1056 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1057 _max_concurrent_workers, result);
1058 return result;
1059 }
1060
1061 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1062 return _root_region_scan_aborted.load_relaxed();
1063 }
1064
1065 #ifndef PRODUCT
1066 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1067 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1068 }
1069 #endif
1070
1071 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1072 #ifdef ASSERT
1073 HeapWord* last = region->last();
1074 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1075 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1076 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1077 assert(top_at_mark_start(hr) == region->start(),
1078 "MemRegion start should be equal to TAMS");
1079 #endif
1080
1081 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1082
1083 const uintx interval = PrefetchScanIntervalInBytes;
1084 HeapWord* curr = region->start();
1085 const HeapWord* end = region->end();
1086 while (curr < end) {
1087 Prefetch::read(curr, interval);
1088 oop obj = cast_to_oop(curr);
1089 size_t size = obj->oop_iterate_size(&cl);
1090 assert(size == obj->size(), "sanity");
1091 curr += size;
1092 }
1093 }
1094
1095 class G1CMRootRegionScanTask : public WorkerTask {
1096 G1ConcurrentMark* _cm;
1097 bool _should_yield;
1098
1099 public:
1100 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1101 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1102
1103 void work(uint worker_id) {
1104 SuspendibleThreadSetJoiner sts_join(_should_yield);
1105
1106 while (true) {
1107 if (_cm->has_root_region_scan_aborted()) {
1108 return;
1109 }
1110 G1CMRootMemRegions* root_regions = _cm->root_regions();
1111 const MemRegion* region = root_regions->claim_next();
1112 if (region == nullptr) {
1113 return;
1114 }
1115 _cm->scan_root_region(region, worker_id);
1116 if (_should_yield) {
1117 SuspendibleThreadSet::yield();
1118 // If we yielded, a GC may have processed all root regions,
1119 // so this loop will naturally exit on the next claim_next() call.
1120 // Same if a Full GC signalled abort of the concurrent mark.
1121 }
1122 }
1123 }
1124 };
1125
1126 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1127 // We first check whether there is any work to do as we might have already aborted
1128 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1129 // We want to avoid spinning up the worker threads if that happened.
1130 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1131 //
1132 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1133 // a young gc doing the work.
1134 //
1135 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1136 // continue after that safepoint.
1137 //
1138 // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1139 // consistent view of the value containing the number of remaining regions across the
1140 // usages below. The safepoint/gc may already be running and modifying it
1141 // while this code is still executing.
1142 uint num_remaining = root_regions()->num_remaining_regions();
1143 bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1144 if (do_scan) {
1145 // Assign one worker to each root-region but subject to the max constraint.
1146 // The constraint is also important to avoid accesses beyond the allocated per-worker
1147 // marking helper data structures. We might get passed different WorkerThreads with
1148 // different number of threads (potential worker ids) than helper data structures when
1149 // completing this work during GC.
1150 const uint num_workers = MIN2(num_remaining,
1151 _max_concurrent_workers);
1152
1153 G1CMRootRegionScanTask task(this, concurrent);
1154 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1155 task.name(), num_workers, num_remaining);
1156 workers->run_task(&task, num_workers);
1157 }
1158
1159 // At the end of this method, we can re-read num_remaining() in the assert: either
1160 // we got non-zero above and we processed all root regions (and it must be zero
1161 // after the worker task synchronization) or it had already been zero. We also
1162 // can't have started another concurrent cycle that could have set it to something else
1163 // while still in the concurrent cycle (if called concurrently).
1164 assert_root_region_scan_completed_or_aborted();
1165
1166 return do_scan;
1167 }
1168
1169 void G1ConcurrentMark::scan_root_regions_concurrently() {
1170 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1171 scan_root_regions(_concurrent_workers, true /* concurrent */);
1172 }
1173
1174 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1175 assert_at_safepoint_on_vm_thread();
1176 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1177 }
1178
1179 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1180 root_regions()->add(top_at_mark_start(r), r->top());
1181 }
1182
1183 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1184 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1185 }
1186
1187 void G1ConcurrentMark::abort_root_region_scan() {
1188 assert_not_at_safepoint();
1189
1190 _root_region_scan_aborted.store_relaxed(true);
1191 }
1192
1193 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1194 assert_at_safepoint_on_vm_thread();
1195
1196 _root_region_scan_aborted.store_relaxed(true);
1197 }
1198
1199 void G1ConcurrentMark::concurrent_cycle_start() {
1200 _gc_timer_cm->register_gc_start();
1201
1202 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1203
1204 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1205 }
1206
1207 uint G1ConcurrentMark::completed_mark_cycles() const {
1208 return _completed_mark_cycles.load_relaxed();
1209 }
1210
1211 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1212 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1213
1214 if (mark_cycle_completed) {
1215 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1216 }
1217
1218 if (has_aborted()) {
1219 log_info(gc, marking)("Concurrent Mark Abort");
1220 _gc_tracer_cm->report_concurrent_mode_failure();
1221 }
1222
1223 _gc_timer_cm->register_gc_end();
1224
1225 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1226 }
1227
1228 void G1ConcurrentMark::mark_from_roots() {
1229 _restart_for_overflow.store_relaxed(false);
1230
1231 uint active_workers = calc_active_marking_workers();
1232
1233 // Setting active workers is not guaranteed since fewer
1234 // worker threads may currently exist and more may not be
1235 // available.
1236 active_workers = _concurrent_workers->set_active_workers(active_workers);
1237 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1238
1239 _num_concurrent_workers = active_workers;
1240
1241 // Parallel task terminator is set in "set_concurrency_and_phase()"
1242 set_concurrency_and_phase(active_workers, true /* concurrent */);
1243
1244 G1CMConcurrentMarkingTask marking_task(this);
1245 _concurrent_workers->run_task(&marking_task);
1246 print_stats();
1247 }
1248
1249 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1250 static const char* location_strings[] = { "Remark Before",
1251 "Remark After",
1252 "Remark Overflow",
1253 "Cleanup Before",
1254 "Cleanup After" };
1255 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1256 }
1257
1258 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1259 VerifyLocation location) {
1260 G1HeapVerifier* verifier = _g1h->verifier();
1261
1262 verifier->verify_region_sets_optional();
1263
1264 const char* caller = verify_location_string(location);
1265
1266 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1267 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1268
1269 size_t const BufLen = 512;
1270 char buffer[BufLen];
1271
1272 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1273 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1274
1275 // Only check bitmap in Remark, and not at After-Verification because the regions
1276 // already have their TAMS'es reset.
1277 if (location != VerifyLocation::RemarkAfter) {
1278 verifier->verify_bitmap_clear(true /* above_tams_only */);
1279 }
1280 }
1281 }
1282
1283 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1284 G1CollectedHeap* _g1h;
1285 public:
1286 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1287
1288 bool do_object_b(oop obj) {
1289 return !_g1h->is_obj_dead(obj);
1290 }
1291 };
1292
1293 void G1ConcurrentMark::remark() {
1294 assert_at_safepoint_on_vm_thread();
1295
1296 // If a full collection has happened, we should not continue. However we might
1297 // have ended up here as the Remark VM operation has been scheduled already.
1298 if (has_aborted()) {
1299 return;
1300 }
1301
1302 G1Policy* policy = _g1h->policy();
1303 policy->record_pause_start_time();
1304
1305 double start = os::elapsedTime();
1306
1307 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1308
1309 {
1310 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1311 finalize_marking();
1312 }
1313
1314 double mark_work_end = os::elapsedTime();
1315
1316 bool const mark_finished = !has_overflown();
1317 if (mark_finished) {
1318 weak_refs_work();
1319
1320 // Unload Klasses, String, Code Cache, etc.
1321 if (ClassUnloadingWithConcurrentMark) {
1322 G1CMIsAliveClosure is_alive(this);
1323 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1324 }
1325
1326 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1327 // We're done with marking.
1328 // This is the end of the marking cycle, we're expected all
1329 // threads to have SATB queues with active set to true.
1330 satb_mq_set.set_active_all_threads(false, /* new active value */
1331 true /* expected_active */);
1332
1333 {
1334 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1335 flush_all_task_caches();
1336 }
1337
1338 // All marking completed. Check bitmap now as we will start to reset TAMSes
1339 // in parallel below so that we can not do this in the After-Remark verification.
1340 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1341
1342 {
1343 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1344
1345 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1346 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1347 _g1h->workers()->active_workers());
1348 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1349 _g1h->workers()->run_task(&cl, num_workers);
1350
1351 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1352 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1353
1354 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1355
1356 if (_needs_remembered_set_rebuild) {
1357 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1358 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1359 }
1360 }
1361
1362 if (log_is_enabled(Trace, gc, liveness)) {
1363 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1364 _g1h->heap_region_iterate(&cl);
1365 }
1366
1367 // Potentially, some empty-regions have been reclaimed; make this a
1368 // "collection" so that pending allocation can retry before attempting a
1369 // GC pause.
1370 _g1h->increment_total_collections();
1371
1372 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1373 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1374 // triggered, it likely means there are very few regular GCs, making resizing
1375 // based on gc heuristics less effective.
1376 if (_g1h->last_gc_was_periodic()) {
1377 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1378 }
1379
1380 compute_new_sizes();
1381
1382 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1383
1384 assert(!restart_for_overflow(), "sanity");
1385 // Completely reset the marking state (except bitmaps) since marking completed.
1386 reset_at_marking_complete();
1387
1388 G1CollectedHeap::finish_codecache_marking_cycle();
1389
1390 {
1391 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1392 G1ObjectCountIsAliveClosure is_alive(_g1h);
1393 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1394 }
1395
1396 // Successfully completed marking, advance state.
1397 cm_thread()->set_full_cycle_rebuild_and_scrub();
1398 } else {
1399 // We overflowed. Restart concurrent marking.
1400 _restart_for_overflow.store_relaxed(true);
1401
1402 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1403
1404 // Clear the marking state because we will be restarting
1405 // marking due to overflowing the global mark stack.
1406 reset_marking_for_restart();
1407 }
1408
1409 // Statistics
1410 double now = os::elapsedTime();
1411 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1412 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1413 _remark_times.add((now - start) * 1000.0);
1414
1415 _g1h->update_perf_counter_cpu_time();
1416
1417 policy->record_concurrent_mark_remark_end();
1418
1419 return;
1420 }
1421
1422 void G1ConcurrentMark::compute_new_sizes() {
1423 MetaspaceGC::compute_new_size();
1424
1425 // Cleanup will have freed any regions completely full of garbage.
1426 // Update the soft reference policy with the new heap occupancy.
1427 Universe::heap()->update_capacity_and_used_at_gc();
1428
1429 // We reclaimed old regions so we should calculate the sizes to make
1430 // sure we update the old gen/space data.
1431 _g1h->monitoring_support()->update_sizes();
1432 }
1433
1434 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1435 G1CollectedHeap* _g1h;
1436
1437 public:
1438 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1439
1440 bool do_heap_region(G1HeapRegion* r) override {
1441 // Update the remset tracking state from updating to complete
1442 // if remembered sets have been rebuilt.
1443 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1444 return false;
1445 }
1446 };
1447
1448 void G1ConcurrentMark::cleanup() {
1449 assert_at_safepoint_on_vm_thread();
1450
1451 // If a full collection has happened, we shouldn't do this.
1452 if (has_aborted()) {
1453 return;
1454 }
1455
1456 G1Policy* policy = _g1h->policy();
1457 policy->record_pause_start_time();
1458
1459 double start = os::elapsedTime();
1460
1461 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1462
1463 if (needs_remembered_set_rebuild()) {
1464 // Update the remset tracking information as well as marking all regions
1465 // as fully parsable.
1466 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1467 G1UpdateRegionsAfterRebuild cl(_g1h);
1468 _g1h->heap_region_iterate(&cl);
1469 } else {
1470 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1471 }
1472
1473 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1474
1475 // Local statistics
1476 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1477
1478 {
1479 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1480 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1481 }
1482
1483 // Advance state.
1484 cm_thread()->set_full_cycle_reset_for_next_cycle();
1485 return;
1486 }
1487
1488 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1489 // Uses the G1CMTask associated with a worker thread (for serial reference
1490 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1491 // trace referent objects.
1492 //
1493 // Using the G1CMTask and embedded local queues avoids having the worker
1494 // threads operating on the global mark stack. This reduces the risk
1495 // of overflowing the stack - which we would rather avoid at this late
1496 // state. Also using the tasks' local queues removes the potential
1497 // of the workers interfering with each other that could occur if
1498 // operating on the global stack.
1499
1500 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1501 G1ConcurrentMark* _cm;
1502 G1CMTask* _task;
1503 uint _ref_counter_limit;
1504 uint _ref_counter;
1505 bool _is_serial;
1506 public:
1507 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1508 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1509 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1510 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1511 }
1512
1513 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1514 virtual void do_oop( oop* p) { do_oop_work(p); }
1515
1516 template <class T> void do_oop_work(T* p) {
1517 if (_cm->has_overflown()) {
1518 return;
1519 }
1520 if (!_task->deal_with_reference(p)) {
1521 // We did not add anything to the mark bitmap (or mark stack), so there is
1522 // no point trying to drain it.
1523 return;
1524 }
1525 _ref_counter--;
1526
1527 if (_ref_counter == 0) {
1528 // We have dealt with _ref_counter_limit references, pushing them
1529 // and objects reachable from them on to the local stack (and
1530 // possibly the global stack). Call G1CMTask::do_marking_step() to
1531 // process these entries.
1532 //
1533 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1534 // there's nothing more to do (i.e. we're done with the entries that
1535 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1536 // above) or we overflow.
1537 //
1538 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1539 // flag while there may still be some work to do. (See the comment at
1540 // the beginning of G1CMTask::do_marking_step() for those conditions -
1541 // one of which is reaching the specified time target.) It is only
1542 // when G1CMTask::do_marking_step() returns without setting the
1543 // has_aborted() flag that the marking step has completed.
1544 do {
1545 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1546 _task->do_marking_step(mark_step_duration_ms,
1547 false /* do_termination */,
1548 _is_serial);
1549 } while (_task->has_aborted() && !_cm->has_overflown());
1550 _ref_counter = _ref_counter_limit;
1551 }
1552 }
1553 };
1554
1555 // 'Drain' oop closure used by both serial and parallel reference processing.
1556 // Uses the G1CMTask associated with a given worker thread (for serial
1557 // reference processing the G1CMtask for worker 0 is used). Calls the
1558 // do_marking_step routine, with an unbelievably large timeout value,
1559 // to drain the marking data structures of the remaining entries
1560 // added by the 'keep alive' oop closure above.
1561
1562 class G1CMDrainMarkingStackClosure : public VoidClosure {
1563 G1ConcurrentMark* _cm;
1564 G1CMTask* _task;
1565 bool _is_serial;
1566 public:
1567 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1568 _cm(cm), _task(task), _is_serial(is_serial) {
1569 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1570 }
1571
1572 void do_void() {
1573 do {
1574 // We call G1CMTask::do_marking_step() to completely drain the local
1575 // and global marking stacks of entries pushed by the 'keep alive'
1576 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1577 //
1578 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1579 // if there's nothing more to do (i.e. we've completely drained the
1580 // entries that were pushed as a result of applying the 'keep alive'
1581 // closure to the entries on the discovered ref lists) or we overflow
1582 // the global marking stack.
1583 //
1584 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1585 // flag while there may still be some work to do. (See the comment at
1586 // the beginning of G1CMTask::do_marking_step() for those conditions -
1587 // one of which is reaching the specified time target.) It is only
1588 // when G1CMTask::do_marking_step() returns without setting the
1589 // has_aborted() flag that the marking step has completed.
1590
1591 _task->do_marking_step(1000000000.0 /* something very large */,
1592 true /* do_termination */,
1593 _is_serial);
1594 } while (_task->has_aborted() && !_cm->has_overflown());
1595 }
1596 };
1597
1598 class G1CMRefProcProxyTask : public RefProcProxyTask {
1599 G1CollectedHeap& _g1h;
1600 G1ConcurrentMark& _cm;
1601
1602 public:
1603 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1604 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1605 _g1h(g1h),
1606 _cm(cm) {}
1607
1608 void work(uint worker_id) override {
1609 assert(worker_id < _max_workers, "sanity");
1610 G1CMIsAliveClosure is_alive(&_cm);
1611 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1612 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1613 BarrierEnqueueDiscoveredFieldClosure enqueue;
1614 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1615 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1616 }
1617
1618 void prepare_run_task_hook() override {
1619 // We need to reset the concurrency level before each
1620 // proxy task execution, so that the termination protocol
1621 // and overflow handling in G1CMTask::do_marking_step() knows
1622 // how many workers to wait for.
1623 _cm.set_concurrency(_queue_count);
1624 }
1625 };
1626
1627 void G1ConcurrentMark::weak_refs_work() {
1628 ResourceMark rm;
1629
1630 {
1631 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1632
1633 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1634
1635 // See the comment in G1CollectedHeap::ref_processing_init()
1636 // about how reference processing currently works in G1.
1637
1638 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1639
1640 // Prefer to grow the stack until the max capacity.
1641 _global_mark_stack.set_should_grow();
1642
1643 // Parallel processing task executor.
1644 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1645 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1646
1647 // Process the weak references.
1648 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1649 _gc_tracer_cm->report_gc_reference_stats(stats);
1650 pt.print_all_references();
1651
1652 // The do_oop work routines of the keep_alive and drain_marking_stack
1653 // oop closures will set the has_overflown flag if we overflow the
1654 // global marking stack.
1655
1656 assert(has_overflown() || _global_mark_stack.is_empty(),
1657 "Mark stack should be empty (unless it has overflown)");
1658 }
1659
1660 if (has_overflown()) {
1661 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1662 // overflowed while processing references. Exit the VM.
1663 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1664 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1665 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1666 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1667 return;
1668 }
1669
1670 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1671
1672 {
1673 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1674 G1CMIsAliveClosure is_alive(this);
1675 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1676 }
1677 }
1678
1679 class G1PrecleanYieldClosure : public YieldClosure {
1680 G1ConcurrentMark* _cm;
1681
1682 public:
1683 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1684
1685 virtual bool should_return() {
1686 return _cm->has_aborted();
1687 }
1688
1689 virtual bool should_return_fine_grain() {
1690 _cm->do_yield_check();
1691 return _cm->has_aborted();
1692 }
1693 };
1694
1695 void G1ConcurrentMark::preclean() {
1696 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1697
1698 SuspendibleThreadSetJoiner joiner;
1699
1700 BarrierEnqueueDiscoveredFieldClosure enqueue;
1701
1702 set_concurrency_and_phase(1, true);
1703
1704 G1PrecleanYieldClosure yield_cl(this);
1705
1706 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1707 // Precleaning is single threaded. Temporarily disable MT discovery.
1708 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1709 rp->preclean_discovered_references(rp->is_alive_non_header(),
1710 &enqueue,
1711 &yield_cl,
1712 _gc_timer_cm);
1713 }
1714
1715 // Closure for marking entries in SATB buffers.
1716 class G1CMSATBBufferClosure : public SATBBufferClosure {
1717 private:
1718 G1CMTask* _task;
1719 G1CollectedHeap* _g1h;
1720
1721 // This is very similar to G1CMTask::deal_with_reference, but with
1722 // more relaxed requirements for the argument, so this must be more
1723 // circumspect about treating the argument as an object.
1724 void do_entry(void* entry) const {
1725 _task->increment_refs_reached();
1726 oop const obj = cast_to_oop(entry);
1727 _task->make_reference_grey(obj);
1728 }
1729
1730 public:
1731 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1732 : _task(task), _g1h(g1h) { }
1733
1734 virtual void do_buffer(void** buffer, size_t size) {
1735 for (size_t i = 0; i < size; ++i) {
1736 do_entry(buffer[i]);
1737 }
1738 }
1739 };
1740
1741 class G1RemarkThreadsClosure : public ThreadClosure {
1742 G1SATBMarkQueueSet& _qset;
1743
1744 public:
1745 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1746 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1747
1748 void do_thread(Thread* thread) {
1749 // Transfer any partial buffer to the qset for completed buffer processing.
1750 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1751 }
1752 };
1753
1754 class G1CMRemarkTask : public WorkerTask {
1755 // For Threads::possibly_parallel_threads_do
1756 ThreadsClaimTokenScope _threads_claim_token_scope;
1757 G1ConcurrentMark* _cm;
1758 public:
1759 void work(uint worker_id) {
1760 G1CMTask* task = _cm->task(worker_id);
1761 task->record_start_time();
1762 {
1763 ResourceMark rm;
1764
1765 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1766 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1767 }
1768
1769 do {
1770 task->do_marking_step(1000000000.0 /* something very large */,
1771 true /* do_termination */,
1772 false /* is_serial */);
1773 } while (task->has_aborted() && !_cm->has_overflown());
1774 // If we overflow, then we do not want to restart. We instead
1775 // want to abort remark and do concurrent marking again.
1776 task->record_end_time();
1777 }
1778
1779 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1780 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1781 _cm->terminator()->reset_for_reuse(active_workers);
1782 }
1783 };
1784
1785 void G1ConcurrentMark::finalize_marking() {
1786 ResourceMark rm;
1787
1788 _g1h->ensure_parsability(false);
1789
1790 // this is remark, so we'll use up all active threads
1791 uint active_workers = _g1h->workers()->active_workers();
1792 set_concurrency_and_phase(active_workers, false /* concurrent */);
1793 // Leave _parallel_marking_threads at it's
1794 // value originally calculated in the G1ConcurrentMark
1795 // constructor and pass values of the active workers
1796 // through the task.
1797
1798 {
1799 G1CMRemarkTask remarkTask(this, active_workers);
1800 // We will start all available threads, even if we decide that the
1801 // active_workers will be fewer. The extra ones will just bail out
1802 // immediately.
1803 _g1h->workers()->run_task(&remarkTask);
1804 }
1805
1806 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1807 guarantee(has_overflown() ||
1808 satb_mq_set.completed_buffers_num() == 0,
1809 "Invariant: has_overflown = %s, num buffers = %zu",
1810 BOOL_TO_STR(has_overflown()),
1811 satb_mq_set.completed_buffers_num());
1812
1813 print_stats();
1814 }
1815
1816 void G1ConcurrentMark::flush_all_task_caches() {
1817 size_t hits = 0;
1818 size_t misses = 0;
1819 for (uint i = 0; i < _max_num_tasks; i++) {
1820 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1821 hits += stats.first;
1822 misses += stats.second;
1823 }
1824 size_t sum = hits + misses;
1825 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1826 hits, misses, percent_of(hits, sum));
1827 }
1828
1829 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1830 assert_at_safepoint();
1831 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1832 }
1833
1834 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1835 // "Checkpoint" the finger.
1836 HeapWord* local_finger = finger();
1837
1838 while (local_finger < _heap.end()) {
1839 assert(_g1h->is_in_reserved(local_finger), "invariant");
1840
1841 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1842 // Make sure that the reads below do not float before loading curr_region.
1843 OrderAccess::loadload();
1844 // Above heap_region_containing may return null as we always scan claim
1845 // until the end of the heap. In this case, just jump to the next region.
1846 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1847
1848 // Is the gap between reading the finger and doing the CAS too long?
1849 HeapWord* res = _finger.compare_exchange(local_finger, end);
1850 if (res == local_finger && curr_region != nullptr) {
1851 // We succeeded.
1852 HeapWord* bottom = curr_region->bottom();
1853 HeapWord* limit = top_at_mark_start(curr_region);
1854
1855 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1856 // Notice that _finger == end cannot be guaranteed here since,
1857 // someone else might have moved the finger even further.
1858 assert(finger() >= end, "The finger should have moved forward");
1859
1860 if (limit > bottom) {
1861 return curr_region;
1862 } else {
1863 assert(limit == bottom,
1864 "The region limit should be at bottom");
1865 // We return null and the caller should try calling
1866 // claim_region() again.
1867 return nullptr;
1868 }
1869 } else {
1870 // Read the finger again.
1871 HeapWord* next_finger = finger();
1872 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1873 local_finger = next_finger;
1874 }
1875 }
1876
1877 return nullptr;
1878 }
1879
1880 #ifndef PRODUCT
1881 class VerifyNoCSetOops {
1882 G1CollectedHeap* _g1h;
1883 const char* _phase;
1884 int _info;
1885
1886 public:
1887 VerifyNoCSetOops(const char* phase, int info = -1) :
1888 _g1h(G1CollectedHeap::heap()),
1889 _phase(phase),
1890 _info(info)
1891 { }
1892
1893 void operator()(G1TaskQueueEntry task_entry) const {
1894 if (task_entry.is_partial_array_state()) {
1895 oop obj = task_entry.to_partial_array_state()->source();
1896 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1897 return;
1898 }
1899 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1900 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1901 p2i(task_entry.to_oop()), _phase, _info);
1902 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1903 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1904 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1905 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1906 }
1907 };
1908
1909 void G1ConcurrentMark::verify_no_collection_set_oops() {
1910 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1911 "should be at a safepoint or initializing");
1912 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1913 return;
1914 }
1915
1916 // Verify entries on the global mark stack
1917 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1918
1919 // Verify entries on the task queues
1920 for (uint i = 0; i < _max_num_tasks; ++i) {
1921 G1CMTaskQueue* queue = _task_queues->queue(i);
1922 queue->iterate(VerifyNoCSetOops("Queue", i));
1923 }
1924
1925 // Verify the global finger
1926 HeapWord* global_finger = finger();
1927 if (global_finger != nullptr && global_finger < _heap.end()) {
1928 // Since we always iterate over all regions, we might get a null G1HeapRegion
1929 // here.
1930 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1931 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1932 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1933 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1934 }
1935
1936 // Verify the task fingers
1937 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1938 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1939 G1CMTask* task = _tasks[i];
1940 HeapWord* task_finger = task->finger();
1941 if (task_finger != nullptr && task_finger < _heap.end()) {
1942 // See above note on the global finger verification.
1943 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1944 guarantee(r == nullptr || task_finger == r->bottom() ||
1945 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1946 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1947 p2i(task_finger), HR_FORMAT_PARAMS(r));
1948 }
1949 }
1950 }
1951 #endif // PRODUCT
1952
1953 void G1ConcurrentMark::rebuild_and_scrub() {
1954 if (!needs_remembered_set_rebuild()) {
1955 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1956 }
1957
1958 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1959 }
1960
1961 void G1ConcurrentMark::print_stats() {
1962 if (!log_is_enabled(Debug, gc, stats)) {
1963 return;
1964 }
1965 log_debug(gc, stats)("---------------------------------------------------------------------");
1966 for (size_t i = 0; i < _num_active_tasks; ++i) {
1967 _tasks[i]->print_stats();
1968 log_debug(gc, stats)("---------------------------------------------------------------------");
1969 }
1970 }
1971
1972 bool G1ConcurrentMark::concurrent_cycle_abort() {
1973 assert_at_safepoint_on_vm_thread();
1974 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1975
1976 // If we start the compaction before the CM threads finish
1977 // scanning the root regions we might trip them over as we'll
1978 // be moving objects / updating references. Since the root region
1979 // scan synchronized with the safepoint, just tell it to abort.
1980 // It will notice when the threads start up again later.
1981 abort_root_region_scan_at_safepoint();
1982
1983 // We haven't started a concurrent cycle no need to do anything; we might have
1984 // aborted the marking because of shutting down though. In this case the marking
1985 // might have already completed the abort (leading to in_progress() below to
1986 // return false), however this still left marking state particularly in the
1987 // shared marking bitmap that must be cleaned up.
1988 // If there are multiple full gcs during shutdown we do this work repeatedly for
1989 // nothing, but this situation should be extremely rare (a full gc after shutdown
1990 // has been signalled is already rare), and this work should be negligible compared
1991 // to actual full gc work.
1992
1993 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1994 return false;
1995 }
1996
1997 reset_marking_for_restart();
1998
1999 abort_marking_threads();
2000
2001 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2002 satb_mq_set.abandon_partial_marking();
2003 // This can be called either during or outside marking, we'll read
2004 // the expected_active value from the SATB queue set.
2005 satb_mq_set.set_active_all_threads(false, /* new active value */
2006 satb_mq_set.is_active() /* expected_active */);
2007 return true;
2008 }
2009
2010 void G1ConcurrentMark::abort_marking_threads() {
2011 assert_root_region_scan_completed_or_aborted();
2012 _has_aborted.store_relaxed(true);
2013 _first_overflow_barrier_sync.abort();
2014 _second_overflow_barrier_sync.abort();
2015 }
2016
2017 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2018 class CountCpuTimeThreadClosure : public ThreadClosure {
2019 public:
2020 jlong _total_cpu_time;
2021
2022 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2023
2024 void do_thread(Thread* t) {
2025 _total_cpu_time += os::thread_cpu_time(t);
2026 }
2027 } cl;
2028
2029 threads_do(&cl);
2030
2031 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2032 }
2033
2034 static void print_ms_time_info(const char* prefix, const char* name,
2035 NumberSeq& ns) {
2036 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2037 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2038 if (ns.num() > 0) {
2039 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2040 prefix, ns.sd(), ns.maximum());
2041 }
2042 }
2043
2044 void G1ConcurrentMark::print_summary_info() {
2045 Log(gc, marking) log;
2046 if (!log.is_trace()) {
2047 return;
2048 }
2049
2050 log.trace(" Concurrent marking:");
2051 if (!is_fully_initialized()) {
2052 log.trace(" has not been initialized yet");
2053 return;
2054 }
2055 print_ms_time_info(" ", "remarks", _remark_times);
2056 {
2057 print_ms_time_info(" ", "final marks", _remark_mark_times);
2058 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2059
2060 }
2061 print_ms_time_info(" ", "cleanups", _cleanup_times);
2062 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2063 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2064 log.trace(" Total stop_world time = %8.2f s.",
2065 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2066 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2067 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2068 }
2069
2070 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2071 if (is_fully_initialized()) { // they are initialized late
2072 tc->do_thread(_cm_thread);
2073 _concurrent_workers->threads_do(tc);
2074 }
2075 }
2076
2077 void G1ConcurrentMark::print_on(outputStream* st) const {
2078 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2079 _mark_bitmap.print_on(st, " Bits: ");
2080 }
2081
2082 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2083 ReferenceProcessor* result = g1h->ref_processor_cm();
2084 assert(result != nullptr, "CM reference processor should not be null");
2085 return result;
2086 }
2087
2088 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2089 G1CMTask* task)
2090 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2091 _g1h(g1h), _task(task)
2092 { }
2093
2094 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2095 assert(hr != nullptr,
2096 "claim_region() should have filtered out null regions");
2097 _curr_region = hr;
2098 _finger = hr->bottom();
2099 update_region_limit();
2100 }
2101
2102 void G1CMTask::update_region_limit() {
2103 G1HeapRegion* hr = _curr_region;
2104 HeapWord* bottom = hr->bottom();
2105 HeapWord* limit = _cm->top_at_mark_start(hr);
2106
2107 if (limit == bottom) {
2108 // The region was collected underneath our feet.
2109 // We set the finger to bottom to ensure that the bitmap
2110 // iteration that will follow this will not do anything.
2111 // (this is not a condition that holds when we set the region up,
2112 // as the region is not supposed to be empty in the first place)
2113 _finger = bottom;
2114 } else if (limit >= _region_limit) {
2115 assert(limit >= _finger, "peace of mind");
2116 } else {
2117 assert(limit < _region_limit, "only way to get here");
2118 // This can happen under some pretty unusual circumstances. An
2119 // evacuation pause empties the region underneath our feet (TAMS
2120 // at bottom). We then do some allocation in the region (TAMS
2121 // stays at bottom), followed by the region being used as a GC
2122 // alloc region (TAMS will move to top() and the objects
2123 // originally below it will be greyed). All objects now marked in
2124 // the region are explicitly greyed, if below the global finger,
2125 // and we do not need in fact to scan anything else. So, we simply
2126 // set _finger to be limit to ensure that the bitmap iteration
2127 // doesn't do anything.
2128 _finger = limit;
2129 }
2130
2131 _region_limit = limit;
2132 }
2133
2134 void G1CMTask::giveup_current_region() {
2135 assert(_curr_region != nullptr, "invariant");
2136 clear_region_fields();
2137 }
2138
2139 void G1CMTask::clear_region_fields() {
2140 // Values for these three fields that indicate that we're not
2141 // holding on to a region.
2142 _curr_region = nullptr;
2143 _finger = nullptr;
2144 _region_limit = nullptr;
2145 }
2146
2147 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2148 if (cm_oop_closure == nullptr) {
2149 assert(_cm_oop_closure != nullptr, "invariant");
2150 } else {
2151 assert(_cm_oop_closure == nullptr, "invariant");
2152 }
2153 _cm_oop_closure = cm_oop_closure;
2154 }
2155
2156 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2157 guarantee(mark_bitmap != nullptr, "invariant");
2158 _mark_bitmap = mark_bitmap;
2159 clear_region_fields();
2160
2161 _calls = 0;
2162 _elapsed_time_ms = 0.0;
2163 _termination_time_ms = 0.0;
2164
2165 _mark_stats_cache.reset();
2166 }
2167
2168 void G1CMTask::reset_for_restart() {
2169 clear_region_fields();
2170 _task_queue->set_empty();
2171 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2172 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2173 }
2174
2175 void G1CMTask::register_partial_array_splitter() {
2176
2177 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2178 _cm->max_num_tasks());
2179 }
2180
2181 void G1CMTask::unregister_partial_array_splitter() {
2182 _partial_array_splitter.~PartialArraySplitter();
2183 }
2184
2185 bool G1CMTask::should_exit_termination() {
2186 if (!regular_clock_call()) {
2187 return true;
2188 }
2189
2190 // This is called when we are in the termination protocol. We should
2191 // quit if, for some reason, this task wants to abort or the global
2192 // stack is not empty (this means that we can get work from it).
2193 return !_cm->mark_stack_empty() || has_aborted();
2194 }
2195
2196 void G1CMTask::reached_limit() {
2197 assert(_words_scanned >= _words_scanned_limit ||
2198 _refs_reached >= _refs_reached_limit ,
2199 "shouldn't have been called otherwise");
2200 abort_marking_if_regular_check_fail();
2201 }
2202
2203 bool G1CMTask::regular_clock_call() {
2204 if (has_aborted()) {
2205 return false;
2206 }
2207
2208 // First, we need to recalculate the words scanned and refs reached
2209 // limits for the next clock call.
2210 recalculate_limits();
2211
2212 // During the regular clock call we do the following
2213
2214 // (1) If an overflow has been flagged, then we abort.
2215 if (_cm->has_overflown()) {
2216 return false;
2217 }
2218
2219 // If we are not concurrent (i.e. we're doing remark) we don't need
2220 // to check anything else. The other steps are only needed during
2221 // the concurrent marking phase.
2222 if (!_cm->concurrent()) {
2223 return true;
2224 }
2225
2226 // (2) If marking has been aborted for Full GC, then we also abort.
2227 if (_cm->has_aborted()) {
2228 return false;
2229 }
2230
2231 // (4) We check whether we should yield. If we have to, then we abort.
2232 if (SuspendibleThreadSet::should_yield()) {
2233 // We should yield. To do this we abort the task. The caller is
2234 // responsible for yielding.
2235 return false;
2236 }
2237
2238 // (5) We check whether we've reached our time quota. If we have,
2239 // then we abort.
2240 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2241 if (elapsed_time_ms > _time_target_ms) {
2242 _has_timed_out = true;
2243 return false;
2244 }
2245
2246 // (6) Finally, we check whether there are enough completed STAB
2247 // buffers available for processing. If there are, we abort.
2248 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2249 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2250 // we do need to process SATB buffers, we'll abort and restart
2251 // the marking task to do so
2252 return false;
2253 }
2254 return true;
2255 }
2256
2257 void G1CMTask::recalculate_limits() {
2258 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2259 _words_scanned_limit = _real_words_scanned_limit;
2260
2261 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2262 _refs_reached_limit = _real_refs_reached_limit;
2263 }
2264
2265 void G1CMTask::decrease_limits() {
2266 // This is called when we believe that we're going to do an infrequent
2267 // operation which will increase the per byte scanned cost (i.e. move
2268 // entries to/from the global stack). It basically tries to decrease the
2269 // scanning limit so that the clock is called earlier.
2270
2271 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2272 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2273 }
2274
2275 void G1CMTask::move_entries_to_global_stack() {
2276 // Local array where we'll store the entries that will be popped
2277 // from the local queue.
2278 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2279
2280 size_t n = 0;
2281 G1TaskQueueEntry task_entry;
2282 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2283 buffer[n] = task_entry;
2284 ++n;
2285 }
2286 if (n < G1CMMarkStack::EntriesPerChunk) {
2287 buffer[n] = G1TaskQueueEntry();
2288 }
2289
2290 if (n > 0) {
2291 if (!_cm->mark_stack_push(buffer)) {
2292 set_has_aborted();
2293 }
2294 }
2295
2296 // This operation was quite expensive, so decrease the limits.
2297 decrease_limits();
2298 }
2299
2300 bool G1CMTask::get_entries_from_global_stack() {
2301 // Local array where we'll store the entries that will be popped
2302 // from the global stack.
2303 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2304
2305 if (!_cm->mark_stack_pop(buffer)) {
2306 return false;
2307 }
2308
2309 // We did actually pop at least one entry.
2310 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2311 G1TaskQueueEntry task_entry = buffer[i];
2312 if (task_entry.is_null()) {
2313 break;
2314 }
2315 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2316 bool success = _task_queue->push(task_entry);
2317 // We only call this when the local queue is empty or under a
2318 // given target limit. So, we do not expect this push to fail.
2319 assert(success, "invariant");
2320 }
2321
2322 // This operation was quite expensive, so decrease the limits
2323 decrease_limits();
2324 return true;
2325 }
2326
2327 void G1CMTask::drain_local_queue(bool partially) {
2328 if (has_aborted()) {
2329 return;
2330 }
2331
2332 // Decide what the target size is, depending whether we're going to
2333 // drain it partially (so that other tasks can steal if they run out
2334 // of things to do) or totally (at the very end).
2335 uint target_size;
2336 if (partially) {
2337 target_size = GCDrainStackTargetSize;
2338 } else {
2339 target_size = 0;
2340 }
2341
2342 if (_task_queue->size() > target_size) {
2343 G1TaskQueueEntry entry;
2344 bool ret = _task_queue->pop_local(entry);
2345 while (ret) {
2346 process_entry(entry, false /* stolen */);
2347 if (_task_queue->size() <= target_size || has_aborted()) {
2348 ret = false;
2349 } else {
2350 ret = _task_queue->pop_local(entry);
2351 }
2352 }
2353 }
2354 }
2355
2356 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2357 assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2358
2359 // Mark objArray klass metadata
2360 process_klass(obj->klass());
2361
2362 size_t array_length = obj->length();
2363 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2364
2365 process_array_chunk(obj, 0, initial_chunk_size);
2366
2367 // Include object header size
2368 return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2369 }
2370
2371 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2372 PartialArrayState* state = task.to_partial_array_state();
2373 // Access state before release by claim().
2374 objArrayOop obj = objArrayOop(state->source());
2375
2376 PartialArraySplitter::Claim claim =
2377 _partial_array_splitter.claim(state, _task_queue, stolen);
2378
2379 process_array_chunk(obj, claim._start, claim._end);
2380 return heap_word_size((claim._end - claim._start) * heapOopSize);
2381 }
2382
2383 void G1CMTask::drain_global_stack(bool partially) {
2384 if (has_aborted()) {
2385 return;
2386 }
2387
2388 // We have a policy to drain the local queue before we attempt to
2389 // drain the global stack.
2390 assert(partially || _task_queue->size() == 0, "invariant");
2391
2392 // Decide what the target size is, depending whether we're going to
2393 // drain it partially (so that other tasks can steal if they run out
2394 // of things to do) or totally (at the very end).
2395 // Notice that when draining the global mark stack partially, due to the racyness
2396 // of the mark stack size update we might in fact drop below the target. But,
2397 // this is not a problem.
2398 // In case of total draining, we simply process until the global mark stack is
2399 // totally empty, disregarding the size counter.
2400 if (partially) {
2401 size_t const target_size = _cm->partial_mark_stack_size_target();
2402 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2403 if (get_entries_from_global_stack()) {
2404 drain_local_queue(partially);
2405 }
2406 }
2407 } else {
2408 while (!has_aborted() && get_entries_from_global_stack()) {
2409 drain_local_queue(partially);
2410 }
2411 }
2412 }
2413
2414 // SATB Queue has several assumptions on whether to call the par or
2415 // non-par versions of the methods. this is why some of the code is
2416 // replicated. We should really get rid of the single-threaded version
2417 // of the code to simplify things.
2418 void G1CMTask::drain_satb_buffers() {
2419 if (has_aborted()) {
2420 return;
2421 }
2422
2423 // We set this so that the regular clock knows that we're in the
2424 // middle of draining buffers and doesn't set the abort flag when it
2425 // notices that SATB buffers are available for draining. It'd be
2426 // very counter productive if it did that. :-)
2427 _draining_satb_buffers = true;
2428
2429 G1CMSATBBufferClosure satb_cl(this, _g1h);
2430 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2431
2432 // This keeps claiming and applying the closure to completed buffers
2433 // until we run out of buffers or we need to abort.
2434 while (!has_aborted() &&
2435 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2436 abort_marking_if_regular_check_fail();
2437 }
2438
2439 // Can't assert qset is empty here, even if not aborted. If concurrent,
2440 // some other thread might be adding to the queue. If not concurrent,
2441 // some other thread might have won the race for the last buffer, but
2442 // has not yet decremented the count.
2443
2444 _draining_satb_buffers = false;
2445
2446 // again, this was a potentially expensive operation, decrease the
2447 // limits to get the regular clock call early
2448 decrease_limits();
2449 }
2450
2451 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2452 _mark_stats_cache.reset(region_idx);
2453 }
2454
2455 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2456 return _mark_stats_cache.evict_all();
2457 }
2458
2459 void G1CMTask::print_stats() {
2460 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2461 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2462 _elapsed_time_ms, _termination_time_ms);
2463 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2464 _step_times_ms.num(),
2465 _step_times_ms.avg(),
2466 _step_times_ms.sd(),
2467 _step_times_ms.maximum(),
2468 _step_times_ms.sum());
2469 size_t const hits = _mark_stats_cache.hits();
2470 size_t const misses = _mark_stats_cache.misses();
2471 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2472 hits, misses, percent_of(hits, hits + misses));
2473 }
2474
2475 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2476 return _task_queues->steal(worker_id, task_entry);
2477 }
2478
2479 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2480 if (has_aborted() || _curr_region == nullptr) {
2481 return;
2482 }
2483
2484 // This means that we're already holding on to a region.
2485 assert(_finger != nullptr, "if region is not null, then the finger "
2486 "should not be null either");
2487
2488 // We might have restarted this task after an evacuation pause
2489 // which might have evacuated the region we're holding on to
2490 // underneath our feet. Let's read its limit again to make sure
2491 // that we do not iterate over a region of the heap that
2492 // contains garbage (update_region_limit() will also move
2493 // _finger to the start of the region if it is found empty).
2494 update_region_limit();
2495 // We will start from _finger not from the start of the region,
2496 // as we might be restarting this task after aborting half-way
2497 // through scanning this region. In this case, _finger points to
2498 // the address where we last found a marked object. If this is a
2499 // fresh region, _finger points to start().
2500 MemRegion mr = MemRegion(_finger, _region_limit);
2501
2502 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2503 "humongous regions should go around loop once only");
2504
2505 // Some special cases:
2506 // If the memory region is empty, we can just give up the region.
2507 // If the current region is humongous then we only need to check
2508 // the bitmap for the bit associated with the start of the object,
2509 // scan the object if it's live, and give up the region.
2510 // Otherwise, let's iterate over the bitmap of the part of the region
2511 // that is left.
2512 // If the iteration is successful, give up the region.
2513 if (mr.is_empty()) {
2514 giveup_current_region();
2515 abort_marking_if_regular_check_fail();
2516 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2517 if (_mark_bitmap->is_marked(mr.start())) {
2518 // The object is marked - apply the closure
2519 bitmap_closure.do_addr(mr.start());
2520 }
2521 // Even if this task aborted while scanning the humongous object
2522 // we can (and should) give up the current region.
2523 giveup_current_region();
2524 abort_marking_if_regular_check_fail();
2525 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2526 giveup_current_region();
2527 abort_marking_if_regular_check_fail();
2528 } else {
2529 assert(has_aborted(), "currently the only way to do so");
2530 // The only way to abort the bitmap iteration is to return
2531 // false from the do_bit() method. However, inside the
2532 // do_bit() method we move the _finger to point to the
2533 // object currently being looked at. So, if we bail out, we
2534 // have definitely set _finger to something non-null.
2535 assert(_finger != nullptr, "invariant");
2536
2537 // Region iteration was actually aborted. So now _finger
2538 // points to the address of the object we last scanned. If we
2539 // leave it there, when we restart this task, we will rescan
2540 // the object. It is easy to avoid this. We move the finger by
2541 // enough to point to the next possible object header.
2542 assert(_finger < _region_limit, "invariant");
2543 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2544 if (new_finger >= _region_limit) {
2545 giveup_current_region();
2546 } else {
2547 move_finger_to(new_finger);
2548 }
2549 }
2550 }
2551
2552 void G1CMTask::claim_new_region() {
2553 // Read the note on the claim_region() method on why it might
2554 // return null with potentially more regions available for
2555 // claiming and why we have to check out_of_regions() to determine
2556 // whether we're done or not.
2557 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2558 // We are going to try to claim a new region. We should have
2559 // given up on the previous one.
2560 // Separated the asserts so that we know which one fires.
2561 assert(_curr_region == nullptr, "invariant");
2562 assert(_finger == nullptr, "invariant");
2563 assert(_region_limit == nullptr, "invariant");
2564 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2565 if (claimed_region != nullptr) {
2566 // Yes, we managed to claim one
2567 setup_for_region(claimed_region);
2568 assert(_curr_region == claimed_region, "invariant");
2569 }
2570 // It is important to call the regular clock here. It might take
2571 // a while to claim a region if, for example, we hit a large
2572 // block of empty regions. So we need to call the regular clock
2573 // method once round the loop to make sure it's called
2574 // frequently enough.
2575 abort_marking_if_regular_check_fail();
2576 }
2577 }
2578
2579 void G1CMTask::attempt_stealing() {
2580 // We cannot check whether the global stack is empty, since other
2581 // tasks might be pushing objects to it concurrently.
2582 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2583 "only way to reach here");
2584 while (!has_aborted()) {
2585 G1TaskQueueEntry entry;
2586 if (_cm->try_stealing(_worker_id, entry)) {
2587 process_entry(entry, true /* stolen */);
2588
2589 // And since we're towards the end, let's totally drain the
2590 // local queue and global stack.
2591 drain_local_queue(false);
2592 drain_global_stack(false);
2593 } else {
2594 break;
2595 }
2596 }
2597 }
2598
2599 void G1CMTask::attempt_termination(bool is_serial) {
2600 // We cannot check whether the global stack is empty, since other
2601 // tasks might be concurrently pushing objects on it.
2602 // Separated the asserts so that we know which one fires.
2603 assert(_cm->out_of_regions(), "only way to reach here");
2604 assert(_task_queue->size() == 0, "only way to reach here");
2605 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2606
2607 // The G1CMTask class also extends the TerminatorTerminator class,
2608 // hence its should_exit_termination() method will also decide
2609 // whether to exit the termination protocol or not.
2610 bool finished = (is_serial ||
2611 _cm->terminator()->offer_termination(this));
2612 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2613
2614 if (finished) {
2615 // We're all done.
2616
2617 // We can now guarantee that the global stack is empty, since
2618 // all other tasks have finished. We separated the guarantees so
2619 // that, if a condition is false, we can immediately find out
2620 // which one.
2621 guarantee(_cm->out_of_regions(), "only way to reach here");
2622 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2623 guarantee(_task_queue->size() == 0, "only way to reach here");
2624 guarantee(!_cm->has_overflown(), "only way to reach here");
2625 guarantee(!has_aborted(), "should never happen if termination has completed");
2626 } else {
2627 // Apparently there's more work to do. Let's abort this task. We
2628 // will restart it and hopefully we can find more things to do.
2629 set_has_aborted();
2630 }
2631 }
2632
2633 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2634 if (_has_timed_out) {
2635 double diff_ms = elapsed_time_ms - _time_target_ms;
2636 // Keep statistics of how well we did with respect to hitting
2637 // our target only if we actually timed out (if we aborted for
2638 // other reasons, then the results might get skewed).
2639 _marking_step_diff_ms.add(diff_ms);
2640 }
2641
2642 if (!_cm->has_overflown()) {
2643 return;
2644 }
2645
2646 // This is the interesting one. We aborted because a global
2647 // overflow was raised. This means we have to restart the
2648 // marking phase and start iterating over regions. However, in
2649 // order to do this we have to make sure that all tasks stop
2650 // what they are doing and re-initialize in a safe manner. We
2651 // will achieve this with the use of two barrier sync points.
2652 if (!is_serial) {
2653 // We only need to enter the sync barrier if being called
2654 // from a parallel context
2655 _cm->enter_first_sync_barrier(_worker_id);
2656
2657 // When we exit this sync barrier we know that all tasks have
2658 // stopped doing marking work. So, it's now safe to
2659 // re-initialize our data structures.
2660 }
2661
2662 clear_region_fields();
2663 flush_mark_stats_cache();
2664
2665 if (!is_serial) {
2666 // If we're executing the concurrent phase of marking, reset the marking
2667 // state; otherwise the marking state is reset after reference processing,
2668 // during the remark pause.
2669 // If we reset here as a result of an overflow during the remark we will
2670 // see assertion failures from any subsequent set_concurrency_and_phase()
2671 // calls.
2672 if (_cm->concurrent() && _worker_id == 0) {
2673 // Worker 0 is responsible for clearing the global data structures because
2674 // of an overflow. During STW we should not clear the overflow flag (in
2675 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2676 // method to abort the pause and restart concurrent marking.
2677 _cm->reset_marking_for_restart();
2678
2679 log_info(gc, marking)("Concurrent Mark reset for overflow");
2680 }
2681
2682 // ...and enter the second barrier.
2683 _cm->enter_second_sync_barrier(_worker_id);
2684 }
2685 }
2686
2687 /*****************************************************************************
2688
2689 The do_marking_step(time_target_ms, ...) method is the building
2690 block of the parallel marking framework. It can be called in parallel
2691 with other invocations of do_marking_step() on different tasks
2692 (but only one per task, obviously) and concurrently with the
2693 mutator threads, or during remark, hence it eliminates the need
2694 for two versions of the code. When called during remark, it will
2695 pick up from where the task left off during the concurrent marking
2696 phase. Interestingly, tasks are also claimable during evacuation
2697 pauses too, since do_marking_step() ensures that it aborts before
2698 it needs to yield.
2699
2700 The data structures that it uses to do marking work are the
2701 following:
2702
2703 (1) Marking Bitmap. If there are grey objects that appear only
2704 on the bitmap (this happens either when dealing with an overflow
2705 or when the concurrent start pause has simply marked the roots
2706 and didn't push them on the stack), then tasks claim heap
2707 regions whose bitmap they then scan to find grey objects. A
2708 global finger indicates where the end of the last claimed region
2709 is. A local finger indicates how far into the region a task has
2710 scanned. The two fingers are used to determine how to grey an
2711 object (i.e. whether simply marking it is OK, as it will be
2712 visited by a task in the future, or whether it needs to be also
2713 pushed on a stack).
2714
2715 (2) Local Queue. The local queue of the task which is accessed
2716 reasonably efficiently by the task. Other tasks can steal from
2717 it when they run out of work. Throughout the marking phase, a
2718 task attempts to keep its local queue short but not totally
2719 empty, so that entries are available for stealing by other
2720 tasks. Only when there is no more work, a task will totally
2721 drain its local queue.
2722
2723 (3) Global Mark Stack. This handles local queue overflow. During
2724 marking only sets of entries are moved between it and the local
2725 queues, as access to it requires a mutex and more fine-grain
2726 interaction with it which might cause contention. If it
2727 overflows, then the marking phase should restart and iterate
2728 over the bitmap to identify grey objects. Throughout the marking
2729 phase, tasks attempt to keep the global mark stack at a small
2730 length but not totally empty, so that entries are available for
2731 popping by other tasks. Only when there is no more work, tasks
2732 will totally drain the global mark stack.
2733
2734 (4) SATB Buffer Queue. This is where completed SATB buffers are
2735 made available. Buffers are regularly removed from this queue
2736 and scanned for roots, so that the queue doesn't get too
2737 long. During remark, all completed buffers are processed, as
2738 well as the filled in parts of any uncompleted buffers.
2739
2740 The do_marking_step() method tries to abort when the time target
2741 has been reached. There are a few other cases when the
2742 do_marking_step() method also aborts:
2743
2744 (1) When the marking phase has been aborted (after a Full GC).
2745
2746 (2) When a global overflow (on the global stack) has been
2747 triggered. Before the task aborts, it will actually sync up with
2748 the other tasks to ensure that all the marking data structures
2749 (local queues, stacks, fingers etc.) are re-initialized so that
2750 when do_marking_step() completes, the marking phase can
2751 immediately restart.
2752
2753 (3) When enough completed SATB buffers are available. The
2754 do_marking_step() method only tries to drain SATB buffers right
2755 at the beginning. So, if enough buffers are available, the
2756 marking step aborts and the SATB buffers are processed at
2757 the beginning of the next invocation.
2758
2759 (4) To yield. when we have to yield then we abort and yield
2760 right at the end of do_marking_step(). This saves us from a lot
2761 of hassle as, by yielding we might allow a Full GC. If this
2762 happens then objects will be compacted underneath our feet, the
2763 heap might shrink, etc. We save checking for this by just
2764 aborting and doing the yield right at the end.
2765
2766 From the above it follows that the do_marking_step() method should
2767 be called in a loop (or, otherwise, regularly) until it completes.
2768
2769 If a marking step completes without its has_aborted() flag being
2770 true, it means it has completed the current marking phase (and
2771 also all other marking tasks have done so and have all synced up).
2772
2773 A method called regular_clock_call() is invoked "regularly" (in
2774 sub ms intervals) throughout marking. It is this clock method that
2775 checks all the abort conditions which were mentioned above and
2776 decides when the task should abort. A work-based scheme is used to
2777 trigger this clock method: when the number of object words the
2778 marking phase has scanned or the number of references the marking
2779 phase has visited reach a given limit. Additional invocations to
2780 the method clock have been planted in a few other strategic places
2781 too. The initial reason for the clock method was to avoid calling
2782 cpu time gathering too regularly, as it is quite expensive. So,
2783 once it was in place, it was natural to piggy-back all the other
2784 conditions on it too and not constantly check them throughout the code.
2785
2786 If do_termination is true then do_marking_step will enter its
2787 termination protocol.
2788
2789 The value of is_serial must be true when do_marking_step is being
2790 called serially (i.e. by the VMThread) and do_marking_step should
2791 skip any synchronization in the termination and overflow code.
2792 Examples include the serial remark code and the serial reference
2793 processing closures.
2794
2795 The value of is_serial must be false when do_marking_step is
2796 being called by any of the worker threads.
2797 Examples include the concurrent marking code (CMMarkingTask),
2798 the MT remark code, and the MT reference processing closures.
2799
2800 *****************************************************************************/
2801
2802 void G1CMTask::do_marking_step(double time_target_ms,
2803 bool do_termination,
2804 bool is_serial) {
2805 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2806
2807 _start_cpu_time_ns = os::current_thread_cpu_time();
2808
2809 // If do_stealing is true then do_marking_step will attempt to
2810 // steal work from the other G1CMTasks. It only makes sense to
2811 // enable stealing when the termination protocol is enabled
2812 // and do_marking_step() is not being called serially.
2813 bool do_stealing = do_termination && !is_serial;
2814
2815 G1Predictions const& predictor = _g1h->policy()->predictor();
2816 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2817 _time_target_ms = time_target_ms - diff_prediction_ms;
2818
2819 // set up the variables that are used in the work-based scheme to
2820 // call the regular clock method
2821 _words_scanned = 0;
2822 _refs_reached = 0;
2823 recalculate_limits();
2824
2825 // clear all flags
2826 clear_has_aborted();
2827 _has_timed_out = false;
2828 _draining_satb_buffers = false;
2829
2830 ++_calls;
2831
2832 // Set up the bitmap and oop closures. Anything that uses them is
2833 // eventually called from this method, so it is OK to allocate these
2834 // statically.
2835 G1CMBitMapClosure bitmap_closure(this, _cm);
2836 G1CMOopClosure cm_oop_closure(_g1h, this);
2837 set_cm_oop_closure(&cm_oop_closure);
2838
2839 if (_cm->has_overflown()) {
2840 // This can happen if the mark stack overflows during a GC pause
2841 // and this task, after a yield point, restarts. We have to abort
2842 // as we need to get into the overflow protocol which happens
2843 // right at the end of this task.
2844 set_has_aborted();
2845 }
2846
2847 // First drain any available SATB buffers. After this, we will not
2848 // look at SATB buffers before the next invocation of this method.
2849 // If enough completed SATB buffers are queued up, the regular clock
2850 // will abort this task so that it restarts.
2851 drain_satb_buffers();
2852 // ...then partially drain the local queue and the global stack
2853 drain_local_queue(true);
2854 drain_global_stack(true);
2855
2856 do {
2857 process_current_region(bitmap_closure);
2858 // At this point we have either completed iterating over the
2859 // region we were holding on to, or we have aborted.
2860
2861 // We then partially drain the local queue and the global stack.
2862 drain_local_queue(true);
2863 drain_global_stack(true);
2864
2865 claim_new_region();
2866
2867 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2868 "at this point we should be out of regions");
2869 } while ( _curr_region != nullptr && !has_aborted());
2870
2871 // We cannot check whether the global stack is empty, since other
2872 // tasks might be pushing objects to it concurrently.
2873 assert(has_aborted() || _cm->out_of_regions(),
2874 "at this point we should be out of regions");
2875 // Try to reduce the number of available SATB buffers so that
2876 // remark has less work to do.
2877 drain_satb_buffers();
2878
2879 // Since we've done everything else, we can now totally drain the
2880 // local queue and global stack.
2881 drain_local_queue(false);
2882 drain_global_stack(false);
2883
2884 // Attempt at work stealing from other task's queues.
2885 if (do_stealing && !has_aborted()) {
2886 // We have not aborted. This means that we have finished all that
2887 // we could. Let's try to do some stealing...
2888 attempt_stealing();
2889 }
2890
2891 // We still haven't aborted. Now, let's try to get into the
2892 // termination protocol.
2893 if (do_termination && !has_aborted()) {
2894 attempt_termination(is_serial);
2895 }
2896
2897 // Mainly for debugging purposes to make sure that a pointer to the
2898 // closure which was statically allocated in this frame doesn't
2899 // escape it by accident.
2900 set_cm_oop_closure(nullptr);
2901 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2902 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2903 // Update the step history.
2904 _step_times_ms.add(elapsed_time_ms);
2905
2906 if (has_aborted()) {
2907 // The task was aborted for some reason.
2908 handle_abort(is_serial, elapsed_time_ms);
2909 }
2910 }
2911
2912 G1CMTask::G1CMTask(uint worker_id,
2913 G1ConcurrentMark* cm,
2914 G1CMTaskQueue* task_queue,
2915 G1RegionMarkStats* mark_stats) :
2916 _worker_id(worker_id),
2917 _g1h(G1CollectedHeap::heap()),
2918 _cm(cm),
2919 _mark_bitmap(nullptr),
2920 _task_queue(task_queue),
2921 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
2922 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2923 _calls(0),
2924 _time_target_ms(0.0),
2925 _start_cpu_time_ns(0),
2926 _cm_oop_closure(nullptr),
2927 _curr_region(nullptr),
2928 _finger(nullptr),
2929 _region_limit(nullptr),
2930 _words_scanned(0),
2931 _words_scanned_limit(0),
2932 _real_words_scanned_limit(0),
2933 _refs_reached(0),
2934 _refs_reached_limit(0),
2935 _real_refs_reached_limit(0),
2936 _has_aborted(false),
2937 _has_timed_out(false),
2938 _draining_satb_buffers(false),
2939 _step_times_ms(),
2940 _elapsed_time_ms(0.0),
2941 _termination_time_ms(0.0),
2942 _marking_step_diff_ms()
2943 {
2944 guarantee(task_queue != nullptr, "invariant");
2945
2946 _marking_step_diff_ms.add(0.5);
2947 }
2948
2949 // These are formatting macros that are used below to ensure
2950 // consistent formatting. The *_H_* versions are used to format the
2951 // header for a particular value and they should be kept consistent
2952 // with the corresponding macro. Also note that most of the macros add
2953 // the necessary white space (as a prefix) which makes them a bit
2954 // easier to compose.
2955
2956 // All the output lines are prefixed with this string to be able to
2957 // identify them easily in a large log file.
2958 #define G1PPRL_LINE_PREFIX "###"
2959
2960 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2961 #ifdef _LP64
2962 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2963 #else // _LP64
2964 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2965 #endif // _LP64
2966
2967 // For per-region info
2968 #define G1PPRL_TYPE_FORMAT " %-4s"
2969 #define G1PPRL_TYPE_H_FORMAT " %4s"
2970 #define G1PPRL_STATE_FORMAT " %-5s"
2971 #define G1PPRL_STATE_H_FORMAT " %5s"
2972 #define G1PPRL_BYTE_FORMAT " %9zu"
2973 #define G1PPRL_BYTE_H_FORMAT " %9s"
2974 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2975 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2976 #define G1PPRL_GID_H_FORMAT " %9s"
2977 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2978 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2979 #define G1PPRL_LEN_H_FORMAT " %14s"
2980 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2981 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2982
2983 // For summary info
2984 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2985 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2986 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2987 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2988
2989 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2990 _total_used_bytes(0),
2991 _total_capacity_bytes(0),
2992 _total_live_bytes(0),
2993 _total_remset_bytes(0),
2994 _total_code_roots_bytes(0)
2995 {
2996 if (!log_is_enabled(Trace, gc, liveness)) {
2997 return;
2998 }
2999
3000 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3001 MemRegion reserved = g1h->reserved();
3002 double now = os::elapsedTime();
3003
3004 // Print the header of the output.
3005 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3006 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3007 G1PPRL_SUM_ADDR_FORMAT("reserved")
3008 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3009 p2i(reserved.start()), p2i(reserved.end()),
3010 G1HeapRegion::GrainBytes);
3011 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3012 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3013 G1PPRL_TYPE_H_FORMAT
3014 G1PPRL_ADDR_BASE_H_FORMAT
3015 G1PPRL_BYTE_H_FORMAT
3016 G1PPRL_BYTE_H_FORMAT
3017 G1PPRL_STATE_H_FORMAT
3018 G1PPRL_BYTE_H_FORMAT
3019 G1PPRL_GID_H_FORMAT,
3020 "type", "address-range",
3021 "used", "live",
3022 "state", "code-roots",
3023 "group-id");
3024 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3025 G1PPRL_TYPE_H_FORMAT
3026 G1PPRL_ADDR_BASE_H_FORMAT
3027 G1PPRL_BYTE_H_FORMAT
3028 G1PPRL_BYTE_H_FORMAT
3029 G1PPRL_STATE_H_FORMAT
3030 G1PPRL_BYTE_H_FORMAT
3031 G1PPRL_GID_H_FORMAT,
3032 "", "",
3033 "(bytes)", "(bytes)",
3034 "", "(bytes)", "");
3035 }
3036
3037 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3038 if (!log_is_enabled(Trace, gc, liveness)) {
3039 return false;
3040 }
3041
3042 const char* type = r->get_type_str();
3043 HeapWord* bottom = r->bottom();
3044 HeapWord* end = r->end();
3045 size_t capacity_bytes = r->capacity();
3046 size_t used_bytes = r->used();
3047 size_t live_bytes = r->live_bytes();
3048 size_t remset_bytes = r->rem_set()->mem_size();
3049 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3050 const char* remset_type = r->rem_set()->get_short_state_str();
3051 uint cset_group_id = r->rem_set()->has_cset_group()
3052 ? r->rem_set()->cset_group_id()
3053 : G1CSetCandidateGroup::NoRemSetId;
3054
3055 _total_used_bytes += used_bytes;
3056 _total_capacity_bytes += capacity_bytes;
3057 _total_live_bytes += live_bytes;
3058 _total_remset_bytes += remset_bytes;
3059 _total_code_roots_bytes += code_roots_bytes;
3060
3061 // Print a line for this particular region.
3062 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3063 G1PPRL_TYPE_FORMAT
3064 G1PPRL_ADDR_BASE_FORMAT
3065 G1PPRL_BYTE_FORMAT
3066 G1PPRL_BYTE_FORMAT
3067 G1PPRL_STATE_FORMAT
3068 G1PPRL_BYTE_FORMAT
3069 G1PPRL_GID_FORMAT,
3070 type, p2i(bottom), p2i(end),
3071 used_bytes, live_bytes,
3072 remset_type, code_roots_bytes,
3073 cset_group_id);
3074
3075 return false;
3076 }
3077
3078 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3079 if (!log_is_enabled(Trace, gc, liveness)) {
3080 return;
3081 }
3082
3083 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3084 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3085 // add static memory usages to remembered set sizes
3086 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3087
3088 log_cset_candidate_groups();
3089
3090 // Print the footer of the output.
3091 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3092 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3093 " SUMMARY"
3094 G1PPRL_SUM_MB_FORMAT("capacity")
3095 G1PPRL_SUM_MB_PERC_FORMAT("used")
3096 G1PPRL_SUM_MB_PERC_FORMAT("live")
3097 G1PPRL_SUM_MB_FORMAT("remset")
3098 G1PPRL_SUM_MB_FORMAT("code-roots"),
3099 bytes_to_mb(_total_capacity_bytes),
3100 bytes_to_mb(_total_used_bytes),
3101 percent_of(_total_used_bytes, _total_capacity_bytes),
3102 bytes_to_mb(_total_live_bytes),
3103 percent_of(_total_live_bytes, _total_capacity_bytes),
3104 bytes_to_mb(_total_remset_bytes),
3105 bytes_to_mb(_total_code_roots_bytes));
3106 }
3107
3108 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3109 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3110 G1PPRL_GID_FORMAT
3111 G1PPRL_LEN_FORMAT
3112 G1PPRL_GID_GCEFF_FORMAT
3113 G1PPRL_GID_LIVENESS_FORMAT
3114 G1PPRL_BYTE_FORMAT
3115 G1PPRL_TYPE_H_FORMAT,
3116 group->group_id(),
3117 group->length(),
3118 group->length() > 0 ? group->gc_efficiency() : 0.0,
3119 group->length() > 0 ? group->liveness_percent() : 0.0,
3120 group->card_set()->mem_size(),
3121 type);
3122 _total_remset_bytes += group->card_set()->mem_size();
3123 }
3124
3125 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3126 for (G1CSetCandidateGroup* group : gl) {
3127 log_cset_candidate_group_add_total(group, type);
3128 }
3129 }
3130
3131 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3132 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3133 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3134 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3135 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3136 G1PPRL_GID_H_FORMAT
3137 G1PPRL_LEN_H_FORMAT
3138 G1PPRL_GCEFF_H_FORMAT
3139 G1PPRL_BYTE_H_FORMAT
3140 G1PPRL_BYTE_H_FORMAT
3141 G1PPRL_TYPE_H_FORMAT,
3142 "groud-id", "num-regions",
3143 "gc-eff", "liveness",
3144 "remset", "type");
3145
3146 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3147 G1PPRL_GID_H_FORMAT
3148 G1PPRL_LEN_H_FORMAT
3149 G1PPRL_GCEFF_H_FORMAT
3150 G1PPRL_BYTE_H_FORMAT
3151 G1PPRL_BYTE_H_FORMAT
3152 G1PPRL_TYPE_H_FORMAT,
3153 "", "",
3154 "(bytes/ms)", "%",
3155 "(bytes)", "");
3156
3157 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3158
3159 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3160
3161 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3162 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3163 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3164 }