1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "runtime/globals_extension.hpp"
74 #include "runtime/handles.inline.hpp"
75 #include "runtime/java.hpp"
76 #include "runtime/orderAccess.hpp"
77 #include "runtime/os.hpp"
78 #include "runtime/prefetch.inline.hpp"
79 #include "runtime/threads.hpp"
80 #include "utilities/align.hpp"
81 #include "utilities/checkedCast.hpp"
82 #include "utilities/formatBuffer.hpp"
83 #include "utilities/growableArray.hpp"
84 #include "utilities/powerOfTwo.hpp"
85
86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
87
88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
89 assert(cm != nullptr, "must be");
90 }
91
92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
93 assert(cm != nullptr, "must be");
94 assert(_cm == nullptr, "double initialize");
95 _cm = cm;
96 }
97
98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
99 assert(addr < _cm->finger(), "invariant");
100 assert(addr >= _task->finger(), "invariant");
101
102 // We move that task's local finger along.
103 _task->move_finger_to(addr);
104
105 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
106 // we only partially drain the local queue and global stack
107 _task->drain_local_queue(true);
108 _task->drain_global_stack(true);
109
110 // if the has_aborted flag has been raised, we need to bail out of
111 // the iteration
112 return !_task->has_aborted();
113 }
114
115 G1CMMarkStack::G1CMMarkStack() :
116 _chunk_allocator() {
117 set_empty();
118 }
119
120 size_t G1CMMarkStack::capacity_alignment() {
121 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
122 }
123
124 bool G1CMMarkStack::initialize() {
125 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
126
127 size_t initial_capacity = MarkStackSize;
128 size_t max_capacity = MarkStackSizeMax;
129
130 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
131
132 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
133 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134
135 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
136 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
137
138 size_t limit = (INT_MAX - 1);
139 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
140 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141
142 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
143 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
144
145 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
146
147 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
148 initial_num_chunks, max_capacity);
149
150 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
151 }
152
153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
154 if (_size.load_relaxed() >= _max_capacity) {
155 return nullptr;
156 }
157
158 size_t cur_idx = _size.fetch_then_add(1u);
159
160 if (cur_idx >= _max_capacity) {
161 return nullptr;
162 }
163
164 size_t bucket = get_bucket(cur_idx);
165 if (_buckets[bucket].load_acquire() == nullptr) {
166 if (!_should_grow) {
167 // Prefer to restart the CM.
168 return nullptr;
169 }
170
171 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
172 if (_buckets[bucket].load_acquire() == nullptr) {
173 size_t desired_capacity = bucket_size(bucket) * 2;
174 if (!try_expand_to(desired_capacity)) {
175 return nullptr;
176 }
177 }
178 }
179
180 size_t bucket_idx = get_bucket_index(cur_idx);
181 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
182 result->next = nullptr;
183 return result;
184 }
185
186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
187 _min_capacity(0),
188 _max_capacity(0),
189 _capacity(0),
190 _num_buckets(0),
191 _should_grow(false),
192 _buckets(nullptr),
193 _size(0)
194 { }
195
196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
197 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
198
199 _min_capacity = initial_capacity;
200 _max_capacity = max_capacity;
201 _num_buckets = get_bucket(_max_capacity) + 1;
202
203 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
204
205 for (size_t i = 0; i < _num_buckets; i++) {
206 _buckets[i].store_relaxed(nullptr);
207 }
208
209 size_t new_capacity = bucket_size(0);
210
211 if (!reserve(new_capacity)) {
212 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
213 return false;
214 }
215 return true;
216 }
217
218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
219 if (_capacity == _max_capacity) {
220 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
221 return false;
222 }
223
224 size_t old_capacity = _capacity;
225 desired_capacity = MIN2(desired_capacity, _max_capacity);
226
227 if (reserve(desired_capacity)) {
228 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
229 old_capacity, desired_capacity);
230 return true;
231 }
232 return false;
233 }
234
235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
236 size_t new_capacity = _capacity * 2;
237 return try_expand_to(new_capacity);
238 }
239
240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
241 if (_buckets == nullptr) {
242 return;
243 }
244
245 for (size_t i = 0; i < _num_buckets; i++) {
246 if (_buckets[i].load_relaxed() != nullptr) {
247 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
248 _buckets[i].store_relaxed(nullptr);
249 }
250 }
251
252 FREE_C_HEAP_ARRAY(_buckets);
253 }
254
255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
256 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
257
258 size_t highest_bucket = get_bucket(new_capacity - 1);
259 size_t i = get_bucket(_capacity);
260
261 // Allocate all buckets associated with indexes between the current capacity (_capacity)
262 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
263 // array and that the capacity accurately reflects the reserved memory.
264 for (; i <= highest_bucket; i++) {
265 if (_buckets[i].load_acquire() != nullptr) {
266 continue; // Skip over already allocated buckets.
267 }
268
269 size_t bucket_capacity = bucket_size(i);
270
271 // Trim bucket size so that we do not exceed the _max_capacity.
272 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
273 bucket_capacity :
274 _max_capacity - _capacity;
275
276
277 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
278
279 if (bucket_base == nullptr) {
280 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
281 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
282 return false;
283 }
284 _capacity += bucket_capacity;
285 _buckets[i].release_store(bucket_base);
286 }
287 return true;
288 }
289
290 void G1CMMarkStack::expand() {
291 _chunk_allocator.try_expand();
292 }
293
294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
295 elem->next = list->load_relaxed();
296 list->store_relaxed(elem);
297 }
298
299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
300 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
301 add_chunk_to_list(&_chunk_list, elem);
302 _chunks_in_chunk_list++;
303 }
304
305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
306 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
307 add_chunk_to_list(&_free_list, elem);
308 }
309
310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
311 TaskQueueEntryChunk* result = list->load_relaxed();
312 if (result != nullptr) {
313 list->store_relaxed(list->load_relaxed()->next);
314 }
315 return result;
316 }
317
318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
319 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
320 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
321 if (result != nullptr) {
322 _chunks_in_chunk_list--;
323 }
324 return result;
325 }
326
327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
328 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
329 return remove_chunk_from_list(&_free_list);
330 }
331
332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
333 // Get a new chunk.
334 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
335
336 if (new_chunk == nullptr) {
337 // Did not get a chunk from the free list. Allocate from backing memory.
338 new_chunk = _chunk_allocator.allocate_new_chunk();
339
340 if (new_chunk == nullptr) {
341 return false;
342 }
343 }
344
345 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
346
347 add_chunk_to_chunk_list(new_chunk);
348
349 return true;
350 }
351
352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
353 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
354
355 if (cur == nullptr) {
356 return false;
357 }
358
359 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
360
361 add_chunk_to_free_list(cur);
362 return true;
363 }
364
365 void G1CMMarkStack::set_empty() {
366 _chunks_in_chunk_list = 0;
367 _chunk_list.store_relaxed(nullptr);
368 _free_list.store_relaxed(nullptr);
369 _chunk_allocator.reset();
370 }
371
372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
373 _root_regions(MemRegion::create_array(max_regions, mtGC)),
374 _max_regions(max_regions),
375 _num_regions(0),
376 _num_claimed_regions(0) { }
377
378 G1CMRootMemRegions::~G1CMRootMemRegions() {
379 MemRegion::destroy_array(_root_regions, _max_regions);
380 }
381
382 void G1CMRootMemRegions::reset() {
383 assert_at_safepoint();
384 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
385
386 _num_regions.store_relaxed(0);
387 _num_claimed_regions.store_relaxed(0);
388 }
389
390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
391 assert_at_safepoint();
392 uint idx = _num_regions.fetch_then_add(1u);
393 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
394 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
395 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
396 _root_regions[idx].set_start(start);
397 _root_regions[idx].set_end(end);
398 }
399
400 const MemRegion* G1CMRootMemRegions::claim_next() {
401 uint local_num_regions = num_regions();
402 if (num_claimed_regions() >= local_num_regions) {
403 return nullptr;
404 }
405
406 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
407 if (claimed_index < local_num_regions) {
408 return &_root_regions[claimed_index];
409 }
410 return nullptr;
411 }
412
413 bool G1CMRootMemRegions::work_completed() const {
414 return num_remaining_regions() == 0;
415 }
416
417 uint G1CMRootMemRegions::num_remaining_regions() const {
418 uint total = num_regions();
419 uint claimed = num_claimed_regions();
420 return (total > claimed) ? total - claimed : 0;
421 }
422
423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
424 uint local_num_root_regions = num_regions();
425 for (uint i = 0; i < local_num_root_regions; i++) {
426 if (_root_regions[i].equals(mr)) {
427 return true;
428 }
429 }
430 return false;
431 }
432
433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
434 G1RegionToSpaceMapper* bitmap_storage) :
435 _cm_thread(nullptr),
436 _g1h(g1h),
437
438 _mark_bitmap(),
439
440 _heap(_g1h->reserved()),
441
442 _root_regions(_g1h->max_num_regions()),
443 _root_region_scan_aborted(false),
444
445 _global_mark_stack(),
446
447 _finger(nullptr), // _finger set in set_non_marking_state
448
449 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
450 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
451 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
452 _tasks(nullptr), // _tasks set inside late_init()
453 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
454 _terminator(_max_num_tasks, _task_queues),
455 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
456
457 _first_overflow_barrier_sync(),
458 _second_overflow_barrier_sync(),
459
460 _completed_mark_cycles(0),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _gc_timer_cm(new ConcurrentGCTimer()),
466 _gc_tracer_cm(new G1OldTracer()),
467
468 // _verbose_level set below
469
470 _remark_times(),
471 _remark_mark_times(),
472 _remark_weak_ref_times(),
473 _cleanup_times(),
474
475 _concurrent_workers(nullptr),
476 _num_concurrent_workers(0),
477 _max_concurrent_workers(0),
478
479 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
480 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
481 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
482 _needs_remembered_set_rebuild(false)
483 {
484 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
485
486 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
487 }
488
489 void G1ConcurrentMark::fully_initialize() {
490 if (is_fully_initialized()) {
491 return;
492 }
493
494 // Create & start ConcurrentMark thread.
495 _cm_thread = new G1ConcurrentMarkThread(this);
496 if (_cm_thread->osthread() == nullptr) {
497 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
498 }
499
500 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
501 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
502
503 _max_concurrent_workers = ConcGCThreads;
504
505 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
506 _concurrent_workers->initialize_workers();
507 _num_concurrent_workers = _concurrent_workers->active_workers();
508
509 if (!_global_mark_stack.initialize()) {
510 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
511 }
512
513 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
514
515 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
516 _num_active_tasks = _max_num_tasks;
517
518 for (uint i = 0; i < _max_num_tasks; ++i) {
519 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
520 _task_queues->register_queue(i, task_queue);
521
522 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
523 }
524
525 uint max_num_regions = _g1h->max_num_regions();
526 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
527 ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
528 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
529
530 reset_at_marking_complete();
531 }
532
533 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
534 return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
535 }
536
537 bool G1ConcurrentMark::is_in_marking() const {
538 return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
539 }
540
541 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
542 return cm_thread()->is_in_rebuild_or_scrub();
543 }
544
545 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
546 return cm_thread()->is_in_reset_for_next_cycle();
547 }
548
549 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
550 return _partial_array_state_manager;
551 }
552
553 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
554 assert(is_fully_initialized(), "must be");
555 return _cm_thread;
556 }
557
558 void G1ConcurrentMark::reset() {
559 _has_aborted.store_relaxed(false);
560
561 reset_marking_for_restart();
562
563 // Reset all tasks, since different phases will use different number of active
564 // threads. So, it's easiest to have all of them ready.
565 for (uint i = 0; i < _max_num_tasks; ++i) {
566 _tasks[i]->reset(mark_bitmap());
567 }
568
569 uint max_num_regions = _g1h->max_num_regions();
570 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
571 for (uint i = 0; i < max_num_regions; i++) {
572 _region_mark_stats[i].clear();
573 }
574
575 _root_region_scan_aborted.store_relaxed(false);
576 _root_regions.reset();
577 }
578
579 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
580 uint region_idx = r->hrm_index();
581 for (uint j = 0; j < _max_num_tasks; ++j) {
582 _tasks[j]->clear_mark_stats_cache(region_idx);
583 }
584 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
585 _region_mark_stats[region_idx].clear();
586 }
587
588 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
589 assert_at_safepoint();
590 assert(r->is_starts_humongous(), "Got humongous continues region here");
591
592 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
593 mark_bitmap()->clear(r->bottom());
594
595 if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
596 return;
597 }
598
599 // Clear any statistics about the region gathered so far.
600 _g1h->humongous_obj_regions_iterate(r,
601 [&] (G1HeapRegion* r) {
602 clear_statistics(r);
603 });
604 }
605
606 void G1ConcurrentMark::reset_marking_for_restart() {
607 _global_mark_stack.set_empty();
608
609 // Expand the marking stack, if we have to and if we can.
610 if (has_overflown()) {
611 _global_mark_stack.expand();
612
613 uint max_num_regions = _g1h->max_num_regions();
614 for (uint i = 0; i < max_num_regions; i++) {
615 _region_mark_stats[i].clear_during_overflow();
616 }
617 }
618
619 clear_has_overflown();
620 _finger.store_relaxed(_heap.start());
621
622 for (uint i = 0; i < _max_num_tasks; ++i) {
623 _tasks[i]->reset_for_restart();
624 }
625 }
626
627 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
628 assert(active_tasks <= _max_num_tasks, "we should not have more");
629
630 _num_active_tasks = active_tasks;
631 // Need to update the three data structures below according to the
632 // number of active threads for this phase.
633 _terminator.reset_for_reuse(active_tasks);
634 _first_overflow_barrier_sync.set_n_workers(active_tasks);
635 _second_overflow_barrier_sync.set_n_workers(active_tasks);
636 }
637
638 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
639 set_concurrency(active_tasks);
640
641 _concurrent.store_relaxed(concurrent);
642
643 if (!concurrent) {
644 // At this point we should be in a STW phase, and completed marking.
645 assert_at_safepoint_on_vm_thread();
646 assert(out_of_regions(),
647 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
648 p2i(finger()), p2i(_heap.end()));
649 }
650 }
651
652 #if TASKQUEUE_STATS
653 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
654
655 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
656
657 auto get_pa_stats = [&](uint i) {
658 return _tasks[i]->partial_array_task_stats();
659 };
660
661 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
662 "Concurrent Mark Partial Array");
663
664 for (uint i = 0; i < _max_num_tasks; ++i) {
665 get_pa_stats(i)->reset();
666 }
667 }
668 #endif
669
670 void G1ConcurrentMark::reset_at_marking_complete() {
671 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
672 // We set the global marking state to some default values when we're
673 // not doing marking.
674 reset_marking_for_restart();
675 _num_active_tasks = 0;
676 }
677
678 G1ConcurrentMark::~G1ConcurrentMark() {
679 FREE_C_HEAP_ARRAY(_top_at_mark_starts);
680 FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
681 FREE_C_HEAP_ARRAY(_region_mark_stats);
682 // The G1ConcurrentMark instance is never freed.
683 ShouldNotReachHere();
684 }
685
686 class G1ClearBitMapTask : public WorkerTask {
687 public:
688 static size_t chunk_size() { return M; }
689
690 private:
691 // Heap region closure used for clearing the _mark_bitmap.
692 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
693 G1ConcurrentMark* _cm;
694 G1CMBitMap* _bitmap;
695 bool _suspendible; // If suspendible, do yield checks.
696
697 bool suspendible() {
698 return _suspendible;
699 }
700
701 bool is_clear_concurrent_undo() {
702 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
703 }
704
705 bool has_aborted() {
706 if (suspendible()) {
707 _cm->do_yield_check();
708 return _cm->has_aborted();
709 }
710 return false;
711 }
712
713 HeapWord* region_clear_limit(G1HeapRegion* r) {
714 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
715 // live_words data are current wrt to the _mark_bitmap. We use this information
716 // to only clear ranges of the bitmap that require clearing.
717 if (is_clear_concurrent_undo()) {
718 // No need to clear bitmaps for empty regions (which includes regions we
719 // did not mark through).
720 if (!_cm->contains_live_object(r->hrm_index())) {
721 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
722 return r->bottom();
723 }
724 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
725 }
726 return r->end();
727 }
728
729 public:
730 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
731 G1HeapRegionClosure(),
732 _cm(cm),
733 _bitmap(cm->mark_bitmap()),
734 _suspendible(suspendible)
735 { }
736
737 virtual bool do_heap_region(G1HeapRegion* r) {
738 if (has_aborted()) {
739 return true;
740 }
741
742 HeapWord* cur = r->bottom();
743 HeapWord* const end = region_clear_limit(r);
744
745 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
746
747 while (cur < end) {
748
749 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
750 _bitmap->clear_range(mr);
751
752 cur += chunk_size_in_words;
753
754 // Repeat the asserts from before the start of the closure. We will do them
755 // as asserts here to minimize their overhead on the product. However, we
756 // will have them as guarantees at the beginning / end of the bitmap
757 // clearing to get some checking in the product.
758 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
759
760 // Abort iteration if necessary.
761 if (has_aborted()) {
762 return true;
763 }
764 }
765 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
766
767 _cm->reset_top_at_mark_start(r);
768
769 return false;
770 }
771 };
772
773 G1ClearBitmapHRClosure _cl;
774 G1HeapRegionClaimer _hr_claimer;
775 bool _suspendible; // If the task is suspendible, workers must join the STS.
776
777 public:
778 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
779 WorkerTask("G1 Clear Bitmap"),
780 _cl(cm, suspendible),
781 _hr_claimer(n_workers),
782 _suspendible(suspendible)
783 { }
784
785 void work(uint worker_id) {
786 SuspendibleThreadSetJoiner sts_join(_suspendible);
787 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
788 }
789 };
790
791 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
792 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
793
794 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
795 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
796
797 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
798
799 G1ClearBitMapTask cl(this, num_workers, may_yield);
800
801 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
802 workers->run_task(&cl, num_workers);
803 }
804
805 void G1ConcurrentMark::cleanup_for_next_mark() {
806 // Make sure that the concurrent mark thread looks to still be in
807 // the current cycle.
808 guarantee(is_in_reset_for_next_cycle(), "invariant");
809
810 clear_bitmap(_concurrent_workers, true);
811
812 reset_partial_array_state_manager();
813
814 // Should not have changed state yet (even if a Full GC interrupted us).
815 guarantee(is_in_reset_for_next_cycle(), "invariant");
816 }
817
818 void G1ConcurrentMark::reset_partial_array_state_manager() {
819 for (uint i = 0; i < _max_num_tasks; ++i) {
820 _tasks[i]->unregister_partial_array_splitter();
821 }
822
823 partial_array_state_manager()->reset();
824
825 for (uint i = 0; i < _max_num_tasks; ++i) {
826 _tasks[i]->register_partial_array_splitter();
827 }
828 }
829
830 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
831 assert_at_safepoint_on_vm_thread();
832 // To avoid fragmentation the full collection requesting to clear the bitmap
833 // might use fewer workers than available. To ensure the bitmap is cleared
834 // as efficiently as possible the number of active workers are temporarily
835 // increased to include all currently created workers.
836 WithActiveWorkers update(workers, workers->created_workers());
837 clear_bitmap(workers, false);
838 }
839
840 class G1PreConcurrentStartTask : public G1BatchedTask {
841 // Reset marking state.
842 class ResetMarkingStateTask;
843 // For each region note start of marking.
844 class NoteStartOfMarkTask;
845
846 public:
847 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
848 };
849
850 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
851 G1ConcurrentMark* _cm;
852 public:
853 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
854
855 double worker_cost() const override { return 1.0; }
856 void do_work(uint worker_id) override;
857 };
858
859 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
860
861 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
862 G1ConcurrentMark* _cm;
863
864 public:
865 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
866
867 bool do_heap_region(G1HeapRegion* r) override {
868 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
869 _cm->update_top_at_mark_start(r);
870 } else {
871 _cm->reset_top_at_mark_start(r);
872 }
873 return false;
874 }
875 } _region_cl;
876
877 G1HeapRegionClaimer _claimer;
878 public:
879 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
880
881 double worker_cost() const override {
882 // The work done per region is very small, therefore we choose this magic number to cap the number
883 // of threads used when there are few regions.
884 const double regions_per_thread = 1000;
885 return _claimer.n_regions() / regions_per_thread;
886 }
887
888 void set_max_workers(uint max_workers) override {
889 _claimer.set_n_workers(max_workers);
890 }
891
892 void do_work(uint worker_id) override {
893 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
894 }
895 };
896
897 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
898 // Reset marking state.
899 _cm->reset();
900 }
901
902 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
903 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
904 add_serial_task(new ResetMarkingStateTask(cm));
905 add_parallel_task(new NoteStartOfMarkTask());
906 };
907
908 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
909 assert_at_safepoint_on_vm_thread();
910
911 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
912
913 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
914
915 G1PreConcurrentStartTask cl(cause, this);
916 G1CollectedHeap::heap()->run_batch_task(&cl);
917
918 _gc_tracer_cm->set_gc_cause(cause);
919 }
920
921 void G1ConcurrentMark::start_full_concurrent_cycle() {
922 // Start Concurrent Marking weak-reference discovery.
923 ReferenceProcessor* rp = _g1h->ref_processor_cm();
924 rp->start_discovery(false /* always_clear */);
925
926 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
927 // This is the start of the marking cycle, we're expected all
928 // threads to have SATB queues with active set to false.
929 satb_mq_set.set_active_all_threads(true, /* new active value */
930 false /* expected_active */);
931
932 // update_g1_committed() will be called at the end of an evac pause
933 // when marking is on. So, it's also called at the end of the
934 // concurrent start pause to update the heap end, if the heap expands
935 // during it. No need to call it here.
936
937 // Signal the thread to start work.
938 cm_thread()->start_full_cycle();
939 }
940
941 void G1ConcurrentMark::start_undo_concurrent_cycle() {
942 assert_at_safepoint_on_vm_thread();
943 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
944 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
945
946 abort_root_region_scan_at_safepoint();
947
948 // Signal the thread to start work.
949 cm_thread()->start_undo_cycle();
950 }
951
952 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
953 cm_thread()->set_idle();
954 }
955
956 void G1ConcurrentMark::stop() {
957 if (is_fully_initialized()) {
958 cm_thread()->stop();
959 }
960 }
961
962 /*
963 * Notice that in the next two methods, we actually leave the STS
964 * during the barrier sync and join it immediately afterwards. If we
965 * do not do this, the following deadlock can occur: one thread could
966 * be in the barrier sync code, waiting for the other thread to also
967 * sync up, whereas another one could be trying to yield, while also
968 * waiting for the other threads to sync up too.
969 *
970 * Note, however, that this code is also used during remark and in
971 * this case we should not attempt to leave / enter the STS, otherwise
972 * we'll either hit an assert (debug / fastdebug) or deadlock
973 * (product). So we should only leave / enter the STS if we are
974 * operating concurrently.
975 *
976 * Because the thread that does the sync barrier has left the STS, it
977 * is possible to be suspended for a Full GC or an evacuation pause
978 * could occur. This is actually safe, since the entering the sync
979 * barrier is one of the last things do_marking_step() does, and it
980 * doesn't manipulate any data structures afterwards.
981 */
982
983 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
984 bool barrier_aborted;
985 {
986 SuspendibleThreadSetLeaver sts_leave(concurrent());
987 barrier_aborted = !_first_overflow_barrier_sync.enter();
988 }
989
990 // at this point everyone should have synced up and not be doing any
991 // more work
992
993 if (barrier_aborted) {
994 // If the barrier aborted we ignore the overflow condition and
995 // just abort the whole marking phase as quickly as possible.
996 return;
997 }
998 }
999
1000 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1001 SuspendibleThreadSetLeaver sts_leave(concurrent());
1002 _second_overflow_barrier_sync.enter();
1003
1004 // at this point everything should be re-initialized and ready to go
1005 }
1006
1007 class G1CMConcurrentMarkingTask : public WorkerTask {
1008 G1ConcurrentMark* _cm;
1009
1010 public:
1011 void work(uint worker_id) {
1012 ResourceMark rm;
1013
1014 SuspendibleThreadSetJoiner sts_join;
1015
1016 assert(worker_id < _cm->active_tasks(), "invariant");
1017
1018 G1CMTask* task = _cm->task(worker_id);
1019 task->record_start_time();
1020 if (!_cm->has_aborted()) {
1021 do {
1022 task->do_marking_step(G1ConcMarkStepDurationMillis,
1023 true /* do_termination */,
1024 false /* is_serial*/);
1025
1026 _cm->do_yield_check();
1027 } while (!_cm->has_aborted() && task->has_aborted());
1028 }
1029 task->record_end_time();
1030 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1031 }
1032
1033 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1034 WorkerTask("Concurrent Mark"), _cm(cm) { }
1035
1036 ~G1CMConcurrentMarkingTask() { }
1037 };
1038
1039 uint G1ConcurrentMark::calc_active_marking_workers() {
1040 uint result = 0;
1041 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1042 result = _max_concurrent_workers;
1043 } else {
1044 result =
1045 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1046 1, /* Minimum workers */
1047 _num_concurrent_workers,
1048 Threads::number_of_non_daemon_threads());
1049 // Don't scale the result down by scale_concurrent_workers() because
1050 // that scaling has already gone into "_max_concurrent_workers".
1051 }
1052 assert(result > 0 && result <= _max_concurrent_workers,
1053 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1054 _max_concurrent_workers, result);
1055 return result;
1056 }
1057
1058 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1059 return _root_region_scan_aborted.load_relaxed();
1060 }
1061
1062 #ifndef PRODUCT
1063 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1064 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1065 }
1066 #endif
1067
1068 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1069 #ifdef ASSERT
1070 HeapWord* last = region->last();
1071 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1072 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1073 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1074 assert(top_at_mark_start(hr) == region->start(),
1075 "MemRegion start should be equal to TAMS");
1076 #endif
1077
1078 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1079
1080 const uintx interval = PrefetchScanIntervalInBytes;
1081 HeapWord* curr = region->start();
1082 const HeapWord* end = region->end();
1083 while (curr < end) {
1084 Prefetch::read(curr, interval);
1085 oop obj = cast_to_oop(curr);
1086 size_t size = obj->oop_iterate_size(&cl);
1087 assert(size == obj->size(), "sanity");
1088 curr += size;
1089 }
1090 }
1091
1092 class G1CMRootRegionScanTask : public WorkerTask {
1093 G1ConcurrentMark* _cm;
1094 bool _should_yield;
1095
1096 public:
1097 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1098 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1099
1100 void work(uint worker_id) {
1101 SuspendibleThreadSetJoiner sts_join(_should_yield);
1102
1103 while (true) {
1104 if (_cm->has_root_region_scan_aborted()) {
1105 return;
1106 }
1107 G1CMRootMemRegions* root_regions = _cm->root_regions();
1108 const MemRegion* region = root_regions->claim_next();
1109 if (region == nullptr) {
1110 return;
1111 }
1112 _cm->scan_root_region(region, worker_id);
1113 if (_should_yield) {
1114 SuspendibleThreadSet::yield();
1115 // If we yielded, a GC may have processed all root regions,
1116 // so this loop will naturally exit on the next claim_next() call.
1117 // Same if a Full GC signalled abort of the concurrent mark.
1118 }
1119 }
1120 }
1121 };
1122
1123 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1124 // We first check whether there is any work to do as we might have already aborted
1125 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1126 // We want to avoid spinning up the worker threads if that happened.
1127 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1128 //
1129 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1130 // a young gc doing the work.
1131 //
1132 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1133 // continue after that safepoint.
1134 //
1135 // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1136 // consistent view of the value containing the number of remaining regions across the
1137 // usages below. The safepoint/gc may already be running and modifying it
1138 // while this code is still executing.
1139 uint num_remaining = root_regions()->num_remaining_regions();
1140 bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1141 if (do_scan) {
1142 // Assign one worker to each root-region but subject to the max constraint.
1143 // The constraint is also important to avoid accesses beyond the allocated per-worker
1144 // marking helper data structures. We might get passed different WorkerThreads with
1145 // different number of threads (potential worker ids) than helper data structures when
1146 // completing this work during GC.
1147 const uint num_workers = MIN2(num_remaining,
1148 _max_concurrent_workers);
1149 assert(num_workers > 0, "no more remaining root regions to process");
1150
1151 G1CMRootRegionScanTask task(this, concurrent);
1152 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1153 task.name(), num_workers, num_remaining);
1154 workers->run_task(&task, num_workers);
1155 }
1156
1157 // At the end of this method, we can re-read num_remaining() in the assert: either
1158 // we got non-zero above and we processed all root regions (and it must be zero
1159 // after the worker task synchronization) or it had already been zero. We also
1160 // can't have started another concurrent cycle that could have set it to something else
1161 // while still in the concurrent cycle (if called concurrently).
1162 assert_root_region_scan_completed_or_aborted();
1163
1164 return do_scan;
1165 }
1166
1167 void G1ConcurrentMark::scan_root_regions_concurrently() {
1168 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1169 scan_root_regions(_concurrent_workers, true /* concurrent */);
1170 }
1171
1172 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1173 assert_at_safepoint_on_vm_thread();
1174 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1175 }
1176
1177 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1178 root_regions()->add(top_at_mark_start(r), r->top());
1179 }
1180
1181 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1182 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1183 }
1184
1185 void G1ConcurrentMark::abort_root_region_scan() {
1186 assert_not_at_safepoint();
1187
1188 _root_region_scan_aborted.store_relaxed(true);
1189 }
1190
1191 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1192 assert_at_safepoint_on_vm_thread();
1193
1194 _root_region_scan_aborted.store_relaxed(true);
1195 }
1196
1197 void G1ConcurrentMark::concurrent_cycle_start() {
1198 _gc_timer_cm->register_gc_start();
1199
1200 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1201
1202 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1203 }
1204
1205 uint G1ConcurrentMark::completed_mark_cycles() const {
1206 return _completed_mark_cycles.load_relaxed();
1207 }
1208
1209 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1210 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1211
1212 if (mark_cycle_completed) {
1213 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1214 }
1215
1216 if (has_aborted()) {
1217 log_info(gc, marking)("Concurrent Mark Abort");
1218 _gc_tracer_cm->report_concurrent_mode_failure();
1219 }
1220
1221 _gc_timer_cm->register_gc_end();
1222
1223 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1224 }
1225
1226 void G1ConcurrentMark::mark_from_roots() {
1227 _restart_for_overflow.store_relaxed(false);
1228
1229 uint active_workers = calc_active_marking_workers();
1230
1231 // Setting active workers is not guaranteed since fewer
1232 // worker threads may currently exist and more may not be
1233 // available.
1234 active_workers = _concurrent_workers->set_active_workers(active_workers);
1235 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1236
1237 _num_concurrent_workers = active_workers;
1238
1239 // Parallel task terminator is set in "set_concurrency_and_phase()"
1240 set_concurrency_and_phase(active_workers, true /* concurrent */);
1241
1242 G1CMConcurrentMarkingTask marking_task(this);
1243 _concurrent_workers->run_task(&marking_task);
1244 print_stats();
1245 }
1246
1247 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1248 static const char* location_strings[] = { "Remark Before",
1249 "Remark After",
1250 "Remark Overflow",
1251 "Cleanup Before",
1252 "Cleanup After" };
1253 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1254 }
1255
1256 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1257 VerifyLocation location) {
1258 G1HeapVerifier* verifier = _g1h->verifier();
1259
1260 verifier->verify_region_sets_optional();
1261
1262 const char* caller = verify_location_string(location);
1263
1264 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1265 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1266
1267 size_t const BufLen = 512;
1268 char buffer[BufLen];
1269
1270 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1271 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1272
1273 // Only check bitmap in Remark, and not at After-Verification because the regions
1274 // already have their TAMS'es reset.
1275 if (location != VerifyLocation::RemarkAfter) {
1276 verifier->verify_bitmap_clear(true /* above_tams_only */);
1277 }
1278 }
1279 }
1280
1281 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1282 G1CollectedHeap* _g1h;
1283 public:
1284 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1285
1286 bool do_object_b(oop obj) {
1287 return !_g1h->is_obj_dead(obj);
1288 }
1289 };
1290
1291 void G1ConcurrentMark::remark() {
1292 assert_at_safepoint_on_vm_thread();
1293
1294 // If a full collection has happened, we should not continue. However we might
1295 // have ended up here as the Remark VM operation has been scheduled already.
1296 if (has_aborted()) {
1297 return;
1298 }
1299
1300 G1Policy* policy = _g1h->policy();
1301 policy->record_pause_start_time();
1302
1303 double start = os::elapsedTime();
1304
1305 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1306
1307 {
1308 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1309 finalize_marking();
1310 }
1311
1312 double mark_work_end = os::elapsedTime();
1313
1314 bool const mark_finished = !has_overflown();
1315 if (mark_finished) {
1316 weak_refs_work();
1317
1318 // Unload Klasses, String, Code Cache, etc.
1319 if (ClassUnloadingWithConcurrentMark) {
1320 G1CMIsAliveClosure is_alive(this);
1321 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1322 }
1323
1324 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1325 // We're done with marking.
1326 // This is the end of the marking cycle, we're expected all
1327 // threads to have SATB queues with active set to true.
1328 satb_mq_set.set_active_all_threads(false, /* new active value */
1329 true /* expected_active */);
1330
1331 {
1332 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1333 flush_all_task_caches();
1334 }
1335
1336 // All marking completed. Check bitmap now as we will start to reset TAMSes
1337 // in parallel below so that we can not do this in the After-Remark verification.
1338 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1339
1340 {
1341 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1342
1343 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1344 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1345 _g1h->workers()->active_workers());
1346 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1347 _g1h->workers()->run_task(&cl, num_workers);
1348
1349 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1350 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1351
1352 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1353
1354 if (_needs_remembered_set_rebuild) {
1355 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1356 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1357 }
1358 }
1359
1360 if (log_is_enabled(Trace, gc, liveness)) {
1361 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1362 _g1h->heap_region_iterate(&cl);
1363 }
1364
1365 // Potentially, some empty-regions have been reclaimed; make this a
1366 // "collection" so that pending allocation can retry before attempting a
1367 // GC pause.
1368 _g1h->increment_total_collections();
1369
1370 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1371 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1372 // triggered, it likely means there are very few regular GCs, making resizing
1373 // based on gc heuristics less effective.
1374 if (_g1h->last_gc_was_periodic()) {
1375 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1376 }
1377
1378 compute_new_sizes();
1379
1380 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1381
1382 assert(!restart_for_overflow(), "sanity");
1383 // Completely reset the marking state (except bitmaps) since marking completed.
1384 reset_at_marking_complete();
1385
1386 G1CollectedHeap::finish_codecache_marking_cycle();
1387
1388 {
1389 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1390 G1ObjectCountIsAliveClosure is_alive(_g1h);
1391 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1392 }
1393
1394 // Successfully completed marking, advance state.
1395 cm_thread()->set_full_cycle_rebuild_and_scrub();
1396 } else {
1397 // We overflowed. Restart concurrent marking.
1398 _restart_for_overflow.store_relaxed(true);
1399
1400 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1401
1402 // Clear the marking state because we will be restarting
1403 // marking due to overflowing the global mark stack.
1404 reset_marking_for_restart();
1405 }
1406
1407 // Statistics
1408 double now = os::elapsedTime();
1409 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1410 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1411 _remark_times.add((now - start) * 1000.0);
1412
1413 _g1h->update_perf_counter_cpu_time();
1414
1415 policy->record_concurrent_mark_remark_end();
1416
1417 return;
1418 }
1419
1420 void G1ConcurrentMark::compute_new_sizes() {
1421 MetaspaceGC::compute_new_size();
1422
1423 // Cleanup will have freed any regions completely full of garbage.
1424 // Update the soft reference policy with the new heap occupancy.
1425 Universe::heap()->update_capacity_and_used_at_gc();
1426
1427 // We reclaimed old regions so we should calculate the sizes to make
1428 // sure we update the old gen/space data.
1429 _g1h->monitoring_support()->update_sizes();
1430 }
1431
1432 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1433 G1CollectedHeap* _g1h;
1434
1435 public:
1436 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1437
1438 bool do_heap_region(G1HeapRegion* r) override {
1439 // Update the remset tracking state from updating to complete
1440 // if remembered sets have been rebuilt.
1441 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1442 return false;
1443 }
1444 };
1445
1446 void G1ConcurrentMark::cleanup() {
1447 assert_at_safepoint_on_vm_thread();
1448
1449 // If a full collection has happened, we shouldn't do this.
1450 if (has_aborted()) {
1451 return;
1452 }
1453
1454 G1Policy* policy = _g1h->policy();
1455 policy->record_pause_start_time();
1456
1457 double start = os::elapsedTime();
1458
1459 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1460
1461 if (needs_remembered_set_rebuild()) {
1462 // Update the remset tracking information as well as marking all regions
1463 // as fully parsable.
1464 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1465 G1UpdateRegionsAfterRebuild cl(_g1h);
1466 _g1h->heap_region_iterate(&cl);
1467 } else {
1468 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1469 }
1470
1471 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1472
1473 // Local statistics
1474 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1475
1476 {
1477 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1478 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1479 }
1480
1481 // Advance state.
1482 cm_thread()->set_full_cycle_reset_for_next_cycle();
1483 return;
1484 }
1485
1486 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1487 // Uses the G1CMTask associated with a worker thread (for serial reference
1488 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1489 // trace referent objects.
1490 //
1491 // Using the G1CMTask and embedded local queues avoids having the worker
1492 // threads operating on the global mark stack. This reduces the risk
1493 // of overflowing the stack - which we would rather avoid at this late
1494 // state. Also using the tasks' local queues removes the potential
1495 // of the workers interfering with each other that could occur if
1496 // operating on the global stack.
1497
1498 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1499 G1ConcurrentMark* _cm;
1500 G1CMTask* _task;
1501 uint _ref_counter_limit;
1502 uint _ref_counter;
1503 bool _is_serial;
1504 public:
1505 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1506 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1507 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1508 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1509 }
1510
1511 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1512 virtual void do_oop( oop* p) { do_oop_work(p); }
1513
1514 template <class T> void do_oop_work(T* p) {
1515 if (_cm->has_overflown()) {
1516 return;
1517 }
1518 if (!_task->deal_with_reference(p)) {
1519 // We did not add anything to the mark bitmap (or mark stack), so there is
1520 // no point trying to drain it.
1521 return;
1522 }
1523 _ref_counter--;
1524
1525 if (_ref_counter == 0) {
1526 // We have dealt with _ref_counter_limit references, pushing them
1527 // and objects reachable from them on to the local stack (and
1528 // possibly the global stack). Call G1CMTask::do_marking_step() to
1529 // process these entries.
1530 //
1531 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1532 // there's nothing more to do (i.e. we're done with the entries that
1533 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1534 // above) or we overflow.
1535 //
1536 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1537 // flag while there may still be some work to do. (See the comment at
1538 // the beginning of G1CMTask::do_marking_step() for those conditions -
1539 // one of which is reaching the specified time target.) It is only
1540 // when G1CMTask::do_marking_step() returns without setting the
1541 // has_aborted() flag that the marking step has completed.
1542 do {
1543 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1544 _task->do_marking_step(mark_step_duration_ms,
1545 false /* do_termination */,
1546 _is_serial);
1547 } while (_task->has_aborted() && !_cm->has_overflown());
1548 _ref_counter = _ref_counter_limit;
1549 }
1550 }
1551 };
1552
1553 // 'Drain' oop closure used by both serial and parallel reference processing.
1554 // Uses the G1CMTask associated with a given worker thread (for serial
1555 // reference processing the G1CMtask for worker 0 is used). Calls the
1556 // do_marking_step routine, with an unbelievably large timeout value,
1557 // to drain the marking data structures of the remaining entries
1558 // added by the 'keep alive' oop closure above.
1559
1560 class G1CMDrainMarkingStackClosure : public VoidClosure {
1561 G1ConcurrentMark* _cm;
1562 G1CMTask* _task;
1563 bool _is_serial;
1564 public:
1565 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1566 _cm(cm), _task(task), _is_serial(is_serial) {
1567 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1568 }
1569
1570 void do_void() {
1571 do {
1572 // We call G1CMTask::do_marking_step() to completely drain the local
1573 // and global marking stacks of entries pushed by the 'keep alive'
1574 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1575 //
1576 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1577 // if there's nothing more to do (i.e. we've completely drained the
1578 // entries that were pushed as a result of applying the 'keep alive'
1579 // closure to the entries on the discovered ref lists) or we overflow
1580 // the global marking stack.
1581 //
1582 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1583 // flag while there may still be some work to do. (See the comment at
1584 // the beginning of G1CMTask::do_marking_step() for those conditions -
1585 // one of which is reaching the specified time target.) It is only
1586 // when G1CMTask::do_marking_step() returns without setting the
1587 // has_aborted() flag that the marking step has completed.
1588
1589 _task->do_marking_step(1000000000.0 /* something very large */,
1590 true /* do_termination */,
1591 _is_serial);
1592 } while (_task->has_aborted() && !_cm->has_overflown());
1593 }
1594 };
1595
1596 class G1CMRefProcProxyTask : public RefProcProxyTask {
1597 G1CollectedHeap& _g1h;
1598 G1ConcurrentMark& _cm;
1599
1600 public:
1601 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1602 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1603 _g1h(g1h),
1604 _cm(cm) {}
1605
1606 void work(uint worker_id) override {
1607 assert(worker_id < _max_workers, "sanity");
1608 G1CMIsAliveClosure is_alive(&_cm);
1609 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1610 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1611 BarrierEnqueueDiscoveredFieldClosure enqueue;
1612 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1613 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1614 }
1615
1616 void prepare_run_task_hook() override {
1617 // We need to reset the concurrency level before each
1618 // proxy task execution, so that the termination protocol
1619 // and overflow handling in G1CMTask::do_marking_step() knows
1620 // how many workers to wait for.
1621 _cm.set_concurrency(_queue_count);
1622 }
1623 };
1624
1625 void G1ConcurrentMark::weak_refs_work() {
1626 ResourceMark rm;
1627
1628 {
1629 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1630
1631 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1632
1633 // See the comment in G1CollectedHeap::ref_processing_init()
1634 // about how reference processing currently works in G1.
1635
1636 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1637
1638 // Prefer to grow the stack until the max capacity.
1639 _global_mark_stack.set_should_grow();
1640
1641 // Parallel processing task executor.
1642 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1643 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1644
1645 // Process the weak references.
1646 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1647 _gc_tracer_cm->report_gc_reference_stats(stats);
1648 pt.print_all_references();
1649
1650 // The do_oop work routines of the keep_alive and drain_marking_stack
1651 // oop closures will set the has_overflown flag if we overflow the
1652 // global marking stack.
1653
1654 assert(has_overflown() || _global_mark_stack.is_empty(),
1655 "Mark stack should be empty (unless it has overflown)");
1656 }
1657
1658 if (has_overflown()) {
1659 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1660 // overflowed while processing references. Exit the VM.
1661 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1662 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1663 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1664 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1665 return;
1666 }
1667
1668 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1669
1670 {
1671 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1672 G1CMIsAliveClosure is_alive(this);
1673 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1674 }
1675 }
1676
1677 class G1PrecleanYieldClosure : public YieldClosure {
1678 G1ConcurrentMark* _cm;
1679
1680 public:
1681 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1682
1683 virtual bool should_return() {
1684 return _cm->has_aborted();
1685 }
1686
1687 virtual bool should_return_fine_grain() {
1688 _cm->do_yield_check();
1689 return _cm->has_aborted();
1690 }
1691 };
1692
1693 void G1ConcurrentMark::preclean() {
1694 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1695
1696 SuspendibleThreadSetJoiner joiner;
1697
1698 BarrierEnqueueDiscoveredFieldClosure enqueue;
1699
1700 set_concurrency_and_phase(1, true);
1701
1702 G1PrecleanYieldClosure yield_cl(this);
1703
1704 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1705 // Precleaning is single threaded. Temporarily disable MT discovery.
1706 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1707 rp->preclean_discovered_references(rp->is_alive_non_header(),
1708 &enqueue,
1709 &yield_cl,
1710 _gc_timer_cm);
1711 }
1712
1713 // Closure for marking entries in SATB buffers.
1714 class G1CMSATBBufferClosure : public SATBBufferClosure {
1715 private:
1716 G1CMTask* _task;
1717 G1CollectedHeap* _g1h;
1718
1719 // This is very similar to G1CMTask::deal_with_reference, but with
1720 // more relaxed requirements for the argument, so this must be more
1721 // circumspect about treating the argument as an object.
1722 void do_entry(void* entry) const {
1723 _task->increment_refs_reached();
1724 oop const obj = cast_to_oop(entry);
1725 _task->make_reference_grey(obj);
1726 }
1727
1728 public:
1729 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1730 : _task(task), _g1h(g1h) { }
1731
1732 virtual void do_buffer(void** buffer, size_t size) {
1733 for (size_t i = 0; i < size; ++i) {
1734 do_entry(buffer[i]);
1735 }
1736 }
1737 };
1738
1739 class G1RemarkThreadsClosure : public ThreadClosure {
1740 G1SATBMarkQueueSet& _qset;
1741
1742 public:
1743 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1744 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1745
1746 void do_thread(Thread* thread) {
1747 // Transfer any partial buffer to the qset for completed buffer processing.
1748 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1749 }
1750 };
1751
1752 class G1CMRemarkTask : public WorkerTask {
1753 // For Threads::possibly_parallel_threads_do
1754 ThreadsClaimTokenScope _threads_claim_token_scope;
1755 G1ConcurrentMark* _cm;
1756 public:
1757 void work(uint worker_id) {
1758 G1CMTask* task = _cm->task(worker_id);
1759 task->record_start_time();
1760 {
1761 ResourceMark rm;
1762
1763 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1764 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1765 }
1766
1767 do {
1768 task->do_marking_step(1000000000.0 /* something very large */,
1769 true /* do_termination */,
1770 false /* is_serial */);
1771 } while (task->has_aborted() && !_cm->has_overflown());
1772 // If we overflow, then we do not want to restart. We instead
1773 // want to abort remark and do concurrent marking again.
1774 task->record_end_time();
1775 }
1776
1777 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1778 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1779 _cm->terminator()->reset_for_reuse(active_workers);
1780 }
1781 };
1782
1783 void G1ConcurrentMark::finalize_marking() {
1784 ResourceMark rm;
1785
1786 _g1h->ensure_parsability(false);
1787
1788 // this is remark, so we'll use up all active threads
1789 uint active_workers = _g1h->workers()->active_workers();
1790 set_concurrency_and_phase(active_workers, false /* concurrent */);
1791 // Leave _parallel_marking_threads at it's
1792 // value originally calculated in the G1ConcurrentMark
1793 // constructor and pass values of the active workers
1794 // through the task.
1795
1796 {
1797 G1CMRemarkTask remarkTask(this, active_workers);
1798 // We will start all available threads, even if we decide that the
1799 // active_workers will be fewer. The extra ones will just bail out
1800 // immediately.
1801 _g1h->workers()->run_task(&remarkTask);
1802 }
1803
1804 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1805 guarantee(has_overflown() ||
1806 satb_mq_set.completed_buffers_num() == 0,
1807 "Invariant: has_overflown = %s, num buffers = %zu",
1808 BOOL_TO_STR(has_overflown()),
1809 satb_mq_set.completed_buffers_num());
1810
1811 print_stats();
1812 }
1813
1814 void G1ConcurrentMark::flush_all_task_caches() {
1815 size_t hits = 0;
1816 size_t misses = 0;
1817 for (uint i = 0; i < _max_num_tasks; i++) {
1818 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1819 hits += stats.first;
1820 misses += stats.second;
1821 }
1822 size_t sum = hits + misses;
1823 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1824 hits, misses, percent_of(hits, sum));
1825 }
1826
1827 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1828 assert_at_safepoint();
1829 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1830 }
1831
1832 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1833 // "Checkpoint" the finger.
1834 HeapWord* local_finger = finger();
1835
1836 while (local_finger < _heap.end()) {
1837 assert(_g1h->is_in_reserved(local_finger), "invariant");
1838
1839 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1840 // Make sure that the reads below do not float before loading curr_region.
1841 OrderAccess::loadload();
1842 // Above heap_region_containing may return null as we always scan claim
1843 // until the end of the heap. In this case, just jump to the next region.
1844 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1845
1846 // Is the gap between reading the finger and doing the CAS too long?
1847 HeapWord* res = _finger.compare_exchange(local_finger, end);
1848 if (res == local_finger && curr_region != nullptr) {
1849 // We succeeded.
1850 HeapWord* bottom = curr_region->bottom();
1851 HeapWord* limit = top_at_mark_start(curr_region);
1852
1853 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1854 // Notice that _finger == end cannot be guaranteed here since,
1855 // someone else might have moved the finger even further.
1856 assert(finger() >= end, "The finger should have moved forward");
1857
1858 if (limit > bottom) {
1859 return curr_region;
1860 } else {
1861 assert(limit == bottom,
1862 "The region limit should be at bottom");
1863 // We return null and the caller should try calling
1864 // claim_region() again.
1865 return nullptr;
1866 }
1867 } else {
1868 // Read the finger again.
1869 HeapWord* next_finger = finger();
1870 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1871 local_finger = next_finger;
1872 }
1873 }
1874
1875 return nullptr;
1876 }
1877
1878 #ifndef PRODUCT
1879 class VerifyNoCSetOops {
1880 G1CollectedHeap* _g1h;
1881 const char* _phase;
1882 int _info;
1883
1884 public:
1885 VerifyNoCSetOops(const char* phase, int info = -1) :
1886 _g1h(G1CollectedHeap::heap()),
1887 _phase(phase),
1888 _info(info)
1889 { }
1890
1891 void operator()(G1TaskQueueEntry task_entry) const {
1892 if (task_entry.is_partial_array_state()) {
1893 oop obj = task_entry.to_partial_array_state()->source();
1894 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1895 return;
1896 }
1897 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1898 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1899 p2i(task_entry.to_oop()), _phase, _info);
1900 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1901 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1902 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1903 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1904 }
1905 };
1906
1907 void G1ConcurrentMark::verify_no_collection_set_oops() {
1908 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1909 "should be at a safepoint or initializing");
1910 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1911 return;
1912 }
1913
1914 // Verify entries on the global mark stack
1915 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1916
1917 // Verify entries on the task queues
1918 for (uint i = 0; i < _max_num_tasks; ++i) {
1919 G1CMTaskQueue* queue = _task_queues->queue(i);
1920 queue->iterate(VerifyNoCSetOops("Queue", i));
1921 }
1922
1923 // Verify the global finger
1924 HeapWord* global_finger = finger();
1925 if (global_finger != nullptr && global_finger < _heap.end()) {
1926 // Since we always iterate over all regions, we might get a null G1HeapRegion
1927 // here.
1928 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1929 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1930 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1931 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1932 }
1933
1934 // Verify the task fingers
1935 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1936 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1937 G1CMTask* task = _tasks[i];
1938 HeapWord* task_finger = task->finger();
1939 if (task_finger != nullptr && task_finger < _heap.end()) {
1940 // See above note on the global finger verification.
1941 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1942 guarantee(r == nullptr || task_finger == r->bottom() ||
1943 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1944 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1945 p2i(task_finger), HR_FORMAT_PARAMS(r));
1946 }
1947 }
1948 }
1949 #endif // PRODUCT
1950
1951 void G1ConcurrentMark::rebuild_and_scrub() {
1952 if (!needs_remembered_set_rebuild()) {
1953 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1954 }
1955
1956 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1957 }
1958
1959 void G1ConcurrentMark::print_stats() {
1960 if (!log_is_enabled(Debug, gc, stats)) {
1961 return;
1962 }
1963 log_debug(gc, stats)("---------------------------------------------------------------------");
1964 for (size_t i = 0; i < _num_active_tasks; ++i) {
1965 _tasks[i]->print_stats();
1966 log_debug(gc, stats)("---------------------------------------------------------------------");
1967 }
1968 }
1969
1970 bool G1ConcurrentMark::concurrent_cycle_abort() {
1971 assert_at_safepoint_on_vm_thread();
1972 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1973
1974 // If we start the compaction before the CM threads finish
1975 // scanning the root regions we might trip them over as we'll
1976 // be moving objects / updating references. Since the root region
1977 // scan synchronized with the safepoint, just tell it to abort.
1978 // It will notice when the threads start up again later.
1979 abort_root_region_scan_at_safepoint();
1980
1981 // We haven't started a concurrent cycle no need to do anything; we might have
1982 // aborted the marking because of shutting down though. In this case the marking
1983 // might have already completed the abort (leading to in_progress() below to
1984 // return false), however this still left marking state particularly in the
1985 // shared marking bitmap that must be cleaned up.
1986 // If there are multiple full gcs during shutdown we do this work repeatedly for
1987 // nothing, but this situation should be extremely rare (a full gc after shutdown
1988 // has been signalled is already rare), and this work should be negligible compared
1989 // to actual full gc work.
1990
1991 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1992 return false;
1993 }
1994
1995 reset_marking_for_restart();
1996
1997 abort_marking_threads();
1998
1999 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2000 satb_mq_set.abandon_partial_marking();
2001 // This can be called either during or outside marking, we'll read
2002 // the expected_active value from the SATB queue set.
2003 satb_mq_set.set_active_all_threads(false, /* new active value */
2004 satb_mq_set.is_active() /* expected_active */);
2005 return true;
2006 }
2007
2008 void G1ConcurrentMark::abort_marking_threads() {
2009 assert_root_region_scan_completed_or_aborted();
2010 _has_aborted.store_relaxed(true);
2011 _first_overflow_barrier_sync.abort();
2012 _second_overflow_barrier_sync.abort();
2013 }
2014
2015 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2016 class CountCpuTimeThreadClosure : public ThreadClosure {
2017 public:
2018 jlong _total_cpu_time;
2019
2020 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2021
2022 void do_thread(Thread* t) {
2023 _total_cpu_time += os::thread_cpu_time(t);
2024 }
2025 } cl;
2026
2027 threads_do(&cl);
2028
2029 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2030 }
2031
2032 static void print_ms_time_info(const char* prefix, const char* name,
2033 NumberSeq& ns) {
2034 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2035 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2036 if (ns.num() > 0) {
2037 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2038 prefix, ns.sd(), ns.maximum());
2039 }
2040 }
2041
2042 void G1ConcurrentMark::print_summary_info() {
2043 Log(gc, marking) log;
2044 if (!log.is_trace()) {
2045 return;
2046 }
2047
2048 log.trace(" Concurrent marking:");
2049 if (!is_fully_initialized()) {
2050 log.trace(" has not been initialized yet");
2051 return;
2052 }
2053 print_ms_time_info(" ", "remarks", _remark_times);
2054 {
2055 print_ms_time_info(" ", "final marks", _remark_mark_times);
2056 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2057
2058 }
2059 print_ms_time_info(" ", "cleanups", _cleanup_times);
2060 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2061 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2062 log.trace(" Total stop_world time = %8.2f s.",
2063 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2064 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2065 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2066 }
2067
2068 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2069 if (is_fully_initialized()) { // they are initialized late
2070 tc->do_thread(_cm_thread);
2071 _concurrent_workers->threads_do(tc);
2072 }
2073 }
2074
2075 void G1ConcurrentMark::print_on(outputStream* st) const {
2076 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2077 _mark_bitmap.print_on(st, " Bits: ");
2078 }
2079
2080 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2081 ReferenceProcessor* result = g1h->ref_processor_cm();
2082 assert(result != nullptr, "CM reference processor should not be null");
2083 return result;
2084 }
2085
2086 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2087 G1CMTask* task)
2088 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2089 _g1h(g1h), _task(task)
2090 { }
2091
2092 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2093 assert(hr != nullptr,
2094 "claim_region() should have filtered out null regions");
2095 _curr_region = hr;
2096 _finger = hr->bottom();
2097 update_region_limit();
2098 }
2099
2100 void G1CMTask::update_region_limit() {
2101 G1HeapRegion* hr = _curr_region;
2102 HeapWord* bottom = hr->bottom();
2103 HeapWord* limit = _cm->top_at_mark_start(hr);
2104
2105 if (limit == bottom) {
2106 // The region was collected underneath our feet.
2107 // We set the finger to bottom to ensure that the bitmap
2108 // iteration that will follow this will not do anything.
2109 // (this is not a condition that holds when we set the region up,
2110 // as the region is not supposed to be empty in the first place)
2111 _finger = bottom;
2112 } else if (limit >= _region_limit) {
2113 assert(limit >= _finger, "peace of mind");
2114 } else {
2115 assert(limit < _region_limit, "only way to get here");
2116 // This can happen under some pretty unusual circumstances. An
2117 // evacuation pause empties the region underneath our feet (TAMS
2118 // at bottom). We then do some allocation in the region (TAMS
2119 // stays at bottom), followed by the region being used as a GC
2120 // alloc region (TAMS will move to top() and the objects
2121 // originally below it will be greyed). All objects now marked in
2122 // the region are explicitly greyed, if below the global finger,
2123 // and we do not need in fact to scan anything else. So, we simply
2124 // set _finger to be limit to ensure that the bitmap iteration
2125 // doesn't do anything.
2126 _finger = limit;
2127 }
2128
2129 _region_limit = limit;
2130 }
2131
2132 void G1CMTask::giveup_current_region() {
2133 assert(_curr_region != nullptr, "invariant");
2134 clear_region_fields();
2135 }
2136
2137 void G1CMTask::clear_region_fields() {
2138 // Values for these three fields that indicate that we're not
2139 // holding on to a region.
2140 _curr_region = nullptr;
2141 _finger = nullptr;
2142 _region_limit = nullptr;
2143 }
2144
2145 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2146 if (cm_oop_closure == nullptr) {
2147 assert(_cm_oop_closure != nullptr, "invariant");
2148 } else {
2149 assert(_cm_oop_closure == nullptr, "invariant");
2150 }
2151 _cm_oop_closure = cm_oop_closure;
2152 }
2153
2154 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2155 guarantee(mark_bitmap != nullptr, "invariant");
2156 _mark_bitmap = mark_bitmap;
2157 clear_region_fields();
2158
2159 _calls = 0;
2160 _elapsed_time_ms = 0.0;
2161 _termination_time_ms = 0.0;
2162
2163 _mark_stats_cache.reset();
2164 }
2165
2166 void G1CMTask::reset_for_restart() {
2167 clear_region_fields();
2168 _task_queue->set_empty();
2169 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2170 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2171 }
2172
2173 void G1CMTask::register_partial_array_splitter() {
2174
2175 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2176 _cm->max_num_tasks());
2177 }
2178
2179 void G1CMTask::unregister_partial_array_splitter() {
2180 _partial_array_splitter.~PartialArraySplitter();
2181 }
2182
2183 bool G1CMTask::should_exit_termination() {
2184 if (!regular_clock_call()) {
2185 return true;
2186 }
2187
2188 // This is called when we are in the termination protocol. We should
2189 // quit if, for some reason, this task wants to abort or the global
2190 // stack is not empty (this means that we can get work from it).
2191 return !_cm->mark_stack_empty() || has_aborted();
2192 }
2193
2194 void G1CMTask::reached_limit() {
2195 assert(_words_scanned >= _words_scanned_limit ||
2196 _refs_reached >= _refs_reached_limit ,
2197 "shouldn't have been called otherwise");
2198 abort_marking_if_regular_check_fail();
2199 }
2200
2201 bool G1CMTask::regular_clock_call() {
2202 if (has_aborted()) {
2203 return false;
2204 }
2205
2206 // First, we need to recalculate the words scanned and refs reached
2207 // limits for the next clock call.
2208 recalculate_limits();
2209
2210 // During the regular clock call we do the following
2211
2212 // (1) If an overflow has been flagged, then we abort.
2213 if (_cm->has_overflown()) {
2214 return false;
2215 }
2216
2217 // If we are not concurrent (i.e. we're doing remark) we don't need
2218 // to check anything else. The other steps are only needed during
2219 // the concurrent marking phase.
2220 if (!_cm->concurrent()) {
2221 return true;
2222 }
2223
2224 // (2) If marking has been aborted for Full GC, then we also abort.
2225 if (_cm->has_aborted()) {
2226 return false;
2227 }
2228
2229 // (4) We check whether we should yield. If we have to, then we abort.
2230 if (SuspendibleThreadSet::should_yield()) {
2231 // We should yield. To do this we abort the task. The caller is
2232 // responsible for yielding.
2233 return false;
2234 }
2235
2236 // (5) We check whether we've reached our time quota. If we have,
2237 // then we abort.
2238 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2239 if (elapsed_time_ms > _time_target_ms) {
2240 _has_timed_out = true;
2241 return false;
2242 }
2243
2244 // (6) Finally, we check whether there are enough completed STAB
2245 // buffers available for processing. If there are, we abort.
2246 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2247 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2248 // we do need to process SATB buffers, we'll abort and restart
2249 // the marking task to do so
2250 return false;
2251 }
2252 return true;
2253 }
2254
2255 void G1CMTask::recalculate_limits() {
2256 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2257 _words_scanned_limit = _real_words_scanned_limit;
2258
2259 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2260 _refs_reached_limit = _real_refs_reached_limit;
2261 }
2262
2263 void G1CMTask::decrease_limits() {
2264 // This is called when we believe that we're going to do an infrequent
2265 // operation which will increase the per byte scanned cost (i.e. move
2266 // entries to/from the global stack). It basically tries to decrease the
2267 // scanning limit so that the clock is called earlier.
2268
2269 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2270 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2271 }
2272
2273 void G1CMTask::move_entries_to_global_stack() {
2274 // Local array where we'll store the entries that will be popped
2275 // from the local queue.
2276 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2277
2278 size_t n = 0;
2279 G1TaskQueueEntry task_entry;
2280 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2281 buffer[n] = task_entry;
2282 ++n;
2283 }
2284 if (n < G1CMMarkStack::EntriesPerChunk) {
2285 buffer[n] = G1TaskQueueEntry();
2286 }
2287
2288 if (n > 0) {
2289 if (!_cm->mark_stack_push(buffer)) {
2290 set_has_aborted();
2291 }
2292 }
2293
2294 // This operation was quite expensive, so decrease the limits.
2295 decrease_limits();
2296 }
2297
2298 bool G1CMTask::get_entries_from_global_stack() {
2299 // Local array where we'll store the entries that will be popped
2300 // from the global stack.
2301 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2302
2303 if (!_cm->mark_stack_pop(buffer)) {
2304 return false;
2305 }
2306
2307 // We did actually pop at least one entry.
2308 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2309 G1TaskQueueEntry task_entry = buffer[i];
2310 if (task_entry.is_null()) {
2311 break;
2312 }
2313 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2314 bool success = _task_queue->push(task_entry);
2315 // We only call this when the local queue is empty or under a
2316 // given target limit. So, we do not expect this push to fail.
2317 assert(success, "invariant");
2318 }
2319
2320 // This operation was quite expensive, so decrease the limits
2321 decrease_limits();
2322 return true;
2323 }
2324
2325 void G1CMTask::drain_local_queue(bool partially) {
2326 if (has_aborted()) {
2327 return;
2328 }
2329
2330 // Decide what the target size is, depending whether we're going to
2331 // drain it partially (so that other tasks can steal if they run out
2332 // of things to do) or totally (at the very end).
2333 uint target_size;
2334 if (partially) {
2335 target_size = GCDrainStackTargetSize;
2336 } else {
2337 target_size = 0;
2338 }
2339
2340 if (_task_queue->size() > target_size) {
2341 G1TaskQueueEntry entry;
2342 bool ret = _task_queue->pop_local(entry);
2343 while (ret) {
2344 process_entry(entry, false /* stolen */);
2345 if (_task_queue->size() <= target_size || has_aborted()) {
2346 ret = false;
2347 } else {
2348 ret = _task_queue->pop_local(entry);
2349 }
2350 }
2351 }
2352 }
2353
2354 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2355 assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2356
2357 // Mark objArray klass metadata
2358 process_klass(obj->klass());
2359
2360 size_t array_length = obj->length();
2361 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2362
2363 process_array_chunk(obj, 0, initial_chunk_size);
2364
2365 // Include object header size
2366 return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2367 }
2368
2369 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2370 PartialArrayState* state = task.to_partial_array_state();
2371 // Access state before release by claim().
2372 objArrayOop obj = objArrayOop(state->source());
2373
2374 PartialArraySplitter::Claim claim =
2375 _partial_array_splitter.claim(state, _task_queue, stolen);
2376
2377 process_array_chunk(obj, claim._start, claim._end);
2378 return heap_word_size((claim._end - claim._start) * heapOopSize);
2379 }
2380
2381 void G1CMTask::drain_global_stack(bool partially) {
2382 if (has_aborted()) {
2383 return;
2384 }
2385
2386 // We have a policy to drain the local queue before we attempt to
2387 // drain the global stack.
2388 assert(partially || _task_queue->size() == 0, "invariant");
2389
2390 // Decide what the target size is, depending whether we're going to
2391 // drain it partially (so that other tasks can steal if they run out
2392 // of things to do) or totally (at the very end).
2393 // Notice that when draining the global mark stack partially, due to the racyness
2394 // of the mark stack size update we might in fact drop below the target. But,
2395 // this is not a problem.
2396 // In case of total draining, we simply process until the global mark stack is
2397 // totally empty, disregarding the size counter.
2398 if (partially) {
2399 size_t const target_size = _cm->partial_mark_stack_size_target();
2400 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2401 if (get_entries_from_global_stack()) {
2402 drain_local_queue(partially);
2403 }
2404 }
2405 } else {
2406 while (!has_aborted() && get_entries_from_global_stack()) {
2407 drain_local_queue(partially);
2408 }
2409 }
2410 }
2411
2412 // SATB Queue has several assumptions on whether to call the par or
2413 // non-par versions of the methods. this is why some of the code is
2414 // replicated. We should really get rid of the single-threaded version
2415 // of the code to simplify things.
2416 void G1CMTask::drain_satb_buffers() {
2417 if (has_aborted()) {
2418 return;
2419 }
2420
2421 // We set this so that the regular clock knows that we're in the
2422 // middle of draining buffers and doesn't set the abort flag when it
2423 // notices that SATB buffers are available for draining. It'd be
2424 // very counter productive if it did that. :-)
2425 _draining_satb_buffers = true;
2426
2427 G1CMSATBBufferClosure satb_cl(this, _g1h);
2428 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2429
2430 // This keeps claiming and applying the closure to completed buffers
2431 // until we run out of buffers or we need to abort.
2432 while (!has_aborted() &&
2433 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2434 abort_marking_if_regular_check_fail();
2435 }
2436
2437 // Can't assert qset is empty here, even if not aborted. If concurrent,
2438 // some other thread might be adding to the queue. If not concurrent,
2439 // some other thread might have won the race for the last buffer, but
2440 // has not yet decremented the count.
2441
2442 _draining_satb_buffers = false;
2443
2444 // again, this was a potentially expensive operation, decrease the
2445 // limits to get the regular clock call early
2446 decrease_limits();
2447 }
2448
2449 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2450 _mark_stats_cache.reset(region_idx);
2451 }
2452
2453 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2454 return _mark_stats_cache.evict_all();
2455 }
2456
2457 void G1CMTask::print_stats() {
2458 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2459 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2460 _elapsed_time_ms, _termination_time_ms);
2461 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2462 _step_times_ms.num(),
2463 _step_times_ms.avg(),
2464 _step_times_ms.sd(),
2465 _step_times_ms.maximum(),
2466 _step_times_ms.sum());
2467 size_t const hits = _mark_stats_cache.hits();
2468 size_t const misses = _mark_stats_cache.misses();
2469 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2470 hits, misses, percent_of(hits, hits + misses));
2471 }
2472
2473 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2474 return _task_queues->steal(worker_id, task_entry);
2475 }
2476
2477 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2478 if (has_aborted() || _curr_region == nullptr) {
2479 return;
2480 }
2481
2482 // This means that we're already holding on to a region.
2483 assert(_finger != nullptr, "if region is not null, then the finger "
2484 "should not be null either");
2485
2486 // We might have restarted this task after an evacuation pause
2487 // which might have evacuated the region we're holding on to
2488 // underneath our feet. Let's read its limit again to make sure
2489 // that we do not iterate over a region of the heap that
2490 // contains garbage (update_region_limit() will also move
2491 // _finger to the start of the region if it is found empty).
2492 update_region_limit();
2493 // We will start from _finger not from the start of the region,
2494 // as we might be restarting this task after aborting half-way
2495 // through scanning this region. In this case, _finger points to
2496 // the address where we last found a marked object. If this is a
2497 // fresh region, _finger points to start().
2498 MemRegion mr = MemRegion(_finger, _region_limit);
2499
2500 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2501 "humongous regions should go around loop once only");
2502
2503 // Some special cases:
2504 // If the memory region is empty, we can just give up the region.
2505 // If the current region is humongous then we only need to check
2506 // the bitmap for the bit associated with the start of the object,
2507 // scan the object if it's live, and give up the region.
2508 // Otherwise, let's iterate over the bitmap of the part of the region
2509 // that is left.
2510 // If the iteration is successful, give up the region.
2511 if (mr.is_empty()) {
2512 giveup_current_region();
2513 abort_marking_if_regular_check_fail();
2514 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2515 if (_mark_bitmap->is_marked(mr.start())) {
2516 // The object is marked - apply the closure
2517 bitmap_closure.do_addr(mr.start());
2518 }
2519 // Even if this task aborted while scanning the humongous object
2520 // we can (and should) give up the current region.
2521 giveup_current_region();
2522 abort_marking_if_regular_check_fail();
2523 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2524 giveup_current_region();
2525 abort_marking_if_regular_check_fail();
2526 } else {
2527 assert(has_aborted(), "currently the only way to do so");
2528 // The only way to abort the bitmap iteration is to return
2529 // false from the do_bit() method. However, inside the
2530 // do_bit() method we move the _finger to point to the
2531 // object currently being looked at. So, if we bail out, we
2532 // have definitely set _finger to something non-null.
2533 assert(_finger != nullptr, "invariant");
2534
2535 // Region iteration was actually aborted. So now _finger
2536 // points to the address of the object we last scanned. If we
2537 // leave it there, when we restart this task, we will rescan
2538 // the object. It is easy to avoid this. We move the finger by
2539 // enough to point to the next possible object header.
2540 assert(_finger < _region_limit, "invariant");
2541 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2542 if (new_finger >= _region_limit) {
2543 giveup_current_region();
2544 } else {
2545 move_finger_to(new_finger);
2546 }
2547 }
2548 }
2549
2550 void G1CMTask::claim_new_region() {
2551 // Read the note on the claim_region() method on why it might
2552 // return null with potentially more regions available for
2553 // claiming and why we have to check out_of_regions() to determine
2554 // whether we're done or not.
2555 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2556 // We are going to try to claim a new region. We should have
2557 // given up on the previous one.
2558 // Separated the asserts so that we know which one fires.
2559 assert(_curr_region == nullptr, "invariant");
2560 assert(_finger == nullptr, "invariant");
2561 assert(_region_limit == nullptr, "invariant");
2562 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2563 if (claimed_region != nullptr) {
2564 // Yes, we managed to claim one
2565 setup_for_region(claimed_region);
2566 assert(_curr_region == claimed_region, "invariant");
2567 }
2568 // It is important to call the regular clock here. It might take
2569 // a while to claim a region if, for example, we hit a large
2570 // block of empty regions. So we need to call the regular clock
2571 // method once round the loop to make sure it's called
2572 // frequently enough.
2573 abort_marking_if_regular_check_fail();
2574 }
2575 }
2576
2577 void G1CMTask::attempt_stealing() {
2578 // We cannot check whether the global stack is empty, since other
2579 // tasks might be pushing objects to it concurrently.
2580 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2581 "only way to reach here");
2582 while (!has_aborted()) {
2583 G1TaskQueueEntry entry;
2584 if (_cm->try_stealing(_worker_id, entry)) {
2585 process_entry(entry, true /* stolen */);
2586
2587 // And since we're towards the end, let's totally drain the
2588 // local queue and global stack.
2589 drain_local_queue(false);
2590 drain_global_stack(false);
2591 } else {
2592 break;
2593 }
2594 }
2595 }
2596
2597 void G1CMTask::attempt_termination(bool is_serial) {
2598 // We cannot check whether the global stack is empty, since other
2599 // tasks might be concurrently pushing objects on it.
2600 // Separated the asserts so that we know which one fires.
2601 assert(_cm->out_of_regions(), "only way to reach here");
2602 assert(_task_queue->size() == 0, "only way to reach here");
2603 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2604
2605 // The G1CMTask class also extends the TerminatorTerminator class,
2606 // hence its should_exit_termination() method will also decide
2607 // whether to exit the termination protocol or not.
2608 bool finished = (is_serial ||
2609 _cm->terminator()->offer_termination(this));
2610 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2611
2612 if (finished) {
2613 // We're all done.
2614
2615 // We can now guarantee that the global stack is empty, since
2616 // all other tasks have finished. We separated the guarantees so
2617 // that, if a condition is false, we can immediately find out
2618 // which one.
2619 guarantee(_cm->out_of_regions(), "only way to reach here");
2620 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2621 guarantee(_task_queue->size() == 0, "only way to reach here");
2622 guarantee(!_cm->has_overflown(), "only way to reach here");
2623 guarantee(!has_aborted(), "should never happen if termination has completed");
2624 } else {
2625 // Apparently there's more work to do. Let's abort this task. We
2626 // will restart it and hopefully we can find more things to do.
2627 set_has_aborted();
2628 }
2629 }
2630
2631 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2632 if (_has_timed_out) {
2633 double diff_ms = elapsed_time_ms - _time_target_ms;
2634 // Keep statistics of how well we did with respect to hitting
2635 // our target only if we actually timed out (if we aborted for
2636 // other reasons, then the results might get skewed).
2637 _marking_step_diff_ms.add(diff_ms);
2638 }
2639
2640 if (!_cm->has_overflown()) {
2641 return;
2642 }
2643
2644 // This is the interesting one. We aborted because a global
2645 // overflow was raised. This means we have to restart the
2646 // marking phase and start iterating over regions. However, in
2647 // order to do this we have to make sure that all tasks stop
2648 // what they are doing and re-initialize in a safe manner. We
2649 // will achieve this with the use of two barrier sync points.
2650 if (!is_serial) {
2651 // We only need to enter the sync barrier if being called
2652 // from a parallel context
2653 _cm->enter_first_sync_barrier(_worker_id);
2654
2655 // When we exit this sync barrier we know that all tasks have
2656 // stopped doing marking work. So, it's now safe to
2657 // re-initialize our data structures.
2658 }
2659
2660 clear_region_fields();
2661 flush_mark_stats_cache();
2662
2663 if (!is_serial) {
2664 // If we're executing the concurrent phase of marking, reset the marking
2665 // state; otherwise the marking state is reset after reference processing,
2666 // during the remark pause.
2667 // If we reset here as a result of an overflow during the remark we will
2668 // see assertion failures from any subsequent set_concurrency_and_phase()
2669 // calls.
2670 if (_cm->concurrent() && _worker_id == 0) {
2671 // Worker 0 is responsible for clearing the global data structures because
2672 // of an overflow. During STW we should not clear the overflow flag (in
2673 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2674 // method to abort the pause and restart concurrent marking.
2675 _cm->reset_marking_for_restart();
2676
2677 log_info(gc, marking)("Concurrent Mark reset for overflow");
2678 }
2679
2680 // ...and enter the second barrier.
2681 _cm->enter_second_sync_barrier(_worker_id);
2682 }
2683 }
2684
2685 /*****************************************************************************
2686
2687 The do_marking_step(time_target_ms, ...) method is the building
2688 block of the parallel marking framework. It can be called in parallel
2689 with other invocations of do_marking_step() on different tasks
2690 (but only one per task, obviously) and concurrently with the
2691 mutator threads, or during remark, hence it eliminates the need
2692 for two versions of the code. When called during remark, it will
2693 pick up from where the task left off during the concurrent marking
2694 phase. Interestingly, tasks are also claimable during evacuation
2695 pauses too, since do_marking_step() ensures that it aborts before
2696 it needs to yield.
2697
2698 The data structures that it uses to do marking work are the
2699 following:
2700
2701 (1) Marking Bitmap. If there are grey objects that appear only
2702 on the bitmap (this happens either when dealing with an overflow
2703 or when the concurrent start pause has simply marked the roots
2704 and didn't push them on the stack), then tasks claim heap
2705 regions whose bitmap they then scan to find grey objects. A
2706 global finger indicates where the end of the last claimed region
2707 is. A local finger indicates how far into the region a task has
2708 scanned. The two fingers are used to determine how to grey an
2709 object (i.e. whether simply marking it is OK, as it will be
2710 visited by a task in the future, or whether it needs to be also
2711 pushed on a stack).
2712
2713 (2) Local Queue. The local queue of the task which is accessed
2714 reasonably efficiently by the task. Other tasks can steal from
2715 it when they run out of work. Throughout the marking phase, a
2716 task attempts to keep its local queue short but not totally
2717 empty, so that entries are available for stealing by other
2718 tasks. Only when there is no more work, a task will totally
2719 drain its local queue.
2720
2721 (3) Global Mark Stack. This handles local queue overflow. During
2722 marking only sets of entries are moved between it and the local
2723 queues, as access to it requires a mutex and more fine-grain
2724 interaction with it which might cause contention. If it
2725 overflows, then the marking phase should restart and iterate
2726 over the bitmap to identify grey objects. Throughout the marking
2727 phase, tasks attempt to keep the global mark stack at a small
2728 length but not totally empty, so that entries are available for
2729 popping by other tasks. Only when there is no more work, tasks
2730 will totally drain the global mark stack.
2731
2732 (4) SATB Buffer Queue. This is where completed SATB buffers are
2733 made available. Buffers are regularly removed from this queue
2734 and scanned for roots, so that the queue doesn't get too
2735 long. During remark, all completed buffers are processed, as
2736 well as the filled in parts of any uncompleted buffers.
2737
2738 The do_marking_step() method tries to abort when the time target
2739 has been reached. There are a few other cases when the
2740 do_marking_step() method also aborts:
2741
2742 (1) When the marking phase has been aborted (after a Full GC).
2743
2744 (2) When a global overflow (on the global stack) has been
2745 triggered. Before the task aborts, it will actually sync up with
2746 the other tasks to ensure that all the marking data structures
2747 (local queues, stacks, fingers etc.) are re-initialized so that
2748 when do_marking_step() completes, the marking phase can
2749 immediately restart.
2750
2751 (3) When enough completed SATB buffers are available. The
2752 do_marking_step() method only tries to drain SATB buffers right
2753 at the beginning. So, if enough buffers are available, the
2754 marking step aborts and the SATB buffers are processed at
2755 the beginning of the next invocation.
2756
2757 (4) To yield. when we have to yield then we abort and yield
2758 right at the end of do_marking_step(). This saves us from a lot
2759 of hassle as, by yielding we might allow a Full GC. If this
2760 happens then objects will be compacted underneath our feet, the
2761 heap might shrink, etc. We save checking for this by just
2762 aborting and doing the yield right at the end.
2763
2764 From the above it follows that the do_marking_step() method should
2765 be called in a loop (or, otherwise, regularly) until it completes.
2766
2767 If a marking step completes without its has_aborted() flag being
2768 true, it means it has completed the current marking phase (and
2769 also all other marking tasks have done so and have all synced up).
2770
2771 A method called regular_clock_call() is invoked "regularly" (in
2772 sub ms intervals) throughout marking. It is this clock method that
2773 checks all the abort conditions which were mentioned above and
2774 decides when the task should abort. A work-based scheme is used to
2775 trigger this clock method: when the number of object words the
2776 marking phase has scanned or the number of references the marking
2777 phase has visited reach a given limit. Additional invocations to
2778 the method clock have been planted in a few other strategic places
2779 too. The initial reason for the clock method was to avoid calling
2780 cpu time gathering too regularly, as it is quite expensive. So,
2781 once it was in place, it was natural to piggy-back all the other
2782 conditions on it too and not constantly check them throughout the code.
2783
2784 If do_termination is true then do_marking_step will enter its
2785 termination protocol.
2786
2787 The value of is_serial must be true when do_marking_step is being
2788 called serially (i.e. by the VMThread) and do_marking_step should
2789 skip any synchronization in the termination and overflow code.
2790 Examples include the serial remark code and the serial reference
2791 processing closures.
2792
2793 The value of is_serial must be false when do_marking_step is
2794 being called by any of the worker threads.
2795 Examples include the concurrent marking code (CMMarkingTask),
2796 the MT remark code, and the MT reference processing closures.
2797
2798 *****************************************************************************/
2799
2800 void G1CMTask::do_marking_step(double time_target_ms,
2801 bool do_termination,
2802 bool is_serial) {
2803 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2804
2805 _start_cpu_time_ns = os::current_thread_cpu_time();
2806
2807 // If do_stealing is true then do_marking_step will attempt to
2808 // steal work from the other G1CMTasks. It only makes sense to
2809 // enable stealing when the termination protocol is enabled
2810 // and do_marking_step() is not being called serially.
2811 bool do_stealing = do_termination && !is_serial;
2812
2813 G1Predictions const& predictor = _g1h->policy()->predictor();
2814 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2815 _time_target_ms = time_target_ms - diff_prediction_ms;
2816
2817 // set up the variables that are used in the work-based scheme to
2818 // call the regular clock method
2819 _words_scanned = 0;
2820 _refs_reached = 0;
2821 recalculate_limits();
2822
2823 // clear all flags
2824 clear_has_aborted();
2825 _has_timed_out = false;
2826 _draining_satb_buffers = false;
2827
2828 ++_calls;
2829
2830 // Set up the bitmap and oop closures. Anything that uses them is
2831 // eventually called from this method, so it is OK to allocate these
2832 // statically.
2833 G1CMBitMapClosure bitmap_closure(this, _cm);
2834 G1CMOopClosure cm_oop_closure(_g1h, this);
2835 set_cm_oop_closure(&cm_oop_closure);
2836
2837 if (_cm->has_overflown()) {
2838 // This can happen if the mark stack overflows during a GC pause
2839 // and this task, after a yield point, restarts. We have to abort
2840 // as we need to get into the overflow protocol which happens
2841 // right at the end of this task.
2842 set_has_aborted();
2843 }
2844
2845 // First drain any available SATB buffers. After this, we will not
2846 // look at SATB buffers before the next invocation of this method.
2847 // If enough completed SATB buffers are queued up, the regular clock
2848 // will abort this task so that it restarts.
2849 drain_satb_buffers();
2850 // ...then partially drain the local queue and the global stack
2851 drain_local_queue(true);
2852 drain_global_stack(true);
2853
2854 do {
2855 process_current_region(bitmap_closure);
2856 // At this point we have either completed iterating over the
2857 // region we were holding on to, or we have aborted.
2858
2859 // We then partially drain the local queue and the global stack.
2860 drain_local_queue(true);
2861 drain_global_stack(true);
2862
2863 claim_new_region();
2864
2865 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2866 "at this point we should be out of regions");
2867 } while ( _curr_region != nullptr && !has_aborted());
2868
2869 // We cannot check whether the global stack is empty, since other
2870 // tasks might be pushing objects to it concurrently.
2871 assert(has_aborted() || _cm->out_of_regions(),
2872 "at this point we should be out of regions");
2873 // Try to reduce the number of available SATB buffers so that
2874 // remark has less work to do.
2875 drain_satb_buffers();
2876
2877 // Since we've done everything else, we can now totally drain the
2878 // local queue and global stack.
2879 drain_local_queue(false);
2880 drain_global_stack(false);
2881
2882 // Attempt at work stealing from other task's queues.
2883 if (do_stealing && !has_aborted()) {
2884 // We have not aborted. This means that we have finished all that
2885 // we could. Let's try to do some stealing...
2886 attempt_stealing();
2887 }
2888
2889 // We still haven't aborted. Now, let's try to get into the
2890 // termination protocol.
2891 if (do_termination && !has_aborted()) {
2892 attempt_termination(is_serial);
2893 }
2894
2895 // Mainly for debugging purposes to make sure that a pointer to the
2896 // closure which was statically allocated in this frame doesn't
2897 // escape it by accident.
2898 set_cm_oop_closure(nullptr);
2899 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2900 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2901 // Update the step history.
2902 _step_times_ms.add(elapsed_time_ms);
2903
2904 if (has_aborted()) {
2905 // The task was aborted for some reason.
2906 handle_abort(is_serial, elapsed_time_ms);
2907 }
2908 }
2909
2910 G1CMTask::G1CMTask(uint worker_id,
2911 G1ConcurrentMark* cm,
2912 G1CMTaskQueue* task_queue,
2913 G1RegionMarkStats* mark_stats) :
2914 _worker_id(worker_id),
2915 _g1h(G1CollectedHeap::heap()),
2916 _cm(cm),
2917 _mark_bitmap(nullptr),
2918 _task_queue(task_queue),
2919 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
2920 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2921 _calls(0),
2922 _time_target_ms(0.0),
2923 _start_cpu_time_ns(0),
2924 _cm_oop_closure(nullptr),
2925 _curr_region(nullptr),
2926 _finger(nullptr),
2927 _region_limit(nullptr),
2928 _words_scanned(0),
2929 _words_scanned_limit(0),
2930 _real_words_scanned_limit(0),
2931 _refs_reached(0),
2932 _refs_reached_limit(0),
2933 _real_refs_reached_limit(0),
2934 _has_aborted(false),
2935 _has_timed_out(false),
2936 _draining_satb_buffers(false),
2937 _step_times_ms(),
2938 _elapsed_time_ms(0.0),
2939 _termination_time_ms(0.0),
2940 _marking_step_diff_ms()
2941 {
2942 guarantee(task_queue != nullptr, "invariant");
2943
2944 _marking_step_diff_ms.add(0.5);
2945 }
2946
2947 // These are formatting macros that are used below to ensure
2948 // consistent formatting. The *_H_* versions are used to format the
2949 // header for a particular value and they should be kept consistent
2950 // with the corresponding macro. Also note that most of the macros add
2951 // the necessary white space (as a prefix) which makes them a bit
2952 // easier to compose.
2953
2954 // All the output lines are prefixed with this string to be able to
2955 // identify them easily in a large log file.
2956 #define G1PPRL_LINE_PREFIX "###"
2957
2958 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2959 #ifdef _LP64
2960 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2961 #else // _LP64
2962 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2963 #endif // _LP64
2964
2965 // For per-region info
2966 #define G1PPRL_TYPE_FORMAT " %-4s"
2967 #define G1PPRL_TYPE_H_FORMAT " %4s"
2968 #define G1PPRL_STATE_FORMAT " %-5s"
2969 #define G1PPRL_STATE_H_FORMAT " %5s"
2970 #define G1PPRL_BYTE_FORMAT " %9zu"
2971 #define G1PPRL_BYTE_H_FORMAT " %9s"
2972 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2973 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2974 #define G1PPRL_GID_H_FORMAT " %9s"
2975 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2976 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2977 #define G1PPRL_LEN_H_FORMAT " %14s"
2978 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2979 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2980
2981 // For summary info
2982 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2983 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2984 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2985 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2986
2987 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2988 _total_used_bytes(0),
2989 _total_capacity_bytes(0),
2990 _total_live_bytes(0),
2991 _total_remset_bytes(0),
2992 _total_code_roots_bytes(0)
2993 {
2994 if (!log_is_enabled(Trace, gc, liveness)) {
2995 return;
2996 }
2997
2998 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2999 MemRegion reserved = g1h->reserved();
3000 double now = os::elapsedTime();
3001
3002 // Print the header of the output.
3003 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3004 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3005 G1PPRL_SUM_ADDR_FORMAT("reserved")
3006 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3007 p2i(reserved.start()), p2i(reserved.end()),
3008 G1HeapRegion::GrainBytes);
3009 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3010 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3011 G1PPRL_TYPE_H_FORMAT
3012 G1PPRL_ADDR_BASE_H_FORMAT
3013 G1PPRL_BYTE_H_FORMAT
3014 G1PPRL_BYTE_H_FORMAT
3015 G1PPRL_STATE_H_FORMAT
3016 G1PPRL_BYTE_H_FORMAT
3017 G1PPRL_GID_H_FORMAT,
3018 "type", "address-range",
3019 "used", "live",
3020 "state", "code-roots",
3021 "group-id");
3022 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3023 G1PPRL_TYPE_H_FORMAT
3024 G1PPRL_ADDR_BASE_H_FORMAT
3025 G1PPRL_BYTE_H_FORMAT
3026 G1PPRL_BYTE_H_FORMAT
3027 G1PPRL_STATE_H_FORMAT
3028 G1PPRL_BYTE_H_FORMAT
3029 G1PPRL_GID_H_FORMAT,
3030 "", "",
3031 "(bytes)", "(bytes)",
3032 "", "(bytes)", "");
3033 }
3034
3035 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3036 if (!log_is_enabled(Trace, gc, liveness)) {
3037 return false;
3038 }
3039
3040 const char* type = r->get_type_str();
3041 HeapWord* bottom = r->bottom();
3042 HeapWord* end = r->end();
3043 size_t capacity_bytes = r->capacity();
3044 size_t used_bytes = r->used();
3045 size_t live_bytes = r->live_bytes();
3046 size_t remset_bytes = r->rem_set()->mem_size();
3047 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3048 const char* remset_type = r->rem_set()->get_short_state_str();
3049 uint cset_group_id = r->rem_set()->has_cset_group()
3050 ? r->rem_set()->cset_group_id()
3051 : G1CSetCandidateGroup::NoRemSetId;
3052
3053 _total_used_bytes += used_bytes;
3054 _total_capacity_bytes += capacity_bytes;
3055 _total_live_bytes += live_bytes;
3056 _total_remset_bytes += remset_bytes;
3057 _total_code_roots_bytes += code_roots_bytes;
3058
3059 // Print a line for this particular region.
3060 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3061 G1PPRL_TYPE_FORMAT
3062 G1PPRL_ADDR_BASE_FORMAT
3063 G1PPRL_BYTE_FORMAT
3064 G1PPRL_BYTE_FORMAT
3065 G1PPRL_STATE_FORMAT
3066 G1PPRL_BYTE_FORMAT
3067 G1PPRL_GID_FORMAT,
3068 type, p2i(bottom), p2i(end),
3069 used_bytes, live_bytes,
3070 remset_type, code_roots_bytes,
3071 cset_group_id);
3072
3073 return false;
3074 }
3075
3076 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3077 if (!log_is_enabled(Trace, gc, liveness)) {
3078 return;
3079 }
3080
3081 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3082 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3083 // add static memory usages to remembered set sizes
3084 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3085
3086 log_cset_candidate_groups();
3087
3088 // Print the footer of the output.
3089 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3090 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3091 " SUMMARY"
3092 G1PPRL_SUM_MB_FORMAT("capacity")
3093 G1PPRL_SUM_MB_PERC_FORMAT("used")
3094 G1PPRL_SUM_MB_PERC_FORMAT("live")
3095 G1PPRL_SUM_MB_FORMAT("remset")
3096 G1PPRL_SUM_MB_FORMAT("code-roots"),
3097 bytes_to_mb(_total_capacity_bytes),
3098 bytes_to_mb(_total_used_bytes),
3099 percent_of(_total_used_bytes, _total_capacity_bytes),
3100 bytes_to_mb(_total_live_bytes),
3101 percent_of(_total_live_bytes, _total_capacity_bytes),
3102 bytes_to_mb(_total_remset_bytes),
3103 bytes_to_mb(_total_code_roots_bytes));
3104 }
3105
3106 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3107 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3108 G1PPRL_GID_FORMAT
3109 G1PPRL_LEN_FORMAT
3110 G1PPRL_GID_GCEFF_FORMAT
3111 G1PPRL_GID_LIVENESS_FORMAT
3112 G1PPRL_BYTE_FORMAT
3113 G1PPRL_TYPE_H_FORMAT,
3114 group->group_id(),
3115 group->length(),
3116 group->length() > 0 ? group->gc_efficiency() : 0.0,
3117 group->length() > 0 ? group->liveness_percent() : 0.0,
3118 group->card_set()->mem_size(),
3119 type);
3120 _total_remset_bytes += group->card_set()->mem_size();
3121 }
3122
3123 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3124 for (G1CSetCandidateGroup* group : gl) {
3125 log_cset_candidate_group_add_total(group, type);
3126 }
3127 }
3128
3129 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3130 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3131 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3132 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3133 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3134 G1PPRL_GID_H_FORMAT
3135 G1PPRL_LEN_H_FORMAT
3136 G1PPRL_GCEFF_H_FORMAT
3137 G1PPRL_BYTE_H_FORMAT
3138 G1PPRL_BYTE_H_FORMAT
3139 G1PPRL_TYPE_H_FORMAT,
3140 "groud-id", "num-regions",
3141 "gc-eff", "liveness",
3142 "remset", "type");
3143
3144 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3145 G1PPRL_GID_H_FORMAT
3146 G1PPRL_LEN_H_FORMAT
3147 G1PPRL_GCEFF_H_FORMAT
3148 G1PPRL_BYTE_H_FORMAT
3149 G1PPRL_BYTE_H_FORMAT
3150 G1PPRL_TYPE_H_FORMAT,
3151 "", "",
3152 "(bytes/ms)", "%",
3153 "(bytes)", "");
3154
3155 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3156
3157 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3158
3159 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3160 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3161 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3162 }