1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "oops/oopCast.inline.hpp"
74 #include "runtime/globals_extension.hpp"
75 #include "runtime/handles.inline.hpp"
76 #include "runtime/java.hpp"
77 #include "runtime/orderAccess.hpp"
78 #include "runtime/os.hpp"
79 #include "runtime/prefetch.inline.hpp"
80 #include "runtime/threads.hpp"
81 #include "utilities/align.hpp"
82 #include "utilities/checkedCast.hpp"
83 #include "utilities/formatBuffer.hpp"
84 #include "utilities/growableArray.hpp"
85 #include "utilities/powerOfTwo.hpp"
86
87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
88
89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
90 assert(cm != nullptr, "must be");
91 }
92
93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
94 assert(cm != nullptr, "must be");
95 assert(_cm == nullptr, "double initialize");
96 _cm = cm;
97 }
98
99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
100 assert(addr < _cm->finger(), "invariant");
101 assert(addr >= _task->finger(), "invariant");
102
103 // We move that task's local finger along.
104 _task->move_finger_to(addr);
105
106 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
107 // we only partially drain the local queue and global stack
108 _task->drain_local_queue(true);
109 _task->drain_global_stack(true);
110
111 // if the has_aborted flag has been raised, we need to bail out of
112 // the iteration
113 return !_task->has_aborted();
114 }
115
116 G1CMMarkStack::G1CMMarkStack() :
117 _chunk_allocator() {
118 set_empty();
119 }
120
121 size_t G1CMMarkStack::capacity_alignment() {
122 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
123 }
124
125 bool G1CMMarkStack::initialize() {
126 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
127
128 size_t initial_capacity = MarkStackSize;
129 size_t max_capacity = MarkStackSizeMax;
130
131 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
132
133 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135
136 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
137 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
138
139 size_t limit = (INT_MAX - 1);
140 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142
143 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
144 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
145
146 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
147
148 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
149 initial_num_chunks, max_capacity);
150
151 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
152 }
153
154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
155 if (_size.load_relaxed() >= _max_capacity) {
156 return nullptr;
157 }
158
159 size_t cur_idx = _size.fetch_then_add(1u);
160
161 if (cur_idx >= _max_capacity) {
162 return nullptr;
163 }
164
165 size_t bucket = get_bucket(cur_idx);
166 if (_buckets[bucket].load_acquire() == nullptr) {
167 if (!_should_grow) {
168 // Prefer to restart the CM.
169 return nullptr;
170 }
171
172 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
173 if (_buckets[bucket].load_acquire() == nullptr) {
174 size_t desired_capacity = bucket_size(bucket) * 2;
175 if (!try_expand_to(desired_capacity)) {
176 return nullptr;
177 }
178 }
179 }
180
181 size_t bucket_idx = get_bucket_index(cur_idx);
182 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
183 result->next = nullptr;
184 return result;
185 }
186
187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
188 _min_capacity(0),
189 _max_capacity(0),
190 _capacity(0),
191 _num_buckets(0),
192 _should_grow(false),
193 _buckets(nullptr),
194 _size(0)
195 { }
196
197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
198 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
199
200 _min_capacity = initial_capacity;
201 _max_capacity = max_capacity;
202 _num_buckets = get_bucket(_max_capacity) + 1;
203
204 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
205
206 for (size_t i = 0; i < _num_buckets; i++) {
207 _buckets[i].store_relaxed(nullptr);
208 }
209
210 size_t new_capacity = bucket_size(0);
211
212 if (!reserve(new_capacity)) {
213 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
214 return false;
215 }
216 return true;
217 }
218
219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
220 if (_capacity == _max_capacity) {
221 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
222 return false;
223 }
224
225 size_t old_capacity = _capacity;
226 desired_capacity = MIN2(desired_capacity, _max_capacity);
227
228 if (reserve(desired_capacity)) {
229 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
230 old_capacity, desired_capacity);
231 return true;
232 }
233 return false;
234 }
235
236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
237 size_t new_capacity = _capacity * 2;
238 return try_expand_to(new_capacity);
239 }
240
241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
242 if (_buckets == nullptr) {
243 return;
244 }
245
246 for (size_t i = 0; i < _num_buckets; i++) {
247 if (_buckets[i].load_relaxed() != nullptr) {
248 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
249 _buckets[i].store_relaxed(nullptr);
250 }
251 }
252
253 FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
254 }
255
256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
257 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
258
259 size_t highest_bucket = get_bucket(new_capacity - 1);
260 size_t i = get_bucket(_capacity);
261
262 // Allocate all buckets associated with indexes between the current capacity (_capacity)
263 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
264 // array and that the capacity accurately reflects the reserved memory.
265 for (; i <= highest_bucket; i++) {
266 if (_buckets[i].load_acquire() != nullptr) {
267 continue; // Skip over already allocated buckets.
268 }
269
270 size_t bucket_capacity = bucket_size(i);
271
272 // Trim bucket size so that we do not exceed the _max_capacity.
273 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
274 bucket_capacity :
275 _max_capacity - _capacity;
276
277
278 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
279
280 if (bucket_base == nullptr) {
281 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
282 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
283 return false;
284 }
285 _capacity += bucket_capacity;
286 _buckets[i].release_store(bucket_base);
287 }
288 return true;
289 }
290
291 void G1CMMarkStack::expand() {
292 _chunk_allocator.try_expand();
293 }
294
295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
296 elem->next = list->load_relaxed();
297 list->store_relaxed(elem);
298 }
299
300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
301 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
302 add_chunk_to_list(&_chunk_list, elem);
303 _chunks_in_chunk_list++;
304 }
305
306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
307 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
308 add_chunk_to_list(&_free_list, elem);
309 }
310
311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
312 TaskQueueEntryChunk* result = list->load_relaxed();
313 if (result != nullptr) {
314 list->store_relaxed(list->load_relaxed()->next);
315 }
316 return result;
317 }
318
319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
320 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
321 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
322 if (result != nullptr) {
323 _chunks_in_chunk_list--;
324 }
325 return result;
326 }
327
328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
329 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
330 return remove_chunk_from_list(&_free_list);
331 }
332
333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
334 // Get a new chunk.
335 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
336
337 if (new_chunk == nullptr) {
338 // Did not get a chunk from the free list. Allocate from backing memory.
339 new_chunk = _chunk_allocator.allocate_new_chunk();
340
341 if (new_chunk == nullptr) {
342 return false;
343 }
344 }
345
346 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
347
348 add_chunk_to_chunk_list(new_chunk);
349
350 return true;
351 }
352
353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
354 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
355
356 if (cur == nullptr) {
357 return false;
358 }
359
360 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
361
362 add_chunk_to_free_list(cur);
363 return true;
364 }
365
366 void G1CMMarkStack::set_empty() {
367 _chunks_in_chunk_list = 0;
368 _chunk_list.store_relaxed(nullptr);
369 _free_list.store_relaxed(nullptr);
370 _chunk_allocator.reset();
371 }
372
373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
374 _root_regions(MemRegion::create_array(max_regions, mtGC)),
375 _max_regions(max_regions),
376 _num_regions(0),
377 _num_claimed_regions(0) { }
378
379 G1CMRootMemRegions::~G1CMRootMemRegions() {
380 MemRegion::destroy_array(_root_regions, _max_regions);
381 }
382
383 void G1CMRootMemRegions::reset() {
384 assert_at_safepoint();
385 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
386
387 _num_regions.store_relaxed(0);
388 _num_claimed_regions.store_relaxed(0);
389 }
390
391 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
392 assert_at_safepoint();
393 uint idx = _num_regions.fetch_then_add(1u);
394 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
395 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
396 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
397 _root_regions[idx].set_start(start);
398 _root_regions[idx].set_end(end);
399 }
400
401 const MemRegion* G1CMRootMemRegions::claim_next() {
402 uint local_num_regions = num_regions();
403 if (num_claimed_regions() >= local_num_regions) {
404 return nullptr;
405 }
406
407 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
408 if (claimed_index < local_num_regions) {
409 return &_root_regions[claimed_index];
410 }
411 return nullptr;
412 }
413
414 bool G1CMRootMemRegions::work_completed() const {
415 return num_remaining_regions() == 0;
416 }
417
418 uint G1CMRootMemRegions::num_remaining_regions() const {
419 uint total = num_regions();
420 uint claimed = num_claimed_regions();
421 return (total > claimed) ? total - claimed : 0;
422 }
423
424 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
425 uint local_num_root_regions = num_regions();
426 for (uint i = 0; i < local_num_root_regions; i++) {
427 if (_root_regions[i].equals(mr)) {
428 return true;
429 }
430 }
431 return false;
432 }
433
434 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
435 G1RegionToSpaceMapper* bitmap_storage) :
436 _cm_thread(nullptr),
437 _g1h(g1h),
438
439 _mark_bitmap(),
440
441 _heap(_g1h->reserved()),
442
443 _root_regions(_g1h->max_num_regions()),
444 _root_region_scan_aborted(false),
445
446 _global_mark_stack(),
447
448 _finger(nullptr), // _finger set in set_non_marking_state
449
450 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
451 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
452 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
453 _tasks(nullptr), // _tasks set inside late_init()
454 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
455 _terminator(_max_num_tasks, _task_queues),
456 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
457
458 _first_overflow_barrier_sync(),
459 _second_overflow_barrier_sync(),
460
461 _completed_mark_cycles(0),
462 _has_overflown(false),
463 _concurrent(false),
464 _has_aborted(false),
465 _restart_for_overflow(false),
466 _gc_timer_cm(new ConcurrentGCTimer()),
467 _gc_tracer_cm(new G1OldTracer()),
468
469 // _verbose_level set below
470
471 _remark_times(),
472 _remark_mark_times(),
473 _remark_weak_ref_times(),
474 _cleanup_times(),
475
476 _concurrent_workers(nullptr),
477 _num_concurrent_workers(0),
478 _max_concurrent_workers(0),
479
480 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
481 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
482 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
483 _needs_remembered_set_rebuild(false)
484 {
485 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
486
487 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
488 }
489
490 void G1ConcurrentMark::fully_initialize() {
491 if (is_fully_initialized()) {
492 return;
493 }
494
495 // Create & start ConcurrentMark thread.
496 _cm_thread = new G1ConcurrentMarkThread(this);
497 if (_cm_thread->osthread() == nullptr) {
498 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
499 }
500
501 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
502 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
503
504 _max_concurrent_workers = ConcGCThreads;
505
506 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
507 _concurrent_workers->initialize_workers();
508 _num_concurrent_workers = _concurrent_workers->active_workers();
509
510 if (!_global_mark_stack.initialize()) {
511 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
512 }
513
514 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
515
516 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
517 _num_active_tasks = _max_num_tasks;
518
519 for (uint i = 0; i < _max_num_tasks; ++i) {
520 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
521 _task_queues->register_queue(i, task_queue);
522
523 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
524 }
525
526 uint max_num_regions = _g1h->max_num_regions();
527 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
528 ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
529 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
530
531 reset_at_marking_complete();
532 }
533
534 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
535 return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
536 }
537
538 bool G1ConcurrentMark::is_in_marking() const {
539 return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
540 }
541
542 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
543 return cm_thread()->is_in_rebuild_or_scrub();
544 }
545
546 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
547 return cm_thread()->is_in_reset_for_next_cycle();
548 }
549
550 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
551 return _partial_array_state_manager;
552 }
553
554 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
555 assert(is_fully_initialized(), "must be");
556 return _cm_thread;
557 }
558
559 void G1ConcurrentMark::reset() {
560 _has_aborted.store_relaxed(false);
561
562 reset_marking_for_restart();
563
564 // Reset all tasks, since different phases will use different number of active
565 // threads. So, it's easiest to have all of them ready.
566 for (uint i = 0; i < _max_num_tasks; ++i) {
567 _tasks[i]->reset(mark_bitmap());
568 }
569
570 uint max_num_regions = _g1h->max_num_regions();
571 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
572 for (uint i = 0; i < max_num_regions; i++) {
573 _region_mark_stats[i].clear();
574 }
575
576 _root_region_scan_aborted.store_relaxed(false);
577 _root_regions.reset();
578 }
579
580 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
581 uint region_idx = r->hrm_index();
582 for (uint j = 0; j < _max_num_tasks; ++j) {
583 _tasks[j]->clear_mark_stats_cache(region_idx);
584 }
585 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
586 _region_mark_stats[region_idx].clear();
587 }
588
589 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
590 assert_at_safepoint();
591 assert(r->is_starts_humongous(), "Got humongous continues region here");
592
593 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
594 mark_bitmap()->clear(r->bottom());
595
596 if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
597 return;
598 }
599
600 // Clear any statistics about the region gathered so far.
601 _g1h->humongous_obj_regions_iterate(r,
602 [&] (G1HeapRegion* r) {
603 clear_statistics(r);
604 });
605 }
606
607 void G1ConcurrentMark::reset_marking_for_restart() {
608 _global_mark_stack.set_empty();
609
610 // Expand the marking stack, if we have to and if we can.
611 if (has_overflown()) {
612 _global_mark_stack.expand();
613
614 uint max_num_regions = _g1h->max_num_regions();
615 for (uint i = 0; i < max_num_regions; i++) {
616 _region_mark_stats[i].clear_during_overflow();
617 }
618 }
619
620 clear_has_overflown();
621 _finger.store_relaxed(_heap.start());
622
623 for (uint i = 0; i < _max_num_tasks; ++i) {
624 _tasks[i]->reset_for_restart();
625 }
626 }
627
628 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
629 assert(active_tasks <= _max_num_tasks, "we should not have more");
630
631 _num_active_tasks = active_tasks;
632 // Need to update the three data structures below according to the
633 // number of active threads for this phase.
634 _terminator.reset_for_reuse(active_tasks);
635 _first_overflow_barrier_sync.set_n_workers(active_tasks);
636 _second_overflow_barrier_sync.set_n_workers(active_tasks);
637 }
638
639 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
640 set_concurrency(active_tasks);
641
642 _concurrent.store_relaxed(concurrent);
643
644 if (!concurrent) {
645 // At this point we should be in a STW phase, and completed marking.
646 assert_at_safepoint_on_vm_thread();
647 assert(out_of_regions(),
648 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
649 p2i(finger()), p2i(_heap.end()));
650 }
651 }
652
653 #if TASKQUEUE_STATS
654 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
655
656 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
657
658 auto get_pa_stats = [&](uint i) {
659 return _tasks[i]->partial_array_task_stats();
660 };
661
662 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
663 "Concurrent Mark Partial Array");
664
665 for (uint i = 0; i < _max_num_tasks; ++i) {
666 get_pa_stats(i)->reset();
667 }
668 }
669 #endif
670
671 void G1ConcurrentMark::reset_at_marking_complete() {
672 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
673 // We set the global marking state to some default values when we're
674 // not doing marking.
675 reset_marking_for_restart();
676 _num_active_tasks = 0;
677 }
678
679 G1ConcurrentMark::~G1ConcurrentMark() {
680 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
681 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
682 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
683 // The G1ConcurrentMark instance is never freed.
684 ShouldNotReachHere();
685 }
686
687 class G1ClearBitMapTask : public WorkerTask {
688 public:
689 static size_t chunk_size() { return M; }
690
691 private:
692 // Heap region closure used for clearing the _mark_bitmap.
693 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
694 G1ConcurrentMark* _cm;
695 G1CMBitMap* _bitmap;
696 bool _suspendible; // If suspendible, do yield checks.
697
698 bool suspendible() {
699 return _suspendible;
700 }
701
702 bool is_clear_concurrent_undo() {
703 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
704 }
705
706 bool has_aborted() {
707 if (suspendible()) {
708 _cm->do_yield_check();
709 return _cm->has_aborted();
710 }
711 return false;
712 }
713
714 HeapWord* region_clear_limit(G1HeapRegion* r) {
715 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
716 // live_words data are current wrt to the _mark_bitmap. We use this information
717 // to only clear ranges of the bitmap that require clearing.
718 if (is_clear_concurrent_undo()) {
719 // No need to clear bitmaps for empty regions (which includes regions we
720 // did not mark through).
721 if (!_cm->contains_live_object(r->hrm_index())) {
722 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
723 return r->bottom();
724 }
725 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
726 }
727 return r->end();
728 }
729
730 public:
731 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
732 G1HeapRegionClosure(),
733 _cm(cm),
734 _bitmap(cm->mark_bitmap()),
735 _suspendible(suspendible)
736 { }
737
738 virtual bool do_heap_region(G1HeapRegion* r) {
739 if (has_aborted()) {
740 return true;
741 }
742
743 HeapWord* cur = r->bottom();
744 HeapWord* const end = region_clear_limit(r);
745
746 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
747
748 while (cur < end) {
749
750 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
751 _bitmap->clear_range(mr);
752
753 cur += chunk_size_in_words;
754
755 // Repeat the asserts from before the start of the closure. We will do them
756 // as asserts here to minimize their overhead on the product. However, we
757 // will have them as guarantees at the beginning / end of the bitmap
758 // clearing to get some checking in the product.
759 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
760
761 // Abort iteration if necessary.
762 if (has_aborted()) {
763 return true;
764 }
765 }
766 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
767
768 _cm->reset_top_at_mark_start(r);
769
770 return false;
771 }
772 };
773
774 G1ClearBitmapHRClosure _cl;
775 G1HeapRegionClaimer _hr_claimer;
776 bool _suspendible; // If the task is suspendible, workers must join the STS.
777
778 public:
779 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
780 WorkerTask("G1 Clear Bitmap"),
781 _cl(cm, suspendible),
782 _hr_claimer(n_workers),
783 _suspendible(suspendible)
784 { }
785
786 void work(uint worker_id) {
787 SuspendibleThreadSetJoiner sts_join(_suspendible);
788 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
789 }
790 };
791
792 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
793 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
794
795 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
796 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
797
798 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
799
800 G1ClearBitMapTask cl(this, num_workers, may_yield);
801
802 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
803 workers->run_task(&cl, num_workers);
804 }
805
806 void G1ConcurrentMark::cleanup_for_next_mark() {
807 // Make sure that the concurrent mark thread looks to still be in
808 // the current cycle.
809 guarantee(is_in_reset_for_next_cycle(), "invariant");
810
811 clear_bitmap(_concurrent_workers, true);
812
813 reset_partial_array_state_manager();
814
815 // Should not have changed state yet (even if a Full GC interrupted us).
816 guarantee(is_in_reset_for_next_cycle(), "invariant");
817 }
818
819 void G1ConcurrentMark::reset_partial_array_state_manager() {
820 for (uint i = 0; i < _max_num_tasks; ++i) {
821 _tasks[i]->unregister_partial_array_splitter();
822 }
823
824 partial_array_state_manager()->reset();
825
826 for (uint i = 0; i < _max_num_tasks; ++i) {
827 _tasks[i]->register_partial_array_splitter();
828 }
829 }
830
831 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
832 assert_at_safepoint_on_vm_thread();
833 // To avoid fragmentation the full collection requesting to clear the bitmap
834 // might use fewer workers than available. To ensure the bitmap is cleared
835 // as efficiently as possible the number of active workers are temporarily
836 // increased to include all currently created workers.
837 WithActiveWorkers update(workers, workers->created_workers());
838 clear_bitmap(workers, false);
839 }
840
841 class G1PreConcurrentStartTask : public G1BatchedTask {
842 // Reset marking state.
843 class ResetMarkingStateTask;
844 // For each region note start of marking.
845 class NoteStartOfMarkTask;
846
847 public:
848 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
849 };
850
851 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
852 G1ConcurrentMark* _cm;
853 public:
854 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
855
856 double worker_cost() const override { return 1.0; }
857 void do_work(uint worker_id) override;
858 };
859
860 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
861
862 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
863 G1ConcurrentMark* _cm;
864
865 public:
866 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
867
868 bool do_heap_region(G1HeapRegion* r) override {
869 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
870 _cm->update_top_at_mark_start(r);
871 } else {
872 _cm->reset_top_at_mark_start(r);
873 }
874 return false;
875 }
876 } _region_cl;
877
878 G1HeapRegionClaimer _claimer;
879 public:
880 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
881
882 double worker_cost() const override {
883 // The work done per region is very small, therefore we choose this magic number to cap the number
884 // of threads used when there are few regions.
885 const double regions_per_thread = 1000;
886 return _claimer.n_regions() / regions_per_thread;
887 }
888
889 void set_max_workers(uint max_workers) override {
890 _claimer.set_n_workers(max_workers);
891 }
892
893 void do_work(uint worker_id) override {
894 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
895 }
896 };
897
898 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
899 // Reset marking state.
900 _cm->reset();
901 }
902
903 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
904 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
905 add_serial_task(new ResetMarkingStateTask(cm));
906 add_parallel_task(new NoteStartOfMarkTask());
907 };
908
909 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
910 assert_at_safepoint_on_vm_thread();
911
912 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
913
914 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
915
916 G1PreConcurrentStartTask cl(cause, this);
917 G1CollectedHeap::heap()->run_batch_task(&cl);
918
919 _gc_tracer_cm->set_gc_cause(cause);
920 }
921
922 void G1ConcurrentMark::start_full_concurrent_cycle() {
923 // Start Concurrent Marking weak-reference discovery.
924 ReferenceProcessor* rp = _g1h->ref_processor_cm();
925 rp->start_discovery(false /* always_clear */);
926
927 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
928 // This is the start of the marking cycle, we're expected all
929 // threads to have SATB queues with active set to false.
930 satb_mq_set.set_active_all_threads(true, /* new active value */
931 false /* expected_active */);
932
933 // update_g1_committed() will be called at the end of an evac pause
934 // when marking is on. So, it's also called at the end of the
935 // concurrent start pause to update the heap end, if the heap expands
936 // during it. No need to call it here.
937
938 // Signal the thread to start work.
939 cm_thread()->start_full_cycle();
940 }
941
942 void G1ConcurrentMark::start_undo_concurrent_cycle() {
943 assert_at_safepoint_on_vm_thread();
944 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
945 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
946
947 abort_root_region_scan_at_safepoint();
948
949 // Signal the thread to start work.
950 cm_thread()->start_undo_cycle();
951 }
952
953 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
954 cm_thread()->set_idle();
955 }
956
957 void G1ConcurrentMark::stop() {
958 if (is_fully_initialized()) {
959 cm_thread()->stop();
960 }
961 }
962
963 /*
964 * Notice that in the next two methods, we actually leave the STS
965 * during the barrier sync and join it immediately afterwards. If we
966 * do not do this, the following deadlock can occur: one thread could
967 * be in the barrier sync code, waiting for the other thread to also
968 * sync up, whereas another one could be trying to yield, while also
969 * waiting for the other threads to sync up too.
970 *
971 * Note, however, that this code is also used during remark and in
972 * this case we should not attempt to leave / enter the STS, otherwise
973 * we'll either hit an assert (debug / fastdebug) or deadlock
974 * (product). So we should only leave / enter the STS if we are
975 * operating concurrently.
976 *
977 * Because the thread that does the sync barrier has left the STS, it
978 * is possible to be suspended for a Full GC or an evacuation pause
979 * could occur. This is actually safe, since the entering the sync
980 * barrier is one of the last things do_marking_step() does, and it
981 * doesn't manipulate any data structures afterwards.
982 */
983
984 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
985 bool barrier_aborted;
986 {
987 SuspendibleThreadSetLeaver sts_leave(concurrent());
988 barrier_aborted = !_first_overflow_barrier_sync.enter();
989 }
990
991 // at this point everyone should have synced up and not be doing any
992 // more work
993
994 if (barrier_aborted) {
995 // If the barrier aborted we ignore the overflow condition and
996 // just abort the whole marking phase as quickly as possible.
997 return;
998 }
999 }
1000
1001 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1002 SuspendibleThreadSetLeaver sts_leave(concurrent());
1003 _second_overflow_barrier_sync.enter();
1004
1005 // at this point everything should be re-initialized and ready to go
1006 }
1007
1008 class G1CMConcurrentMarkingTask : public WorkerTask {
1009 G1ConcurrentMark* _cm;
1010
1011 public:
1012 void work(uint worker_id) {
1013 ResourceMark rm;
1014
1015 SuspendibleThreadSetJoiner sts_join;
1016
1017 assert(worker_id < _cm->active_tasks(), "invariant");
1018
1019 G1CMTask* task = _cm->task(worker_id);
1020 task->record_start_time();
1021 if (!_cm->has_aborted()) {
1022 do {
1023 task->do_marking_step(G1ConcMarkStepDurationMillis,
1024 true /* do_termination */,
1025 false /* is_serial*/);
1026
1027 _cm->do_yield_check();
1028 } while (!_cm->has_aborted() && task->has_aborted());
1029 }
1030 task->record_end_time();
1031 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1032 }
1033
1034 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1035 WorkerTask("Concurrent Mark"), _cm(cm) { }
1036
1037 ~G1CMConcurrentMarkingTask() { }
1038 };
1039
1040 uint G1ConcurrentMark::calc_active_marking_workers() {
1041 uint result = 0;
1042 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1043 result = _max_concurrent_workers;
1044 } else {
1045 result =
1046 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1047 1, /* Minimum workers */
1048 _num_concurrent_workers,
1049 Threads::number_of_non_daemon_threads());
1050 // Don't scale the result down by scale_concurrent_workers() because
1051 // that scaling has already gone into "_max_concurrent_workers".
1052 }
1053 assert(result > 0 && result <= _max_concurrent_workers,
1054 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1055 _max_concurrent_workers, result);
1056 return result;
1057 }
1058
1059 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1060 return _root_region_scan_aborted.load_relaxed();
1061 }
1062
1063 #ifndef PRODUCT
1064 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1065 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1066 }
1067 #endif
1068
1069 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1070 #ifdef ASSERT
1071 HeapWord* last = region->last();
1072 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1073 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1074 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1075 assert(top_at_mark_start(hr) == region->start(),
1076 "MemRegion start should be equal to TAMS");
1077 #endif
1078
1079 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1080
1081 const uintx interval = PrefetchScanIntervalInBytes;
1082 HeapWord* curr = region->start();
1083 const HeapWord* end = region->end();
1084 while (curr < end) {
1085 Prefetch::read(curr, interval);
1086 oop obj = cast_to_oop(curr);
1087 size_t size = obj->oop_iterate_size(&cl);
1088 assert(size == obj->size(), "sanity");
1089 curr += size;
1090 }
1091 }
1092
1093 class G1CMRootRegionScanTask : public WorkerTask {
1094 G1ConcurrentMark* _cm;
1095 bool _should_yield;
1096
1097 public:
1098 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1099 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1100
1101 void work(uint worker_id) {
1102 SuspendibleThreadSetJoiner sts_join(_should_yield);
1103
1104 while (true) {
1105 if (_cm->has_root_region_scan_aborted()) {
1106 return;
1107 }
1108 G1CMRootMemRegions* root_regions = _cm->root_regions();
1109 const MemRegion* region = root_regions->claim_next();
1110 if (region == nullptr) {
1111 return;
1112 }
1113 _cm->scan_root_region(region, worker_id);
1114 if (_should_yield) {
1115 SuspendibleThreadSet::yield();
1116 // If we yielded, a GC may have processed all root regions,
1117 // so this loop will naturally exit on the next claim_next() call.
1118 // Same if a Full GC signalled abort of the concurrent mark.
1119 }
1120 }
1121 }
1122 };
1123
1124 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1125 // We first check whether there is any work to do as we might have already aborted
1126 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1127 // We want to avoid spinning up the worker threads if that happened.
1128 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1129 //
1130 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1131 // a young gc doing the work.
1132 //
1133 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1134 // continue after that safepoint.
1135 bool do_scan = !root_regions()->work_completed() && !has_root_region_scan_aborted();
1136 if (do_scan) {
1137 // Assign one worker to each root-region but subject to the max constraint.
1138 // The constraint is also important to avoid accesses beyond the allocated per-worker
1139 // marking helper data structures. We might get passed different WorkerThreads with
1140 // different number of threads (potential worker ids) than helper data structures when
1141 // completing this work during GC.
1142 const uint num_workers = MIN2(root_regions()->num_remaining_regions(),
1143 _max_concurrent_workers);
1144 assert(num_workers > 0, "no more remaining root regions to process");
1145
1146 G1CMRootRegionScanTask task(this, concurrent);
1147 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1148 task.name(), num_workers, root_regions()->num_remaining_regions());
1149 workers->run_task(&task, num_workers);
1150 }
1151
1152 assert_root_region_scan_completed_or_aborted();
1153
1154 return do_scan;
1155 }
1156
1157 void G1ConcurrentMark::scan_root_regions_concurrently() {
1158 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1159 scan_root_regions(_concurrent_workers, true /* concurrent */);
1160 }
1161
1162 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1163 assert_at_safepoint_on_vm_thread();
1164 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1165 }
1166
1167 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1168 root_regions()->add(top_at_mark_start(r), r->top());
1169 }
1170
1171 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1172 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1173 }
1174
1175 void G1ConcurrentMark::abort_root_region_scan() {
1176 assert_not_at_safepoint();
1177
1178 _root_region_scan_aborted.store_relaxed(true);
1179 }
1180
1181 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1182 assert_at_safepoint_on_vm_thread();
1183
1184 _root_region_scan_aborted.store_relaxed(true);
1185 }
1186
1187 void G1ConcurrentMark::concurrent_cycle_start() {
1188 _gc_timer_cm->register_gc_start();
1189
1190 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1191
1192 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1193 }
1194
1195 uint G1ConcurrentMark::completed_mark_cycles() const {
1196 return _completed_mark_cycles.load_relaxed();
1197 }
1198
1199 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1200 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1201
1202 if (mark_cycle_completed) {
1203 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1204 }
1205
1206 if (has_aborted()) {
1207 log_info(gc, marking)("Concurrent Mark Abort");
1208 _gc_tracer_cm->report_concurrent_mode_failure();
1209 }
1210
1211 _gc_timer_cm->register_gc_end();
1212
1213 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1214 }
1215
1216 void G1ConcurrentMark::mark_from_roots() {
1217 _restart_for_overflow.store_relaxed(false);
1218
1219 uint active_workers = calc_active_marking_workers();
1220
1221 // Setting active workers is not guaranteed since fewer
1222 // worker threads may currently exist and more may not be
1223 // available.
1224 active_workers = _concurrent_workers->set_active_workers(active_workers);
1225 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1226
1227 _num_concurrent_workers = active_workers;
1228
1229 // Parallel task terminator is set in "set_concurrency_and_phase()"
1230 set_concurrency_and_phase(active_workers, true /* concurrent */);
1231
1232 G1CMConcurrentMarkingTask marking_task(this);
1233 _concurrent_workers->run_task(&marking_task);
1234 print_stats();
1235 }
1236
1237 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1238 static const char* location_strings[] = { "Remark Before",
1239 "Remark After",
1240 "Remark Overflow",
1241 "Cleanup Before",
1242 "Cleanup After" };
1243 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1244 }
1245
1246 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1247 VerifyLocation location) {
1248 G1HeapVerifier* verifier = _g1h->verifier();
1249
1250 verifier->verify_region_sets_optional();
1251
1252 const char* caller = verify_location_string(location);
1253
1254 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1255 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1256
1257 size_t const BufLen = 512;
1258 char buffer[BufLen];
1259
1260 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1261 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1262
1263 // Only check bitmap in Remark, and not at After-Verification because the regions
1264 // already have their TAMS'es reset.
1265 if (location != VerifyLocation::RemarkAfter) {
1266 verifier->verify_bitmap_clear(true /* above_tams_only */);
1267 }
1268 }
1269 }
1270
1271 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1272 G1CollectedHeap* _g1h;
1273 public:
1274 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1275
1276 bool do_object_b(oop obj) {
1277 return !_g1h->is_obj_dead(obj);
1278 }
1279 };
1280
1281 void G1ConcurrentMark::remark() {
1282 assert_at_safepoint_on_vm_thread();
1283
1284 // If a full collection has happened, we should not continue. However we might
1285 // have ended up here as the Remark VM operation has been scheduled already.
1286 if (has_aborted()) {
1287 return;
1288 }
1289
1290 G1Policy* policy = _g1h->policy();
1291 policy->record_pause_start_time();
1292
1293 double start = os::elapsedTime();
1294
1295 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1296
1297 {
1298 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1299 finalize_marking();
1300 }
1301
1302 double mark_work_end = os::elapsedTime();
1303
1304 bool const mark_finished = !has_overflown();
1305 if (mark_finished) {
1306 weak_refs_work();
1307
1308 // Unload Klasses, String, Code Cache, etc.
1309 if (ClassUnloadingWithConcurrentMark) {
1310 G1CMIsAliveClosure is_alive(this);
1311 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1312 }
1313
1314 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1315 // We're done with marking.
1316 // This is the end of the marking cycle, we're expected all
1317 // threads to have SATB queues with active set to true.
1318 satb_mq_set.set_active_all_threads(false, /* new active value */
1319 true /* expected_active */);
1320
1321 {
1322 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1323 flush_all_task_caches();
1324 }
1325
1326 // All marking completed. Check bitmap now as we will start to reset TAMSes
1327 // in parallel below so that we can not do this in the After-Remark verification.
1328 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1329
1330 {
1331 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1332
1333 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1334 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1335 _g1h->workers()->active_workers());
1336 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1337 _g1h->workers()->run_task(&cl, num_workers);
1338
1339 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1340 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1341
1342 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1343
1344 if (_needs_remembered_set_rebuild) {
1345 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1346 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1347 }
1348 }
1349
1350 if (log_is_enabled(Trace, gc, liveness)) {
1351 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1352 _g1h->heap_region_iterate(&cl);
1353 }
1354
1355 // Potentially, some empty-regions have been reclaimed; make this a
1356 // "collection" so that pending allocation can retry before attempting a
1357 // GC pause.
1358 _g1h->increment_total_collections();
1359
1360 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1361 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1362 // triggered, it likely means there are very few regular GCs, making resizing
1363 // based on gc heuristics less effective.
1364 if (_g1h->last_gc_was_periodic()) {
1365 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1366 }
1367
1368 compute_new_sizes();
1369
1370 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1371
1372 assert(!restart_for_overflow(), "sanity");
1373 // Completely reset the marking state (except bitmaps) since marking completed.
1374 reset_at_marking_complete();
1375
1376 G1CollectedHeap::finish_codecache_marking_cycle();
1377
1378 {
1379 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1380 G1ObjectCountIsAliveClosure is_alive(_g1h);
1381 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1382 }
1383
1384 // Successfully completed marking, advance state.
1385 cm_thread()->set_full_cycle_rebuild_and_scrub();
1386 } else {
1387 // We overflowed. Restart concurrent marking.
1388 _restart_for_overflow.store_relaxed(true);
1389
1390 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1391
1392 // Clear the marking state because we will be restarting
1393 // marking due to overflowing the global mark stack.
1394 reset_marking_for_restart();
1395 }
1396
1397 // Statistics
1398 double now = os::elapsedTime();
1399 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1400 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1401 _remark_times.add((now - start) * 1000.0);
1402
1403 _g1h->update_perf_counter_cpu_time();
1404
1405 policy->record_concurrent_mark_remark_end();
1406
1407 return;
1408 }
1409
1410 void G1ConcurrentMark::compute_new_sizes() {
1411 MetaspaceGC::compute_new_size();
1412
1413 // Cleanup will have freed any regions completely full of garbage.
1414 // Update the soft reference policy with the new heap occupancy.
1415 Universe::heap()->update_capacity_and_used_at_gc();
1416
1417 // We reclaimed old regions so we should calculate the sizes to make
1418 // sure we update the old gen/space data.
1419 _g1h->monitoring_support()->update_sizes();
1420 }
1421
1422 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1423 G1CollectedHeap* _g1h;
1424
1425 public:
1426 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1427
1428 bool do_heap_region(G1HeapRegion* r) override {
1429 // Update the remset tracking state from updating to complete
1430 // if remembered sets have been rebuilt.
1431 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1432 return false;
1433 }
1434 };
1435
1436 void G1ConcurrentMark::cleanup() {
1437 assert_at_safepoint_on_vm_thread();
1438
1439 // If a full collection has happened, we shouldn't do this.
1440 if (has_aborted()) {
1441 return;
1442 }
1443
1444 G1Policy* policy = _g1h->policy();
1445 policy->record_pause_start_time();
1446
1447 double start = os::elapsedTime();
1448
1449 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1450
1451 if (needs_remembered_set_rebuild()) {
1452 // Update the remset tracking information as well as marking all regions
1453 // as fully parsable.
1454 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1455 G1UpdateRegionsAfterRebuild cl(_g1h);
1456 _g1h->heap_region_iterate(&cl);
1457 } else {
1458 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1459 }
1460
1461 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1462
1463 // Local statistics
1464 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1465
1466 {
1467 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1468 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1469 }
1470
1471 // Advance state.
1472 cm_thread()->set_full_cycle_reset_for_next_cycle();
1473 return;
1474 }
1475
1476 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1477 // Uses the G1CMTask associated with a worker thread (for serial reference
1478 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1479 // trace referent objects.
1480 //
1481 // Using the G1CMTask and embedded local queues avoids having the worker
1482 // threads operating on the global mark stack. This reduces the risk
1483 // of overflowing the stack - which we would rather avoid at this late
1484 // state. Also using the tasks' local queues removes the potential
1485 // of the workers interfering with each other that could occur if
1486 // operating on the global stack.
1487
1488 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1489 G1ConcurrentMark* _cm;
1490 G1CMTask* _task;
1491 uint _ref_counter_limit;
1492 uint _ref_counter;
1493 bool _is_serial;
1494 public:
1495 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1496 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1497 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1498 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1499 }
1500
1501 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1502 virtual void do_oop( oop* p) { do_oop_work(p); }
1503
1504 template <class T> void do_oop_work(T* p) {
1505 if (_cm->has_overflown()) {
1506 return;
1507 }
1508 if (!_task->deal_with_reference(p)) {
1509 // We did not add anything to the mark bitmap (or mark stack), so there is
1510 // no point trying to drain it.
1511 return;
1512 }
1513 _ref_counter--;
1514
1515 if (_ref_counter == 0) {
1516 // We have dealt with _ref_counter_limit references, pushing them
1517 // and objects reachable from them on to the local stack (and
1518 // possibly the global stack). Call G1CMTask::do_marking_step() to
1519 // process these entries.
1520 //
1521 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1522 // there's nothing more to do (i.e. we're done with the entries that
1523 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1524 // above) or we overflow.
1525 //
1526 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1527 // flag while there may still be some work to do. (See the comment at
1528 // the beginning of G1CMTask::do_marking_step() for those conditions -
1529 // one of which is reaching the specified time target.) It is only
1530 // when G1CMTask::do_marking_step() returns without setting the
1531 // has_aborted() flag that the marking step has completed.
1532 do {
1533 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1534 _task->do_marking_step(mark_step_duration_ms,
1535 false /* do_termination */,
1536 _is_serial);
1537 } while (_task->has_aborted() && !_cm->has_overflown());
1538 _ref_counter = _ref_counter_limit;
1539 }
1540 }
1541 };
1542
1543 // 'Drain' oop closure used by both serial and parallel reference processing.
1544 // Uses the G1CMTask associated with a given worker thread (for serial
1545 // reference processing the G1CMtask for worker 0 is used). Calls the
1546 // do_marking_step routine, with an unbelievably large timeout value,
1547 // to drain the marking data structures of the remaining entries
1548 // added by the 'keep alive' oop closure above.
1549
1550 class G1CMDrainMarkingStackClosure : public VoidClosure {
1551 G1ConcurrentMark* _cm;
1552 G1CMTask* _task;
1553 bool _is_serial;
1554 public:
1555 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1556 _cm(cm), _task(task), _is_serial(is_serial) {
1557 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1558 }
1559
1560 void do_void() {
1561 do {
1562 // We call G1CMTask::do_marking_step() to completely drain the local
1563 // and global marking stacks of entries pushed by the 'keep alive'
1564 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1565 //
1566 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1567 // if there's nothing more to do (i.e. we've completely drained the
1568 // entries that were pushed as a result of applying the 'keep alive'
1569 // closure to the entries on the discovered ref lists) or we overflow
1570 // the global marking stack.
1571 //
1572 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1573 // flag while there may still be some work to do. (See the comment at
1574 // the beginning of G1CMTask::do_marking_step() for those conditions -
1575 // one of which is reaching the specified time target.) It is only
1576 // when G1CMTask::do_marking_step() returns without setting the
1577 // has_aborted() flag that the marking step has completed.
1578
1579 _task->do_marking_step(1000000000.0 /* something very large */,
1580 true /* do_termination */,
1581 _is_serial);
1582 } while (_task->has_aborted() && !_cm->has_overflown());
1583 }
1584 };
1585
1586 class G1CMRefProcProxyTask : public RefProcProxyTask {
1587 G1CollectedHeap& _g1h;
1588 G1ConcurrentMark& _cm;
1589
1590 public:
1591 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1592 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1593 _g1h(g1h),
1594 _cm(cm) {}
1595
1596 void work(uint worker_id) override {
1597 assert(worker_id < _max_workers, "sanity");
1598 G1CMIsAliveClosure is_alive(&_cm);
1599 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1600 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1601 BarrierEnqueueDiscoveredFieldClosure enqueue;
1602 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1603 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1604 }
1605
1606 void prepare_run_task_hook() override {
1607 // We need to reset the concurrency level before each
1608 // proxy task execution, so that the termination protocol
1609 // and overflow handling in G1CMTask::do_marking_step() knows
1610 // how many workers to wait for.
1611 _cm.set_concurrency(_queue_count);
1612 }
1613 };
1614
1615 void G1ConcurrentMark::weak_refs_work() {
1616 ResourceMark rm;
1617
1618 {
1619 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1620
1621 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1622
1623 // See the comment in G1CollectedHeap::ref_processing_init()
1624 // about how reference processing currently works in G1.
1625
1626 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1627
1628 // Prefer to grow the stack until the max capacity.
1629 _global_mark_stack.set_should_grow();
1630
1631 // Parallel processing task executor.
1632 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1633 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1634
1635 // Process the weak references.
1636 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1637 _gc_tracer_cm->report_gc_reference_stats(stats);
1638 pt.print_all_references();
1639
1640 // The do_oop work routines of the keep_alive and drain_marking_stack
1641 // oop closures will set the has_overflown flag if we overflow the
1642 // global marking stack.
1643
1644 assert(has_overflown() || _global_mark_stack.is_empty(),
1645 "Mark stack should be empty (unless it has overflown)");
1646 }
1647
1648 if (has_overflown()) {
1649 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1650 // overflowed while processing references. Exit the VM.
1651 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1652 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1653 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1654 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1655 return;
1656 }
1657
1658 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1659
1660 {
1661 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1662 G1CMIsAliveClosure is_alive(this);
1663 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1664 }
1665 }
1666
1667 class G1PrecleanYieldClosure : public YieldClosure {
1668 G1ConcurrentMark* _cm;
1669
1670 public:
1671 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1672
1673 virtual bool should_return() {
1674 return _cm->has_aborted();
1675 }
1676
1677 virtual bool should_return_fine_grain() {
1678 _cm->do_yield_check();
1679 return _cm->has_aborted();
1680 }
1681 };
1682
1683 void G1ConcurrentMark::preclean() {
1684 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1685
1686 SuspendibleThreadSetJoiner joiner;
1687
1688 BarrierEnqueueDiscoveredFieldClosure enqueue;
1689
1690 set_concurrency_and_phase(1, true);
1691
1692 G1PrecleanYieldClosure yield_cl(this);
1693
1694 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1695 // Precleaning is single threaded. Temporarily disable MT discovery.
1696 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1697 rp->preclean_discovered_references(rp->is_alive_non_header(),
1698 &enqueue,
1699 &yield_cl,
1700 _gc_timer_cm);
1701 }
1702
1703 // Closure for marking entries in SATB buffers.
1704 class G1CMSATBBufferClosure : public SATBBufferClosure {
1705 private:
1706 G1CMTask* _task;
1707 G1CollectedHeap* _g1h;
1708
1709 // This is very similar to G1CMTask::deal_with_reference, but with
1710 // more relaxed requirements for the argument, so this must be more
1711 // circumspect about treating the argument as an object.
1712 void do_entry(void* entry) const {
1713 _task->increment_refs_reached();
1714 oop const obj = cast_to_oop(entry);
1715 _task->make_reference_grey(obj);
1716 }
1717
1718 public:
1719 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1720 : _task(task), _g1h(g1h) { }
1721
1722 virtual void do_buffer(void** buffer, size_t size) {
1723 for (size_t i = 0; i < size; ++i) {
1724 do_entry(buffer[i]);
1725 }
1726 }
1727 };
1728
1729 class G1RemarkThreadsClosure : public ThreadClosure {
1730 G1SATBMarkQueueSet& _qset;
1731
1732 public:
1733 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1734 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1735
1736 void do_thread(Thread* thread) {
1737 // Transfer any partial buffer to the qset for completed buffer processing.
1738 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1739 }
1740 };
1741
1742 class G1CMRemarkTask : public WorkerTask {
1743 // For Threads::possibly_parallel_threads_do
1744 ThreadsClaimTokenScope _threads_claim_token_scope;
1745 G1ConcurrentMark* _cm;
1746 public:
1747 void work(uint worker_id) {
1748 G1CMTask* task = _cm->task(worker_id);
1749 task->record_start_time();
1750 {
1751 ResourceMark rm;
1752
1753 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1754 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1755 }
1756
1757 do {
1758 task->do_marking_step(1000000000.0 /* something very large */,
1759 true /* do_termination */,
1760 false /* is_serial */);
1761 } while (task->has_aborted() && !_cm->has_overflown());
1762 // If we overflow, then we do not want to restart. We instead
1763 // want to abort remark and do concurrent marking again.
1764 task->record_end_time();
1765 }
1766
1767 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1768 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1769 _cm->terminator()->reset_for_reuse(active_workers);
1770 }
1771 };
1772
1773 void G1ConcurrentMark::finalize_marking() {
1774 ResourceMark rm;
1775
1776 _g1h->ensure_parsability(false);
1777
1778 // this is remark, so we'll use up all active threads
1779 uint active_workers = _g1h->workers()->active_workers();
1780 set_concurrency_and_phase(active_workers, false /* concurrent */);
1781 // Leave _parallel_marking_threads at it's
1782 // value originally calculated in the G1ConcurrentMark
1783 // constructor and pass values of the active workers
1784 // through the task.
1785
1786 {
1787 G1CMRemarkTask remarkTask(this, active_workers);
1788 // We will start all available threads, even if we decide that the
1789 // active_workers will be fewer. The extra ones will just bail out
1790 // immediately.
1791 _g1h->workers()->run_task(&remarkTask);
1792 }
1793
1794 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1795 guarantee(has_overflown() ||
1796 satb_mq_set.completed_buffers_num() == 0,
1797 "Invariant: has_overflown = %s, num buffers = %zu",
1798 BOOL_TO_STR(has_overflown()),
1799 satb_mq_set.completed_buffers_num());
1800
1801 print_stats();
1802 }
1803
1804 void G1ConcurrentMark::flush_all_task_caches() {
1805 size_t hits = 0;
1806 size_t misses = 0;
1807 for (uint i = 0; i < _max_num_tasks; i++) {
1808 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1809 hits += stats.first;
1810 misses += stats.second;
1811 }
1812 size_t sum = hits + misses;
1813 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1814 hits, misses, percent_of(hits, sum));
1815 }
1816
1817 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1818 assert_at_safepoint();
1819 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1820 }
1821
1822 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1823 // "Checkpoint" the finger.
1824 HeapWord* local_finger = finger();
1825
1826 while (local_finger < _heap.end()) {
1827 assert(_g1h->is_in_reserved(local_finger), "invariant");
1828
1829 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1830 // Make sure that the reads below do not float before loading curr_region.
1831 OrderAccess::loadload();
1832 // Above heap_region_containing may return null as we always scan claim
1833 // until the end of the heap. In this case, just jump to the next region.
1834 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1835
1836 // Is the gap between reading the finger and doing the CAS too long?
1837 HeapWord* res = _finger.compare_exchange(local_finger, end);
1838 if (res == local_finger && curr_region != nullptr) {
1839 // We succeeded.
1840 HeapWord* bottom = curr_region->bottom();
1841 HeapWord* limit = top_at_mark_start(curr_region);
1842
1843 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1844 // Notice that _finger == end cannot be guaranteed here since,
1845 // someone else might have moved the finger even further.
1846 assert(finger() >= end, "The finger should have moved forward");
1847
1848 if (limit > bottom) {
1849 return curr_region;
1850 } else {
1851 assert(limit == bottom,
1852 "The region limit should be at bottom");
1853 // We return null and the caller should try calling
1854 // claim_region() again.
1855 return nullptr;
1856 }
1857 } else {
1858 // Read the finger again.
1859 HeapWord* next_finger = finger();
1860 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1861 local_finger = next_finger;
1862 }
1863 }
1864
1865 return nullptr;
1866 }
1867
1868 #ifndef PRODUCT
1869 class VerifyNoCSetOops {
1870 G1CollectedHeap* _g1h;
1871 const char* _phase;
1872 int _info;
1873
1874 public:
1875 VerifyNoCSetOops(const char* phase, int info = -1) :
1876 _g1h(G1CollectedHeap::heap()),
1877 _phase(phase),
1878 _info(info)
1879 { }
1880
1881 void operator()(G1TaskQueueEntry task_entry) const {
1882 if (task_entry.is_partial_array_state()) {
1883 oop obj = task_entry.to_partial_array_state()->source();
1884 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1885 return;
1886 }
1887 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1888 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1889 p2i(task_entry.to_oop()), _phase, _info);
1890 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1891 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1892 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1893 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1894 }
1895 };
1896
1897 void G1ConcurrentMark::verify_no_collection_set_oops() {
1898 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1899 "should be at a safepoint or initializing");
1900 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1901 return;
1902 }
1903
1904 // Verify entries on the global mark stack
1905 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1906
1907 // Verify entries on the task queues
1908 for (uint i = 0; i < _max_num_tasks; ++i) {
1909 G1CMTaskQueue* queue = _task_queues->queue(i);
1910 queue->iterate(VerifyNoCSetOops("Queue", i));
1911 }
1912
1913 // Verify the global finger
1914 HeapWord* global_finger = finger();
1915 if (global_finger != nullptr && global_finger < _heap.end()) {
1916 // Since we always iterate over all regions, we might get a null G1HeapRegion
1917 // here.
1918 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1919 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1920 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1921 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1922 }
1923
1924 // Verify the task fingers
1925 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1926 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1927 G1CMTask* task = _tasks[i];
1928 HeapWord* task_finger = task->finger();
1929 if (task_finger != nullptr && task_finger < _heap.end()) {
1930 // See above note on the global finger verification.
1931 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1932 guarantee(r == nullptr || task_finger == r->bottom() ||
1933 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1934 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1935 p2i(task_finger), HR_FORMAT_PARAMS(r));
1936 }
1937 }
1938 }
1939 #endif // PRODUCT
1940
1941 void G1ConcurrentMark::rebuild_and_scrub() {
1942 if (!needs_remembered_set_rebuild()) {
1943 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1944 }
1945
1946 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1947 }
1948
1949 void G1ConcurrentMark::print_stats() {
1950 if (!log_is_enabled(Debug, gc, stats)) {
1951 return;
1952 }
1953 log_debug(gc, stats)("---------------------------------------------------------------------");
1954 for (size_t i = 0; i < _num_active_tasks; ++i) {
1955 _tasks[i]->print_stats();
1956 log_debug(gc, stats)("---------------------------------------------------------------------");
1957 }
1958 }
1959
1960 bool G1ConcurrentMark::concurrent_cycle_abort() {
1961 assert_at_safepoint_on_vm_thread();
1962 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1963
1964 // If we start the compaction before the CM threads finish
1965 // scanning the root regions we might trip them over as we'll
1966 // be moving objects / updating references. Since the root region
1967 // scan synchronized with the safepoint, just tell it to abort.
1968 // It will notice when the threads start up again later.
1969 abort_root_region_scan_at_safepoint();
1970
1971 // We haven't started a concurrent cycle no need to do anything; we might have
1972 // aborted the marking because of shutting down though. In this case the marking
1973 // might have already completed the abort (leading to in_progress() below to
1974 // return false), however this still left marking state particularly in the
1975 // shared marking bitmap that must be cleaned up.
1976 // If there are multiple full gcs during shutdown we do this work repeatedly for
1977 // nothing, but this situation should be extremely rare (a full gc after shutdown
1978 // has been signalled is already rare), and this work should be negligible compared
1979 // to actual full gc work.
1980
1981 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1982 return false;
1983 }
1984
1985 reset_marking_for_restart();
1986
1987 abort_marking_threads();
1988
1989 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1990 satb_mq_set.abandon_partial_marking();
1991 // This can be called either during or outside marking, we'll read
1992 // the expected_active value from the SATB queue set.
1993 satb_mq_set.set_active_all_threads(false, /* new active value */
1994 satb_mq_set.is_active() /* expected_active */);
1995 return true;
1996 }
1997
1998 void G1ConcurrentMark::abort_marking_threads() {
1999 assert_root_region_scan_completed_or_aborted();
2000 _has_aborted.store_relaxed(true);
2001 _first_overflow_barrier_sync.abort();
2002 _second_overflow_barrier_sync.abort();
2003 }
2004
2005 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2006 class CountCpuTimeThreadClosure : public ThreadClosure {
2007 public:
2008 jlong _total_cpu_time;
2009
2010 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2011
2012 void do_thread(Thread* t) {
2013 _total_cpu_time += os::thread_cpu_time(t);
2014 }
2015 } cl;
2016
2017 threads_do(&cl);
2018
2019 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2020 }
2021
2022 static void print_ms_time_info(const char* prefix, const char* name,
2023 NumberSeq& ns) {
2024 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2025 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2026 if (ns.num() > 0) {
2027 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2028 prefix, ns.sd(), ns.maximum());
2029 }
2030 }
2031
2032 void G1ConcurrentMark::print_summary_info() {
2033 Log(gc, marking) log;
2034 if (!log.is_trace()) {
2035 return;
2036 }
2037
2038 log.trace(" Concurrent marking:");
2039 if (!is_fully_initialized()) {
2040 log.trace(" has not been initialized yet");
2041 return;
2042 }
2043 print_ms_time_info(" ", "remarks", _remark_times);
2044 {
2045 print_ms_time_info(" ", "final marks", _remark_mark_times);
2046 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2047
2048 }
2049 print_ms_time_info(" ", "cleanups", _cleanup_times);
2050 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2051 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2052 log.trace(" Total stop_world time = %8.2f s.",
2053 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2054 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2055 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2056 }
2057
2058 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2059 if (is_fully_initialized()) { // they are initialized late
2060 tc->do_thread(_cm_thread);
2061 _concurrent_workers->threads_do(tc);
2062 }
2063 }
2064
2065 void G1ConcurrentMark::print_on(outputStream* st) const {
2066 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2067 _mark_bitmap.print_on(st, " Bits: ");
2068 }
2069
2070 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2071 ReferenceProcessor* result = g1h->ref_processor_cm();
2072 assert(result != nullptr, "CM reference processor should not be null");
2073 return result;
2074 }
2075
2076 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2077 G1CMTask* task)
2078 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2079 _g1h(g1h), _task(task)
2080 { }
2081
2082 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2083 assert(hr != nullptr,
2084 "claim_region() should have filtered out null regions");
2085 _curr_region = hr;
2086 _finger = hr->bottom();
2087 update_region_limit();
2088 }
2089
2090 void G1CMTask::update_region_limit() {
2091 G1HeapRegion* hr = _curr_region;
2092 HeapWord* bottom = hr->bottom();
2093 HeapWord* limit = _cm->top_at_mark_start(hr);
2094
2095 if (limit == bottom) {
2096 // The region was collected underneath our feet.
2097 // We set the finger to bottom to ensure that the bitmap
2098 // iteration that will follow this will not do anything.
2099 // (this is not a condition that holds when we set the region up,
2100 // as the region is not supposed to be empty in the first place)
2101 _finger = bottom;
2102 } else if (limit >= _region_limit) {
2103 assert(limit >= _finger, "peace of mind");
2104 } else {
2105 assert(limit < _region_limit, "only way to get here");
2106 // This can happen under some pretty unusual circumstances. An
2107 // evacuation pause empties the region underneath our feet (TAMS
2108 // at bottom). We then do some allocation in the region (TAMS
2109 // stays at bottom), followed by the region being used as a GC
2110 // alloc region (TAMS will move to top() and the objects
2111 // originally below it will be greyed). All objects now marked in
2112 // the region are explicitly greyed, if below the global finger,
2113 // and we do not need in fact to scan anything else. So, we simply
2114 // set _finger to be limit to ensure that the bitmap iteration
2115 // doesn't do anything.
2116 _finger = limit;
2117 }
2118
2119 _region_limit = limit;
2120 }
2121
2122 void G1CMTask::giveup_current_region() {
2123 assert(_curr_region != nullptr, "invariant");
2124 clear_region_fields();
2125 }
2126
2127 void G1CMTask::clear_region_fields() {
2128 // Values for these three fields that indicate that we're not
2129 // holding on to a region.
2130 _curr_region = nullptr;
2131 _finger = nullptr;
2132 _region_limit = nullptr;
2133 }
2134
2135 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2136 if (cm_oop_closure == nullptr) {
2137 assert(_cm_oop_closure != nullptr, "invariant");
2138 } else {
2139 assert(_cm_oop_closure == nullptr, "invariant");
2140 }
2141 _cm_oop_closure = cm_oop_closure;
2142 }
2143
2144 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2145 guarantee(mark_bitmap != nullptr, "invariant");
2146 _mark_bitmap = mark_bitmap;
2147 clear_region_fields();
2148
2149 _calls = 0;
2150 _elapsed_time_ms = 0.0;
2151 _termination_time_ms = 0.0;
2152
2153 _mark_stats_cache.reset();
2154 }
2155
2156 void G1CMTask::reset_for_restart() {
2157 clear_region_fields();
2158 _task_queue->set_empty();
2159 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2160 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2161 }
2162
2163 void G1CMTask::register_partial_array_splitter() {
2164
2165 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2166 _cm->max_num_tasks(),
2167 ObjArrayMarkingStride);
2168 }
2169
2170 void G1CMTask::unregister_partial_array_splitter() {
2171 _partial_array_splitter.~PartialArraySplitter();
2172 }
2173
2174 bool G1CMTask::should_exit_termination() {
2175 if (!regular_clock_call()) {
2176 return true;
2177 }
2178
2179 // This is called when we are in the termination protocol. We should
2180 // quit if, for some reason, this task wants to abort or the global
2181 // stack is not empty (this means that we can get work from it).
2182 return !_cm->mark_stack_empty() || has_aborted();
2183 }
2184
2185 void G1CMTask::reached_limit() {
2186 assert(_words_scanned >= _words_scanned_limit ||
2187 _refs_reached >= _refs_reached_limit ,
2188 "shouldn't have been called otherwise");
2189 abort_marking_if_regular_check_fail();
2190 }
2191
2192 bool G1CMTask::regular_clock_call() {
2193 if (has_aborted()) {
2194 return false;
2195 }
2196
2197 // First, we need to recalculate the words scanned and refs reached
2198 // limits for the next clock call.
2199 recalculate_limits();
2200
2201 // During the regular clock call we do the following
2202
2203 // (1) If an overflow has been flagged, then we abort.
2204 if (_cm->has_overflown()) {
2205 return false;
2206 }
2207
2208 // If we are not concurrent (i.e. we're doing remark) we don't need
2209 // to check anything else. The other steps are only needed during
2210 // the concurrent marking phase.
2211 if (!_cm->concurrent()) {
2212 return true;
2213 }
2214
2215 // (2) If marking has been aborted for Full GC, then we also abort.
2216 if (_cm->has_aborted()) {
2217 return false;
2218 }
2219
2220 // (4) We check whether we should yield. If we have to, then we abort.
2221 if (SuspendibleThreadSet::should_yield()) {
2222 // We should yield. To do this we abort the task. The caller is
2223 // responsible for yielding.
2224 return false;
2225 }
2226
2227 // (5) We check whether we've reached our time quota. If we have,
2228 // then we abort.
2229 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2230 if (elapsed_time_ms > _time_target_ms) {
2231 _has_timed_out = true;
2232 return false;
2233 }
2234
2235 // (6) Finally, we check whether there are enough completed STAB
2236 // buffers available for processing. If there are, we abort.
2237 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2238 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2239 // we do need to process SATB buffers, we'll abort and restart
2240 // the marking task to do so
2241 return false;
2242 }
2243 return true;
2244 }
2245
2246 void G1CMTask::recalculate_limits() {
2247 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2248 _words_scanned_limit = _real_words_scanned_limit;
2249
2250 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2251 _refs_reached_limit = _real_refs_reached_limit;
2252 }
2253
2254 void G1CMTask::decrease_limits() {
2255 // This is called when we believe that we're going to do an infrequent
2256 // operation which will increase the per byte scanned cost (i.e. move
2257 // entries to/from the global stack). It basically tries to decrease the
2258 // scanning limit so that the clock is called earlier.
2259
2260 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2261 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2262 }
2263
2264 void G1CMTask::move_entries_to_global_stack() {
2265 // Local array where we'll store the entries that will be popped
2266 // from the local queue.
2267 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2268
2269 size_t n = 0;
2270 G1TaskQueueEntry task_entry;
2271 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2272 buffer[n] = task_entry;
2273 ++n;
2274 }
2275 if (n < G1CMMarkStack::EntriesPerChunk) {
2276 buffer[n] = G1TaskQueueEntry();
2277 }
2278
2279 if (n > 0) {
2280 if (!_cm->mark_stack_push(buffer)) {
2281 set_has_aborted();
2282 }
2283 }
2284
2285 // This operation was quite expensive, so decrease the limits.
2286 decrease_limits();
2287 }
2288
2289 bool G1CMTask::get_entries_from_global_stack() {
2290 // Local array where we'll store the entries that will be popped
2291 // from the global stack.
2292 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2293
2294 if (!_cm->mark_stack_pop(buffer)) {
2295 return false;
2296 }
2297
2298 // We did actually pop at least one entry.
2299 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2300 G1TaskQueueEntry task_entry = buffer[i];
2301 if (task_entry.is_null()) {
2302 break;
2303 }
2304 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2305 bool success = _task_queue->push(task_entry);
2306 // We only call this when the local queue is empty or under a
2307 // given target limit. So, we do not expect this push to fail.
2308 assert(success, "invariant");
2309 }
2310
2311 // This operation was quite expensive, so decrease the limits
2312 decrease_limits();
2313 return true;
2314 }
2315
2316 void G1CMTask::drain_local_queue(bool partially) {
2317 if (has_aborted()) {
2318 return;
2319 }
2320
2321 // Decide what the target size is, depending whether we're going to
2322 // drain it partially (so that other tasks can steal if they run out
2323 // of things to do) or totally (at the very end).
2324 uint target_size;
2325 if (partially) {
2326 target_size = GCDrainStackTargetSize;
2327 } else {
2328 target_size = 0;
2329 }
2330
2331 if (_task_queue->size() > target_size) {
2332 G1TaskQueueEntry entry;
2333 bool ret = _task_queue->pop_local(entry);
2334 while (ret) {
2335 process_entry(entry, false /* stolen */);
2336 if (_task_queue->size() <= target_size || has_aborted()) {
2337 ret = false;
2338 } else {
2339 ret = _task_queue->pop_local(entry);
2340 }
2341 }
2342 }
2343 }
2344
2345 size_t G1CMTask::start_partial_array_processing(oop obj) {
2346 assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2347
2348 objArrayOop obj_array = oop_cast<objArrayOop>(obj);
2349 size_t array_length = obj_array->length();
2350
2351 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2352
2353 // Mark objArray klass metadata
2354 if (_cm_oop_closure->do_metadata()) {
2355 _cm_oop_closure->do_klass(obj_array->klass());
2356 }
2357
2358 process_array_chunk(obj_array, 0, initial_chunk_size);
2359
2360 // Include object header size
2361 if (obj_array->is_refArray()) {
2362 return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2363 } else {
2364 FlatArrayKlass* fak = FlatArrayKlass::cast(obj_array->klass());
2365 return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2366 }
2367 }
2368
2369 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2370 PartialArrayState* state = task.to_partial_array_state();
2371 // Access state before release by claim().
2372 objArrayOop obj = oop_cast<objArrayOop>(state->source());
2373
2374 PartialArraySplitter::Claim claim =
2375 _partial_array_splitter.claim(state, _task_queue, stolen);
2376
2377 process_array_chunk(obj, claim._start, claim._end);
2378
2379 if (obj->is_refArray()) {
2380 return heap_word_size((claim._end - claim._start) * heapOopSize);
2381 } else {
2382 assert(obj->is_flatArray(), "Must be!");
2383 size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2384 size_t nof_elements = claim._end - claim._start;
2385 return heap_word_size(nof_elements * element_byte_size);
2386 }
2387 }
2388
2389 void G1CMTask::drain_global_stack(bool partially) {
2390 if (has_aborted()) {
2391 return;
2392 }
2393
2394 // We have a policy to drain the local queue before we attempt to
2395 // drain the global stack.
2396 assert(partially || _task_queue->size() == 0, "invariant");
2397
2398 // Decide what the target size is, depending whether we're going to
2399 // drain it partially (so that other tasks can steal if they run out
2400 // of things to do) or totally (at the very end).
2401 // Notice that when draining the global mark stack partially, due to the racyness
2402 // of the mark stack size update we might in fact drop below the target. But,
2403 // this is not a problem.
2404 // In case of total draining, we simply process until the global mark stack is
2405 // totally empty, disregarding the size counter.
2406 if (partially) {
2407 size_t const target_size = _cm->partial_mark_stack_size_target();
2408 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2409 if (get_entries_from_global_stack()) {
2410 drain_local_queue(partially);
2411 }
2412 }
2413 } else {
2414 while (!has_aborted() && get_entries_from_global_stack()) {
2415 drain_local_queue(partially);
2416 }
2417 }
2418 }
2419
2420 // SATB Queue has several assumptions on whether to call the par or
2421 // non-par versions of the methods. this is why some of the code is
2422 // replicated. We should really get rid of the single-threaded version
2423 // of the code to simplify things.
2424 void G1CMTask::drain_satb_buffers() {
2425 if (has_aborted()) {
2426 return;
2427 }
2428
2429 // We set this so that the regular clock knows that we're in the
2430 // middle of draining buffers and doesn't set the abort flag when it
2431 // notices that SATB buffers are available for draining. It'd be
2432 // very counter productive if it did that. :-)
2433 _draining_satb_buffers = true;
2434
2435 G1CMSATBBufferClosure satb_cl(this, _g1h);
2436 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2437
2438 // This keeps claiming and applying the closure to completed buffers
2439 // until we run out of buffers or we need to abort.
2440 while (!has_aborted() &&
2441 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2442 abort_marking_if_regular_check_fail();
2443 }
2444
2445 // Can't assert qset is empty here, even if not aborted. If concurrent,
2446 // some other thread might be adding to the queue. If not concurrent,
2447 // some other thread might have won the race for the last buffer, but
2448 // has not yet decremented the count.
2449
2450 _draining_satb_buffers = false;
2451
2452 // again, this was a potentially expensive operation, decrease the
2453 // limits to get the regular clock call early
2454 decrease_limits();
2455 }
2456
2457 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2458 _mark_stats_cache.reset(region_idx);
2459 }
2460
2461 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2462 return _mark_stats_cache.evict_all();
2463 }
2464
2465 void G1CMTask::print_stats() {
2466 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2467 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2468 _elapsed_time_ms, _termination_time_ms);
2469 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2470 _step_times_ms.num(),
2471 _step_times_ms.avg(),
2472 _step_times_ms.sd(),
2473 _step_times_ms.maximum(),
2474 _step_times_ms.sum());
2475 size_t const hits = _mark_stats_cache.hits();
2476 size_t const misses = _mark_stats_cache.misses();
2477 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2478 hits, misses, percent_of(hits, hits + misses));
2479 }
2480
2481 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2482 return _task_queues->steal(worker_id, task_entry);
2483 }
2484
2485 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2486 if (has_aborted() || _curr_region == nullptr) {
2487 return;
2488 }
2489
2490 // This means that we're already holding on to a region.
2491 assert(_finger != nullptr, "if region is not null, then the finger "
2492 "should not be null either");
2493
2494 // We might have restarted this task after an evacuation pause
2495 // which might have evacuated the region we're holding on to
2496 // underneath our feet. Let's read its limit again to make sure
2497 // that we do not iterate over a region of the heap that
2498 // contains garbage (update_region_limit() will also move
2499 // _finger to the start of the region if it is found empty).
2500 update_region_limit();
2501 // We will start from _finger not from the start of the region,
2502 // as we might be restarting this task after aborting half-way
2503 // through scanning this region. In this case, _finger points to
2504 // the address where we last found a marked object. If this is a
2505 // fresh region, _finger points to start().
2506 MemRegion mr = MemRegion(_finger, _region_limit);
2507
2508 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2509 "humongous regions should go around loop once only");
2510
2511 // Some special cases:
2512 // If the memory region is empty, we can just give up the region.
2513 // If the current region is humongous then we only need to check
2514 // the bitmap for the bit associated with the start of the object,
2515 // scan the object if it's live, and give up the region.
2516 // Otherwise, let's iterate over the bitmap of the part of the region
2517 // that is left.
2518 // If the iteration is successful, give up the region.
2519 if (mr.is_empty()) {
2520 giveup_current_region();
2521 abort_marking_if_regular_check_fail();
2522 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2523 if (_mark_bitmap->is_marked(mr.start())) {
2524 // The object is marked - apply the closure
2525 bitmap_closure.do_addr(mr.start());
2526 }
2527 // Even if this task aborted while scanning the humongous object
2528 // we can (and should) give up the current region.
2529 giveup_current_region();
2530 abort_marking_if_regular_check_fail();
2531 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2532 giveup_current_region();
2533 abort_marking_if_regular_check_fail();
2534 } else {
2535 assert(has_aborted(), "currently the only way to do so");
2536 // The only way to abort the bitmap iteration is to return
2537 // false from the do_bit() method. However, inside the
2538 // do_bit() method we move the _finger to point to the
2539 // object currently being looked at. So, if we bail out, we
2540 // have definitely set _finger to something non-null.
2541 assert(_finger != nullptr, "invariant");
2542
2543 // Region iteration was actually aborted. So now _finger
2544 // points to the address of the object we last scanned. If we
2545 // leave it there, when we restart this task, we will rescan
2546 // the object. It is easy to avoid this. We move the finger by
2547 // enough to point to the next possible object header.
2548 assert(_finger < _region_limit, "invariant");
2549 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2550 if (new_finger >= _region_limit) {
2551 giveup_current_region();
2552 } else {
2553 move_finger_to(new_finger);
2554 }
2555 }
2556 }
2557
2558 void G1CMTask::claim_new_region() {
2559 // Read the note on the claim_region() method on why it might
2560 // return null with potentially more regions available for
2561 // claiming and why we have to check out_of_regions() to determine
2562 // whether we're done or not.
2563 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2564 // We are going to try to claim a new region. We should have
2565 // given up on the previous one.
2566 // Separated the asserts so that we know which one fires.
2567 assert(_curr_region == nullptr, "invariant");
2568 assert(_finger == nullptr, "invariant");
2569 assert(_region_limit == nullptr, "invariant");
2570 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2571 if (claimed_region != nullptr) {
2572 // Yes, we managed to claim one
2573 setup_for_region(claimed_region);
2574 assert(_curr_region == claimed_region, "invariant");
2575 }
2576 // It is important to call the regular clock here. It might take
2577 // a while to claim a region if, for example, we hit a large
2578 // block of empty regions. So we need to call the regular clock
2579 // method once round the loop to make sure it's called
2580 // frequently enough.
2581 abort_marking_if_regular_check_fail();
2582 }
2583 }
2584
2585 void G1CMTask::attempt_stealing() {
2586 // We cannot check whether the global stack is empty, since other
2587 // tasks might be pushing objects to it concurrently.
2588 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2589 "only way to reach here");
2590 while (!has_aborted()) {
2591 G1TaskQueueEntry entry;
2592 if (_cm->try_stealing(_worker_id, entry)) {
2593 process_entry(entry, true /* stolen */);
2594
2595 // And since we're towards the end, let's totally drain the
2596 // local queue and global stack.
2597 drain_local_queue(false);
2598 drain_global_stack(false);
2599 } else {
2600 break;
2601 }
2602 }
2603 }
2604
2605 void G1CMTask::attempt_termination(bool is_serial) {
2606 // We cannot check whether the global stack is empty, since other
2607 // tasks might be concurrently pushing objects on it.
2608 // Separated the asserts so that we know which one fires.
2609 assert(_cm->out_of_regions(), "only way to reach here");
2610 assert(_task_queue->size() == 0, "only way to reach here");
2611 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2612
2613 // The G1CMTask class also extends the TerminatorTerminator class,
2614 // hence its should_exit_termination() method will also decide
2615 // whether to exit the termination protocol or not.
2616 bool finished = (is_serial ||
2617 _cm->terminator()->offer_termination(this));
2618 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2619
2620 if (finished) {
2621 // We're all done.
2622
2623 // We can now guarantee that the global stack is empty, since
2624 // all other tasks have finished. We separated the guarantees so
2625 // that, if a condition is false, we can immediately find out
2626 // which one.
2627 guarantee(_cm->out_of_regions(), "only way to reach here");
2628 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2629 guarantee(_task_queue->size() == 0, "only way to reach here");
2630 guarantee(!_cm->has_overflown(), "only way to reach here");
2631 guarantee(!has_aborted(), "should never happen if termination has completed");
2632 } else {
2633 // Apparently there's more work to do. Let's abort this task. We
2634 // will restart it and hopefully we can find more things to do.
2635 set_has_aborted();
2636 }
2637 }
2638
2639 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2640 if (_has_timed_out) {
2641 double diff_ms = elapsed_time_ms - _time_target_ms;
2642 // Keep statistics of how well we did with respect to hitting
2643 // our target only if we actually timed out (if we aborted for
2644 // other reasons, then the results might get skewed).
2645 _marking_step_diff_ms.add(diff_ms);
2646 }
2647
2648 if (!_cm->has_overflown()) {
2649 return;
2650 }
2651
2652 // This is the interesting one. We aborted because a global
2653 // overflow was raised. This means we have to restart the
2654 // marking phase and start iterating over regions. However, in
2655 // order to do this we have to make sure that all tasks stop
2656 // what they are doing and re-initialize in a safe manner. We
2657 // will achieve this with the use of two barrier sync points.
2658 if (!is_serial) {
2659 // We only need to enter the sync barrier if being called
2660 // from a parallel context
2661 _cm->enter_first_sync_barrier(_worker_id);
2662
2663 // When we exit this sync barrier we know that all tasks have
2664 // stopped doing marking work. So, it's now safe to
2665 // re-initialize our data structures.
2666 }
2667
2668 clear_region_fields();
2669 flush_mark_stats_cache();
2670
2671 if (!is_serial) {
2672 // If we're executing the concurrent phase of marking, reset the marking
2673 // state; otherwise the marking state is reset after reference processing,
2674 // during the remark pause.
2675 // If we reset here as a result of an overflow during the remark we will
2676 // see assertion failures from any subsequent set_concurrency_and_phase()
2677 // calls.
2678 if (_cm->concurrent() && _worker_id == 0) {
2679 // Worker 0 is responsible for clearing the global data structures because
2680 // of an overflow. During STW we should not clear the overflow flag (in
2681 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2682 // method to abort the pause and restart concurrent marking.
2683 _cm->reset_marking_for_restart();
2684
2685 log_info(gc, marking)("Concurrent Mark reset for overflow");
2686 }
2687
2688 // ...and enter the second barrier.
2689 _cm->enter_second_sync_barrier(_worker_id);
2690 }
2691 }
2692
2693 /*****************************************************************************
2694
2695 The do_marking_step(time_target_ms, ...) method is the building
2696 block of the parallel marking framework. It can be called in parallel
2697 with other invocations of do_marking_step() on different tasks
2698 (but only one per task, obviously) and concurrently with the
2699 mutator threads, or during remark, hence it eliminates the need
2700 for two versions of the code. When called during remark, it will
2701 pick up from where the task left off during the concurrent marking
2702 phase. Interestingly, tasks are also claimable during evacuation
2703 pauses too, since do_marking_step() ensures that it aborts before
2704 it needs to yield.
2705
2706 The data structures that it uses to do marking work are the
2707 following:
2708
2709 (1) Marking Bitmap. If there are grey objects that appear only
2710 on the bitmap (this happens either when dealing with an overflow
2711 or when the concurrent start pause has simply marked the roots
2712 and didn't push them on the stack), then tasks claim heap
2713 regions whose bitmap they then scan to find grey objects. A
2714 global finger indicates where the end of the last claimed region
2715 is. A local finger indicates how far into the region a task has
2716 scanned. The two fingers are used to determine how to grey an
2717 object (i.e. whether simply marking it is OK, as it will be
2718 visited by a task in the future, or whether it needs to be also
2719 pushed on a stack).
2720
2721 (2) Local Queue. The local queue of the task which is accessed
2722 reasonably efficiently by the task. Other tasks can steal from
2723 it when they run out of work. Throughout the marking phase, a
2724 task attempts to keep its local queue short but not totally
2725 empty, so that entries are available for stealing by other
2726 tasks. Only when there is no more work, a task will totally
2727 drain its local queue.
2728
2729 (3) Global Mark Stack. This handles local queue overflow. During
2730 marking only sets of entries are moved between it and the local
2731 queues, as access to it requires a mutex and more fine-grain
2732 interaction with it which might cause contention. If it
2733 overflows, then the marking phase should restart and iterate
2734 over the bitmap to identify grey objects. Throughout the marking
2735 phase, tasks attempt to keep the global mark stack at a small
2736 length but not totally empty, so that entries are available for
2737 popping by other tasks. Only when there is no more work, tasks
2738 will totally drain the global mark stack.
2739
2740 (4) SATB Buffer Queue. This is where completed SATB buffers are
2741 made available. Buffers are regularly removed from this queue
2742 and scanned for roots, so that the queue doesn't get too
2743 long. During remark, all completed buffers are processed, as
2744 well as the filled in parts of any uncompleted buffers.
2745
2746 The do_marking_step() method tries to abort when the time target
2747 has been reached. There are a few other cases when the
2748 do_marking_step() method also aborts:
2749
2750 (1) When the marking phase has been aborted (after a Full GC).
2751
2752 (2) When a global overflow (on the global stack) has been
2753 triggered. Before the task aborts, it will actually sync up with
2754 the other tasks to ensure that all the marking data structures
2755 (local queues, stacks, fingers etc.) are re-initialized so that
2756 when do_marking_step() completes, the marking phase can
2757 immediately restart.
2758
2759 (3) When enough completed SATB buffers are available. The
2760 do_marking_step() method only tries to drain SATB buffers right
2761 at the beginning. So, if enough buffers are available, the
2762 marking step aborts and the SATB buffers are processed at
2763 the beginning of the next invocation.
2764
2765 (4) To yield. when we have to yield then we abort and yield
2766 right at the end of do_marking_step(). This saves us from a lot
2767 of hassle as, by yielding we might allow a Full GC. If this
2768 happens then objects will be compacted underneath our feet, the
2769 heap might shrink, etc. We save checking for this by just
2770 aborting and doing the yield right at the end.
2771
2772 From the above it follows that the do_marking_step() method should
2773 be called in a loop (or, otherwise, regularly) until it completes.
2774
2775 If a marking step completes without its has_aborted() flag being
2776 true, it means it has completed the current marking phase (and
2777 also all other marking tasks have done so and have all synced up).
2778
2779 A method called regular_clock_call() is invoked "regularly" (in
2780 sub ms intervals) throughout marking. It is this clock method that
2781 checks all the abort conditions which were mentioned above and
2782 decides when the task should abort. A work-based scheme is used to
2783 trigger this clock method: when the number of object words the
2784 marking phase has scanned or the number of references the marking
2785 phase has visited reach a given limit. Additional invocations to
2786 the method clock have been planted in a few other strategic places
2787 too. The initial reason for the clock method was to avoid calling
2788 cpu time gathering too regularly, as it is quite expensive. So,
2789 once it was in place, it was natural to piggy-back all the other
2790 conditions on it too and not constantly check them throughout the code.
2791
2792 If do_termination is true then do_marking_step will enter its
2793 termination protocol.
2794
2795 The value of is_serial must be true when do_marking_step is being
2796 called serially (i.e. by the VMThread) and do_marking_step should
2797 skip any synchronization in the termination and overflow code.
2798 Examples include the serial remark code and the serial reference
2799 processing closures.
2800
2801 The value of is_serial must be false when do_marking_step is
2802 being called by any of the worker threads.
2803 Examples include the concurrent marking code (CMMarkingTask),
2804 the MT remark code, and the MT reference processing closures.
2805
2806 *****************************************************************************/
2807
2808 void G1CMTask::do_marking_step(double time_target_ms,
2809 bool do_termination,
2810 bool is_serial) {
2811 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2812
2813 _start_cpu_time_ns = os::current_thread_cpu_time();
2814
2815 // If do_stealing is true then do_marking_step will attempt to
2816 // steal work from the other G1CMTasks. It only makes sense to
2817 // enable stealing when the termination protocol is enabled
2818 // and do_marking_step() is not being called serially.
2819 bool do_stealing = do_termination && !is_serial;
2820
2821 G1Predictions const& predictor = _g1h->policy()->predictor();
2822 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2823 _time_target_ms = time_target_ms - diff_prediction_ms;
2824
2825 // set up the variables that are used in the work-based scheme to
2826 // call the regular clock method
2827 _words_scanned = 0;
2828 _refs_reached = 0;
2829 recalculate_limits();
2830
2831 // clear all flags
2832 clear_has_aborted();
2833 _has_timed_out = false;
2834 _draining_satb_buffers = false;
2835
2836 ++_calls;
2837
2838 // Set up the bitmap and oop closures. Anything that uses them is
2839 // eventually called from this method, so it is OK to allocate these
2840 // statically.
2841 G1CMBitMapClosure bitmap_closure(this, _cm);
2842 G1CMOopClosure cm_oop_closure(_g1h, this);
2843 set_cm_oop_closure(&cm_oop_closure);
2844
2845 if (_cm->has_overflown()) {
2846 // This can happen if the mark stack overflows during a GC pause
2847 // and this task, after a yield point, restarts. We have to abort
2848 // as we need to get into the overflow protocol which happens
2849 // right at the end of this task.
2850 set_has_aborted();
2851 }
2852
2853 // First drain any available SATB buffers. After this, we will not
2854 // look at SATB buffers before the next invocation of this method.
2855 // If enough completed SATB buffers are queued up, the regular clock
2856 // will abort this task so that it restarts.
2857 drain_satb_buffers();
2858 // ...then partially drain the local queue and the global stack
2859 drain_local_queue(true);
2860 drain_global_stack(true);
2861
2862 do {
2863 process_current_region(bitmap_closure);
2864 // At this point we have either completed iterating over the
2865 // region we were holding on to, or we have aborted.
2866
2867 // We then partially drain the local queue and the global stack.
2868 drain_local_queue(true);
2869 drain_global_stack(true);
2870
2871 claim_new_region();
2872
2873 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2874 "at this point we should be out of regions");
2875 } while ( _curr_region != nullptr && !has_aborted());
2876
2877 // We cannot check whether the global stack is empty, since other
2878 // tasks might be pushing objects to it concurrently.
2879 assert(has_aborted() || _cm->out_of_regions(),
2880 "at this point we should be out of regions");
2881 // Try to reduce the number of available SATB buffers so that
2882 // remark has less work to do.
2883 drain_satb_buffers();
2884
2885 // Since we've done everything else, we can now totally drain the
2886 // local queue and global stack.
2887 drain_local_queue(false);
2888 drain_global_stack(false);
2889
2890 // Attempt at work stealing from other task's queues.
2891 if (do_stealing && !has_aborted()) {
2892 // We have not aborted. This means that we have finished all that
2893 // we could. Let's try to do some stealing...
2894 attempt_stealing();
2895 }
2896
2897 // We still haven't aborted. Now, let's try to get into the
2898 // termination protocol.
2899 if (do_termination && !has_aborted()) {
2900 attempt_termination(is_serial);
2901 }
2902
2903 // Mainly for debugging purposes to make sure that a pointer to the
2904 // closure which was statically allocated in this frame doesn't
2905 // escape it by accident.
2906 set_cm_oop_closure(nullptr);
2907 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2908 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2909 // Update the step history.
2910 _step_times_ms.add(elapsed_time_ms);
2911
2912 if (has_aborted()) {
2913 // The task was aborted for some reason.
2914 handle_abort(is_serial, elapsed_time_ms);
2915 }
2916 }
2917
2918 G1CMTask::G1CMTask(uint worker_id,
2919 G1ConcurrentMark* cm,
2920 G1CMTaskQueue* task_queue,
2921 G1RegionMarkStats* mark_stats) :
2922 _worker_id(worker_id),
2923 _g1h(G1CollectedHeap::heap()),
2924 _cm(cm),
2925 _mark_bitmap(nullptr),
2926 _task_queue(task_queue),
2927 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2928 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2929 _calls(0),
2930 _time_target_ms(0.0),
2931 _start_cpu_time_ns(0),
2932 _cm_oop_closure(nullptr),
2933 _curr_region(nullptr),
2934 _finger(nullptr),
2935 _region_limit(nullptr),
2936 _words_scanned(0),
2937 _words_scanned_limit(0),
2938 _real_words_scanned_limit(0),
2939 _refs_reached(0),
2940 _refs_reached_limit(0),
2941 _real_refs_reached_limit(0),
2942 _has_aborted(false),
2943 _has_timed_out(false),
2944 _draining_satb_buffers(false),
2945 _step_times_ms(),
2946 _elapsed_time_ms(0.0),
2947 _termination_time_ms(0.0),
2948 _marking_step_diff_ms()
2949 {
2950 guarantee(task_queue != nullptr, "invariant");
2951
2952 _marking_step_diff_ms.add(0.5);
2953 }
2954
2955 // These are formatting macros that are used below to ensure
2956 // consistent formatting. The *_H_* versions are used to format the
2957 // header for a particular value and they should be kept consistent
2958 // with the corresponding macro. Also note that most of the macros add
2959 // the necessary white space (as a prefix) which makes them a bit
2960 // easier to compose.
2961
2962 // All the output lines are prefixed with this string to be able to
2963 // identify them easily in a large log file.
2964 #define G1PPRL_LINE_PREFIX "###"
2965
2966 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2967 #ifdef _LP64
2968 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2969 #else // _LP64
2970 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2971 #endif // _LP64
2972
2973 // For per-region info
2974 #define G1PPRL_TYPE_FORMAT " %-4s"
2975 #define G1PPRL_TYPE_H_FORMAT " %4s"
2976 #define G1PPRL_STATE_FORMAT " %-5s"
2977 #define G1PPRL_STATE_H_FORMAT " %5s"
2978 #define G1PPRL_BYTE_FORMAT " %9zu"
2979 #define G1PPRL_BYTE_H_FORMAT " %9s"
2980 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2981 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2982 #define G1PPRL_GID_H_FORMAT " %9s"
2983 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2984 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2985 #define G1PPRL_LEN_H_FORMAT " %14s"
2986 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2987 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2988
2989 // For summary info
2990 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2991 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2992 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2993 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2994
2995 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2996 _total_used_bytes(0),
2997 _total_capacity_bytes(0),
2998 _total_live_bytes(0),
2999 _total_remset_bytes(0),
3000 _total_code_roots_bytes(0)
3001 {
3002 if (!log_is_enabled(Trace, gc, liveness)) {
3003 return;
3004 }
3005
3006 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3007 MemRegion reserved = g1h->reserved();
3008 double now = os::elapsedTime();
3009
3010 // Print the header of the output.
3011 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3012 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3013 G1PPRL_SUM_ADDR_FORMAT("reserved")
3014 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3015 p2i(reserved.start()), p2i(reserved.end()),
3016 G1HeapRegion::GrainBytes);
3017 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3018 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3019 G1PPRL_TYPE_H_FORMAT
3020 G1PPRL_ADDR_BASE_H_FORMAT
3021 G1PPRL_BYTE_H_FORMAT
3022 G1PPRL_BYTE_H_FORMAT
3023 G1PPRL_STATE_H_FORMAT
3024 G1PPRL_BYTE_H_FORMAT
3025 G1PPRL_GID_H_FORMAT,
3026 "type", "address-range",
3027 "used", "live",
3028 "state", "code-roots",
3029 "group-id");
3030 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3031 G1PPRL_TYPE_H_FORMAT
3032 G1PPRL_ADDR_BASE_H_FORMAT
3033 G1PPRL_BYTE_H_FORMAT
3034 G1PPRL_BYTE_H_FORMAT
3035 G1PPRL_STATE_H_FORMAT
3036 G1PPRL_BYTE_H_FORMAT
3037 G1PPRL_GID_H_FORMAT,
3038 "", "",
3039 "(bytes)", "(bytes)",
3040 "", "(bytes)", "");
3041 }
3042
3043 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3044 if (!log_is_enabled(Trace, gc, liveness)) {
3045 return false;
3046 }
3047
3048 const char* type = r->get_type_str();
3049 HeapWord* bottom = r->bottom();
3050 HeapWord* end = r->end();
3051 size_t capacity_bytes = r->capacity();
3052 size_t used_bytes = r->used();
3053 size_t live_bytes = r->live_bytes();
3054 size_t remset_bytes = r->rem_set()->mem_size();
3055 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3056 const char* remset_type = r->rem_set()->get_short_state_str();
3057 uint cset_group_id = r->rem_set()->has_cset_group()
3058 ? r->rem_set()->cset_group_id()
3059 : G1CSetCandidateGroup::NoRemSetId;
3060
3061 _total_used_bytes += used_bytes;
3062 _total_capacity_bytes += capacity_bytes;
3063 _total_live_bytes += live_bytes;
3064 _total_remset_bytes += remset_bytes;
3065 _total_code_roots_bytes += code_roots_bytes;
3066
3067 // Print a line for this particular region.
3068 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3069 G1PPRL_TYPE_FORMAT
3070 G1PPRL_ADDR_BASE_FORMAT
3071 G1PPRL_BYTE_FORMAT
3072 G1PPRL_BYTE_FORMAT
3073 G1PPRL_STATE_FORMAT
3074 G1PPRL_BYTE_FORMAT
3075 G1PPRL_GID_FORMAT,
3076 type, p2i(bottom), p2i(end),
3077 used_bytes, live_bytes,
3078 remset_type, code_roots_bytes,
3079 cset_group_id);
3080
3081 return false;
3082 }
3083
3084 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3085 if (!log_is_enabled(Trace, gc, liveness)) {
3086 return;
3087 }
3088
3089 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3090 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3091 // add static memory usages to remembered set sizes
3092 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3093
3094 log_cset_candidate_groups();
3095
3096 // Print the footer of the output.
3097 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3098 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3099 " SUMMARY"
3100 G1PPRL_SUM_MB_FORMAT("capacity")
3101 G1PPRL_SUM_MB_PERC_FORMAT("used")
3102 G1PPRL_SUM_MB_PERC_FORMAT("live")
3103 G1PPRL_SUM_MB_FORMAT("remset")
3104 G1PPRL_SUM_MB_FORMAT("code-roots"),
3105 bytes_to_mb(_total_capacity_bytes),
3106 bytes_to_mb(_total_used_bytes),
3107 percent_of(_total_used_bytes, _total_capacity_bytes),
3108 bytes_to_mb(_total_live_bytes),
3109 percent_of(_total_live_bytes, _total_capacity_bytes),
3110 bytes_to_mb(_total_remset_bytes),
3111 bytes_to_mb(_total_code_roots_bytes));
3112 }
3113
3114 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3115 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3116 G1PPRL_GID_FORMAT
3117 G1PPRL_LEN_FORMAT
3118 G1PPRL_GID_GCEFF_FORMAT
3119 G1PPRL_GID_LIVENESS_FORMAT
3120 G1PPRL_BYTE_FORMAT
3121 G1PPRL_TYPE_H_FORMAT,
3122 group->group_id(),
3123 group->length(),
3124 group->length() > 0 ? group->gc_efficiency() : 0.0,
3125 group->length() > 0 ? group->liveness_percent() : 0.0,
3126 group->card_set()->mem_size(),
3127 type);
3128 _total_remset_bytes += group->card_set()->mem_size();
3129 }
3130
3131 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3132 for (G1CSetCandidateGroup* group : gl) {
3133 log_cset_candidate_group_add_total(group, type);
3134 }
3135 }
3136
3137 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3138 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3139 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3140 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3141 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3142 G1PPRL_GID_H_FORMAT
3143 G1PPRL_LEN_H_FORMAT
3144 G1PPRL_GCEFF_H_FORMAT
3145 G1PPRL_BYTE_H_FORMAT
3146 G1PPRL_BYTE_H_FORMAT
3147 G1PPRL_TYPE_H_FORMAT,
3148 "groud-id", "num-regions",
3149 "gc-eff", "liveness",
3150 "remset", "type");
3151
3152 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3153 G1PPRL_GID_H_FORMAT
3154 G1PPRL_LEN_H_FORMAT
3155 G1PPRL_GCEFF_H_FORMAT
3156 G1PPRL_BYTE_H_FORMAT
3157 G1PPRL_BYTE_H_FORMAT
3158 G1PPRL_TYPE_H_FORMAT,
3159 "", "",
3160 "(bytes/ms)", "%",
3161 "(bytes)", "");
3162
3163 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3164
3165 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3166
3167 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3168 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3169 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3170 }