1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "oops/oopCast.inline.hpp"
74 #include "runtime/globals_extension.hpp"
75 #include "runtime/handles.inline.hpp"
76 #include "runtime/java.hpp"
77 #include "runtime/orderAccess.hpp"
78 #include "runtime/os.hpp"
79 #include "runtime/prefetch.inline.hpp"
80 #include "runtime/threads.hpp"
81 #include "utilities/align.hpp"
82 #include "utilities/checkedCast.hpp"
83 #include "utilities/formatBuffer.hpp"
84 #include "utilities/growableArray.hpp"
85 #include "utilities/powerOfTwo.hpp"
86
87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
88
89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
90 assert(cm != nullptr, "must be");
91 }
92
93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
94 assert(cm != nullptr, "must be");
95 assert(_cm == nullptr, "double initialize");
96 _cm = cm;
97 }
98
99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
100 assert(addr < _cm->finger(), "invariant");
101 assert(addr >= _task->finger(), "invariant");
102
103 // We move that task's local finger along.
104 _task->move_finger_to(addr);
105
106 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
107 // we only partially drain the local queue and global stack
108 _task->drain_local_queue(true);
109 _task->drain_global_stack(true);
110
111 // if the has_aborted flag has been raised, we need to bail out of
112 // the iteration
113 return !_task->has_aborted();
114 }
115
116 G1CMMarkStack::G1CMMarkStack() :
117 _chunk_allocator() {
118 set_empty();
119 }
120
121 size_t G1CMMarkStack::capacity_alignment() {
122 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
123 }
124
125 bool G1CMMarkStack::initialize() {
126 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
127
128 size_t initial_capacity = MarkStackSize;
129 size_t max_capacity = MarkStackSizeMax;
130
131 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
132
133 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135
136 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
137 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
138
139 size_t limit = (INT_MAX - 1);
140 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142
143 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
144 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
145
146 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
147
148 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
149 initial_num_chunks, max_capacity);
150
151 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
152 }
153
154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
155 if (_size.load_relaxed() >= _max_capacity) {
156 return nullptr;
157 }
158
159 size_t cur_idx = _size.fetch_then_add(1u);
160
161 if (cur_idx >= _max_capacity) {
162 return nullptr;
163 }
164
165 size_t bucket = get_bucket(cur_idx);
166 if (_buckets[bucket].load_acquire() == nullptr) {
167 if (!_should_grow) {
168 // Prefer to restart the CM.
169 return nullptr;
170 }
171
172 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
173 if (_buckets[bucket].load_acquire() == nullptr) {
174 size_t desired_capacity = bucket_size(bucket) * 2;
175 if (!try_expand_to(desired_capacity)) {
176 return nullptr;
177 }
178 }
179 }
180
181 size_t bucket_idx = get_bucket_index(cur_idx);
182 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
183 result->next = nullptr;
184 return result;
185 }
186
187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
188 _min_capacity(0),
189 _max_capacity(0),
190 _capacity(0),
191 _num_buckets(0),
192 _should_grow(false),
193 _buckets(nullptr),
194 _size(0)
195 { }
196
197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
198 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
199
200 _min_capacity = initial_capacity;
201 _max_capacity = max_capacity;
202 _num_buckets = get_bucket(_max_capacity) + 1;
203
204 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
205
206 for (size_t i = 0; i < _num_buckets; i++) {
207 _buckets[i].store_relaxed(nullptr);
208 }
209
210 size_t new_capacity = bucket_size(0);
211
212 if (!reserve(new_capacity)) {
213 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
214 return false;
215 }
216 return true;
217 }
218
219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
220 if (_capacity == _max_capacity) {
221 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
222 return false;
223 }
224
225 size_t old_capacity = _capacity;
226 desired_capacity = MIN2(desired_capacity, _max_capacity);
227
228 if (reserve(desired_capacity)) {
229 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
230 old_capacity, desired_capacity);
231 return true;
232 }
233 return false;
234 }
235
236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
237 size_t new_capacity = _capacity * 2;
238 return try_expand_to(new_capacity);
239 }
240
241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
242 if (_buckets == nullptr) {
243 return;
244 }
245
246 for (size_t i = 0; i < _num_buckets; i++) {
247 if (_buckets[i].load_relaxed() != nullptr) {
248 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
249 _buckets[i].store_relaxed(nullptr);
250 }
251 }
252
253 FREE_C_HEAP_ARRAY(_buckets);
254 }
255
256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
257 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
258
259 size_t highest_bucket = get_bucket(new_capacity - 1);
260 size_t i = get_bucket(_capacity);
261
262 // Allocate all buckets associated with indexes between the current capacity (_capacity)
263 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
264 // array and that the capacity accurately reflects the reserved memory.
265 for (; i <= highest_bucket; i++) {
266 if (_buckets[i].load_acquire() != nullptr) {
267 continue; // Skip over already allocated buckets.
268 }
269
270 size_t bucket_capacity = bucket_size(i);
271
272 // Trim bucket size so that we do not exceed the _max_capacity.
273 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
274 bucket_capacity :
275 _max_capacity - _capacity;
276
277
278 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
279
280 if (bucket_base == nullptr) {
281 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
282 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
283 return false;
284 }
285 _capacity += bucket_capacity;
286 _buckets[i].release_store(bucket_base);
287 }
288 return true;
289 }
290
291 void G1CMMarkStack::expand() {
292 _chunk_allocator.try_expand();
293 }
294
295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
296 elem->next = list->load_relaxed();
297 list->store_relaxed(elem);
298 }
299
300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
301 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
302 add_chunk_to_list(&_chunk_list, elem);
303 _chunks_in_chunk_list++;
304 }
305
306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
307 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
308 add_chunk_to_list(&_free_list, elem);
309 }
310
311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
312 TaskQueueEntryChunk* result = list->load_relaxed();
313 if (result != nullptr) {
314 list->store_relaxed(list->load_relaxed()->next);
315 }
316 return result;
317 }
318
319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
320 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
321 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
322 if (result != nullptr) {
323 _chunks_in_chunk_list--;
324 }
325 return result;
326 }
327
328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
329 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
330 return remove_chunk_from_list(&_free_list);
331 }
332
333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
334 // Get a new chunk.
335 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
336
337 if (new_chunk == nullptr) {
338 // Did not get a chunk from the free list. Allocate from backing memory.
339 new_chunk = _chunk_allocator.allocate_new_chunk();
340
341 if (new_chunk == nullptr) {
342 return false;
343 }
344 }
345
346 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
347
348 add_chunk_to_chunk_list(new_chunk);
349
350 return true;
351 }
352
353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
354 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
355
356 if (cur == nullptr) {
357 return false;
358 }
359
360 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
361
362 add_chunk_to_free_list(cur);
363 return true;
364 }
365
366 void G1CMMarkStack::set_empty() {
367 _chunks_in_chunk_list = 0;
368 _chunk_list.store_relaxed(nullptr);
369 _free_list.store_relaxed(nullptr);
370 _chunk_allocator.reset();
371 }
372
373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
374 _root_regions(MemRegion::create_array(max_regions, mtGC)),
375 _max_regions(max_regions),
376 _num_regions(0),
377 _num_claimed_regions(0) { }
378
379 G1CMRootMemRegions::~G1CMRootMemRegions() {
380 MemRegion::destroy_array(_root_regions, _max_regions);
381 }
382
383 void G1CMRootMemRegions::reset() {
384 assert_at_safepoint();
385 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
386
387 _num_regions.store_relaxed(0);
388 _num_claimed_regions.store_relaxed(0);
389 }
390
391 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
392 assert_at_safepoint();
393 uint idx = _num_regions.fetch_then_add(1u);
394 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
395 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
396 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
397 _root_regions[idx].set_start(start);
398 _root_regions[idx].set_end(end);
399 }
400
401 const MemRegion* G1CMRootMemRegions::claim_next() {
402 uint local_num_regions = num_regions();
403 if (num_claimed_regions() >= local_num_regions) {
404 return nullptr;
405 }
406
407 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
408 if (claimed_index < local_num_regions) {
409 return &_root_regions[claimed_index];
410 }
411 return nullptr;
412 }
413
414 bool G1CMRootMemRegions::work_completed() const {
415 return num_remaining_regions() == 0;
416 }
417
418 uint G1CMRootMemRegions::num_remaining_regions() const {
419 uint total = num_regions();
420 uint claimed = num_claimed_regions();
421 return (total > claimed) ? total - claimed : 0;
422 }
423
424 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
425 uint local_num_root_regions = num_regions();
426 for (uint i = 0; i < local_num_root_regions; i++) {
427 if (_root_regions[i].equals(mr)) {
428 return true;
429 }
430 }
431 return false;
432 }
433
434 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
435 G1RegionToSpaceMapper* bitmap_storage) :
436 _cm_thread(nullptr),
437 _g1h(g1h),
438
439 _mark_bitmap(),
440
441 _heap(_g1h->reserved()),
442
443 _root_regions(_g1h->max_num_regions()),
444 _root_region_scan_aborted(false),
445
446 _global_mark_stack(),
447
448 _finger(nullptr), // _finger set in set_non_marking_state
449
450 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
451 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
452 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
453 _tasks(nullptr), // _tasks set inside late_init()
454 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
455 _terminator(_max_num_tasks, _task_queues),
456 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
457
458 _first_overflow_barrier_sync(),
459 _second_overflow_barrier_sync(),
460
461 _completed_mark_cycles(0),
462 _has_overflown(false),
463 _concurrent(false),
464 _has_aborted(false),
465 _restart_for_overflow(false),
466 _gc_timer_cm(new ConcurrentGCTimer()),
467 _gc_tracer_cm(new G1OldTracer()),
468
469 // _verbose_level set below
470
471 _remark_times(),
472 _remark_mark_times(),
473 _remark_weak_ref_times(),
474 _cleanup_times(),
475
476 _concurrent_workers(nullptr),
477 _num_concurrent_workers(0),
478 _max_concurrent_workers(0),
479
480 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
481 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
482 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
483 _needs_remembered_set_rebuild(false)
484 {
485 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
486
487 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
488 }
489
490 void G1ConcurrentMark::fully_initialize() {
491 if (is_fully_initialized()) {
492 return;
493 }
494
495 // Create & start ConcurrentMark thread.
496 _cm_thread = new G1ConcurrentMarkThread(this);
497 if (_cm_thread->osthread() == nullptr) {
498 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
499 }
500
501 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
502 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
503
504 _max_concurrent_workers = ConcGCThreads;
505
506 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
507 _concurrent_workers->initialize_workers();
508 _num_concurrent_workers = _concurrent_workers->active_workers();
509
510 if (!_global_mark_stack.initialize()) {
511 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
512 }
513
514 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
515
516 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
517 _num_active_tasks = _max_num_tasks;
518
519 for (uint i = 0; i < _max_num_tasks; ++i) {
520 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
521 _task_queues->register_queue(i, task_queue);
522
523 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
524 }
525
526 uint max_num_regions = _g1h->max_num_regions();
527 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
528 ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
529 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
530
531 reset_at_marking_complete();
532 }
533
534 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
535 return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
536 }
537
538 bool G1ConcurrentMark::is_in_marking() const {
539 return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
540 }
541
542 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
543 return cm_thread()->is_in_rebuild_or_scrub();
544 }
545
546 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
547 return cm_thread()->is_in_reset_for_next_cycle();
548 }
549
550 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
551 return _partial_array_state_manager;
552 }
553
554 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
555 assert(is_fully_initialized(), "must be");
556 return _cm_thread;
557 }
558
559 void G1ConcurrentMark::reset() {
560 _has_aborted.store_relaxed(false);
561
562 reset_marking_for_restart();
563
564 // Reset all tasks, since different phases will use different number of active
565 // threads. So, it's easiest to have all of them ready.
566 for (uint i = 0; i < _max_num_tasks; ++i) {
567 _tasks[i]->reset(mark_bitmap());
568 }
569
570 uint max_num_regions = _g1h->max_num_regions();
571 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
572 for (uint i = 0; i < max_num_regions; i++) {
573 _region_mark_stats[i].clear();
574 }
575
576 _root_region_scan_aborted.store_relaxed(false);
577 _root_regions.reset();
578 }
579
580 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
581 uint region_idx = r->hrm_index();
582 for (uint j = 0; j < _max_num_tasks; ++j) {
583 _tasks[j]->clear_mark_stats_cache(region_idx);
584 }
585 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
586 _region_mark_stats[region_idx].clear();
587 }
588
589 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
590 assert_at_safepoint();
591 assert(r->is_starts_humongous(), "Got humongous continues region here");
592
593 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
594 mark_bitmap()->clear(r->bottom());
595
596 if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
597 return;
598 }
599
600 // Clear any statistics about the region gathered so far.
601 _g1h->humongous_obj_regions_iterate(r,
602 [&] (G1HeapRegion* r) {
603 clear_statistics(r);
604 });
605 }
606
607 void G1ConcurrentMark::reset_marking_for_restart() {
608 _global_mark_stack.set_empty();
609
610 // Expand the marking stack, if we have to and if we can.
611 if (has_overflown()) {
612 _global_mark_stack.expand();
613
614 uint max_num_regions = _g1h->max_num_regions();
615 for (uint i = 0; i < max_num_regions; i++) {
616 _region_mark_stats[i].clear_during_overflow();
617 }
618 }
619
620 clear_has_overflown();
621 _finger.store_relaxed(_heap.start());
622
623 for (uint i = 0; i < _max_num_tasks; ++i) {
624 _tasks[i]->reset_for_restart();
625 }
626 }
627
628 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
629 assert(active_tasks <= _max_num_tasks, "we should not have more");
630
631 _num_active_tasks = active_tasks;
632 // Need to update the three data structures below according to the
633 // number of active threads for this phase.
634 _terminator.reset_for_reuse(active_tasks);
635 _first_overflow_barrier_sync.set_n_workers(active_tasks);
636 _second_overflow_barrier_sync.set_n_workers(active_tasks);
637 }
638
639 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
640 set_concurrency(active_tasks);
641
642 _concurrent.store_relaxed(concurrent);
643
644 if (!concurrent) {
645 // At this point we should be in a STW phase, and completed marking.
646 assert_at_safepoint_on_vm_thread();
647 assert(out_of_regions(),
648 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
649 p2i(finger()), p2i(_heap.end()));
650 }
651 }
652
653 #if TASKQUEUE_STATS
654 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
655
656 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
657
658 auto get_pa_stats = [&](uint i) {
659 return _tasks[i]->partial_array_task_stats();
660 };
661
662 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
663 "Concurrent Mark Partial Array");
664
665 for (uint i = 0; i < _max_num_tasks; ++i) {
666 get_pa_stats(i)->reset();
667 }
668 }
669 #endif
670
671 void G1ConcurrentMark::reset_at_marking_complete() {
672 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
673 // We set the global marking state to some default values when we're
674 // not doing marking.
675 reset_marking_for_restart();
676 _num_active_tasks = 0;
677 }
678
679 G1ConcurrentMark::~G1ConcurrentMark() {
680 FREE_C_HEAP_ARRAY(_top_at_mark_starts);
681 FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
682 FREE_C_HEAP_ARRAY(_region_mark_stats);
683 // The G1ConcurrentMark instance is never freed.
684 ShouldNotReachHere();
685 }
686
687 class G1ClearBitMapTask : public WorkerTask {
688 public:
689 static size_t chunk_size() { return M; }
690
691 private:
692 // Heap region closure used for clearing the _mark_bitmap.
693 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
694 G1ConcurrentMark* _cm;
695 G1CMBitMap* _bitmap;
696 bool _suspendible; // If suspendible, do yield checks.
697
698 bool suspendible() {
699 return _suspendible;
700 }
701
702 bool is_clear_concurrent_undo() {
703 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
704 }
705
706 bool has_aborted() {
707 if (suspendible()) {
708 _cm->do_yield_check();
709 return _cm->has_aborted();
710 }
711 return false;
712 }
713
714 HeapWord* region_clear_limit(G1HeapRegion* r) {
715 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
716 // live_words data are current wrt to the _mark_bitmap. We use this information
717 // to only clear ranges of the bitmap that require clearing.
718 if (is_clear_concurrent_undo()) {
719 // No need to clear bitmaps for empty regions (which includes regions we
720 // did not mark through).
721 if (!_cm->contains_live_object(r->hrm_index())) {
722 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
723 return r->bottom();
724 }
725 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
726 }
727 return r->end();
728 }
729
730 public:
731 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
732 G1HeapRegionClosure(),
733 _cm(cm),
734 _bitmap(cm->mark_bitmap()),
735 _suspendible(suspendible)
736 { }
737
738 virtual bool do_heap_region(G1HeapRegion* r) {
739 if (has_aborted()) {
740 return true;
741 }
742
743 HeapWord* cur = r->bottom();
744 HeapWord* const end = region_clear_limit(r);
745
746 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
747
748 while (cur < end) {
749
750 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
751 _bitmap->clear_range(mr);
752
753 cur += chunk_size_in_words;
754
755 // Repeat the asserts from before the start of the closure. We will do them
756 // as asserts here to minimize their overhead on the product. However, we
757 // will have them as guarantees at the beginning / end of the bitmap
758 // clearing to get some checking in the product.
759 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
760
761 // Abort iteration if necessary.
762 if (has_aborted()) {
763 return true;
764 }
765 }
766 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
767
768 _cm->reset_top_at_mark_start(r);
769
770 return false;
771 }
772 };
773
774 G1ClearBitmapHRClosure _cl;
775 G1HeapRegionClaimer _hr_claimer;
776 bool _suspendible; // If the task is suspendible, workers must join the STS.
777
778 public:
779 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
780 WorkerTask("G1 Clear Bitmap"),
781 _cl(cm, suspendible),
782 _hr_claimer(n_workers),
783 _suspendible(suspendible)
784 { }
785
786 void work(uint worker_id) {
787 SuspendibleThreadSetJoiner sts_join(_suspendible);
788 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
789 }
790 };
791
792 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
793 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
794
795 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
796 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
797
798 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
799
800 G1ClearBitMapTask cl(this, num_workers, may_yield);
801
802 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
803 workers->run_task(&cl, num_workers);
804 }
805
806 void G1ConcurrentMark::cleanup_for_next_mark() {
807 // Make sure that the concurrent mark thread looks to still be in
808 // the current cycle.
809 guarantee(is_in_reset_for_next_cycle(), "invariant");
810
811 clear_bitmap(_concurrent_workers, true);
812
813 reset_partial_array_state_manager();
814
815 // Should not have changed state yet (even if a Full GC interrupted us).
816 guarantee(is_in_reset_for_next_cycle(), "invariant");
817 }
818
819 void G1ConcurrentMark::reset_partial_array_state_manager() {
820 for (uint i = 0; i < _max_num_tasks; ++i) {
821 _tasks[i]->unregister_partial_array_splitter();
822 }
823
824 partial_array_state_manager()->reset();
825
826 for (uint i = 0; i < _max_num_tasks; ++i) {
827 _tasks[i]->register_partial_array_splitter();
828 }
829 }
830
831 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
832 assert_at_safepoint_on_vm_thread();
833 // To avoid fragmentation the full collection requesting to clear the bitmap
834 // might use fewer workers than available. To ensure the bitmap is cleared
835 // as efficiently as possible the number of active workers are temporarily
836 // increased to include all currently created workers.
837 WithActiveWorkers update(workers, workers->created_workers());
838 clear_bitmap(workers, false);
839 }
840
841 class G1PreConcurrentStartTask : public G1BatchedTask {
842 // Reset marking state.
843 class ResetMarkingStateTask;
844 // For each region note start of marking.
845 class NoteStartOfMarkTask;
846
847 public:
848 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
849 };
850
851 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
852 G1ConcurrentMark* _cm;
853 public:
854 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
855
856 double worker_cost() const override { return 1.0; }
857 void do_work(uint worker_id) override;
858 };
859
860 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
861
862 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
863 G1ConcurrentMark* _cm;
864
865 public:
866 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
867
868 bool do_heap_region(G1HeapRegion* r) override {
869 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
870 _cm->update_top_at_mark_start(r);
871 } else {
872 _cm->reset_top_at_mark_start(r);
873 }
874 return false;
875 }
876 } _region_cl;
877
878 G1HeapRegionClaimer _claimer;
879 public:
880 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
881
882 double worker_cost() const override {
883 // The work done per region is very small, therefore we choose this magic number to cap the number
884 // of threads used when there are few regions.
885 const double regions_per_thread = 1000;
886 return _claimer.n_regions() / regions_per_thread;
887 }
888
889 void set_max_workers(uint max_workers) override {
890 _claimer.set_n_workers(max_workers);
891 }
892
893 void do_work(uint worker_id) override {
894 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
895 }
896 };
897
898 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
899 // Reset marking state.
900 _cm->reset();
901 }
902
903 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
904 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
905 add_serial_task(new ResetMarkingStateTask(cm));
906 add_parallel_task(new NoteStartOfMarkTask());
907 };
908
909 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
910 assert_at_safepoint_on_vm_thread();
911
912 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
913
914 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
915
916 G1PreConcurrentStartTask cl(cause, this);
917 G1CollectedHeap::heap()->run_batch_task(&cl);
918
919 _gc_tracer_cm->set_gc_cause(cause);
920 }
921
922 void G1ConcurrentMark::start_full_concurrent_cycle() {
923 // Start Concurrent Marking weak-reference discovery.
924 ReferenceProcessor* rp = _g1h->ref_processor_cm();
925 rp->start_discovery(false /* always_clear */);
926
927 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
928 // This is the start of the marking cycle, we're expected all
929 // threads to have SATB queues with active set to false.
930 satb_mq_set.set_active_all_threads(true, /* new active value */
931 false /* expected_active */);
932
933 // update_g1_committed() will be called at the end of an evac pause
934 // when marking is on. So, it's also called at the end of the
935 // concurrent start pause to update the heap end, if the heap expands
936 // during it. No need to call it here.
937
938 // Signal the thread to start work.
939 cm_thread()->start_full_cycle();
940 }
941
942 void G1ConcurrentMark::start_undo_concurrent_cycle() {
943 assert_at_safepoint_on_vm_thread();
944 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
945 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
946
947 abort_root_region_scan_at_safepoint();
948
949 // Signal the thread to start work.
950 cm_thread()->start_undo_cycle();
951 }
952
953 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
954 cm_thread()->set_idle();
955 }
956
957 void G1ConcurrentMark::stop() {
958 if (is_fully_initialized()) {
959 cm_thread()->stop();
960 }
961 }
962
963 /*
964 * Notice that in the next two methods, we actually leave the STS
965 * during the barrier sync and join it immediately afterwards. If we
966 * do not do this, the following deadlock can occur: one thread could
967 * be in the barrier sync code, waiting for the other thread to also
968 * sync up, whereas another one could be trying to yield, while also
969 * waiting for the other threads to sync up too.
970 *
971 * Note, however, that this code is also used during remark and in
972 * this case we should not attempt to leave / enter the STS, otherwise
973 * we'll either hit an assert (debug / fastdebug) or deadlock
974 * (product). So we should only leave / enter the STS if we are
975 * operating concurrently.
976 *
977 * Because the thread that does the sync barrier has left the STS, it
978 * is possible to be suspended for a Full GC or an evacuation pause
979 * could occur. This is actually safe, since the entering the sync
980 * barrier is one of the last things do_marking_step() does, and it
981 * doesn't manipulate any data structures afterwards.
982 */
983
984 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
985 bool barrier_aborted;
986 {
987 SuspendibleThreadSetLeaver sts_leave(concurrent());
988 barrier_aborted = !_first_overflow_barrier_sync.enter();
989 }
990
991 // at this point everyone should have synced up and not be doing any
992 // more work
993
994 if (barrier_aborted) {
995 // If the barrier aborted we ignore the overflow condition and
996 // just abort the whole marking phase as quickly as possible.
997 return;
998 }
999 }
1000
1001 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1002 SuspendibleThreadSetLeaver sts_leave(concurrent());
1003 _second_overflow_barrier_sync.enter();
1004
1005 // at this point everything should be re-initialized and ready to go
1006 }
1007
1008 class G1CMConcurrentMarkingTask : public WorkerTask {
1009 G1ConcurrentMark* _cm;
1010
1011 public:
1012 void work(uint worker_id) {
1013 ResourceMark rm;
1014
1015 SuspendibleThreadSetJoiner sts_join;
1016
1017 assert(worker_id < _cm->active_tasks(), "invariant");
1018
1019 G1CMTask* task = _cm->task(worker_id);
1020 task->record_start_time();
1021 if (!_cm->has_aborted()) {
1022 do {
1023 task->do_marking_step(G1ConcMarkStepDurationMillis,
1024 true /* do_termination */,
1025 false /* is_serial*/);
1026
1027 _cm->do_yield_check();
1028 } while (!_cm->has_aborted() && task->has_aborted());
1029 }
1030 task->record_end_time();
1031 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1032 }
1033
1034 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1035 WorkerTask("Concurrent Mark"), _cm(cm) { }
1036
1037 ~G1CMConcurrentMarkingTask() { }
1038 };
1039
1040 uint G1ConcurrentMark::calc_active_marking_workers() {
1041 uint result = 0;
1042 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1043 result = _max_concurrent_workers;
1044 } else {
1045 result =
1046 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1047 1, /* Minimum workers */
1048 _num_concurrent_workers,
1049 Threads::number_of_non_daemon_threads());
1050 // Don't scale the result down by scale_concurrent_workers() because
1051 // that scaling has already gone into "_max_concurrent_workers".
1052 }
1053 assert(result > 0 && result <= _max_concurrent_workers,
1054 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1055 _max_concurrent_workers, result);
1056 return result;
1057 }
1058
1059 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1060 return _root_region_scan_aborted.load_relaxed();
1061 }
1062
1063 #ifndef PRODUCT
1064 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1065 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1066 }
1067 #endif
1068
1069 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1070 #ifdef ASSERT
1071 HeapWord* last = region->last();
1072 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1073 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1074 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1075 assert(top_at_mark_start(hr) == region->start(),
1076 "MemRegion start should be equal to TAMS");
1077 #endif
1078
1079 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1080
1081 const uintx interval = PrefetchScanIntervalInBytes;
1082 HeapWord* curr = region->start();
1083 const HeapWord* end = region->end();
1084 while (curr < end) {
1085 Prefetch::read(curr, interval);
1086 oop obj = cast_to_oop(curr);
1087 size_t size = obj->oop_iterate_size(&cl);
1088 assert(size == obj->size(), "sanity");
1089 curr += size;
1090 }
1091 }
1092
1093 class G1CMRootRegionScanTask : public WorkerTask {
1094 G1ConcurrentMark* _cm;
1095 bool _should_yield;
1096
1097 public:
1098 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1099 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1100
1101 void work(uint worker_id) {
1102 SuspendibleThreadSetJoiner sts_join(_should_yield);
1103
1104 while (true) {
1105 if (_cm->has_root_region_scan_aborted()) {
1106 return;
1107 }
1108 G1CMRootMemRegions* root_regions = _cm->root_regions();
1109 const MemRegion* region = root_regions->claim_next();
1110 if (region == nullptr) {
1111 return;
1112 }
1113 _cm->scan_root_region(region, worker_id);
1114 if (_should_yield) {
1115 SuspendibleThreadSet::yield();
1116 // If we yielded, a GC may have processed all root regions,
1117 // so this loop will naturally exit on the next claim_next() call.
1118 // Same if a Full GC signalled abort of the concurrent mark.
1119 }
1120 }
1121 }
1122 };
1123
1124 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1125 // We first check whether there is any work to do as we might have already aborted
1126 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1127 // We want to avoid spinning up the worker threads if that happened.
1128 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1129 //
1130 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1131 // a young gc doing the work.
1132 //
1133 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1134 // continue after that safepoint.
1135 //
1136 // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1137 // consistent view of the value containing the number of remaining regions across the
1138 // usages below. The safepoint/gc may already be running and modifying it
1139 // while this code is still executing.
1140 uint num_remaining = root_regions()->num_remaining_regions();
1141 bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1142 if (do_scan) {
1143 // Assign one worker to each root-region but subject to the max constraint.
1144 // The constraint is also important to avoid accesses beyond the allocated per-worker
1145 // marking helper data structures. We might get passed different WorkerThreads with
1146 // different number of threads (potential worker ids) than helper data structures when
1147 // completing this work during GC.
1148 const uint num_workers = MIN2(num_remaining,
1149 _max_concurrent_workers);
1150 assert(num_workers > 0, "no more remaining root regions to process");
1151
1152 G1CMRootRegionScanTask task(this, concurrent);
1153 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1154 task.name(), num_workers, num_remaining);
1155 workers->run_task(&task, num_workers);
1156 }
1157
1158 // At the end of this method, we can re-read num_remaining() in the assert: either
1159 // we got non-zero above and we processed all root regions (and it must be zero
1160 // after the worker task synchronization) or it had already been zero. We also
1161 // can't have started another concurrent cycle that could have set it to something else
1162 // while still in the concurrent cycle (if called concurrently).
1163 assert_root_region_scan_completed_or_aborted();
1164
1165 return do_scan;
1166 }
1167
1168 void G1ConcurrentMark::scan_root_regions_concurrently() {
1169 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1170 scan_root_regions(_concurrent_workers, true /* concurrent */);
1171 }
1172
1173 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1174 assert_at_safepoint_on_vm_thread();
1175 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1176 }
1177
1178 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1179 root_regions()->add(top_at_mark_start(r), r->top());
1180 }
1181
1182 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1183 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1184 }
1185
1186 void G1ConcurrentMark::abort_root_region_scan() {
1187 assert_not_at_safepoint();
1188
1189 _root_region_scan_aborted.store_relaxed(true);
1190 }
1191
1192 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1193 assert_at_safepoint_on_vm_thread();
1194
1195 _root_region_scan_aborted.store_relaxed(true);
1196 }
1197
1198 void G1ConcurrentMark::concurrent_cycle_start() {
1199 _gc_timer_cm->register_gc_start();
1200
1201 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1202
1203 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1204 }
1205
1206 uint G1ConcurrentMark::completed_mark_cycles() const {
1207 return _completed_mark_cycles.load_relaxed();
1208 }
1209
1210 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1211 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1212
1213 if (mark_cycle_completed) {
1214 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1215 }
1216
1217 if (has_aborted()) {
1218 log_info(gc, marking)("Concurrent Mark Abort");
1219 _gc_tracer_cm->report_concurrent_mode_failure();
1220 }
1221
1222 _gc_timer_cm->register_gc_end();
1223
1224 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1225 }
1226
1227 void G1ConcurrentMark::mark_from_roots() {
1228 _restart_for_overflow.store_relaxed(false);
1229
1230 uint active_workers = calc_active_marking_workers();
1231
1232 // Setting active workers is not guaranteed since fewer
1233 // worker threads may currently exist and more may not be
1234 // available.
1235 active_workers = _concurrent_workers->set_active_workers(active_workers);
1236 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1237
1238 _num_concurrent_workers = active_workers;
1239
1240 // Parallel task terminator is set in "set_concurrency_and_phase()"
1241 set_concurrency_and_phase(active_workers, true /* concurrent */);
1242
1243 G1CMConcurrentMarkingTask marking_task(this);
1244 _concurrent_workers->run_task(&marking_task);
1245 print_stats();
1246 }
1247
1248 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1249 static const char* location_strings[] = { "Remark Before",
1250 "Remark After",
1251 "Remark Overflow",
1252 "Cleanup Before",
1253 "Cleanup After" };
1254 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1255 }
1256
1257 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1258 VerifyLocation location) {
1259 G1HeapVerifier* verifier = _g1h->verifier();
1260
1261 verifier->verify_region_sets_optional();
1262
1263 const char* caller = verify_location_string(location);
1264
1265 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1266 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1267
1268 size_t const BufLen = 512;
1269 char buffer[BufLen];
1270
1271 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1272 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1273
1274 // Only check bitmap in Remark, and not at After-Verification because the regions
1275 // already have their TAMS'es reset.
1276 if (location != VerifyLocation::RemarkAfter) {
1277 verifier->verify_bitmap_clear(true /* above_tams_only */);
1278 }
1279 }
1280 }
1281
1282 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1283 G1CollectedHeap* _g1h;
1284 public:
1285 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1286
1287 bool do_object_b(oop obj) {
1288 return !_g1h->is_obj_dead(obj);
1289 }
1290 };
1291
1292 void G1ConcurrentMark::remark() {
1293 assert_at_safepoint_on_vm_thread();
1294
1295 // If a full collection has happened, we should not continue. However we might
1296 // have ended up here as the Remark VM operation has been scheduled already.
1297 if (has_aborted()) {
1298 return;
1299 }
1300
1301 G1Policy* policy = _g1h->policy();
1302 policy->record_pause_start_time();
1303
1304 double start = os::elapsedTime();
1305
1306 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1307
1308 {
1309 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1310 finalize_marking();
1311 }
1312
1313 double mark_work_end = os::elapsedTime();
1314
1315 bool const mark_finished = !has_overflown();
1316 if (mark_finished) {
1317 weak_refs_work();
1318
1319 // Unload Klasses, String, Code Cache, etc.
1320 if (ClassUnloadingWithConcurrentMark) {
1321 G1CMIsAliveClosure is_alive(this);
1322 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1323 }
1324
1325 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1326 // We're done with marking.
1327 // This is the end of the marking cycle, we're expected all
1328 // threads to have SATB queues with active set to true.
1329 satb_mq_set.set_active_all_threads(false, /* new active value */
1330 true /* expected_active */);
1331
1332 {
1333 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1334 flush_all_task_caches();
1335 }
1336
1337 // All marking completed. Check bitmap now as we will start to reset TAMSes
1338 // in parallel below so that we can not do this in the After-Remark verification.
1339 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1340
1341 {
1342 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1343
1344 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1345 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1346 _g1h->workers()->active_workers());
1347 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1348 _g1h->workers()->run_task(&cl, num_workers);
1349
1350 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1351 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1352
1353 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1354
1355 if (_needs_remembered_set_rebuild) {
1356 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1357 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1358 }
1359 }
1360
1361 if (log_is_enabled(Trace, gc, liveness)) {
1362 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1363 _g1h->heap_region_iterate(&cl);
1364 }
1365
1366 // Potentially, some empty-regions have been reclaimed; make this a
1367 // "collection" so that pending allocation can retry before attempting a
1368 // GC pause.
1369 _g1h->increment_total_collections();
1370
1371 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1372 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1373 // triggered, it likely means there are very few regular GCs, making resizing
1374 // based on gc heuristics less effective.
1375 if (_g1h->last_gc_was_periodic()) {
1376 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1377 }
1378
1379 compute_new_sizes();
1380
1381 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1382
1383 assert(!restart_for_overflow(), "sanity");
1384 // Completely reset the marking state (except bitmaps) since marking completed.
1385 reset_at_marking_complete();
1386
1387 G1CollectedHeap::finish_codecache_marking_cycle();
1388
1389 {
1390 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1391 G1ObjectCountIsAliveClosure is_alive(_g1h);
1392 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1393 }
1394
1395 // Successfully completed marking, advance state.
1396 cm_thread()->set_full_cycle_rebuild_and_scrub();
1397 } else {
1398 // We overflowed. Restart concurrent marking.
1399 _restart_for_overflow.store_relaxed(true);
1400
1401 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1402
1403 // Clear the marking state because we will be restarting
1404 // marking due to overflowing the global mark stack.
1405 reset_marking_for_restart();
1406 }
1407
1408 // Statistics
1409 double now = os::elapsedTime();
1410 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1411 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1412 _remark_times.add((now - start) * 1000.0);
1413
1414 _g1h->update_perf_counter_cpu_time();
1415
1416 policy->record_concurrent_mark_remark_end();
1417
1418 return;
1419 }
1420
1421 void G1ConcurrentMark::compute_new_sizes() {
1422 MetaspaceGC::compute_new_size();
1423
1424 // Cleanup will have freed any regions completely full of garbage.
1425 // Update the soft reference policy with the new heap occupancy.
1426 Universe::heap()->update_capacity_and_used_at_gc();
1427
1428 // We reclaimed old regions so we should calculate the sizes to make
1429 // sure we update the old gen/space data.
1430 _g1h->monitoring_support()->update_sizes();
1431 }
1432
1433 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1434 G1CollectedHeap* _g1h;
1435
1436 public:
1437 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1438
1439 bool do_heap_region(G1HeapRegion* r) override {
1440 // Update the remset tracking state from updating to complete
1441 // if remembered sets have been rebuilt.
1442 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1443 return false;
1444 }
1445 };
1446
1447 void G1ConcurrentMark::cleanup() {
1448 assert_at_safepoint_on_vm_thread();
1449
1450 // If a full collection has happened, we shouldn't do this.
1451 if (has_aborted()) {
1452 return;
1453 }
1454
1455 G1Policy* policy = _g1h->policy();
1456 policy->record_pause_start_time();
1457
1458 double start = os::elapsedTime();
1459
1460 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1461
1462 if (needs_remembered_set_rebuild()) {
1463 // Update the remset tracking information as well as marking all regions
1464 // as fully parsable.
1465 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1466 G1UpdateRegionsAfterRebuild cl(_g1h);
1467 _g1h->heap_region_iterate(&cl);
1468 } else {
1469 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1470 }
1471
1472 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1473
1474 // Local statistics
1475 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1476
1477 {
1478 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1479 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1480 }
1481
1482 // Advance state.
1483 cm_thread()->set_full_cycle_reset_for_next_cycle();
1484 return;
1485 }
1486
1487 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1488 // Uses the G1CMTask associated with a worker thread (for serial reference
1489 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1490 // trace referent objects.
1491 //
1492 // Using the G1CMTask and embedded local queues avoids having the worker
1493 // threads operating on the global mark stack. This reduces the risk
1494 // of overflowing the stack - which we would rather avoid at this late
1495 // state. Also using the tasks' local queues removes the potential
1496 // of the workers interfering with each other that could occur if
1497 // operating on the global stack.
1498
1499 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1500 G1ConcurrentMark* _cm;
1501 G1CMTask* _task;
1502 uint _ref_counter_limit;
1503 uint _ref_counter;
1504 bool _is_serial;
1505 public:
1506 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1507 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1508 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1509 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1510 }
1511
1512 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1513 virtual void do_oop( oop* p) { do_oop_work(p); }
1514
1515 template <class T> void do_oop_work(T* p) {
1516 if (_cm->has_overflown()) {
1517 return;
1518 }
1519 if (!_task->deal_with_reference(p)) {
1520 // We did not add anything to the mark bitmap (or mark stack), so there is
1521 // no point trying to drain it.
1522 return;
1523 }
1524 _ref_counter--;
1525
1526 if (_ref_counter == 0) {
1527 // We have dealt with _ref_counter_limit references, pushing them
1528 // and objects reachable from them on to the local stack (and
1529 // possibly the global stack). Call G1CMTask::do_marking_step() to
1530 // process these entries.
1531 //
1532 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1533 // there's nothing more to do (i.e. we're done with the entries that
1534 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1535 // above) or we overflow.
1536 //
1537 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1538 // flag while there may still be some work to do. (See the comment at
1539 // the beginning of G1CMTask::do_marking_step() for those conditions -
1540 // one of which is reaching the specified time target.) It is only
1541 // when G1CMTask::do_marking_step() returns without setting the
1542 // has_aborted() flag that the marking step has completed.
1543 do {
1544 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1545 _task->do_marking_step(mark_step_duration_ms,
1546 false /* do_termination */,
1547 _is_serial);
1548 } while (_task->has_aborted() && !_cm->has_overflown());
1549 _ref_counter = _ref_counter_limit;
1550 }
1551 }
1552 };
1553
1554 // 'Drain' oop closure used by both serial and parallel reference processing.
1555 // Uses the G1CMTask associated with a given worker thread (for serial
1556 // reference processing the G1CMtask for worker 0 is used). Calls the
1557 // do_marking_step routine, with an unbelievably large timeout value,
1558 // to drain the marking data structures of the remaining entries
1559 // added by the 'keep alive' oop closure above.
1560
1561 class G1CMDrainMarkingStackClosure : public VoidClosure {
1562 G1ConcurrentMark* _cm;
1563 G1CMTask* _task;
1564 bool _is_serial;
1565 public:
1566 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1567 _cm(cm), _task(task), _is_serial(is_serial) {
1568 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1569 }
1570
1571 void do_void() {
1572 do {
1573 // We call G1CMTask::do_marking_step() to completely drain the local
1574 // and global marking stacks of entries pushed by the 'keep alive'
1575 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1576 //
1577 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1578 // if there's nothing more to do (i.e. we've completely drained the
1579 // entries that were pushed as a result of applying the 'keep alive'
1580 // closure to the entries on the discovered ref lists) or we overflow
1581 // the global marking stack.
1582 //
1583 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1584 // flag while there may still be some work to do. (See the comment at
1585 // the beginning of G1CMTask::do_marking_step() for those conditions -
1586 // one of which is reaching the specified time target.) It is only
1587 // when G1CMTask::do_marking_step() returns without setting the
1588 // has_aborted() flag that the marking step has completed.
1589
1590 _task->do_marking_step(1000000000.0 /* something very large */,
1591 true /* do_termination */,
1592 _is_serial);
1593 } while (_task->has_aborted() && !_cm->has_overflown());
1594 }
1595 };
1596
1597 class G1CMRefProcProxyTask : public RefProcProxyTask {
1598 G1CollectedHeap& _g1h;
1599 G1ConcurrentMark& _cm;
1600
1601 public:
1602 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1603 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1604 _g1h(g1h),
1605 _cm(cm) {}
1606
1607 void work(uint worker_id) override {
1608 assert(worker_id < _max_workers, "sanity");
1609 G1CMIsAliveClosure is_alive(&_cm);
1610 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1611 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1612 BarrierEnqueueDiscoveredFieldClosure enqueue;
1613 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1614 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1615 }
1616
1617 void prepare_run_task_hook() override {
1618 // We need to reset the concurrency level before each
1619 // proxy task execution, so that the termination protocol
1620 // and overflow handling in G1CMTask::do_marking_step() knows
1621 // how many workers to wait for.
1622 _cm.set_concurrency(_queue_count);
1623 }
1624 };
1625
1626 void G1ConcurrentMark::weak_refs_work() {
1627 ResourceMark rm;
1628
1629 {
1630 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1631
1632 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1633
1634 // See the comment in G1CollectedHeap::ref_processing_init()
1635 // about how reference processing currently works in G1.
1636
1637 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1638
1639 // Prefer to grow the stack until the max capacity.
1640 _global_mark_stack.set_should_grow();
1641
1642 // Parallel processing task executor.
1643 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1644 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1645
1646 // Process the weak references.
1647 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1648 _gc_tracer_cm->report_gc_reference_stats(stats);
1649 pt.print_all_references();
1650
1651 // The do_oop work routines of the keep_alive and drain_marking_stack
1652 // oop closures will set the has_overflown flag if we overflow the
1653 // global marking stack.
1654
1655 assert(has_overflown() || _global_mark_stack.is_empty(),
1656 "Mark stack should be empty (unless it has overflown)");
1657 }
1658
1659 if (has_overflown()) {
1660 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1661 // overflowed while processing references. Exit the VM.
1662 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1663 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1664 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1665 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1666 return;
1667 }
1668
1669 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1670
1671 {
1672 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1673 G1CMIsAliveClosure is_alive(this);
1674 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1675 }
1676 }
1677
1678 class G1PrecleanYieldClosure : public YieldClosure {
1679 G1ConcurrentMark* _cm;
1680
1681 public:
1682 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1683
1684 virtual bool should_return() {
1685 return _cm->has_aborted();
1686 }
1687
1688 virtual bool should_return_fine_grain() {
1689 _cm->do_yield_check();
1690 return _cm->has_aborted();
1691 }
1692 };
1693
1694 void G1ConcurrentMark::preclean() {
1695 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1696
1697 SuspendibleThreadSetJoiner joiner;
1698
1699 BarrierEnqueueDiscoveredFieldClosure enqueue;
1700
1701 set_concurrency_and_phase(1, true);
1702
1703 G1PrecleanYieldClosure yield_cl(this);
1704
1705 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1706 // Precleaning is single threaded. Temporarily disable MT discovery.
1707 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1708 rp->preclean_discovered_references(rp->is_alive_non_header(),
1709 &enqueue,
1710 &yield_cl,
1711 _gc_timer_cm);
1712 }
1713
1714 // Closure for marking entries in SATB buffers.
1715 class G1CMSATBBufferClosure : public SATBBufferClosure {
1716 private:
1717 G1CMTask* _task;
1718 G1CollectedHeap* _g1h;
1719
1720 // This is very similar to G1CMTask::deal_with_reference, but with
1721 // more relaxed requirements for the argument, so this must be more
1722 // circumspect about treating the argument as an object.
1723 void do_entry(void* entry) const {
1724 _task->increment_refs_reached();
1725 oop const obj = cast_to_oop(entry);
1726 _task->make_reference_grey(obj);
1727 }
1728
1729 public:
1730 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1731 : _task(task), _g1h(g1h) { }
1732
1733 virtual void do_buffer(void** buffer, size_t size) {
1734 for (size_t i = 0; i < size; ++i) {
1735 do_entry(buffer[i]);
1736 }
1737 }
1738 };
1739
1740 class G1RemarkThreadsClosure : public ThreadClosure {
1741 G1SATBMarkQueueSet& _qset;
1742
1743 public:
1744 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1745 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1746
1747 void do_thread(Thread* thread) {
1748 // Transfer any partial buffer to the qset for completed buffer processing.
1749 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1750 }
1751 };
1752
1753 class G1CMRemarkTask : public WorkerTask {
1754 // For Threads::possibly_parallel_threads_do
1755 ThreadsClaimTokenScope _threads_claim_token_scope;
1756 G1ConcurrentMark* _cm;
1757 public:
1758 void work(uint worker_id) {
1759 G1CMTask* task = _cm->task(worker_id);
1760 task->record_start_time();
1761 {
1762 ResourceMark rm;
1763
1764 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1765 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1766 }
1767
1768 do {
1769 task->do_marking_step(1000000000.0 /* something very large */,
1770 true /* do_termination */,
1771 false /* is_serial */);
1772 } while (task->has_aborted() && !_cm->has_overflown());
1773 // If we overflow, then we do not want to restart. We instead
1774 // want to abort remark and do concurrent marking again.
1775 task->record_end_time();
1776 }
1777
1778 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1779 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1780 _cm->terminator()->reset_for_reuse(active_workers);
1781 }
1782 };
1783
1784 void G1ConcurrentMark::finalize_marking() {
1785 ResourceMark rm;
1786
1787 _g1h->ensure_parsability(false);
1788
1789 // this is remark, so we'll use up all active threads
1790 uint active_workers = _g1h->workers()->active_workers();
1791 set_concurrency_and_phase(active_workers, false /* concurrent */);
1792 // Leave _parallel_marking_threads at it's
1793 // value originally calculated in the G1ConcurrentMark
1794 // constructor and pass values of the active workers
1795 // through the task.
1796
1797 {
1798 G1CMRemarkTask remarkTask(this, active_workers);
1799 // We will start all available threads, even if we decide that the
1800 // active_workers will be fewer. The extra ones will just bail out
1801 // immediately.
1802 _g1h->workers()->run_task(&remarkTask);
1803 }
1804
1805 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1806 guarantee(has_overflown() ||
1807 satb_mq_set.completed_buffers_num() == 0,
1808 "Invariant: has_overflown = %s, num buffers = %zu",
1809 BOOL_TO_STR(has_overflown()),
1810 satb_mq_set.completed_buffers_num());
1811
1812 print_stats();
1813 }
1814
1815 void G1ConcurrentMark::flush_all_task_caches() {
1816 size_t hits = 0;
1817 size_t misses = 0;
1818 for (uint i = 0; i < _max_num_tasks; i++) {
1819 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1820 hits += stats.first;
1821 misses += stats.second;
1822 }
1823 size_t sum = hits + misses;
1824 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1825 hits, misses, percent_of(hits, sum));
1826 }
1827
1828 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1829 assert_at_safepoint();
1830 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1831 }
1832
1833 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1834 // "Checkpoint" the finger.
1835 HeapWord* local_finger = finger();
1836
1837 while (local_finger < _heap.end()) {
1838 assert(_g1h->is_in_reserved(local_finger), "invariant");
1839
1840 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1841 // Make sure that the reads below do not float before loading curr_region.
1842 OrderAccess::loadload();
1843 // Above heap_region_containing may return null as we always scan claim
1844 // until the end of the heap. In this case, just jump to the next region.
1845 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1846
1847 // Is the gap between reading the finger and doing the CAS too long?
1848 HeapWord* res = _finger.compare_exchange(local_finger, end);
1849 if (res == local_finger && curr_region != nullptr) {
1850 // We succeeded.
1851 HeapWord* bottom = curr_region->bottom();
1852 HeapWord* limit = top_at_mark_start(curr_region);
1853
1854 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1855 // Notice that _finger == end cannot be guaranteed here since,
1856 // someone else might have moved the finger even further.
1857 assert(finger() >= end, "The finger should have moved forward");
1858
1859 if (limit > bottom) {
1860 return curr_region;
1861 } else {
1862 assert(limit == bottom,
1863 "The region limit should be at bottom");
1864 // We return null and the caller should try calling
1865 // claim_region() again.
1866 return nullptr;
1867 }
1868 } else {
1869 // Read the finger again.
1870 HeapWord* next_finger = finger();
1871 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1872 local_finger = next_finger;
1873 }
1874 }
1875
1876 return nullptr;
1877 }
1878
1879 #ifndef PRODUCT
1880 class VerifyNoCSetOops {
1881 G1CollectedHeap* _g1h;
1882 const char* _phase;
1883 int _info;
1884
1885 public:
1886 VerifyNoCSetOops(const char* phase, int info = -1) :
1887 _g1h(G1CollectedHeap::heap()),
1888 _phase(phase),
1889 _info(info)
1890 { }
1891
1892 void operator()(G1TaskQueueEntry task_entry) const {
1893 if (task_entry.is_partial_array_state()) {
1894 oop obj = task_entry.to_partial_array_state()->source();
1895 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1896 return;
1897 }
1898 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1899 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1900 p2i(task_entry.to_oop()), _phase, _info);
1901 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1902 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1903 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1904 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1905 }
1906 };
1907
1908 void G1ConcurrentMark::verify_no_collection_set_oops() {
1909 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1910 "should be at a safepoint or initializing");
1911 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1912 return;
1913 }
1914
1915 // Verify entries on the global mark stack
1916 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1917
1918 // Verify entries on the task queues
1919 for (uint i = 0; i < _max_num_tasks; ++i) {
1920 G1CMTaskQueue* queue = _task_queues->queue(i);
1921 queue->iterate(VerifyNoCSetOops("Queue", i));
1922 }
1923
1924 // Verify the global finger
1925 HeapWord* global_finger = finger();
1926 if (global_finger != nullptr && global_finger < _heap.end()) {
1927 // Since we always iterate over all regions, we might get a null G1HeapRegion
1928 // here.
1929 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1930 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1931 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1932 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1933 }
1934
1935 // Verify the task fingers
1936 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1937 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1938 G1CMTask* task = _tasks[i];
1939 HeapWord* task_finger = task->finger();
1940 if (task_finger != nullptr && task_finger < _heap.end()) {
1941 // See above note on the global finger verification.
1942 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1943 guarantee(r == nullptr || task_finger == r->bottom() ||
1944 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1945 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1946 p2i(task_finger), HR_FORMAT_PARAMS(r));
1947 }
1948 }
1949 }
1950 #endif // PRODUCT
1951
1952 void G1ConcurrentMark::rebuild_and_scrub() {
1953 if (!needs_remembered_set_rebuild()) {
1954 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1955 }
1956
1957 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1958 }
1959
1960 void G1ConcurrentMark::print_stats() {
1961 if (!log_is_enabled(Debug, gc, stats)) {
1962 return;
1963 }
1964 log_debug(gc, stats)("---------------------------------------------------------------------");
1965 for (size_t i = 0; i < _num_active_tasks; ++i) {
1966 _tasks[i]->print_stats();
1967 log_debug(gc, stats)("---------------------------------------------------------------------");
1968 }
1969 }
1970
1971 bool G1ConcurrentMark::concurrent_cycle_abort() {
1972 assert_at_safepoint_on_vm_thread();
1973 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1974
1975 // If we start the compaction before the CM threads finish
1976 // scanning the root regions we might trip them over as we'll
1977 // be moving objects / updating references. Since the root region
1978 // scan synchronized with the safepoint, just tell it to abort.
1979 // It will notice when the threads start up again later.
1980 abort_root_region_scan_at_safepoint();
1981
1982 // We haven't started a concurrent cycle no need to do anything; we might have
1983 // aborted the marking because of shutting down though. In this case the marking
1984 // might have already completed the abort (leading to in_progress() below to
1985 // return false), however this still left marking state particularly in the
1986 // shared marking bitmap that must be cleaned up.
1987 // If there are multiple full gcs during shutdown we do this work repeatedly for
1988 // nothing, but this situation should be extremely rare (a full gc after shutdown
1989 // has been signalled is already rare), and this work should be negligible compared
1990 // to actual full gc work.
1991
1992 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1993 return false;
1994 }
1995
1996 reset_marking_for_restart();
1997
1998 abort_marking_threads();
1999
2000 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2001 satb_mq_set.abandon_partial_marking();
2002 // This can be called either during or outside marking, we'll read
2003 // the expected_active value from the SATB queue set.
2004 satb_mq_set.set_active_all_threads(false, /* new active value */
2005 satb_mq_set.is_active() /* expected_active */);
2006 return true;
2007 }
2008
2009 void G1ConcurrentMark::abort_marking_threads() {
2010 assert_root_region_scan_completed_or_aborted();
2011 _has_aborted.store_relaxed(true);
2012 _first_overflow_barrier_sync.abort();
2013 _second_overflow_barrier_sync.abort();
2014 }
2015
2016 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2017 class CountCpuTimeThreadClosure : public ThreadClosure {
2018 public:
2019 jlong _total_cpu_time;
2020
2021 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2022
2023 void do_thread(Thread* t) {
2024 _total_cpu_time += os::thread_cpu_time(t);
2025 }
2026 } cl;
2027
2028 threads_do(&cl);
2029
2030 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2031 }
2032
2033 static void print_ms_time_info(const char* prefix, const char* name,
2034 NumberSeq& ns) {
2035 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2036 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2037 if (ns.num() > 0) {
2038 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2039 prefix, ns.sd(), ns.maximum());
2040 }
2041 }
2042
2043 void G1ConcurrentMark::print_summary_info() {
2044 Log(gc, marking) log;
2045 if (!log.is_trace()) {
2046 return;
2047 }
2048
2049 log.trace(" Concurrent marking:");
2050 if (!is_fully_initialized()) {
2051 log.trace(" has not been initialized yet");
2052 return;
2053 }
2054 print_ms_time_info(" ", "remarks", _remark_times);
2055 {
2056 print_ms_time_info(" ", "final marks", _remark_mark_times);
2057 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2058
2059 }
2060 print_ms_time_info(" ", "cleanups", _cleanup_times);
2061 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2062 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2063 log.trace(" Total stop_world time = %8.2f s.",
2064 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2065 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2066 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2067 }
2068
2069 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2070 if (is_fully_initialized()) { // they are initialized late
2071 tc->do_thread(_cm_thread);
2072 _concurrent_workers->threads_do(tc);
2073 }
2074 }
2075
2076 void G1ConcurrentMark::print_on(outputStream* st) const {
2077 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2078 _mark_bitmap.print_on(st, " Bits: ");
2079 }
2080
2081 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2082 ReferenceProcessor* result = g1h->ref_processor_cm();
2083 assert(result != nullptr, "CM reference processor should not be null");
2084 return result;
2085 }
2086
2087 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2088 G1CMTask* task)
2089 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2090 _g1h(g1h), _task(task)
2091 { }
2092
2093 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2094 assert(hr != nullptr,
2095 "claim_region() should have filtered out null regions");
2096 _curr_region = hr;
2097 _finger = hr->bottom();
2098 update_region_limit();
2099 }
2100
2101 void G1CMTask::update_region_limit() {
2102 G1HeapRegion* hr = _curr_region;
2103 HeapWord* bottom = hr->bottom();
2104 HeapWord* limit = _cm->top_at_mark_start(hr);
2105
2106 if (limit == bottom) {
2107 // The region was collected underneath our feet.
2108 // We set the finger to bottom to ensure that the bitmap
2109 // iteration that will follow this will not do anything.
2110 // (this is not a condition that holds when we set the region up,
2111 // as the region is not supposed to be empty in the first place)
2112 _finger = bottom;
2113 } else if (limit >= _region_limit) {
2114 assert(limit >= _finger, "peace of mind");
2115 } else {
2116 assert(limit < _region_limit, "only way to get here");
2117 // This can happen under some pretty unusual circumstances. An
2118 // evacuation pause empties the region underneath our feet (TAMS
2119 // at bottom). We then do some allocation in the region (TAMS
2120 // stays at bottom), followed by the region being used as a GC
2121 // alloc region (TAMS will move to top() and the objects
2122 // originally below it will be greyed). All objects now marked in
2123 // the region are explicitly greyed, if below the global finger,
2124 // and we do not need in fact to scan anything else. So, we simply
2125 // set _finger to be limit to ensure that the bitmap iteration
2126 // doesn't do anything.
2127 _finger = limit;
2128 }
2129
2130 _region_limit = limit;
2131 }
2132
2133 void G1CMTask::giveup_current_region() {
2134 assert(_curr_region != nullptr, "invariant");
2135 clear_region_fields();
2136 }
2137
2138 void G1CMTask::clear_region_fields() {
2139 // Values for these three fields that indicate that we're not
2140 // holding on to a region.
2141 _curr_region = nullptr;
2142 _finger = nullptr;
2143 _region_limit = nullptr;
2144 }
2145
2146 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2147 if (cm_oop_closure == nullptr) {
2148 assert(_cm_oop_closure != nullptr, "invariant");
2149 } else {
2150 assert(_cm_oop_closure == nullptr, "invariant");
2151 }
2152 _cm_oop_closure = cm_oop_closure;
2153 }
2154
2155 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2156 guarantee(mark_bitmap != nullptr, "invariant");
2157 _mark_bitmap = mark_bitmap;
2158 clear_region_fields();
2159
2160 _calls = 0;
2161 _elapsed_time_ms = 0.0;
2162 _termination_time_ms = 0.0;
2163
2164 _mark_stats_cache.reset();
2165 }
2166
2167 void G1CMTask::reset_for_restart() {
2168 clear_region_fields();
2169 _task_queue->set_empty();
2170 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2171 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2172 }
2173
2174 void G1CMTask::register_partial_array_splitter() {
2175
2176 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2177 _cm->max_num_tasks());
2178 }
2179
2180 void G1CMTask::unregister_partial_array_splitter() {
2181 _partial_array_splitter.~PartialArraySplitter();
2182 }
2183
2184 bool G1CMTask::should_exit_termination() {
2185 if (!regular_clock_call()) {
2186 return true;
2187 }
2188
2189 // This is called when we are in the termination protocol. We should
2190 // quit if, for some reason, this task wants to abort or the global
2191 // stack is not empty (this means that we can get work from it).
2192 return !_cm->mark_stack_empty() || has_aborted();
2193 }
2194
2195 void G1CMTask::reached_limit() {
2196 assert(_words_scanned >= _words_scanned_limit ||
2197 _refs_reached >= _refs_reached_limit ,
2198 "shouldn't have been called otherwise");
2199 abort_marking_if_regular_check_fail();
2200 }
2201
2202 bool G1CMTask::regular_clock_call() {
2203 if (has_aborted()) {
2204 return false;
2205 }
2206
2207 // First, we need to recalculate the words scanned and refs reached
2208 // limits for the next clock call.
2209 recalculate_limits();
2210
2211 // During the regular clock call we do the following
2212
2213 // (1) If an overflow has been flagged, then we abort.
2214 if (_cm->has_overflown()) {
2215 return false;
2216 }
2217
2218 // If we are not concurrent (i.e. we're doing remark) we don't need
2219 // to check anything else. The other steps are only needed during
2220 // the concurrent marking phase.
2221 if (!_cm->concurrent()) {
2222 return true;
2223 }
2224
2225 // (2) If marking has been aborted for Full GC, then we also abort.
2226 if (_cm->has_aborted()) {
2227 return false;
2228 }
2229
2230 // (4) We check whether we should yield. If we have to, then we abort.
2231 if (SuspendibleThreadSet::should_yield()) {
2232 // We should yield. To do this we abort the task. The caller is
2233 // responsible for yielding.
2234 return false;
2235 }
2236
2237 // (5) We check whether we've reached our time quota. If we have,
2238 // then we abort.
2239 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2240 if (elapsed_time_ms > _time_target_ms) {
2241 _has_timed_out = true;
2242 return false;
2243 }
2244
2245 // (6) Finally, we check whether there are enough completed STAB
2246 // buffers available for processing. If there are, we abort.
2247 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2248 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2249 // we do need to process SATB buffers, we'll abort and restart
2250 // the marking task to do so
2251 return false;
2252 }
2253 return true;
2254 }
2255
2256 void G1CMTask::recalculate_limits() {
2257 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2258 _words_scanned_limit = _real_words_scanned_limit;
2259
2260 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2261 _refs_reached_limit = _real_refs_reached_limit;
2262 }
2263
2264 void G1CMTask::decrease_limits() {
2265 // This is called when we believe that we're going to do an infrequent
2266 // operation which will increase the per byte scanned cost (i.e. move
2267 // entries to/from the global stack). It basically tries to decrease the
2268 // scanning limit so that the clock is called earlier.
2269
2270 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2271 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2272 }
2273
2274 void G1CMTask::move_entries_to_global_stack() {
2275 // Local array where we'll store the entries that will be popped
2276 // from the local queue.
2277 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2278
2279 size_t n = 0;
2280 G1TaskQueueEntry task_entry;
2281 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2282 buffer[n] = task_entry;
2283 ++n;
2284 }
2285 if (n < G1CMMarkStack::EntriesPerChunk) {
2286 buffer[n] = G1TaskQueueEntry();
2287 }
2288
2289 if (n > 0) {
2290 if (!_cm->mark_stack_push(buffer)) {
2291 set_has_aborted();
2292 }
2293 }
2294
2295 // This operation was quite expensive, so decrease the limits.
2296 decrease_limits();
2297 }
2298
2299 bool G1CMTask::get_entries_from_global_stack() {
2300 // Local array where we'll store the entries that will be popped
2301 // from the global stack.
2302 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2303
2304 if (!_cm->mark_stack_pop(buffer)) {
2305 return false;
2306 }
2307
2308 // We did actually pop at least one entry.
2309 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2310 G1TaskQueueEntry task_entry = buffer[i];
2311 if (task_entry.is_null()) {
2312 break;
2313 }
2314 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2315 bool success = _task_queue->push(task_entry);
2316 // We only call this when the local queue is empty or under a
2317 // given target limit. So, we do not expect this push to fail.
2318 assert(success, "invariant");
2319 }
2320
2321 // This operation was quite expensive, so decrease the limits
2322 decrease_limits();
2323 return true;
2324 }
2325
2326 void G1CMTask::drain_local_queue(bool partially) {
2327 if (has_aborted()) {
2328 return;
2329 }
2330
2331 // Decide what the target size is, depending whether we're going to
2332 // drain it partially (so that other tasks can steal if they run out
2333 // of things to do) or totally (at the very end).
2334 uint target_size;
2335 if (partially) {
2336 target_size = GCDrainStackTargetSize;
2337 } else {
2338 target_size = 0;
2339 }
2340
2341 if (_task_queue->size() > target_size) {
2342 G1TaskQueueEntry entry;
2343 bool ret = _task_queue->pop_local(entry);
2344 while (ret) {
2345 process_entry(entry, false /* stolen */);
2346 if (_task_queue->size() <= target_size || has_aborted()) {
2347 ret = false;
2348 } else {
2349 ret = _task_queue->pop_local(entry);
2350 }
2351 }
2352 }
2353 }
2354
2355 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2356 assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2357
2358 // Mark klass metadata
2359 process_klass(obj->klass());
2360
2361 size_t array_length = obj->length();
2362 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2363
2364 process_array_chunk(obj, 0, initial_chunk_size);
2365
2366 // Include object header size
2367 if (obj->is_refArray()) {
2368 return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2369 } else {
2370 FlatArrayKlass* fak = FlatArrayKlass::cast(obj->klass());
2371 return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2372 }
2373 }
2374
2375 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2376 PartialArrayState* state = task.to_partial_array_state();
2377 // Access state before release by claim().
2378 objArrayOop obj = oop_cast<objArrayOop>(state->source());
2379
2380 PartialArraySplitter::Claim claim =
2381 _partial_array_splitter.claim(state, _task_queue, stolen);
2382
2383 process_array_chunk(obj, claim._start, claim._end);
2384
2385 if (obj->is_refArray()) {
2386 return heap_word_size((claim._end - claim._start) * heapOopSize);
2387 } else {
2388 assert(obj->is_flatArray(), "Must be!");
2389 size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2390 size_t nof_elements = claim._end - claim._start;
2391 return heap_word_size(nof_elements * element_byte_size);
2392 }
2393 }
2394
2395 void G1CMTask::drain_global_stack(bool partially) {
2396 if (has_aborted()) {
2397 return;
2398 }
2399
2400 // We have a policy to drain the local queue before we attempt to
2401 // drain the global stack.
2402 assert(partially || _task_queue->size() == 0, "invariant");
2403
2404 // Decide what the target size is, depending whether we're going to
2405 // drain it partially (so that other tasks can steal if they run out
2406 // of things to do) or totally (at the very end).
2407 // Notice that when draining the global mark stack partially, due to the racyness
2408 // of the mark stack size update we might in fact drop below the target. But,
2409 // this is not a problem.
2410 // In case of total draining, we simply process until the global mark stack is
2411 // totally empty, disregarding the size counter.
2412 if (partially) {
2413 size_t const target_size = _cm->partial_mark_stack_size_target();
2414 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2415 if (get_entries_from_global_stack()) {
2416 drain_local_queue(partially);
2417 }
2418 }
2419 } else {
2420 while (!has_aborted() && get_entries_from_global_stack()) {
2421 drain_local_queue(partially);
2422 }
2423 }
2424 }
2425
2426 // SATB Queue has several assumptions on whether to call the par or
2427 // non-par versions of the methods. this is why some of the code is
2428 // replicated. We should really get rid of the single-threaded version
2429 // of the code to simplify things.
2430 void G1CMTask::drain_satb_buffers() {
2431 if (has_aborted()) {
2432 return;
2433 }
2434
2435 // We set this so that the regular clock knows that we're in the
2436 // middle of draining buffers and doesn't set the abort flag when it
2437 // notices that SATB buffers are available for draining. It'd be
2438 // very counter productive if it did that. :-)
2439 _draining_satb_buffers = true;
2440
2441 G1CMSATBBufferClosure satb_cl(this, _g1h);
2442 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2443
2444 // This keeps claiming and applying the closure to completed buffers
2445 // until we run out of buffers or we need to abort.
2446 while (!has_aborted() &&
2447 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2448 abort_marking_if_regular_check_fail();
2449 }
2450
2451 // Can't assert qset is empty here, even if not aborted. If concurrent,
2452 // some other thread might be adding to the queue. If not concurrent,
2453 // some other thread might have won the race for the last buffer, but
2454 // has not yet decremented the count.
2455
2456 _draining_satb_buffers = false;
2457
2458 // again, this was a potentially expensive operation, decrease the
2459 // limits to get the regular clock call early
2460 decrease_limits();
2461 }
2462
2463 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2464 _mark_stats_cache.reset(region_idx);
2465 }
2466
2467 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2468 return _mark_stats_cache.evict_all();
2469 }
2470
2471 void G1CMTask::print_stats() {
2472 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2473 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2474 _elapsed_time_ms, _termination_time_ms);
2475 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2476 _step_times_ms.num(),
2477 _step_times_ms.avg(),
2478 _step_times_ms.sd(),
2479 _step_times_ms.maximum(),
2480 _step_times_ms.sum());
2481 size_t const hits = _mark_stats_cache.hits();
2482 size_t const misses = _mark_stats_cache.misses();
2483 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2484 hits, misses, percent_of(hits, hits + misses));
2485 }
2486
2487 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2488 return _task_queues->steal(worker_id, task_entry);
2489 }
2490
2491 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2492 if (has_aborted() || _curr_region == nullptr) {
2493 return;
2494 }
2495
2496 // This means that we're already holding on to a region.
2497 assert(_finger != nullptr, "if region is not null, then the finger "
2498 "should not be null either");
2499
2500 // We might have restarted this task after an evacuation pause
2501 // which might have evacuated the region we're holding on to
2502 // underneath our feet. Let's read its limit again to make sure
2503 // that we do not iterate over a region of the heap that
2504 // contains garbage (update_region_limit() will also move
2505 // _finger to the start of the region if it is found empty).
2506 update_region_limit();
2507 // We will start from _finger not from the start of the region,
2508 // as we might be restarting this task after aborting half-way
2509 // through scanning this region. In this case, _finger points to
2510 // the address where we last found a marked object. If this is a
2511 // fresh region, _finger points to start().
2512 MemRegion mr = MemRegion(_finger, _region_limit);
2513
2514 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2515 "humongous regions should go around loop once only");
2516
2517 // Some special cases:
2518 // If the memory region is empty, we can just give up the region.
2519 // If the current region is humongous then we only need to check
2520 // the bitmap for the bit associated with the start of the object,
2521 // scan the object if it's live, and give up the region.
2522 // Otherwise, let's iterate over the bitmap of the part of the region
2523 // that is left.
2524 // If the iteration is successful, give up the region.
2525 if (mr.is_empty()) {
2526 giveup_current_region();
2527 abort_marking_if_regular_check_fail();
2528 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2529 if (_mark_bitmap->is_marked(mr.start())) {
2530 // The object is marked - apply the closure
2531 bitmap_closure.do_addr(mr.start());
2532 }
2533 // Even if this task aborted while scanning the humongous object
2534 // we can (and should) give up the current region.
2535 giveup_current_region();
2536 abort_marking_if_regular_check_fail();
2537 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2538 giveup_current_region();
2539 abort_marking_if_regular_check_fail();
2540 } else {
2541 assert(has_aborted(), "currently the only way to do so");
2542 // The only way to abort the bitmap iteration is to return
2543 // false from the do_bit() method. However, inside the
2544 // do_bit() method we move the _finger to point to the
2545 // object currently being looked at. So, if we bail out, we
2546 // have definitely set _finger to something non-null.
2547 assert(_finger != nullptr, "invariant");
2548
2549 // Region iteration was actually aborted. So now _finger
2550 // points to the address of the object we last scanned. If we
2551 // leave it there, when we restart this task, we will rescan
2552 // the object. It is easy to avoid this. We move the finger by
2553 // enough to point to the next possible object header.
2554 assert(_finger < _region_limit, "invariant");
2555 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2556 if (new_finger >= _region_limit) {
2557 giveup_current_region();
2558 } else {
2559 move_finger_to(new_finger);
2560 }
2561 }
2562 }
2563
2564 void G1CMTask::claim_new_region() {
2565 // Read the note on the claim_region() method on why it might
2566 // return null with potentially more regions available for
2567 // claiming and why we have to check out_of_regions() to determine
2568 // whether we're done or not.
2569 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2570 // We are going to try to claim a new region. We should have
2571 // given up on the previous one.
2572 // Separated the asserts so that we know which one fires.
2573 assert(_curr_region == nullptr, "invariant");
2574 assert(_finger == nullptr, "invariant");
2575 assert(_region_limit == nullptr, "invariant");
2576 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2577 if (claimed_region != nullptr) {
2578 // Yes, we managed to claim one
2579 setup_for_region(claimed_region);
2580 assert(_curr_region == claimed_region, "invariant");
2581 }
2582 // It is important to call the regular clock here. It might take
2583 // a while to claim a region if, for example, we hit a large
2584 // block of empty regions. So we need to call the regular clock
2585 // method once round the loop to make sure it's called
2586 // frequently enough.
2587 abort_marking_if_regular_check_fail();
2588 }
2589 }
2590
2591 void G1CMTask::attempt_stealing() {
2592 // We cannot check whether the global stack is empty, since other
2593 // tasks might be pushing objects to it concurrently.
2594 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2595 "only way to reach here");
2596 while (!has_aborted()) {
2597 G1TaskQueueEntry entry;
2598 if (_cm->try_stealing(_worker_id, entry)) {
2599 process_entry(entry, true /* stolen */);
2600
2601 // And since we're towards the end, let's totally drain the
2602 // local queue and global stack.
2603 drain_local_queue(false);
2604 drain_global_stack(false);
2605 } else {
2606 break;
2607 }
2608 }
2609 }
2610
2611 void G1CMTask::attempt_termination(bool is_serial) {
2612 // We cannot check whether the global stack is empty, since other
2613 // tasks might be concurrently pushing objects on it.
2614 // Separated the asserts so that we know which one fires.
2615 assert(_cm->out_of_regions(), "only way to reach here");
2616 assert(_task_queue->size() == 0, "only way to reach here");
2617 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2618
2619 // The G1CMTask class also extends the TerminatorTerminator class,
2620 // hence its should_exit_termination() method will also decide
2621 // whether to exit the termination protocol or not.
2622 bool finished = (is_serial ||
2623 _cm->terminator()->offer_termination(this));
2624 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2625
2626 if (finished) {
2627 // We're all done.
2628
2629 // We can now guarantee that the global stack is empty, since
2630 // all other tasks have finished. We separated the guarantees so
2631 // that, if a condition is false, we can immediately find out
2632 // which one.
2633 guarantee(_cm->out_of_regions(), "only way to reach here");
2634 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2635 guarantee(_task_queue->size() == 0, "only way to reach here");
2636 guarantee(!_cm->has_overflown(), "only way to reach here");
2637 guarantee(!has_aborted(), "should never happen if termination has completed");
2638 } else {
2639 // Apparently there's more work to do. Let's abort this task. We
2640 // will restart it and hopefully we can find more things to do.
2641 set_has_aborted();
2642 }
2643 }
2644
2645 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2646 if (_has_timed_out) {
2647 double diff_ms = elapsed_time_ms - _time_target_ms;
2648 // Keep statistics of how well we did with respect to hitting
2649 // our target only if we actually timed out (if we aborted for
2650 // other reasons, then the results might get skewed).
2651 _marking_step_diff_ms.add(diff_ms);
2652 }
2653
2654 if (!_cm->has_overflown()) {
2655 return;
2656 }
2657
2658 // This is the interesting one. We aborted because a global
2659 // overflow was raised. This means we have to restart the
2660 // marking phase and start iterating over regions. However, in
2661 // order to do this we have to make sure that all tasks stop
2662 // what they are doing and re-initialize in a safe manner. We
2663 // will achieve this with the use of two barrier sync points.
2664 if (!is_serial) {
2665 // We only need to enter the sync barrier if being called
2666 // from a parallel context
2667 _cm->enter_first_sync_barrier(_worker_id);
2668
2669 // When we exit this sync barrier we know that all tasks have
2670 // stopped doing marking work. So, it's now safe to
2671 // re-initialize our data structures.
2672 }
2673
2674 clear_region_fields();
2675 flush_mark_stats_cache();
2676
2677 if (!is_serial) {
2678 // If we're executing the concurrent phase of marking, reset the marking
2679 // state; otherwise the marking state is reset after reference processing,
2680 // during the remark pause.
2681 // If we reset here as a result of an overflow during the remark we will
2682 // see assertion failures from any subsequent set_concurrency_and_phase()
2683 // calls.
2684 if (_cm->concurrent() && _worker_id == 0) {
2685 // Worker 0 is responsible for clearing the global data structures because
2686 // of an overflow. During STW we should not clear the overflow flag (in
2687 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2688 // method to abort the pause and restart concurrent marking.
2689 _cm->reset_marking_for_restart();
2690
2691 log_info(gc, marking)("Concurrent Mark reset for overflow");
2692 }
2693
2694 // ...and enter the second barrier.
2695 _cm->enter_second_sync_barrier(_worker_id);
2696 }
2697 }
2698
2699 /*****************************************************************************
2700
2701 The do_marking_step(time_target_ms, ...) method is the building
2702 block of the parallel marking framework. It can be called in parallel
2703 with other invocations of do_marking_step() on different tasks
2704 (but only one per task, obviously) and concurrently with the
2705 mutator threads, or during remark, hence it eliminates the need
2706 for two versions of the code. When called during remark, it will
2707 pick up from where the task left off during the concurrent marking
2708 phase. Interestingly, tasks are also claimable during evacuation
2709 pauses too, since do_marking_step() ensures that it aborts before
2710 it needs to yield.
2711
2712 The data structures that it uses to do marking work are the
2713 following:
2714
2715 (1) Marking Bitmap. If there are grey objects that appear only
2716 on the bitmap (this happens either when dealing with an overflow
2717 or when the concurrent start pause has simply marked the roots
2718 and didn't push them on the stack), then tasks claim heap
2719 regions whose bitmap they then scan to find grey objects. A
2720 global finger indicates where the end of the last claimed region
2721 is. A local finger indicates how far into the region a task has
2722 scanned. The two fingers are used to determine how to grey an
2723 object (i.e. whether simply marking it is OK, as it will be
2724 visited by a task in the future, or whether it needs to be also
2725 pushed on a stack).
2726
2727 (2) Local Queue. The local queue of the task which is accessed
2728 reasonably efficiently by the task. Other tasks can steal from
2729 it when they run out of work. Throughout the marking phase, a
2730 task attempts to keep its local queue short but not totally
2731 empty, so that entries are available for stealing by other
2732 tasks. Only when there is no more work, a task will totally
2733 drain its local queue.
2734
2735 (3) Global Mark Stack. This handles local queue overflow. During
2736 marking only sets of entries are moved between it and the local
2737 queues, as access to it requires a mutex and more fine-grain
2738 interaction with it which might cause contention. If it
2739 overflows, then the marking phase should restart and iterate
2740 over the bitmap to identify grey objects. Throughout the marking
2741 phase, tasks attempt to keep the global mark stack at a small
2742 length but not totally empty, so that entries are available for
2743 popping by other tasks. Only when there is no more work, tasks
2744 will totally drain the global mark stack.
2745
2746 (4) SATB Buffer Queue. This is where completed SATB buffers are
2747 made available. Buffers are regularly removed from this queue
2748 and scanned for roots, so that the queue doesn't get too
2749 long. During remark, all completed buffers are processed, as
2750 well as the filled in parts of any uncompleted buffers.
2751
2752 The do_marking_step() method tries to abort when the time target
2753 has been reached. There are a few other cases when the
2754 do_marking_step() method also aborts:
2755
2756 (1) When the marking phase has been aborted (after a Full GC).
2757
2758 (2) When a global overflow (on the global stack) has been
2759 triggered. Before the task aborts, it will actually sync up with
2760 the other tasks to ensure that all the marking data structures
2761 (local queues, stacks, fingers etc.) are re-initialized so that
2762 when do_marking_step() completes, the marking phase can
2763 immediately restart.
2764
2765 (3) When enough completed SATB buffers are available. The
2766 do_marking_step() method only tries to drain SATB buffers right
2767 at the beginning. So, if enough buffers are available, the
2768 marking step aborts and the SATB buffers are processed at
2769 the beginning of the next invocation.
2770
2771 (4) To yield. when we have to yield then we abort and yield
2772 right at the end of do_marking_step(). This saves us from a lot
2773 of hassle as, by yielding we might allow a Full GC. If this
2774 happens then objects will be compacted underneath our feet, the
2775 heap might shrink, etc. We save checking for this by just
2776 aborting and doing the yield right at the end.
2777
2778 From the above it follows that the do_marking_step() method should
2779 be called in a loop (or, otherwise, regularly) until it completes.
2780
2781 If a marking step completes without its has_aborted() flag being
2782 true, it means it has completed the current marking phase (and
2783 also all other marking tasks have done so and have all synced up).
2784
2785 A method called regular_clock_call() is invoked "regularly" (in
2786 sub ms intervals) throughout marking. It is this clock method that
2787 checks all the abort conditions which were mentioned above and
2788 decides when the task should abort. A work-based scheme is used to
2789 trigger this clock method: when the number of object words the
2790 marking phase has scanned or the number of references the marking
2791 phase has visited reach a given limit. Additional invocations to
2792 the method clock have been planted in a few other strategic places
2793 too. The initial reason for the clock method was to avoid calling
2794 cpu time gathering too regularly, as it is quite expensive. So,
2795 once it was in place, it was natural to piggy-back all the other
2796 conditions on it too and not constantly check them throughout the code.
2797
2798 If do_termination is true then do_marking_step will enter its
2799 termination protocol.
2800
2801 The value of is_serial must be true when do_marking_step is being
2802 called serially (i.e. by the VMThread) and do_marking_step should
2803 skip any synchronization in the termination and overflow code.
2804 Examples include the serial remark code and the serial reference
2805 processing closures.
2806
2807 The value of is_serial must be false when do_marking_step is
2808 being called by any of the worker threads.
2809 Examples include the concurrent marking code (CMMarkingTask),
2810 the MT remark code, and the MT reference processing closures.
2811
2812 *****************************************************************************/
2813
2814 void G1CMTask::do_marking_step(double time_target_ms,
2815 bool do_termination,
2816 bool is_serial) {
2817 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2818
2819 _start_cpu_time_ns = os::current_thread_cpu_time();
2820
2821 // If do_stealing is true then do_marking_step will attempt to
2822 // steal work from the other G1CMTasks. It only makes sense to
2823 // enable stealing when the termination protocol is enabled
2824 // and do_marking_step() is not being called serially.
2825 bool do_stealing = do_termination && !is_serial;
2826
2827 G1Predictions const& predictor = _g1h->policy()->predictor();
2828 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2829 _time_target_ms = time_target_ms - diff_prediction_ms;
2830
2831 // set up the variables that are used in the work-based scheme to
2832 // call the regular clock method
2833 _words_scanned = 0;
2834 _refs_reached = 0;
2835 recalculate_limits();
2836
2837 // clear all flags
2838 clear_has_aborted();
2839 _has_timed_out = false;
2840 _draining_satb_buffers = false;
2841
2842 ++_calls;
2843
2844 // Set up the bitmap and oop closures. Anything that uses them is
2845 // eventually called from this method, so it is OK to allocate these
2846 // statically.
2847 G1CMBitMapClosure bitmap_closure(this, _cm);
2848 G1CMOopClosure cm_oop_closure(_g1h, this);
2849 set_cm_oop_closure(&cm_oop_closure);
2850
2851 if (_cm->has_overflown()) {
2852 // This can happen if the mark stack overflows during a GC pause
2853 // and this task, after a yield point, restarts. We have to abort
2854 // as we need to get into the overflow protocol which happens
2855 // right at the end of this task.
2856 set_has_aborted();
2857 }
2858
2859 // First drain any available SATB buffers. After this, we will not
2860 // look at SATB buffers before the next invocation of this method.
2861 // If enough completed SATB buffers are queued up, the regular clock
2862 // will abort this task so that it restarts.
2863 drain_satb_buffers();
2864 // ...then partially drain the local queue and the global stack
2865 drain_local_queue(true);
2866 drain_global_stack(true);
2867
2868 do {
2869 process_current_region(bitmap_closure);
2870 // At this point we have either completed iterating over the
2871 // region we were holding on to, or we have aborted.
2872
2873 // We then partially drain the local queue and the global stack.
2874 drain_local_queue(true);
2875 drain_global_stack(true);
2876
2877 claim_new_region();
2878
2879 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2880 "at this point we should be out of regions");
2881 } while ( _curr_region != nullptr && !has_aborted());
2882
2883 // We cannot check whether the global stack is empty, since other
2884 // tasks might be pushing objects to it concurrently.
2885 assert(has_aborted() || _cm->out_of_regions(),
2886 "at this point we should be out of regions");
2887 // Try to reduce the number of available SATB buffers so that
2888 // remark has less work to do.
2889 drain_satb_buffers();
2890
2891 // Since we've done everything else, we can now totally drain the
2892 // local queue and global stack.
2893 drain_local_queue(false);
2894 drain_global_stack(false);
2895
2896 // Attempt at work stealing from other task's queues.
2897 if (do_stealing && !has_aborted()) {
2898 // We have not aborted. This means that we have finished all that
2899 // we could. Let's try to do some stealing...
2900 attempt_stealing();
2901 }
2902
2903 // We still haven't aborted. Now, let's try to get into the
2904 // termination protocol.
2905 if (do_termination && !has_aborted()) {
2906 attempt_termination(is_serial);
2907 }
2908
2909 // Mainly for debugging purposes to make sure that a pointer to the
2910 // closure which was statically allocated in this frame doesn't
2911 // escape it by accident.
2912 set_cm_oop_closure(nullptr);
2913 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2914 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2915 // Update the step history.
2916 _step_times_ms.add(elapsed_time_ms);
2917
2918 if (has_aborted()) {
2919 // The task was aborted for some reason.
2920 handle_abort(is_serial, elapsed_time_ms);
2921 }
2922 }
2923
2924 G1CMTask::G1CMTask(uint worker_id,
2925 G1ConcurrentMark* cm,
2926 G1CMTaskQueue* task_queue,
2927 G1RegionMarkStats* mark_stats) :
2928 _worker_id(worker_id),
2929 _g1h(G1CollectedHeap::heap()),
2930 _cm(cm),
2931 _mark_bitmap(nullptr),
2932 _task_queue(task_queue),
2933 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
2934 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2935 _calls(0),
2936 _time_target_ms(0.0),
2937 _start_cpu_time_ns(0),
2938 _cm_oop_closure(nullptr),
2939 _curr_region(nullptr),
2940 _finger(nullptr),
2941 _region_limit(nullptr),
2942 _words_scanned(0),
2943 _words_scanned_limit(0),
2944 _real_words_scanned_limit(0),
2945 _refs_reached(0),
2946 _refs_reached_limit(0),
2947 _real_refs_reached_limit(0),
2948 _has_aborted(false),
2949 _has_timed_out(false),
2950 _draining_satb_buffers(false),
2951 _step_times_ms(),
2952 _elapsed_time_ms(0.0),
2953 _termination_time_ms(0.0),
2954 _marking_step_diff_ms()
2955 {
2956 guarantee(task_queue != nullptr, "invariant");
2957
2958 _marking_step_diff_ms.add(0.5);
2959 }
2960
2961 // These are formatting macros that are used below to ensure
2962 // consistent formatting. The *_H_* versions are used to format the
2963 // header for a particular value and they should be kept consistent
2964 // with the corresponding macro. Also note that most of the macros add
2965 // the necessary white space (as a prefix) which makes them a bit
2966 // easier to compose.
2967
2968 // All the output lines are prefixed with this string to be able to
2969 // identify them easily in a large log file.
2970 #define G1PPRL_LINE_PREFIX "###"
2971
2972 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2973 #ifdef _LP64
2974 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2975 #else // _LP64
2976 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2977 #endif // _LP64
2978
2979 // For per-region info
2980 #define G1PPRL_TYPE_FORMAT " %-4s"
2981 #define G1PPRL_TYPE_H_FORMAT " %4s"
2982 #define G1PPRL_STATE_FORMAT " %-5s"
2983 #define G1PPRL_STATE_H_FORMAT " %5s"
2984 #define G1PPRL_BYTE_FORMAT " %9zu"
2985 #define G1PPRL_BYTE_H_FORMAT " %9s"
2986 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2987 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2988 #define G1PPRL_GID_H_FORMAT " %9s"
2989 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2990 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2991 #define G1PPRL_LEN_H_FORMAT " %14s"
2992 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2993 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2994
2995 // For summary info
2996 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2997 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2998 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2999 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3000
3001 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
3002 _total_used_bytes(0),
3003 _total_capacity_bytes(0),
3004 _total_live_bytes(0),
3005 _total_remset_bytes(0),
3006 _total_code_roots_bytes(0)
3007 {
3008 if (!log_is_enabled(Trace, gc, liveness)) {
3009 return;
3010 }
3011
3012 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3013 MemRegion reserved = g1h->reserved();
3014 double now = os::elapsedTime();
3015
3016 // Print the header of the output.
3017 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3018 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3019 G1PPRL_SUM_ADDR_FORMAT("reserved")
3020 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3021 p2i(reserved.start()), p2i(reserved.end()),
3022 G1HeapRegion::GrainBytes);
3023 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3024 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3025 G1PPRL_TYPE_H_FORMAT
3026 G1PPRL_ADDR_BASE_H_FORMAT
3027 G1PPRL_BYTE_H_FORMAT
3028 G1PPRL_BYTE_H_FORMAT
3029 G1PPRL_STATE_H_FORMAT
3030 G1PPRL_BYTE_H_FORMAT
3031 G1PPRL_GID_H_FORMAT,
3032 "type", "address-range",
3033 "used", "live",
3034 "state", "code-roots",
3035 "group-id");
3036 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3037 G1PPRL_TYPE_H_FORMAT
3038 G1PPRL_ADDR_BASE_H_FORMAT
3039 G1PPRL_BYTE_H_FORMAT
3040 G1PPRL_BYTE_H_FORMAT
3041 G1PPRL_STATE_H_FORMAT
3042 G1PPRL_BYTE_H_FORMAT
3043 G1PPRL_GID_H_FORMAT,
3044 "", "",
3045 "(bytes)", "(bytes)",
3046 "", "(bytes)", "");
3047 }
3048
3049 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3050 if (!log_is_enabled(Trace, gc, liveness)) {
3051 return false;
3052 }
3053
3054 const char* type = r->get_type_str();
3055 HeapWord* bottom = r->bottom();
3056 HeapWord* end = r->end();
3057 size_t capacity_bytes = r->capacity();
3058 size_t used_bytes = r->used();
3059 size_t live_bytes = r->live_bytes();
3060 size_t remset_bytes = r->rem_set()->mem_size();
3061 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3062 const char* remset_type = r->rem_set()->get_short_state_str();
3063 uint cset_group_id = r->rem_set()->has_cset_group()
3064 ? r->rem_set()->cset_group_id()
3065 : G1CSetCandidateGroup::NoRemSetId;
3066
3067 _total_used_bytes += used_bytes;
3068 _total_capacity_bytes += capacity_bytes;
3069 _total_live_bytes += live_bytes;
3070 _total_remset_bytes += remset_bytes;
3071 _total_code_roots_bytes += code_roots_bytes;
3072
3073 // Print a line for this particular region.
3074 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3075 G1PPRL_TYPE_FORMAT
3076 G1PPRL_ADDR_BASE_FORMAT
3077 G1PPRL_BYTE_FORMAT
3078 G1PPRL_BYTE_FORMAT
3079 G1PPRL_STATE_FORMAT
3080 G1PPRL_BYTE_FORMAT
3081 G1PPRL_GID_FORMAT,
3082 type, p2i(bottom), p2i(end),
3083 used_bytes, live_bytes,
3084 remset_type, code_roots_bytes,
3085 cset_group_id);
3086
3087 return false;
3088 }
3089
3090 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3091 if (!log_is_enabled(Trace, gc, liveness)) {
3092 return;
3093 }
3094
3095 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3096 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3097 // add static memory usages to remembered set sizes
3098 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3099
3100 log_cset_candidate_groups();
3101
3102 // Print the footer of the output.
3103 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3104 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3105 " SUMMARY"
3106 G1PPRL_SUM_MB_FORMAT("capacity")
3107 G1PPRL_SUM_MB_PERC_FORMAT("used")
3108 G1PPRL_SUM_MB_PERC_FORMAT("live")
3109 G1PPRL_SUM_MB_FORMAT("remset")
3110 G1PPRL_SUM_MB_FORMAT("code-roots"),
3111 bytes_to_mb(_total_capacity_bytes),
3112 bytes_to_mb(_total_used_bytes),
3113 percent_of(_total_used_bytes, _total_capacity_bytes),
3114 bytes_to_mb(_total_live_bytes),
3115 percent_of(_total_live_bytes, _total_capacity_bytes),
3116 bytes_to_mb(_total_remset_bytes),
3117 bytes_to_mb(_total_code_roots_bytes));
3118 }
3119
3120 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3121 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3122 G1PPRL_GID_FORMAT
3123 G1PPRL_LEN_FORMAT
3124 G1PPRL_GID_GCEFF_FORMAT
3125 G1PPRL_GID_LIVENESS_FORMAT
3126 G1PPRL_BYTE_FORMAT
3127 G1PPRL_TYPE_H_FORMAT,
3128 group->group_id(),
3129 group->length(),
3130 group->length() > 0 ? group->gc_efficiency() : 0.0,
3131 group->length() > 0 ? group->liveness_percent() : 0.0,
3132 group->card_set()->mem_size(),
3133 type);
3134 _total_remset_bytes += group->card_set()->mem_size();
3135 }
3136
3137 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3138 for (G1CSetCandidateGroup* group : gl) {
3139 log_cset_candidate_group_add_total(group, type);
3140 }
3141 }
3142
3143 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3144 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3145 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3146 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3147 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3148 G1PPRL_GID_H_FORMAT
3149 G1PPRL_LEN_H_FORMAT
3150 G1PPRL_GCEFF_H_FORMAT
3151 G1PPRL_BYTE_H_FORMAT
3152 G1PPRL_BYTE_H_FORMAT
3153 G1PPRL_TYPE_H_FORMAT,
3154 "groud-id", "num-regions",
3155 "gc-eff", "liveness",
3156 "remset", "type");
3157
3158 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3159 G1PPRL_GID_H_FORMAT
3160 G1PPRL_LEN_H_FORMAT
3161 G1PPRL_GCEFF_H_FORMAT
3162 G1PPRL_BYTE_H_FORMAT
3163 G1PPRL_BYTE_H_FORMAT
3164 G1PPRL_TYPE_H_FORMAT,
3165 "", "",
3166 "(bytes/ms)", "%",
3167 "(bytes)", "");
3168
3169 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3170
3171 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3172
3173 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3174 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3175 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3176 }