1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "runtime/globals_extension.hpp"
74 #include "runtime/handles.inline.hpp"
75 #include "runtime/java.hpp"
76 #include "runtime/orderAccess.hpp"
77 #include "runtime/os.hpp"
78 #include "runtime/prefetch.inline.hpp"
79 #include "runtime/threads.hpp"
80 #include "utilities/align.hpp"
81 #include "utilities/checkedCast.hpp"
82 #include "utilities/formatBuffer.hpp"
83 #include "utilities/growableArray.hpp"
84 #include "utilities/powerOfTwo.hpp"
85
86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
87
88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
89 assert(cm != nullptr, "must be");
90 }
91
92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
93 assert(cm != nullptr, "must be");
94 assert(_cm == nullptr, "double initialize");
95 _cm = cm;
96 }
97
98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
99 assert(addr < _cm->finger(), "invariant");
100 assert(addr >= _task->finger(), "invariant");
101
102 // We move that task's local finger along.
103 _task->move_finger_to(addr);
104
105 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
106 // we only partially drain the local queue and global stack
107 _task->drain_local_queue(true);
108 _task->drain_global_stack(true);
109
110 // if the has_aborted flag has been raised, we need to bail out of
111 // the iteration
112 return !_task->has_aborted();
113 }
114
115 G1CMMarkStack::G1CMMarkStack() :
116 _chunk_allocator() {
117 set_empty();
118 }
119
120 size_t G1CMMarkStack::capacity_alignment() {
121 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
122 }
123
124 bool G1CMMarkStack::initialize() {
125 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
126
127 size_t initial_capacity = MarkStackSize;
128 size_t max_capacity = MarkStackSizeMax;
129
130 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
131
132 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
133 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134
135 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
136 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
137
138 size_t limit = (INT_MAX - 1);
139 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
140 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141
142 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
143 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
144
145 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
146
147 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
148 initial_num_chunks, max_capacity);
149
150 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
151 }
152
153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
154 if (_size.load_relaxed() >= _max_capacity) {
155 return nullptr;
156 }
157
158 size_t cur_idx = _size.fetch_then_add(1u);
159
160 if (cur_idx >= _max_capacity) {
161 return nullptr;
162 }
163
164 size_t bucket = get_bucket(cur_idx);
165 if (_buckets[bucket].load_acquire() == nullptr) {
166 if (!_should_grow) {
167 // Prefer to restart the CM.
168 return nullptr;
169 }
170
171 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
172 if (_buckets[bucket].load_acquire() == nullptr) {
173 size_t desired_capacity = bucket_size(bucket) * 2;
174 if (!try_expand_to(desired_capacity)) {
175 return nullptr;
176 }
177 }
178 }
179
180 size_t bucket_idx = get_bucket_index(cur_idx);
181 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
182 result->next = nullptr;
183 return result;
184 }
185
186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
187 _min_capacity(0),
188 _max_capacity(0),
189 _capacity(0),
190 _num_buckets(0),
191 _should_grow(false),
192 _buckets(nullptr),
193 _size(0)
194 { }
195
196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
197 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
198
199 _min_capacity = initial_capacity;
200 _max_capacity = max_capacity;
201 _num_buckets = get_bucket(_max_capacity) + 1;
202
203 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
204
205 for (size_t i = 0; i < _num_buckets; i++) {
206 _buckets[i].store_relaxed(nullptr);
207 }
208
209 size_t new_capacity = bucket_size(0);
210
211 if (!reserve(new_capacity)) {
212 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
213 return false;
214 }
215 return true;
216 }
217
218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
219 if (_capacity == _max_capacity) {
220 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
221 return false;
222 }
223
224 size_t old_capacity = _capacity;
225 desired_capacity = MIN2(desired_capacity, _max_capacity);
226
227 if (reserve(desired_capacity)) {
228 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
229 old_capacity, desired_capacity);
230 return true;
231 }
232 return false;
233 }
234
235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
236 size_t new_capacity = _capacity * 2;
237 return try_expand_to(new_capacity);
238 }
239
240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
241 if (_buckets == nullptr) {
242 return;
243 }
244
245 for (size_t i = 0; i < _num_buckets; i++) {
246 if (_buckets[i].load_relaxed() != nullptr) {
247 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
248 _buckets[i].store_relaxed(nullptr);
249 }
250 }
251
252 FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
253 }
254
255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
256 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
257
258 size_t highest_bucket = get_bucket(new_capacity - 1);
259 size_t i = get_bucket(_capacity);
260
261 // Allocate all buckets associated with indexes between the current capacity (_capacity)
262 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
263 // array and that the capacity accurately reflects the reserved memory.
264 for (; i <= highest_bucket; i++) {
265 if (_buckets[i].load_acquire() != nullptr) {
266 continue; // Skip over already allocated buckets.
267 }
268
269 size_t bucket_capacity = bucket_size(i);
270
271 // Trim bucket size so that we do not exceed the _max_capacity.
272 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
273 bucket_capacity :
274 _max_capacity - _capacity;
275
276
277 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
278
279 if (bucket_base == nullptr) {
280 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
281 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
282 return false;
283 }
284 _capacity += bucket_capacity;
285 _buckets[i].release_store(bucket_base);
286 }
287 return true;
288 }
289
290 void G1CMMarkStack::expand() {
291 _chunk_allocator.try_expand();
292 }
293
294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
295 elem->next = list->load_relaxed();
296 list->store_relaxed(elem);
297 }
298
299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
300 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
301 add_chunk_to_list(&_chunk_list, elem);
302 _chunks_in_chunk_list++;
303 }
304
305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
306 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
307 add_chunk_to_list(&_free_list, elem);
308 }
309
310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
311 TaskQueueEntryChunk* result = list->load_relaxed();
312 if (result != nullptr) {
313 list->store_relaxed(list->load_relaxed()->next);
314 }
315 return result;
316 }
317
318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
319 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
320 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
321 if (result != nullptr) {
322 _chunks_in_chunk_list--;
323 }
324 return result;
325 }
326
327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
328 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
329 return remove_chunk_from_list(&_free_list);
330 }
331
332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
333 // Get a new chunk.
334 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
335
336 if (new_chunk == nullptr) {
337 // Did not get a chunk from the free list. Allocate from backing memory.
338 new_chunk = _chunk_allocator.allocate_new_chunk();
339
340 if (new_chunk == nullptr) {
341 return false;
342 }
343 }
344
345 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
346
347 add_chunk_to_chunk_list(new_chunk);
348
349 return true;
350 }
351
352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
353 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
354
355 if (cur == nullptr) {
356 return false;
357 }
358
359 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
360
361 add_chunk_to_free_list(cur);
362 return true;
363 }
364
365 void G1CMMarkStack::set_empty() {
366 _chunks_in_chunk_list = 0;
367 _chunk_list.store_relaxed(nullptr);
368 _free_list.store_relaxed(nullptr);
369 _chunk_allocator.reset();
370 }
371
372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
373 _root_regions(MemRegion::create_array(max_regions, mtGC)),
374 _max_regions(max_regions),
375 _num_regions(0),
376 _num_claimed_regions(0) { }
377
378 G1CMRootMemRegions::~G1CMRootMemRegions() {
379 MemRegion::destroy_array(_root_regions, _max_regions);
380 }
381
382 void G1CMRootMemRegions::reset() {
383 assert_at_safepoint();
384 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
385
386 _num_regions.store_relaxed(0);
387 _num_claimed_regions.store_relaxed(0);
388 }
389
390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
391 assert_at_safepoint();
392 uint idx = _num_regions.fetch_then_add(1u);
393 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
394 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
395 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
396 _root_regions[idx].set_start(start);
397 _root_regions[idx].set_end(end);
398 }
399
400 const MemRegion* G1CMRootMemRegions::claim_next() {
401 uint local_num_regions = num_regions();
402 if (num_claimed_regions() >= local_num_regions) {
403 return nullptr;
404 }
405
406 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
407 if (claimed_index < local_num_regions) {
408 return &_root_regions[claimed_index];
409 }
410 return nullptr;
411 }
412
413 bool G1CMRootMemRegions::work_completed() const {
414 return num_remaining_regions() == 0;
415 }
416
417 uint G1CMRootMemRegions::num_remaining_regions() const {
418 uint total = num_regions();
419 uint claimed = num_claimed_regions();
420 return (total > claimed) ? total - claimed : 0;
421 }
422
423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
424 uint local_num_root_regions = num_regions();
425 for (uint i = 0; i < local_num_root_regions; i++) {
426 if (_root_regions[i].equals(mr)) {
427 return true;
428 }
429 }
430 return false;
431 }
432
433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
434 G1RegionToSpaceMapper* bitmap_storage) :
435 _cm_thread(nullptr),
436 _g1h(g1h),
437
438 _mark_bitmap(),
439
440 _heap(_g1h->reserved()),
441
442 _root_regions(_g1h->max_num_regions()),
443 _root_region_scan_aborted(false),
444
445 _global_mark_stack(),
446
447 _finger(nullptr), // _finger set in set_non_marking_state
448
449 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
450 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
451 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
452 _tasks(nullptr), // _tasks set inside late_init()
453 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
454 _terminator(_max_num_tasks, _task_queues),
455 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
456
457 _first_overflow_barrier_sync(),
458 _second_overflow_barrier_sync(),
459
460 _completed_mark_cycles(0),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _gc_timer_cm(new ConcurrentGCTimer()),
466 _gc_tracer_cm(new G1OldTracer()),
467
468 // _verbose_level set below
469
470 _remark_times(),
471 _remark_mark_times(),
472 _remark_weak_ref_times(),
473 _cleanup_times(),
474
475 _concurrent_workers(nullptr),
476 _num_concurrent_workers(0),
477 _max_concurrent_workers(0),
478
479 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
480 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
481 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
482 _needs_remembered_set_rebuild(false)
483 {
484 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
485
486 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
487 }
488
489 void G1ConcurrentMark::fully_initialize() {
490 if (is_fully_initialized()) {
491 return;
492 }
493
494 // Create & start ConcurrentMark thread.
495 _cm_thread = new G1ConcurrentMarkThread(this);
496 if (_cm_thread->osthread() == nullptr) {
497 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
498 }
499
500 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
501 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
502
503 _max_concurrent_workers = ConcGCThreads;
504
505 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
506 _concurrent_workers->initialize_workers();
507 _num_concurrent_workers = _concurrent_workers->active_workers();
508
509 if (!_global_mark_stack.initialize()) {
510 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
511 }
512
513 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
514
515 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
516 _num_active_tasks = _max_num_tasks;
517
518 for (uint i = 0; i < _max_num_tasks; ++i) {
519 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
520 _task_queues->register_queue(i, task_queue);
521
522 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
523 }
524
525 uint max_num_regions = _g1h->max_num_regions();
526 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
527 ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
528 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
529
530 reset_at_marking_complete();
531 }
532
533 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
534 return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
535 }
536
537 bool G1ConcurrentMark::is_in_marking() const {
538 return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
539 }
540
541 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
542 return cm_thread()->is_in_rebuild_or_scrub();
543 }
544
545 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
546 return cm_thread()->is_in_reset_for_next_cycle();
547 }
548
549 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
550 return _partial_array_state_manager;
551 }
552
553 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
554 assert(is_fully_initialized(), "must be");
555 return _cm_thread;
556 }
557
558 void G1ConcurrentMark::reset() {
559 _has_aborted.store_relaxed(false);
560
561 reset_marking_for_restart();
562
563 // Reset all tasks, since different phases will use different number of active
564 // threads. So, it's easiest to have all of them ready.
565 for (uint i = 0; i < _max_num_tasks; ++i) {
566 _tasks[i]->reset(mark_bitmap());
567 }
568
569 uint max_num_regions = _g1h->max_num_regions();
570 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
571 for (uint i = 0; i < max_num_regions; i++) {
572 _region_mark_stats[i].clear();
573 }
574
575 _root_region_scan_aborted.store_relaxed(false);
576 _root_regions.reset();
577 }
578
579 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
580 uint region_idx = r->hrm_index();
581 for (uint j = 0; j < _max_num_tasks; ++j) {
582 _tasks[j]->clear_mark_stats_cache(region_idx);
583 }
584 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
585 _region_mark_stats[region_idx].clear();
586 }
587
588 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
589 assert_at_safepoint();
590 assert(r->is_starts_humongous(), "Got humongous continues region here");
591
592 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
593 mark_bitmap()->clear(r->bottom());
594
595 if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
596 return;
597 }
598
599 // Clear any statistics about the region gathered so far.
600 _g1h->humongous_obj_regions_iterate(r,
601 [&] (G1HeapRegion* r) {
602 clear_statistics(r);
603 });
604 }
605
606 void G1ConcurrentMark::reset_marking_for_restart() {
607 _global_mark_stack.set_empty();
608
609 // Expand the marking stack, if we have to and if we can.
610 if (has_overflown()) {
611 _global_mark_stack.expand();
612
613 uint max_num_regions = _g1h->max_num_regions();
614 for (uint i = 0; i < max_num_regions; i++) {
615 _region_mark_stats[i].clear_during_overflow();
616 }
617 }
618
619 clear_has_overflown();
620 _finger.store_relaxed(_heap.start());
621
622 for (uint i = 0; i < _max_num_tasks; ++i) {
623 _tasks[i]->reset_for_restart();
624 }
625 }
626
627 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
628 assert(active_tasks <= _max_num_tasks, "we should not have more");
629
630 _num_active_tasks = active_tasks;
631 // Need to update the three data structures below according to the
632 // number of active threads for this phase.
633 _terminator.reset_for_reuse(active_tasks);
634 _first_overflow_barrier_sync.set_n_workers(active_tasks);
635 _second_overflow_barrier_sync.set_n_workers(active_tasks);
636 }
637
638 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
639 set_concurrency(active_tasks);
640
641 _concurrent.store_relaxed(concurrent);
642
643 if (!concurrent) {
644 // At this point we should be in a STW phase, and completed marking.
645 assert_at_safepoint_on_vm_thread();
646 assert(out_of_regions(),
647 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
648 p2i(finger()), p2i(_heap.end()));
649 }
650 }
651
652 #if TASKQUEUE_STATS
653 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
654
655 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
656
657 auto get_pa_stats = [&](uint i) {
658 return _tasks[i]->partial_array_task_stats();
659 };
660
661 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
662 "Concurrent Mark Partial Array");
663
664 for (uint i = 0; i < _max_num_tasks; ++i) {
665 get_pa_stats(i)->reset();
666 }
667 }
668 #endif
669
670 void G1ConcurrentMark::reset_at_marking_complete() {
671 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
672 // We set the global marking state to some default values when we're
673 // not doing marking.
674 reset_marking_for_restart();
675 _num_active_tasks = 0;
676 }
677
678 G1ConcurrentMark::~G1ConcurrentMark() {
679 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
680 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
681 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
682 // The G1ConcurrentMark instance is never freed.
683 ShouldNotReachHere();
684 }
685
686 class G1ClearBitMapTask : public WorkerTask {
687 public:
688 static size_t chunk_size() { return M; }
689
690 private:
691 // Heap region closure used for clearing the _mark_bitmap.
692 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
693 G1ConcurrentMark* _cm;
694 G1CMBitMap* _bitmap;
695 bool _suspendible; // If suspendible, do yield checks.
696
697 bool suspendible() {
698 return _suspendible;
699 }
700
701 bool is_clear_concurrent_undo() {
702 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
703 }
704
705 bool has_aborted() {
706 if (suspendible()) {
707 _cm->do_yield_check();
708 return _cm->has_aborted();
709 }
710 return false;
711 }
712
713 HeapWord* region_clear_limit(G1HeapRegion* r) {
714 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
715 // live_words data are current wrt to the _mark_bitmap. We use this information
716 // to only clear ranges of the bitmap that require clearing.
717 if (is_clear_concurrent_undo()) {
718 // No need to clear bitmaps for empty regions (which includes regions we
719 // did not mark through).
720 if (!_cm->contains_live_object(r->hrm_index())) {
721 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
722 return r->bottom();
723 }
724 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
725 }
726 return r->end();
727 }
728
729 public:
730 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
731 G1HeapRegionClosure(),
732 _cm(cm),
733 _bitmap(cm->mark_bitmap()),
734 _suspendible(suspendible)
735 { }
736
737 virtual bool do_heap_region(G1HeapRegion* r) {
738 if (has_aborted()) {
739 return true;
740 }
741
742 HeapWord* cur = r->bottom();
743 HeapWord* const end = region_clear_limit(r);
744
745 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
746
747 while (cur < end) {
748
749 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
750 _bitmap->clear_range(mr);
751
752 cur += chunk_size_in_words;
753
754 // Repeat the asserts from before the start of the closure. We will do them
755 // as asserts here to minimize their overhead on the product. However, we
756 // will have them as guarantees at the beginning / end of the bitmap
757 // clearing to get some checking in the product.
758 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
759
760 // Abort iteration if necessary.
761 if (has_aborted()) {
762 return true;
763 }
764 }
765 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
766
767 _cm->reset_top_at_mark_start(r);
768
769 return false;
770 }
771 };
772
773 G1ClearBitmapHRClosure _cl;
774 G1HeapRegionClaimer _hr_claimer;
775 bool _suspendible; // If the task is suspendible, workers must join the STS.
776
777 public:
778 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
779 WorkerTask("G1 Clear Bitmap"),
780 _cl(cm, suspendible),
781 _hr_claimer(n_workers),
782 _suspendible(suspendible)
783 { }
784
785 void work(uint worker_id) {
786 SuspendibleThreadSetJoiner sts_join(_suspendible);
787 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
788 }
789 };
790
791 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
792 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
793
794 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
795 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
796
797 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
798
799 G1ClearBitMapTask cl(this, num_workers, may_yield);
800
801 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
802 workers->run_task(&cl, num_workers);
803 }
804
805 void G1ConcurrentMark::cleanup_for_next_mark() {
806 // Make sure that the concurrent mark thread looks to still be in
807 // the current cycle.
808 guarantee(is_in_reset_for_next_cycle(), "invariant");
809
810 clear_bitmap(_concurrent_workers, true);
811
812 reset_partial_array_state_manager();
813
814 // Should not have changed state yet (even if a Full GC interrupted us).
815 guarantee(is_in_reset_for_next_cycle(), "invariant");
816 }
817
818 void G1ConcurrentMark::reset_partial_array_state_manager() {
819 for (uint i = 0; i < _max_num_tasks; ++i) {
820 _tasks[i]->unregister_partial_array_splitter();
821 }
822
823 partial_array_state_manager()->reset();
824
825 for (uint i = 0; i < _max_num_tasks; ++i) {
826 _tasks[i]->register_partial_array_splitter();
827 }
828 }
829
830 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
831 assert_at_safepoint_on_vm_thread();
832 // To avoid fragmentation the full collection requesting to clear the bitmap
833 // might use fewer workers than available. To ensure the bitmap is cleared
834 // as efficiently as possible the number of active workers are temporarily
835 // increased to include all currently created workers.
836 WithActiveWorkers update(workers, workers->created_workers());
837 clear_bitmap(workers, false);
838 }
839
840 class G1PreConcurrentStartTask : public G1BatchedTask {
841 // Reset marking state.
842 class ResetMarkingStateTask;
843 // For each region note start of marking.
844 class NoteStartOfMarkTask;
845
846 public:
847 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
848 };
849
850 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
851 G1ConcurrentMark* _cm;
852 public:
853 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
854
855 double worker_cost() const override { return 1.0; }
856 void do_work(uint worker_id) override;
857 };
858
859 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
860
861 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
862 G1ConcurrentMark* _cm;
863
864 public:
865 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
866
867 bool do_heap_region(G1HeapRegion* r) override {
868 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
869 _cm->update_top_at_mark_start(r);
870 } else {
871 _cm->reset_top_at_mark_start(r);
872 }
873 return false;
874 }
875 } _region_cl;
876
877 G1HeapRegionClaimer _claimer;
878 public:
879 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
880
881 double worker_cost() const override {
882 // The work done per region is very small, therefore we choose this magic number to cap the number
883 // of threads used when there are few regions.
884 const double regions_per_thread = 1000;
885 return _claimer.n_regions() / regions_per_thread;
886 }
887
888 void set_max_workers(uint max_workers) override {
889 _claimer.set_n_workers(max_workers);
890 }
891
892 void do_work(uint worker_id) override {
893 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
894 }
895 };
896
897 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
898 // Reset marking state.
899 _cm->reset();
900 }
901
902 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
903 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
904 add_serial_task(new ResetMarkingStateTask(cm));
905 add_parallel_task(new NoteStartOfMarkTask());
906 };
907
908 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
909 assert_at_safepoint_on_vm_thread();
910
911 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
912
913 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
914
915 G1PreConcurrentStartTask cl(cause, this);
916 G1CollectedHeap::heap()->run_batch_task(&cl);
917
918 _gc_tracer_cm->set_gc_cause(cause);
919 }
920
921 void G1ConcurrentMark::start_full_concurrent_cycle() {
922 // Start Concurrent Marking weak-reference discovery.
923 ReferenceProcessor* rp = _g1h->ref_processor_cm();
924 rp->start_discovery(false /* always_clear */);
925
926 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
927 // This is the start of the marking cycle, we're expected all
928 // threads to have SATB queues with active set to false.
929 satb_mq_set.set_active_all_threads(true, /* new active value */
930 false /* expected_active */);
931
932 // update_g1_committed() will be called at the end of an evac pause
933 // when marking is on. So, it's also called at the end of the
934 // concurrent start pause to update the heap end, if the heap expands
935 // during it. No need to call it here.
936
937 // Signal the thread to start work.
938 cm_thread()->start_full_cycle();
939 }
940
941 void G1ConcurrentMark::start_undo_concurrent_cycle() {
942 assert_at_safepoint_on_vm_thread();
943 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
944 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
945
946 abort_root_region_scan_at_safepoint();
947
948 // Signal the thread to start work.
949 cm_thread()->start_undo_cycle();
950 }
951
952 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
953 cm_thread()->set_idle();
954 }
955
956 void G1ConcurrentMark::stop() {
957 if (is_fully_initialized()) {
958 cm_thread()->stop();
959 }
960 }
961
962 /*
963 * Notice that in the next two methods, we actually leave the STS
964 * during the barrier sync and join it immediately afterwards. If we
965 * do not do this, the following deadlock can occur: one thread could
966 * be in the barrier sync code, waiting for the other thread to also
967 * sync up, whereas another one could be trying to yield, while also
968 * waiting for the other threads to sync up too.
969 *
970 * Note, however, that this code is also used during remark and in
971 * this case we should not attempt to leave / enter the STS, otherwise
972 * we'll either hit an assert (debug / fastdebug) or deadlock
973 * (product). So we should only leave / enter the STS if we are
974 * operating concurrently.
975 *
976 * Because the thread that does the sync barrier has left the STS, it
977 * is possible to be suspended for a Full GC or an evacuation pause
978 * could occur. This is actually safe, since the entering the sync
979 * barrier is one of the last things do_marking_step() does, and it
980 * doesn't manipulate any data structures afterwards.
981 */
982
983 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
984 bool barrier_aborted;
985 {
986 SuspendibleThreadSetLeaver sts_leave(concurrent());
987 barrier_aborted = !_first_overflow_barrier_sync.enter();
988 }
989
990 // at this point everyone should have synced up and not be doing any
991 // more work
992
993 if (barrier_aborted) {
994 // If the barrier aborted we ignore the overflow condition and
995 // just abort the whole marking phase as quickly as possible.
996 return;
997 }
998 }
999
1000 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1001 SuspendibleThreadSetLeaver sts_leave(concurrent());
1002 _second_overflow_barrier_sync.enter();
1003
1004 // at this point everything should be re-initialized and ready to go
1005 }
1006
1007 class G1CMConcurrentMarkingTask : public WorkerTask {
1008 G1ConcurrentMark* _cm;
1009
1010 public:
1011 void work(uint worker_id) {
1012 ResourceMark rm;
1013
1014 SuspendibleThreadSetJoiner sts_join;
1015
1016 assert(worker_id < _cm->active_tasks(), "invariant");
1017
1018 G1CMTask* task = _cm->task(worker_id);
1019 task->record_start_time();
1020 if (!_cm->has_aborted()) {
1021 do {
1022 task->do_marking_step(G1ConcMarkStepDurationMillis,
1023 true /* do_termination */,
1024 false /* is_serial*/);
1025
1026 _cm->do_yield_check();
1027 } while (!_cm->has_aborted() && task->has_aborted());
1028 }
1029 task->record_end_time();
1030 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1031 }
1032
1033 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1034 WorkerTask("Concurrent Mark"), _cm(cm) { }
1035
1036 ~G1CMConcurrentMarkingTask() { }
1037 };
1038
1039 uint G1ConcurrentMark::calc_active_marking_workers() {
1040 uint result = 0;
1041 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1042 result = _max_concurrent_workers;
1043 } else {
1044 result =
1045 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1046 1, /* Minimum workers */
1047 _num_concurrent_workers,
1048 Threads::number_of_non_daemon_threads());
1049 // Don't scale the result down by scale_concurrent_workers() because
1050 // that scaling has already gone into "_max_concurrent_workers".
1051 }
1052 assert(result > 0 && result <= _max_concurrent_workers,
1053 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1054 _max_concurrent_workers, result);
1055 return result;
1056 }
1057
1058 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1059 return _root_region_scan_aborted.load_relaxed();
1060 }
1061
1062 #ifndef PRODUCT
1063 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1064 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1065 }
1066 #endif
1067
1068 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1069 #ifdef ASSERT
1070 HeapWord* last = region->last();
1071 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1072 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1073 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1074 assert(top_at_mark_start(hr) == region->start(),
1075 "MemRegion start should be equal to TAMS");
1076 #endif
1077
1078 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1079
1080 const uintx interval = PrefetchScanIntervalInBytes;
1081 HeapWord* curr = region->start();
1082 const HeapWord* end = region->end();
1083 while (curr < end) {
1084 Prefetch::read(curr, interval);
1085 oop obj = cast_to_oop(curr);
1086 size_t size = obj->oop_iterate_size(&cl);
1087 assert(size == obj->size(), "sanity");
1088 curr += size;
1089 }
1090 }
1091
1092 class G1CMRootRegionScanTask : public WorkerTask {
1093 G1ConcurrentMark* _cm;
1094 bool _should_yield;
1095
1096 public:
1097 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1098 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1099
1100 void work(uint worker_id) {
1101 SuspendibleThreadSetJoiner sts_join(_should_yield);
1102
1103 while (true) {
1104 if (_cm->has_root_region_scan_aborted()) {
1105 return;
1106 }
1107 G1CMRootMemRegions* root_regions = _cm->root_regions();
1108 const MemRegion* region = root_regions->claim_next();
1109 if (region == nullptr) {
1110 return;
1111 }
1112 _cm->scan_root_region(region, worker_id);
1113 if (_should_yield) {
1114 SuspendibleThreadSet::yield();
1115 // If we yielded, a GC may have processed all root regions,
1116 // so this loop will naturally exit on the next claim_next() call.
1117 // Same if a Full GC signalled abort of the concurrent mark.
1118 }
1119 }
1120 }
1121 };
1122
1123 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1124 // We first check whether there is any work to do as we might have already aborted
1125 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1126 // We want to avoid spinning up the worker threads if that happened.
1127 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1128 //
1129 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1130 // a young gc doing the work.
1131 //
1132 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1133 // continue after that safepoint.
1134 bool do_scan = !root_regions()->work_completed() && !has_root_region_scan_aborted();
1135 if (do_scan) {
1136 // Assign one worker to each root-region but subject to the max constraint.
1137 // The constraint is also important to avoid accesses beyond the allocated per-worker
1138 // marking helper data structures. We might get passed different WorkerThreads with
1139 // different number of threads (potential worker ids) than helper data structures when
1140 // completing this work during GC.
1141 const uint num_workers = MIN2(root_regions()->num_remaining_regions(),
1142 _max_concurrent_workers);
1143 assert(num_workers > 0, "no more remaining root regions to process");
1144
1145 G1CMRootRegionScanTask task(this, concurrent);
1146 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1147 task.name(), num_workers, root_regions()->num_remaining_regions());
1148 workers->run_task(&task, num_workers);
1149 }
1150
1151 assert_root_region_scan_completed_or_aborted();
1152
1153 return do_scan;
1154 }
1155
1156 void G1ConcurrentMark::scan_root_regions_concurrently() {
1157 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1158 scan_root_regions(_concurrent_workers, true /* concurrent */);
1159 }
1160
1161 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1162 assert_at_safepoint_on_vm_thread();
1163 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1164 }
1165
1166 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1167 root_regions()->add(top_at_mark_start(r), r->top());
1168 }
1169
1170 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1171 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1172 }
1173
1174 void G1ConcurrentMark::abort_root_region_scan() {
1175 assert_not_at_safepoint();
1176
1177 _root_region_scan_aborted.store_relaxed(true);
1178 }
1179
1180 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1181 assert_at_safepoint_on_vm_thread();
1182
1183 _root_region_scan_aborted.store_relaxed(true);
1184 }
1185
1186 void G1ConcurrentMark::concurrent_cycle_start() {
1187 _gc_timer_cm->register_gc_start();
1188
1189 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1190
1191 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1192 }
1193
1194 uint G1ConcurrentMark::completed_mark_cycles() const {
1195 return _completed_mark_cycles.load_relaxed();
1196 }
1197
1198 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1199 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1200
1201 if (mark_cycle_completed) {
1202 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1203 }
1204
1205 if (has_aborted()) {
1206 log_info(gc, marking)("Concurrent Mark Abort");
1207 _gc_tracer_cm->report_concurrent_mode_failure();
1208 }
1209
1210 _gc_timer_cm->register_gc_end();
1211
1212 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1213 }
1214
1215 void G1ConcurrentMark::mark_from_roots() {
1216 _restart_for_overflow.store_relaxed(false);
1217
1218 uint active_workers = calc_active_marking_workers();
1219
1220 // Setting active workers is not guaranteed since fewer
1221 // worker threads may currently exist and more may not be
1222 // available.
1223 active_workers = _concurrent_workers->set_active_workers(active_workers);
1224 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1225
1226 _num_concurrent_workers = active_workers;
1227
1228 // Parallel task terminator is set in "set_concurrency_and_phase()"
1229 set_concurrency_and_phase(active_workers, true /* concurrent */);
1230
1231 G1CMConcurrentMarkingTask marking_task(this);
1232 _concurrent_workers->run_task(&marking_task);
1233 print_stats();
1234 }
1235
1236 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1237 static const char* location_strings[] = { "Remark Before",
1238 "Remark After",
1239 "Remark Overflow",
1240 "Cleanup Before",
1241 "Cleanup After" };
1242 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1243 }
1244
1245 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1246 VerifyLocation location) {
1247 G1HeapVerifier* verifier = _g1h->verifier();
1248
1249 verifier->verify_region_sets_optional();
1250
1251 const char* caller = verify_location_string(location);
1252
1253 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1254 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1255
1256 size_t const BufLen = 512;
1257 char buffer[BufLen];
1258
1259 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1260 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1261
1262 // Only check bitmap in Remark, and not at After-Verification because the regions
1263 // already have their TAMS'es reset.
1264 if (location != VerifyLocation::RemarkAfter) {
1265 verifier->verify_bitmap_clear(true /* above_tams_only */);
1266 }
1267 }
1268 }
1269
1270 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1271 G1CollectedHeap* _g1h;
1272 public:
1273 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1274
1275 bool do_object_b(oop obj) {
1276 return !_g1h->is_obj_dead(obj);
1277 }
1278 };
1279
1280 void G1ConcurrentMark::remark() {
1281 assert_at_safepoint_on_vm_thread();
1282
1283 // If a full collection has happened, we should not continue. However we might
1284 // have ended up here as the Remark VM operation has been scheduled already.
1285 if (has_aborted()) {
1286 return;
1287 }
1288
1289 G1Policy* policy = _g1h->policy();
1290 policy->record_pause_start_time();
1291
1292 double start = os::elapsedTime();
1293
1294 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1295
1296 {
1297 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1298 finalize_marking();
1299 }
1300
1301 double mark_work_end = os::elapsedTime();
1302
1303 bool const mark_finished = !has_overflown();
1304 if (mark_finished) {
1305 weak_refs_work();
1306
1307 // Unload Klasses, String, Code Cache, etc.
1308 if (ClassUnloadingWithConcurrentMark) {
1309 G1CMIsAliveClosure is_alive(this);
1310 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1311 }
1312
1313 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1314 // We're done with marking.
1315 // This is the end of the marking cycle, we're expected all
1316 // threads to have SATB queues with active set to true.
1317 satb_mq_set.set_active_all_threads(false, /* new active value */
1318 true /* expected_active */);
1319
1320 {
1321 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1322 flush_all_task_caches();
1323 }
1324
1325 // All marking completed. Check bitmap now as we will start to reset TAMSes
1326 // in parallel below so that we can not do this in the After-Remark verification.
1327 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1328
1329 {
1330 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1331
1332 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1333 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1334 _g1h->workers()->active_workers());
1335 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1336 _g1h->workers()->run_task(&cl, num_workers);
1337
1338 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1339 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1340
1341 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1342
1343 if (_needs_remembered_set_rebuild) {
1344 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1345 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1346 }
1347 }
1348
1349 if (log_is_enabled(Trace, gc, liveness)) {
1350 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1351 _g1h->heap_region_iterate(&cl);
1352 }
1353
1354 // Potentially, some empty-regions have been reclaimed; make this a
1355 // "collection" so that pending allocation can retry before attempting a
1356 // GC pause.
1357 _g1h->increment_total_collections();
1358
1359 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1360 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1361 // triggered, it likely means there are very few regular GCs, making resizing
1362 // based on gc heuristics less effective.
1363 if (_g1h->last_gc_was_periodic()) {
1364 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1365 }
1366
1367 compute_new_sizes();
1368
1369 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1370
1371 assert(!restart_for_overflow(), "sanity");
1372 // Completely reset the marking state (except bitmaps) since marking completed.
1373 reset_at_marking_complete();
1374
1375 G1CollectedHeap::finish_codecache_marking_cycle();
1376
1377 {
1378 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1379 G1ObjectCountIsAliveClosure is_alive(_g1h);
1380 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1381 }
1382
1383 // Successfully completed marking, advance state.
1384 cm_thread()->set_full_cycle_rebuild_and_scrub();
1385 } else {
1386 // We overflowed. Restart concurrent marking.
1387 _restart_for_overflow.store_relaxed(true);
1388
1389 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1390
1391 // Clear the marking state because we will be restarting
1392 // marking due to overflowing the global mark stack.
1393 reset_marking_for_restart();
1394 }
1395
1396 // Statistics
1397 double now = os::elapsedTime();
1398 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1399 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1400 _remark_times.add((now - start) * 1000.0);
1401
1402 _g1h->update_perf_counter_cpu_time();
1403
1404 policy->record_concurrent_mark_remark_end();
1405
1406 return;
1407 }
1408
1409 void G1ConcurrentMark::compute_new_sizes() {
1410 MetaspaceGC::compute_new_size();
1411
1412 // Cleanup will have freed any regions completely full of garbage.
1413 // Update the soft reference policy with the new heap occupancy.
1414 Universe::heap()->update_capacity_and_used_at_gc();
1415
1416 // We reclaimed old regions so we should calculate the sizes to make
1417 // sure we update the old gen/space data.
1418 _g1h->monitoring_support()->update_sizes();
1419 }
1420
1421 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1422 G1CollectedHeap* _g1h;
1423
1424 public:
1425 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1426
1427 bool do_heap_region(G1HeapRegion* r) override {
1428 // Update the remset tracking state from updating to complete
1429 // if remembered sets have been rebuilt.
1430 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1431 return false;
1432 }
1433 };
1434
1435 void G1ConcurrentMark::cleanup() {
1436 assert_at_safepoint_on_vm_thread();
1437
1438 // If a full collection has happened, we shouldn't do this.
1439 if (has_aborted()) {
1440 return;
1441 }
1442
1443 G1Policy* policy = _g1h->policy();
1444 policy->record_pause_start_time();
1445
1446 double start = os::elapsedTime();
1447
1448 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1449
1450 if (needs_remembered_set_rebuild()) {
1451 // Update the remset tracking information as well as marking all regions
1452 // as fully parsable.
1453 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1454 G1UpdateRegionsAfterRebuild cl(_g1h);
1455 _g1h->heap_region_iterate(&cl);
1456 } else {
1457 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1458 }
1459
1460 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1461
1462 // Local statistics
1463 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1464
1465 {
1466 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1467 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1468 }
1469
1470 // Advance state.
1471 cm_thread()->set_full_cycle_reset_for_next_cycle();
1472 return;
1473 }
1474
1475 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1476 // Uses the G1CMTask associated with a worker thread (for serial reference
1477 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1478 // trace referent objects.
1479 //
1480 // Using the G1CMTask and embedded local queues avoids having the worker
1481 // threads operating on the global mark stack. This reduces the risk
1482 // of overflowing the stack - which we would rather avoid at this late
1483 // state. Also using the tasks' local queues removes the potential
1484 // of the workers interfering with each other that could occur if
1485 // operating on the global stack.
1486
1487 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1488 G1ConcurrentMark* _cm;
1489 G1CMTask* _task;
1490 uint _ref_counter_limit;
1491 uint _ref_counter;
1492 bool _is_serial;
1493 public:
1494 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1495 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1496 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1497 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1498 }
1499
1500 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1501 virtual void do_oop( oop* p) { do_oop_work(p); }
1502
1503 template <class T> void do_oop_work(T* p) {
1504 if (_cm->has_overflown()) {
1505 return;
1506 }
1507 if (!_task->deal_with_reference(p)) {
1508 // We did not add anything to the mark bitmap (or mark stack), so there is
1509 // no point trying to drain it.
1510 return;
1511 }
1512 _ref_counter--;
1513
1514 if (_ref_counter == 0) {
1515 // We have dealt with _ref_counter_limit references, pushing them
1516 // and objects reachable from them on to the local stack (and
1517 // possibly the global stack). Call G1CMTask::do_marking_step() to
1518 // process these entries.
1519 //
1520 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1521 // there's nothing more to do (i.e. we're done with the entries that
1522 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1523 // above) or we overflow.
1524 //
1525 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1526 // flag while there may still be some work to do. (See the comment at
1527 // the beginning of G1CMTask::do_marking_step() for those conditions -
1528 // one of which is reaching the specified time target.) It is only
1529 // when G1CMTask::do_marking_step() returns without setting the
1530 // has_aborted() flag that the marking step has completed.
1531 do {
1532 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1533 _task->do_marking_step(mark_step_duration_ms,
1534 false /* do_termination */,
1535 _is_serial);
1536 } while (_task->has_aborted() && !_cm->has_overflown());
1537 _ref_counter = _ref_counter_limit;
1538 }
1539 }
1540 };
1541
1542 // 'Drain' oop closure used by both serial and parallel reference processing.
1543 // Uses the G1CMTask associated with a given worker thread (for serial
1544 // reference processing the G1CMtask for worker 0 is used). Calls the
1545 // do_marking_step routine, with an unbelievably large timeout value,
1546 // to drain the marking data structures of the remaining entries
1547 // added by the 'keep alive' oop closure above.
1548
1549 class G1CMDrainMarkingStackClosure : public VoidClosure {
1550 G1ConcurrentMark* _cm;
1551 G1CMTask* _task;
1552 bool _is_serial;
1553 public:
1554 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1555 _cm(cm), _task(task), _is_serial(is_serial) {
1556 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1557 }
1558
1559 void do_void() {
1560 do {
1561 // We call G1CMTask::do_marking_step() to completely drain the local
1562 // and global marking stacks of entries pushed by the 'keep alive'
1563 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1564 //
1565 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1566 // if there's nothing more to do (i.e. we've completely drained the
1567 // entries that were pushed as a result of applying the 'keep alive'
1568 // closure to the entries on the discovered ref lists) or we overflow
1569 // the global marking stack.
1570 //
1571 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1572 // flag while there may still be some work to do. (See the comment at
1573 // the beginning of G1CMTask::do_marking_step() for those conditions -
1574 // one of which is reaching the specified time target.) It is only
1575 // when G1CMTask::do_marking_step() returns without setting the
1576 // has_aborted() flag that the marking step has completed.
1577
1578 _task->do_marking_step(1000000000.0 /* something very large */,
1579 true /* do_termination */,
1580 _is_serial);
1581 } while (_task->has_aborted() && !_cm->has_overflown());
1582 }
1583 };
1584
1585 class G1CMRefProcProxyTask : public RefProcProxyTask {
1586 G1CollectedHeap& _g1h;
1587 G1ConcurrentMark& _cm;
1588
1589 public:
1590 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1591 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1592 _g1h(g1h),
1593 _cm(cm) {}
1594
1595 void work(uint worker_id) override {
1596 assert(worker_id < _max_workers, "sanity");
1597 G1CMIsAliveClosure is_alive(&_cm);
1598 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1599 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1600 BarrierEnqueueDiscoveredFieldClosure enqueue;
1601 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1602 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1603 }
1604
1605 void prepare_run_task_hook() override {
1606 // We need to reset the concurrency level before each
1607 // proxy task execution, so that the termination protocol
1608 // and overflow handling in G1CMTask::do_marking_step() knows
1609 // how many workers to wait for.
1610 _cm.set_concurrency(_queue_count);
1611 }
1612 };
1613
1614 void G1ConcurrentMark::weak_refs_work() {
1615 ResourceMark rm;
1616
1617 {
1618 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1619
1620 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1621
1622 // See the comment in G1CollectedHeap::ref_processing_init()
1623 // about how reference processing currently works in G1.
1624
1625 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1626
1627 // Prefer to grow the stack until the max capacity.
1628 _global_mark_stack.set_should_grow();
1629
1630 // Parallel processing task executor.
1631 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1632 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1633
1634 // Process the weak references.
1635 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1636 _gc_tracer_cm->report_gc_reference_stats(stats);
1637 pt.print_all_references();
1638
1639 // The do_oop work routines of the keep_alive and drain_marking_stack
1640 // oop closures will set the has_overflown flag if we overflow the
1641 // global marking stack.
1642
1643 assert(has_overflown() || _global_mark_stack.is_empty(),
1644 "Mark stack should be empty (unless it has overflown)");
1645 }
1646
1647 if (has_overflown()) {
1648 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1649 // overflowed while processing references. Exit the VM.
1650 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1651 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1652 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1653 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1654 return;
1655 }
1656
1657 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1658
1659 {
1660 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1661 G1CMIsAliveClosure is_alive(this);
1662 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1663 }
1664 }
1665
1666 class G1PrecleanYieldClosure : public YieldClosure {
1667 G1ConcurrentMark* _cm;
1668
1669 public:
1670 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1671
1672 virtual bool should_return() {
1673 return _cm->has_aborted();
1674 }
1675
1676 virtual bool should_return_fine_grain() {
1677 _cm->do_yield_check();
1678 return _cm->has_aborted();
1679 }
1680 };
1681
1682 void G1ConcurrentMark::preclean() {
1683 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1684
1685 SuspendibleThreadSetJoiner joiner;
1686
1687 BarrierEnqueueDiscoveredFieldClosure enqueue;
1688
1689 set_concurrency_and_phase(1, true);
1690
1691 G1PrecleanYieldClosure yield_cl(this);
1692
1693 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1694 // Precleaning is single threaded. Temporarily disable MT discovery.
1695 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1696 rp->preclean_discovered_references(rp->is_alive_non_header(),
1697 &enqueue,
1698 &yield_cl,
1699 _gc_timer_cm);
1700 }
1701
1702 // Closure for marking entries in SATB buffers.
1703 class G1CMSATBBufferClosure : public SATBBufferClosure {
1704 private:
1705 G1CMTask* _task;
1706 G1CollectedHeap* _g1h;
1707
1708 // This is very similar to G1CMTask::deal_with_reference, but with
1709 // more relaxed requirements for the argument, so this must be more
1710 // circumspect about treating the argument as an object.
1711 void do_entry(void* entry) const {
1712 _task->increment_refs_reached();
1713 oop const obj = cast_to_oop(entry);
1714 _task->make_reference_grey(obj);
1715 }
1716
1717 public:
1718 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1719 : _task(task), _g1h(g1h) { }
1720
1721 virtual void do_buffer(void** buffer, size_t size) {
1722 for (size_t i = 0; i < size; ++i) {
1723 do_entry(buffer[i]);
1724 }
1725 }
1726 };
1727
1728 class G1RemarkThreadsClosure : public ThreadClosure {
1729 G1SATBMarkQueueSet& _qset;
1730
1731 public:
1732 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1733 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1734
1735 void do_thread(Thread* thread) {
1736 // Transfer any partial buffer to the qset for completed buffer processing.
1737 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1738 }
1739 };
1740
1741 class G1CMRemarkTask : public WorkerTask {
1742 // For Threads::possibly_parallel_threads_do
1743 ThreadsClaimTokenScope _threads_claim_token_scope;
1744 G1ConcurrentMark* _cm;
1745 public:
1746 void work(uint worker_id) {
1747 G1CMTask* task = _cm->task(worker_id);
1748 task->record_start_time();
1749 {
1750 ResourceMark rm;
1751
1752 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1753 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1754 }
1755
1756 do {
1757 task->do_marking_step(1000000000.0 /* something very large */,
1758 true /* do_termination */,
1759 false /* is_serial */);
1760 } while (task->has_aborted() && !_cm->has_overflown());
1761 // If we overflow, then we do not want to restart. We instead
1762 // want to abort remark and do concurrent marking again.
1763 task->record_end_time();
1764 }
1765
1766 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1767 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1768 _cm->terminator()->reset_for_reuse(active_workers);
1769 }
1770 };
1771
1772 void G1ConcurrentMark::finalize_marking() {
1773 ResourceMark rm;
1774
1775 _g1h->ensure_parsability(false);
1776
1777 // this is remark, so we'll use up all active threads
1778 uint active_workers = _g1h->workers()->active_workers();
1779 set_concurrency_and_phase(active_workers, false /* concurrent */);
1780 // Leave _parallel_marking_threads at it's
1781 // value originally calculated in the G1ConcurrentMark
1782 // constructor and pass values of the active workers
1783 // through the task.
1784
1785 {
1786 G1CMRemarkTask remarkTask(this, active_workers);
1787 // We will start all available threads, even if we decide that the
1788 // active_workers will be fewer. The extra ones will just bail out
1789 // immediately.
1790 _g1h->workers()->run_task(&remarkTask);
1791 }
1792
1793 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1794 guarantee(has_overflown() ||
1795 satb_mq_set.completed_buffers_num() == 0,
1796 "Invariant: has_overflown = %s, num buffers = %zu",
1797 BOOL_TO_STR(has_overflown()),
1798 satb_mq_set.completed_buffers_num());
1799
1800 print_stats();
1801 }
1802
1803 void G1ConcurrentMark::flush_all_task_caches() {
1804 size_t hits = 0;
1805 size_t misses = 0;
1806 for (uint i = 0; i < _max_num_tasks; i++) {
1807 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1808 hits += stats.first;
1809 misses += stats.second;
1810 }
1811 size_t sum = hits + misses;
1812 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1813 hits, misses, percent_of(hits, sum));
1814 }
1815
1816 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1817 assert_at_safepoint();
1818 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1819 }
1820
1821 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1822 // "Checkpoint" the finger.
1823 HeapWord* local_finger = finger();
1824
1825 while (local_finger < _heap.end()) {
1826 assert(_g1h->is_in_reserved(local_finger), "invariant");
1827
1828 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1829 // Make sure that the reads below do not float before loading curr_region.
1830 OrderAccess::loadload();
1831 // Above heap_region_containing may return null as we always scan claim
1832 // until the end of the heap. In this case, just jump to the next region.
1833 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1834
1835 // Is the gap between reading the finger and doing the CAS too long?
1836 HeapWord* res = _finger.compare_exchange(local_finger, end);
1837 if (res == local_finger && curr_region != nullptr) {
1838 // We succeeded.
1839 HeapWord* bottom = curr_region->bottom();
1840 HeapWord* limit = top_at_mark_start(curr_region);
1841
1842 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1843 // Notice that _finger == end cannot be guaranteed here since,
1844 // someone else might have moved the finger even further.
1845 assert(finger() >= end, "The finger should have moved forward");
1846
1847 if (limit > bottom) {
1848 return curr_region;
1849 } else {
1850 assert(limit == bottom,
1851 "The region limit should be at bottom");
1852 // We return null and the caller should try calling
1853 // claim_region() again.
1854 return nullptr;
1855 }
1856 } else {
1857 // Read the finger again.
1858 HeapWord* next_finger = finger();
1859 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1860 local_finger = next_finger;
1861 }
1862 }
1863
1864 return nullptr;
1865 }
1866
1867 #ifndef PRODUCT
1868 class VerifyNoCSetOops {
1869 G1CollectedHeap* _g1h;
1870 const char* _phase;
1871 int _info;
1872
1873 public:
1874 VerifyNoCSetOops(const char* phase, int info = -1) :
1875 _g1h(G1CollectedHeap::heap()),
1876 _phase(phase),
1877 _info(info)
1878 { }
1879
1880 void operator()(G1TaskQueueEntry task_entry) const {
1881 if (task_entry.is_partial_array_state()) {
1882 oop obj = task_entry.to_partial_array_state()->source();
1883 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1884 return;
1885 }
1886 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1887 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1888 p2i(task_entry.to_oop()), _phase, _info);
1889 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1890 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1891 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1892 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1893 }
1894 };
1895
1896 void G1ConcurrentMark::verify_no_collection_set_oops() {
1897 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1898 "should be at a safepoint or initializing");
1899 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1900 return;
1901 }
1902
1903 // Verify entries on the global mark stack
1904 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1905
1906 // Verify entries on the task queues
1907 for (uint i = 0; i < _max_num_tasks; ++i) {
1908 G1CMTaskQueue* queue = _task_queues->queue(i);
1909 queue->iterate(VerifyNoCSetOops("Queue", i));
1910 }
1911
1912 // Verify the global finger
1913 HeapWord* global_finger = finger();
1914 if (global_finger != nullptr && global_finger < _heap.end()) {
1915 // Since we always iterate over all regions, we might get a null G1HeapRegion
1916 // here.
1917 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1918 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1919 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1920 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1921 }
1922
1923 // Verify the task fingers
1924 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1925 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1926 G1CMTask* task = _tasks[i];
1927 HeapWord* task_finger = task->finger();
1928 if (task_finger != nullptr && task_finger < _heap.end()) {
1929 // See above note on the global finger verification.
1930 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1931 guarantee(r == nullptr || task_finger == r->bottom() ||
1932 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1933 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1934 p2i(task_finger), HR_FORMAT_PARAMS(r));
1935 }
1936 }
1937 }
1938 #endif // PRODUCT
1939
1940 void G1ConcurrentMark::rebuild_and_scrub() {
1941 if (!needs_remembered_set_rebuild()) {
1942 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1943 }
1944
1945 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1946 }
1947
1948 void G1ConcurrentMark::print_stats() {
1949 if (!log_is_enabled(Debug, gc, stats)) {
1950 return;
1951 }
1952 log_debug(gc, stats)("---------------------------------------------------------------------");
1953 for (size_t i = 0; i < _num_active_tasks; ++i) {
1954 _tasks[i]->print_stats();
1955 log_debug(gc, stats)("---------------------------------------------------------------------");
1956 }
1957 }
1958
1959 bool G1ConcurrentMark::concurrent_cycle_abort() {
1960 assert_at_safepoint_on_vm_thread();
1961 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1962
1963 // If we start the compaction before the CM threads finish
1964 // scanning the root regions we might trip them over as we'll
1965 // be moving objects / updating references. Since the root region
1966 // scan synchronized with the safepoint, just tell it to abort.
1967 // It will notice when the threads start up again later.
1968 abort_root_region_scan_at_safepoint();
1969
1970 // We haven't started a concurrent cycle no need to do anything; we might have
1971 // aborted the marking because of shutting down though. In this case the marking
1972 // might have already completed the abort (leading to in_progress() below to
1973 // return false), however this still left marking state particularly in the
1974 // shared marking bitmap that must be cleaned up.
1975 // If there are multiple full gcs during shutdown we do this work repeatedly for
1976 // nothing, but this situation should be extremely rare (a full gc after shutdown
1977 // has been signalled is already rare), and this work should be negligible compared
1978 // to actual full gc work.
1979
1980 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1981 return false;
1982 }
1983
1984 reset_marking_for_restart();
1985
1986 abort_marking_threads();
1987
1988 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1989 satb_mq_set.abandon_partial_marking();
1990 // This can be called either during or outside marking, we'll read
1991 // the expected_active value from the SATB queue set.
1992 satb_mq_set.set_active_all_threads(false, /* new active value */
1993 satb_mq_set.is_active() /* expected_active */);
1994 return true;
1995 }
1996
1997 void G1ConcurrentMark::abort_marking_threads() {
1998 assert_root_region_scan_completed_or_aborted();
1999 _has_aborted.store_relaxed(true);
2000 _first_overflow_barrier_sync.abort();
2001 _second_overflow_barrier_sync.abort();
2002 }
2003
2004 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2005 class CountCpuTimeThreadClosure : public ThreadClosure {
2006 public:
2007 jlong _total_cpu_time;
2008
2009 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2010
2011 void do_thread(Thread* t) {
2012 _total_cpu_time += os::thread_cpu_time(t);
2013 }
2014 } cl;
2015
2016 threads_do(&cl);
2017
2018 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2019 }
2020
2021 static void print_ms_time_info(const char* prefix, const char* name,
2022 NumberSeq& ns) {
2023 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2024 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2025 if (ns.num() > 0) {
2026 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2027 prefix, ns.sd(), ns.maximum());
2028 }
2029 }
2030
2031 void G1ConcurrentMark::print_summary_info() {
2032 Log(gc, marking) log;
2033 if (!log.is_trace()) {
2034 return;
2035 }
2036
2037 log.trace(" Concurrent marking:");
2038 if (!is_fully_initialized()) {
2039 log.trace(" has not been initialized yet");
2040 return;
2041 }
2042 print_ms_time_info(" ", "remarks", _remark_times);
2043 {
2044 print_ms_time_info(" ", "final marks", _remark_mark_times);
2045 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2046
2047 }
2048 print_ms_time_info(" ", "cleanups", _cleanup_times);
2049 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2050 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2051 log.trace(" Total stop_world time = %8.2f s.",
2052 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2053 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2054 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2055 }
2056
2057 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2058 if (is_fully_initialized()) { // they are initialized late
2059 tc->do_thread(_cm_thread);
2060 _concurrent_workers->threads_do(tc);
2061 }
2062 }
2063
2064 void G1ConcurrentMark::print_on(outputStream* st) const {
2065 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2066 _mark_bitmap.print_on(st, " Bits: ");
2067 }
2068
2069 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2070 ReferenceProcessor* result = g1h->ref_processor_cm();
2071 assert(result != nullptr, "CM reference processor should not be null");
2072 return result;
2073 }
2074
2075 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2076 G1CMTask* task)
2077 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2078 _g1h(g1h), _task(task)
2079 { }
2080
2081 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2082 assert(hr != nullptr,
2083 "claim_region() should have filtered out null regions");
2084 _curr_region = hr;
2085 _finger = hr->bottom();
2086 update_region_limit();
2087 }
2088
2089 void G1CMTask::update_region_limit() {
2090 G1HeapRegion* hr = _curr_region;
2091 HeapWord* bottom = hr->bottom();
2092 HeapWord* limit = _cm->top_at_mark_start(hr);
2093
2094 if (limit == bottom) {
2095 // The region was collected underneath our feet.
2096 // We set the finger to bottom to ensure that the bitmap
2097 // iteration that will follow this will not do anything.
2098 // (this is not a condition that holds when we set the region up,
2099 // as the region is not supposed to be empty in the first place)
2100 _finger = bottom;
2101 } else if (limit >= _region_limit) {
2102 assert(limit >= _finger, "peace of mind");
2103 } else {
2104 assert(limit < _region_limit, "only way to get here");
2105 // This can happen under some pretty unusual circumstances. An
2106 // evacuation pause empties the region underneath our feet (TAMS
2107 // at bottom). We then do some allocation in the region (TAMS
2108 // stays at bottom), followed by the region being used as a GC
2109 // alloc region (TAMS will move to top() and the objects
2110 // originally below it will be greyed). All objects now marked in
2111 // the region are explicitly greyed, if below the global finger,
2112 // and we do not need in fact to scan anything else. So, we simply
2113 // set _finger to be limit to ensure that the bitmap iteration
2114 // doesn't do anything.
2115 _finger = limit;
2116 }
2117
2118 _region_limit = limit;
2119 }
2120
2121 void G1CMTask::giveup_current_region() {
2122 assert(_curr_region != nullptr, "invariant");
2123 clear_region_fields();
2124 }
2125
2126 void G1CMTask::clear_region_fields() {
2127 // Values for these three fields that indicate that we're not
2128 // holding on to a region.
2129 _curr_region = nullptr;
2130 _finger = nullptr;
2131 _region_limit = nullptr;
2132 }
2133
2134 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2135 if (cm_oop_closure == nullptr) {
2136 assert(_cm_oop_closure != nullptr, "invariant");
2137 } else {
2138 assert(_cm_oop_closure == nullptr, "invariant");
2139 }
2140 _cm_oop_closure = cm_oop_closure;
2141 }
2142
2143 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2144 guarantee(mark_bitmap != nullptr, "invariant");
2145 _mark_bitmap = mark_bitmap;
2146 clear_region_fields();
2147
2148 _calls = 0;
2149 _elapsed_time_ms = 0.0;
2150 _termination_time_ms = 0.0;
2151
2152 _mark_stats_cache.reset();
2153 }
2154
2155 void G1CMTask::reset_for_restart() {
2156 clear_region_fields();
2157 _task_queue->set_empty();
2158 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2159 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2160 }
2161
2162 void G1CMTask::register_partial_array_splitter() {
2163
2164 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2165 _cm->max_num_tasks(),
2166 ObjArrayMarkingStride);
2167 }
2168
2169 void G1CMTask::unregister_partial_array_splitter() {
2170 _partial_array_splitter.~PartialArraySplitter();
2171 }
2172
2173 bool G1CMTask::should_exit_termination() {
2174 if (!regular_clock_call()) {
2175 return true;
2176 }
2177
2178 // This is called when we are in the termination protocol. We should
2179 // quit if, for some reason, this task wants to abort or the global
2180 // stack is not empty (this means that we can get work from it).
2181 return !_cm->mark_stack_empty() || has_aborted();
2182 }
2183
2184 void G1CMTask::reached_limit() {
2185 assert(_words_scanned >= _words_scanned_limit ||
2186 _refs_reached >= _refs_reached_limit ,
2187 "shouldn't have been called otherwise");
2188 abort_marking_if_regular_check_fail();
2189 }
2190
2191 bool G1CMTask::regular_clock_call() {
2192 if (has_aborted()) {
2193 return false;
2194 }
2195
2196 // First, we need to recalculate the words scanned and refs reached
2197 // limits for the next clock call.
2198 recalculate_limits();
2199
2200 // During the regular clock call we do the following
2201
2202 // (1) If an overflow has been flagged, then we abort.
2203 if (_cm->has_overflown()) {
2204 return false;
2205 }
2206
2207 // If we are not concurrent (i.e. we're doing remark) we don't need
2208 // to check anything else. The other steps are only needed during
2209 // the concurrent marking phase.
2210 if (!_cm->concurrent()) {
2211 return true;
2212 }
2213
2214 // (2) If marking has been aborted for Full GC, then we also abort.
2215 if (_cm->has_aborted()) {
2216 return false;
2217 }
2218
2219 // (4) We check whether we should yield. If we have to, then we abort.
2220 if (SuspendibleThreadSet::should_yield()) {
2221 // We should yield. To do this we abort the task. The caller is
2222 // responsible for yielding.
2223 return false;
2224 }
2225
2226 // (5) We check whether we've reached our time quota. If we have,
2227 // then we abort.
2228 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2229 if (elapsed_time_ms > _time_target_ms) {
2230 _has_timed_out = true;
2231 return false;
2232 }
2233
2234 // (6) Finally, we check whether there are enough completed STAB
2235 // buffers available for processing. If there are, we abort.
2236 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2237 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2238 // we do need to process SATB buffers, we'll abort and restart
2239 // the marking task to do so
2240 return false;
2241 }
2242 return true;
2243 }
2244
2245 void G1CMTask::recalculate_limits() {
2246 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2247 _words_scanned_limit = _real_words_scanned_limit;
2248
2249 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2250 _refs_reached_limit = _real_refs_reached_limit;
2251 }
2252
2253 void G1CMTask::decrease_limits() {
2254 // This is called when we believe that we're going to do an infrequent
2255 // operation which will increase the per byte scanned cost (i.e. move
2256 // entries to/from the global stack). It basically tries to decrease the
2257 // scanning limit so that the clock is called earlier.
2258
2259 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2260 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2261 }
2262
2263 void G1CMTask::move_entries_to_global_stack() {
2264 // Local array where we'll store the entries that will be popped
2265 // from the local queue.
2266 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2267
2268 size_t n = 0;
2269 G1TaskQueueEntry task_entry;
2270 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2271 buffer[n] = task_entry;
2272 ++n;
2273 }
2274 if (n < G1CMMarkStack::EntriesPerChunk) {
2275 buffer[n] = G1TaskQueueEntry();
2276 }
2277
2278 if (n > 0) {
2279 if (!_cm->mark_stack_push(buffer)) {
2280 set_has_aborted();
2281 }
2282 }
2283
2284 // This operation was quite expensive, so decrease the limits.
2285 decrease_limits();
2286 }
2287
2288 bool G1CMTask::get_entries_from_global_stack() {
2289 // Local array where we'll store the entries that will be popped
2290 // from the global stack.
2291 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2292
2293 if (!_cm->mark_stack_pop(buffer)) {
2294 return false;
2295 }
2296
2297 // We did actually pop at least one entry.
2298 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2299 G1TaskQueueEntry task_entry = buffer[i];
2300 if (task_entry.is_null()) {
2301 break;
2302 }
2303 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2304 bool success = _task_queue->push(task_entry);
2305 // We only call this when the local queue is empty or under a
2306 // given target limit. So, we do not expect this push to fail.
2307 assert(success, "invariant");
2308 }
2309
2310 // This operation was quite expensive, so decrease the limits
2311 decrease_limits();
2312 return true;
2313 }
2314
2315 void G1CMTask::drain_local_queue(bool partially) {
2316 if (has_aborted()) {
2317 return;
2318 }
2319
2320 // Decide what the target size is, depending whether we're going to
2321 // drain it partially (so that other tasks can steal if they run out
2322 // of things to do) or totally (at the very end).
2323 uint target_size;
2324 if (partially) {
2325 target_size = GCDrainStackTargetSize;
2326 } else {
2327 target_size = 0;
2328 }
2329
2330 if (_task_queue->size() > target_size) {
2331 G1TaskQueueEntry entry;
2332 bool ret = _task_queue->pop_local(entry);
2333 while (ret) {
2334 process_entry(entry, false /* stolen */);
2335 if (_task_queue->size() <= target_size || has_aborted()) {
2336 ret = false;
2337 } else {
2338 ret = _task_queue->pop_local(entry);
2339 }
2340 }
2341 }
2342 }
2343
2344 size_t G1CMTask::start_partial_array_processing(oop obj) {
2345 assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2346
2347 objArrayOop obj_array = objArrayOop(obj);
2348 size_t array_length = obj_array->length();
2349
2350 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2351
2352 // Mark objArray klass metadata
2353 if (_cm_oop_closure->do_metadata()) {
2354 _cm_oop_closure->do_klass(obj_array->klass());
2355 }
2356
2357 process_array_chunk(obj_array, 0, initial_chunk_size);
2358
2359 // Include object header size
2360 return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2361 }
2362
2363 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2364 PartialArrayState* state = task.to_partial_array_state();
2365 // Access state before release by claim().
2366 objArrayOop obj = objArrayOop(state->source());
2367
2368 PartialArraySplitter::Claim claim =
2369 _partial_array_splitter.claim(state, _task_queue, stolen);
2370
2371 process_array_chunk(obj, claim._start, claim._end);
2372 return heap_word_size((claim._end - claim._start) * heapOopSize);
2373 }
2374
2375 void G1CMTask::drain_global_stack(bool partially) {
2376 if (has_aborted()) {
2377 return;
2378 }
2379
2380 // We have a policy to drain the local queue before we attempt to
2381 // drain the global stack.
2382 assert(partially || _task_queue->size() == 0, "invariant");
2383
2384 // Decide what the target size is, depending whether we're going to
2385 // drain it partially (so that other tasks can steal if they run out
2386 // of things to do) or totally (at the very end).
2387 // Notice that when draining the global mark stack partially, due to the racyness
2388 // of the mark stack size update we might in fact drop below the target. But,
2389 // this is not a problem.
2390 // In case of total draining, we simply process until the global mark stack is
2391 // totally empty, disregarding the size counter.
2392 if (partially) {
2393 size_t const target_size = _cm->partial_mark_stack_size_target();
2394 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2395 if (get_entries_from_global_stack()) {
2396 drain_local_queue(partially);
2397 }
2398 }
2399 } else {
2400 while (!has_aborted() && get_entries_from_global_stack()) {
2401 drain_local_queue(partially);
2402 }
2403 }
2404 }
2405
2406 // SATB Queue has several assumptions on whether to call the par or
2407 // non-par versions of the methods. this is why some of the code is
2408 // replicated. We should really get rid of the single-threaded version
2409 // of the code to simplify things.
2410 void G1CMTask::drain_satb_buffers() {
2411 if (has_aborted()) {
2412 return;
2413 }
2414
2415 // We set this so that the regular clock knows that we're in the
2416 // middle of draining buffers and doesn't set the abort flag when it
2417 // notices that SATB buffers are available for draining. It'd be
2418 // very counter productive if it did that. :-)
2419 _draining_satb_buffers = true;
2420
2421 G1CMSATBBufferClosure satb_cl(this, _g1h);
2422 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2423
2424 // This keeps claiming and applying the closure to completed buffers
2425 // until we run out of buffers or we need to abort.
2426 while (!has_aborted() &&
2427 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2428 abort_marking_if_regular_check_fail();
2429 }
2430
2431 // Can't assert qset is empty here, even if not aborted. If concurrent,
2432 // some other thread might be adding to the queue. If not concurrent,
2433 // some other thread might have won the race for the last buffer, but
2434 // has not yet decremented the count.
2435
2436 _draining_satb_buffers = false;
2437
2438 // again, this was a potentially expensive operation, decrease the
2439 // limits to get the regular clock call early
2440 decrease_limits();
2441 }
2442
2443 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2444 _mark_stats_cache.reset(region_idx);
2445 }
2446
2447 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2448 return _mark_stats_cache.evict_all();
2449 }
2450
2451 void G1CMTask::print_stats() {
2452 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2453 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2454 _elapsed_time_ms, _termination_time_ms);
2455 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2456 _step_times_ms.num(),
2457 _step_times_ms.avg(),
2458 _step_times_ms.sd(),
2459 _step_times_ms.maximum(),
2460 _step_times_ms.sum());
2461 size_t const hits = _mark_stats_cache.hits();
2462 size_t const misses = _mark_stats_cache.misses();
2463 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2464 hits, misses, percent_of(hits, hits + misses));
2465 }
2466
2467 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2468 return _task_queues->steal(worker_id, task_entry);
2469 }
2470
2471 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2472 if (has_aborted() || _curr_region == nullptr) {
2473 return;
2474 }
2475
2476 // This means that we're already holding on to a region.
2477 assert(_finger != nullptr, "if region is not null, then the finger "
2478 "should not be null either");
2479
2480 // We might have restarted this task after an evacuation pause
2481 // which might have evacuated the region we're holding on to
2482 // underneath our feet. Let's read its limit again to make sure
2483 // that we do not iterate over a region of the heap that
2484 // contains garbage (update_region_limit() will also move
2485 // _finger to the start of the region if it is found empty).
2486 update_region_limit();
2487 // We will start from _finger not from the start of the region,
2488 // as we might be restarting this task after aborting half-way
2489 // through scanning this region. In this case, _finger points to
2490 // the address where we last found a marked object. If this is a
2491 // fresh region, _finger points to start().
2492 MemRegion mr = MemRegion(_finger, _region_limit);
2493
2494 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2495 "humongous regions should go around loop once only");
2496
2497 // Some special cases:
2498 // If the memory region is empty, we can just give up the region.
2499 // If the current region is humongous then we only need to check
2500 // the bitmap for the bit associated with the start of the object,
2501 // scan the object if it's live, and give up the region.
2502 // Otherwise, let's iterate over the bitmap of the part of the region
2503 // that is left.
2504 // If the iteration is successful, give up the region.
2505 if (mr.is_empty()) {
2506 giveup_current_region();
2507 abort_marking_if_regular_check_fail();
2508 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2509 if (_mark_bitmap->is_marked(mr.start())) {
2510 // The object is marked - apply the closure
2511 bitmap_closure.do_addr(mr.start());
2512 }
2513 // Even if this task aborted while scanning the humongous object
2514 // we can (and should) give up the current region.
2515 giveup_current_region();
2516 abort_marking_if_regular_check_fail();
2517 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2518 giveup_current_region();
2519 abort_marking_if_regular_check_fail();
2520 } else {
2521 assert(has_aborted(), "currently the only way to do so");
2522 // The only way to abort the bitmap iteration is to return
2523 // false from the do_bit() method. However, inside the
2524 // do_bit() method we move the _finger to point to the
2525 // object currently being looked at. So, if we bail out, we
2526 // have definitely set _finger to something non-null.
2527 assert(_finger != nullptr, "invariant");
2528
2529 // Region iteration was actually aborted. So now _finger
2530 // points to the address of the object we last scanned. If we
2531 // leave it there, when we restart this task, we will rescan
2532 // the object. It is easy to avoid this. We move the finger by
2533 // enough to point to the next possible object header.
2534 assert(_finger < _region_limit, "invariant");
2535 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2536 if (new_finger >= _region_limit) {
2537 giveup_current_region();
2538 } else {
2539 move_finger_to(new_finger);
2540 }
2541 }
2542 }
2543
2544 void G1CMTask::claim_new_region() {
2545 // Read the note on the claim_region() method on why it might
2546 // return null with potentially more regions available for
2547 // claiming and why we have to check out_of_regions() to determine
2548 // whether we're done or not.
2549 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2550 // We are going to try to claim a new region. We should have
2551 // given up on the previous one.
2552 // Separated the asserts so that we know which one fires.
2553 assert(_curr_region == nullptr, "invariant");
2554 assert(_finger == nullptr, "invariant");
2555 assert(_region_limit == nullptr, "invariant");
2556 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2557 if (claimed_region != nullptr) {
2558 // Yes, we managed to claim one
2559 setup_for_region(claimed_region);
2560 assert(_curr_region == claimed_region, "invariant");
2561 }
2562 // It is important to call the regular clock here. It might take
2563 // a while to claim a region if, for example, we hit a large
2564 // block of empty regions. So we need to call the regular clock
2565 // method once round the loop to make sure it's called
2566 // frequently enough.
2567 abort_marking_if_regular_check_fail();
2568 }
2569 }
2570
2571 void G1CMTask::attempt_stealing() {
2572 // We cannot check whether the global stack is empty, since other
2573 // tasks might be pushing objects to it concurrently.
2574 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2575 "only way to reach here");
2576 while (!has_aborted()) {
2577 G1TaskQueueEntry entry;
2578 if (_cm->try_stealing(_worker_id, entry)) {
2579 process_entry(entry, true /* stolen */);
2580
2581 // And since we're towards the end, let's totally drain the
2582 // local queue and global stack.
2583 drain_local_queue(false);
2584 drain_global_stack(false);
2585 } else {
2586 break;
2587 }
2588 }
2589 }
2590
2591 void G1CMTask::attempt_termination(bool is_serial) {
2592 // We cannot check whether the global stack is empty, since other
2593 // tasks might be concurrently pushing objects on it.
2594 // Separated the asserts so that we know which one fires.
2595 assert(_cm->out_of_regions(), "only way to reach here");
2596 assert(_task_queue->size() == 0, "only way to reach here");
2597 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2598
2599 // The G1CMTask class also extends the TerminatorTerminator class,
2600 // hence its should_exit_termination() method will also decide
2601 // whether to exit the termination protocol or not.
2602 bool finished = (is_serial ||
2603 _cm->terminator()->offer_termination(this));
2604 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2605
2606 if (finished) {
2607 // We're all done.
2608
2609 // We can now guarantee that the global stack is empty, since
2610 // all other tasks have finished. We separated the guarantees so
2611 // that, if a condition is false, we can immediately find out
2612 // which one.
2613 guarantee(_cm->out_of_regions(), "only way to reach here");
2614 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2615 guarantee(_task_queue->size() == 0, "only way to reach here");
2616 guarantee(!_cm->has_overflown(), "only way to reach here");
2617 guarantee(!has_aborted(), "should never happen if termination has completed");
2618 } else {
2619 // Apparently there's more work to do. Let's abort this task. We
2620 // will restart it and hopefully we can find more things to do.
2621 set_has_aborted();
2622 }
2623 }
2624
2625 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2626 if (_has_timed_out) {
2627 double diff_ms = elapsed_time_ms - _time_target_ms;
2628 // Keep statistics of how well we did with respect to hitting
2629 // our target only if we actually timed out (if we aborted for
2630 // other reasons, then the results might get skewed).
2631 _marking_step_diff_ms.add(diff_ms);
2632 }
2633
2634 if (!_cm->has_overflown()) {
2635 return;
2636 }
2637
2638 // This is the interesting one. We aborted because a global
2639 // overflow was raised. This means we have to restart the
2640 // marking phase and start iterating over regions. However, in
2641 // order to do this we have to make sure that all tasks stop
2642 // what they are doing and re-initialize in a safe manner. We
2643 // will achieve this with the use of two barrier sync points.
2644 if (!is_serial) {
2645 // We only need to enter the sync barrier if being called
2646 // from a parallel context
2647 _cm->enter_first_sync_barrier(_worker_id);
2648
2649 // When we exit this sync barrier we know that all tasks have
2650 // stopped doing marking work. So, it's now safe to
2651 // re-initialize our data structures.
2652 }
2653
2654 clear_region_fields();
2655 flush_mark_stats_cache();
2656
2657 if (!is_serial) {
2658 // If we're executing the concurrent phase of marking, reset the marking
2659 // state; otherwise the marking state is reset after reference processing,
2660 // during the remark pause.
2661 // If we reset here as a result of an overflow during the remark we will
2662 // see assertion failures from any subsequent set_concurrency_and_phase()
2663 // calls.
2664 if (_cm->concurrent() && _worker_id == 0) {
2665 // Worker 0 is responsible for clearing the global data structures because
2666 // of an overflow. During STW we should not clear the overflow flag (in
2667 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2668 // method to abort the pause and restart concurrent marking.
2669 _cm->reset_marking_for_restart();
2670
2671 log_info(gc, marking)("Concurrent Mark reset for overflow");
2672 }
2673
2674 // ...and enter the second barrier.
2675 _cm->enter_second_sync_barrier(_worker_id);
2676 }
2677 }
2678
2679 /*****************************************************************************
2680
2681 The do_marking_step(time_target_ms, ...) method is the building
2682 block of the parallel marking framework. It can be called in parallel
2683 with other invocations of do_marking_step() on different tasks
2684 (but only one per task, obviously) and concurrently with the
2685 mutator threads, or during remark, hence it eliminates the need
2686 for two versions of the code. When called during remark, it will
2687 pick up from where the task left off during the concurrent marking
2688 phase. Interestingly, tasks are also claimable during evacuation
2689 pauses too, since do_marking_step() ensures that it aborts before
2690 it needs to yield.
2691
2692 The data structures that it uses to do marking work are the
2693 following:
2694
2695 (1) Marking Bitmap. If there are grey objects that appear only
2696 on the bitmap (this happens either when dealing with an overflow
2697 or when the concurrent start pause has simply marked the roots
2698 and didn't push them on the stack), then tasks claim heap
2699 regions whose bitmap they then scan to find grey objects. A
2700 global finger indicates where the end of the last claimed region
2701 is. A local finger indicates how far into the region a task has
2702 scanned. The two fingers are used to determine how to grey an
2703 object (i.e. whether simply marking it is OK, as it will be
2704 visited by a task in the future, or whether it needs to be also
2705 pushed on a stack).
2706
2707 (2) Local Queue. The local queue of the task which is accessed
2708 reasonably efficiently by the task. Other tasks can steal from
2709 it when they run out of work. Throughout the marking phase, a
2710 task attempts to keep its local queue short but not totally
2711 empty, so that entries are available for stealing by other
2712 tasks. Only when there is no more work, a task will totally
2713 drain its local queue.
2714
2715 (3) Global Mark Stack. This handles local queue overflow. During
2716 marking only sets of entries are moved between it and the local
2717 queues, as access to it requires a mutex and more fine-grain
2718 interaction with it which might cause contention. If it
2719 overflows, then the marking phase should restart and iterate
2720 over the bitmap to identify grey objects. Throughout the marking
2721 phase, tasks attempt to keep the global mark stack at a small
2722 length but not totally empty, so that entries are available for
2723 popping by other tasks. Only when there is no more work, tasks
2724 will totally drain the global mark stack.
2725
2726 (4) SATB Buffer Queue. This is where completed SATB buffers are
2727 made available. Buffers are regularly removed from this queue
2728 and scanned for roots, so that the queue doesn't get too
2729 long. During remark, all completed buffers are processed, as
2730 well as the filled in parts of any uncompleted buffers.
2731
2732 The do_marking_step() method tries to abort when the time target
2733 has been reached. There are a few other cases when the
2734 do_marking_step() method also aborts:
2735
2736 (1) When the marking phase has been aborted (after a Full GC).
2737
2738 (2) When a global overflow (on the global stack) has been
2739 triggered. Before the task aborts, it will actually sync up with
2740 the other tasks to ensure that all the marking data structures
2741 (local queues, stacks, fingers etc.) are re-initialized so that
2742 when do_marking_step() completes, the marking phase can
2743 immediately restart.
2744
2745 (3) When enough completed SATB buffers are available. The
2746 do_marking_step() method only tries to drain SATB buffers right
2747 at the beginning. So, if enough buffers are available, the
2748 marking step aborts and the SATB buffers are processed at
2749 the beginning of the next invocation.
2750
2751 (4) To yield. when we have to yield then we abort and yield
2752 right at the end of do_marking_step(). This saves us from a lot
2753 of hassle as, by yielding we might allow a Full GC. If this
2754 happens then objects will be compacted underneath our feet, the
2755 heap might shrink, etc. We save checking for this by just
2756 aborting and doing the yield right at the end.
2757
2758 From the above it follows that the do_marking_step() method should
2759 be called in a loop (or, otherwise, regularly) until it completes.
2760
2761 If a marking step completes without its has_aborted() flag being
2762 true, it means it has completed the current marking phase (and
2763 also all other marking tasks have done so and have all synced up).
2764
2765 A method called regular_clock_call() is invoked "regularly" (in
2766 sub ms intervals) throughout marking. It is this clock method that
2767 checks all the abort conditions which were mentioned above and
2768 decides when the task should abort. A work-based scheme is used to
2769 trigger this clock method: when the number of object words the
2770 marking phase has scanned or the number of references the marking
2771 phase has visited reach a given limit. Additional invocations to
2772 the method clock have been planted in a few other strategic places
2773 too. The initial reason for the clock method was to avoid calling
2774 cpu time gathering too regularly, as it is quite expensive. So,
2775 once it was in place, it was natural to piggy-back all the other
2776 conditions on it too and not constantly check them throughout the code.
2777
2778 If do_termination is true then do_marking_step will enter its
2779 termination protocol.
2780
2781 The value of is_serial must be true when do_marking_step is being
2782 called serially (i.e. by the VMThread) and do_marking_step should
2783 skip any synchronization in the termination and overflow code.
2784 Examples include the serial remark code and the serial reference
2785 processing closures.
2786
2787 The value of is_serial must be false when do_marking_step is
2788 being called by any of the worker threads.
2789 Examples include the concurrent marking code (CMMarkingTask),
2790 the MT remark code, and the MT reference processing closures.
2791
2792 *****************************************************************************/
2793
2794 void G1CMTask::do_marking_step(double time_target_ms,
2795 bool do_termination,
2796 bool is_serial) {
2797 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2798
2799 _start_cpu_time_ns = os::current_thread_cpu_time();
2800
2801 // If do_stealing is true then do_marking_step will attempt to
2802 // steal work from the other G1CMTasks. It only makes sense to
2803 // enable stealing when the termination protocol is enabled
2804 // and do_marking_step() is not being called serially.
2805 bool do_stealing = do_termination && !is_serial;
2806
2807 G1Predictions const& predictor = _g1h->policy()->predictor();
2808 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2809 _time_target_ms = time_target_ms - diff_prediction_ms;
2810
2811 // set up the variables that are used in the work-based scheme to
2812 // call the regular clock method
2813 _words_scanned = 0;
2814 _refs_reached = 0;
2815 recalculate_limits();
2816
2817 // clear all flags
2818 clear_has_aborted();
2819 _has_timed_out = false;
2820 _draining_satb_buffers = false;
2821
2822 ++_calls;
2823
2824 // Set up the bitmap and oop closures. Anything that uses them is
2825 // eventually called from this method, so it is OK to allocate these
2826 // statically.
2827 G1CMBitMapClosure bitmap_closure(this, _cm);
2828 G1CMOopClosure cm_oop_closure(_g1h, this);
2829 set_cm_oop_closure(&cm_oop_closure);
2830
2831 if (_cm->has_overflown()) {
2832 // This can happen if the mark stack overflows during a GC pause
2833 // and this task, after a yield point, restarts. We have to abort
2834 // as we need to get into the overflow protocol which happens
2835 // right at the end of this task.
2836 set_has_aborted();
2837 }
2838
2839 // First drain any available SATB buffers. After this, we will not
2840 // look at SATB buffers before the next invocation of this method.
2841 // If enough completed SATB buffers are queued up, the regular clock
2842 // will abort this task so that it restarts.
2843 drain_satb_buffers();
2844 // ...then partially drain the local queue and the global stack
2845 drain_local_queue(true);
2846 drain_global_stack(true);
2847
2848 do {
2849 process_current_region(bitmap_closure);
2850 // At this point we have either completed iterating over the
2851 // region we were holding on to, or we have aborted.
2852
2853 // We then partially drain the local queue and the global stack.
2854 drain_local_queue(true);
2855 drain_global_stack(true);
2856
2857 claim_new_region();
2858
2859 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2860 "at this point we should be out of regions");
2861 } while ( _curr_region != nullptr && !has_aborted());
2862
2863 // We cannot check whether the global stack is empty, since other
2864 // tasks might be pushing objects to it concurrently.
2865 assert(has_aborted() || _cm->out_of_regions(),
2866 "at this point we should be out of regions");
2867 // Try to reduce the number of available SATB buffers so that
2868 // remark has less work to do.
2869 drain_satb_buffers();
2870
2871 // Since we've done everything else, we can now totally drain the
2872 // local queue and global stack.
2873 drain_local_queue(false);
2874 drain_global_stack(false);
2875
2876 // Attempt at work stealing from other task's queues.
2877 if (do_stealing && !has_aborted()) {
2878 // We have not aborted. This means that we have finished all that
2879 // we could. Let's try to do some stealing...
2880 attempt_stealing();
2881 }
2882
2883 // We still haven't aborted. Now, let's try to get into the
2884 // termination protocol.
2885 if (do_termination && !has_aborted()) {
2886 attempt_termination(is_serial);
2887 }
2888
2889 // Mainly for debugging purposes to make sure that a pointer to the
2890 // closure which was statically allocated in this frame doesn't
2891 // escape it by accident.
2892 set_cm_oop_closure(nullptr);
2893 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2894 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2895 // Update the step history.
2896 _step_times_ms.add(elapsed_time_ms);
2897
2898 if (has_aborted()) {
2899 // The task was aborted for some reason.
2900 handle_abort(is_serial, elapsed_time_ms);
2901 }
2902 }
2903
2904 G1CMTask::G1CMTask(uint worker_id,
2905 G1ConcurrentMark* cm,
2906 G1CMTaskQueue* task_queue,
2907 G1RegionMarkStats* mark_stats) :
2908 _worker_id(worker_id),
2909 _g1h(G1CollectedHeap::heap()),
2910 _cm(cm),
2911 _mark_bitmap(nullptr),
2912 _task_queue(task_queue),
2913 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2914 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2915 _calls(0),
2916 _time_target_ms(0.0),
2917 _start_cpu_time_ns(0),
2918 _cm_oop_closure(nullptr),
2919 _curr_region(nullptr),
2920 _finger(nullptr),
2921 _region_limit(nullptr),
2922 _words_scanned(0),
2923 _words_scanned_limit(0),
2924 _real_words_scanned_limit(0),
2925 _refs_reached(0),
2926 _refs_reached_limit(0),
2927 _real_refs_reached_limit(0),
2928 _has_aborted(false),
2929 _has_timed_out(false),
2930 _draining_satb_buffers(false),
2931 _step_times_ms(),
2932 _elapsed_time_ms(0.0),
2933 _termination_time_ms(0.0),
2934 _marking_step_diff_ms()
2935 {
2936 guarantee(task_queue != nullptr, "invariant");
2937
2938 _marking_step_diff_ms.add(0.5);
2939 }
2940
2941 // These are formatting macros that are used below to ensure
2942 // consistent formatting. The *_H_* versions are used to format the
2943 // header for a particular value and they should be kept consistent
2944 // with the corresponding macro. Also note that most of the macros add
2945 // the necessary white space (as a prefix) which makes them a bit
2946 // easier to compose.
2947
2948 // All the output lines are prefixed with this string to be able to
2949 // identify them easily in a large log file.
2950 #define G1PPRL_LINE_PREFIX "###"
2951
2952 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2953 #ifdef _LP64
2954 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2955 #else // _LP64
2956 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2957 #endif // _LP64
2958
2959 // For per-region info
2960 #define G1PPRL_TYPE_FORMAT " %-4s"
2961 #define G1PPRL_TYPE_H_FORMAT " %4s"
2962 #define G1PPRL_STATE_FORMAT " %-5s"
2963 #define G1PPRL_STATE_H_FORMAT " %5s"
2964 #define G1PPRL_BYTE_FORMAT " %9zu"
2965 #define G1PPRL_BYTE_H_FORMAT " %9s"
2966 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2967 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2968 #define G1PPRL_GID_H_FORMAT " %9s"
2969 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2970 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2971 #define G1PPRL_LEN_H_FORMAT " %14s"
2972 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2973 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2974
2975 // For summary info
2976 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2977 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2978 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2979 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2980
2981 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2982 _total_used_bytes(0),
2983 _total_capacity_bytes(0),
2984 _total_live_bytes(0),
2985 _total_remset_bytes(0),
2986 _total_code_roots_bytes(0)
2987 {
2988 if (!log_is_enabled(Trace, gc, liveness)) {
2989 return;
2990 }
2991
2992 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2993 MemRegion reserved = g1h->reserved();
2994 double now = os::elapsedTime();
2995
2996 // Print the header of the output.
2997 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2998 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2999 G1PPRL_SUM_ADDR_FORMAT("reserved")
3000 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3001 p2i(reserved.start()), p2i(reserved.end()),
3002 G1HeapRegion::GrainBytes);
3003 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3004 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3005 G1PPRL_TYPE_H_FORMAT
3006 G1PPRL_ADDR_BASE_H_FORMAT
3007 G1PPRL_BYTE_H_FORMAT
3008 G1PPRL_BYTE_H_FORMAT
3009 G1PPRL_STATE_H_FORMAT
3010 G1PPRL_BYTE_H_FORMAT
3011 G1PPRL_GID_H_FORMAT,
3012 "type", "address-range",
3013 "used", "live",
3014 "state", "code-roots",
3015 "group-id");
3016 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3017 G1PPRL_TYPE_H_FORMAT
3018 G1PPRL_ADDR_BASE_H_FORMAT
3019 G1PPRL_BYTE_H_FORMAT
3020 G1PPRL_BYTE_H_FORMAT
3021 G1PPRL_STATE_H_FORMAT
3022 G1PPRL_BYTE_H_FORMAT
3023 G1PPRL_GID_H_FORMAT,
3024 "", "",
3025 "(bytes)", "(bytes)",
3026 "", "(bytes)", "");
3027 }
3028
3029 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3030 if (!log_is_enabled(Trace, gc, liveness)) {
3031 return false;
3032 }
3033
3034 const char* type = r->get_type_str();
3035 HeapWord* bottom = r->bottom();
3036 HeapWord* end = r->end();
3037 size_t capacity_bytes = r->capacity();
3038 size_t used_bytes = r->used();
3039 size_t live_bytes = r->live_bytes();
3040 size_t remset_bytes = r->rem_set()->mem_size();
3041 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3042 const char* remset_type = r->rem_set()->get_short_state_str();
3043 uint cset_group_id = r->rem_set()->has_cset_group()
3044 ? r->rem_set()->cset_group_id()
3045 : G1CSetCandidateGroup::NoRemSetId;
3046
3047 _total_used_bytes += used_bytes;
3048 _total_capacity_bytes += capacity_bytes;
3049 _total_live_bytes += live_bytes;
3050 _total_remset_bytes += remset_bytes;
3051 _total_code_roots_bytes += code_roots_bytes;
3052
3053 // Print a line for this particular region.
3054 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3055 G1PPRL_TYPE_FORMAT
3056 G1PPRL_ADDR_BASE_FORMAT
3057 G1PPRL_BYTE_FORMAT
3058 G1PPRL_BYTE_FORMAT
3059 G1PPRL_STATE_FORMAT
3060 G1PPRL_BYTE_FORMAT
3061 G1PPRL_GID_FORMAT,
3062 type, p2i(bottom), p2i(end),
3063 used_bytes, live_bytes,
3064 remset_type, code_roots_bytes,
3065 cset_group_id);
3066
3067 return false;
3068 }
3069
3070 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3071 if (!log_is_enabled(Trace, gc, liveness)) {
3072 return;
3073 }
3074
3075 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3076 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3077 // add static memory usages to remembered set sizes
3078 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3079
3080 log_cset_candidate_groups();
3081
3082 // Print the footer of the output.
3083 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3084 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3085 " SUMMARY"
3086 G1PPRL_SUM_MB_FORMAT("capacity")
3087 G1PPRL_SUM_MB_PERC_FORMAT("used")
3088 G1PPRL_SUM_MB_PERC_FORMAT("live")
3089 G1PPRL_SUM_MB_FORMAT("remset")
3090 G1PPRL_SUM_MB_FORMAT("code-roots"),
3091 bytes_to_mb(_total_capacity_bytes),
3092 bytes_to_mb(_total_used_bytes),
3093 percent_of(_total_used_bytes, _total_capacity_bytes),
3094 bytes_to_mb(_total_live_bytes),
3095 percent_of(_total_live_bytes, _total_capacity_bytes),
3096 bytes_to_mb(_total_remset_bytes),
3097 bytes_to_mb(_total_code_roots_bytes));
3098 }
3099
3100 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3101 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3102 G1PPRL_GID_FORMAT
3103 G1PPRL_LEN_FORMAT
3104 G1PPRL_GID_GCEFF_FORMAT
3105 G1PPRL_GID_LIVENESS_FORMAT
3106 G1PPRL_BYTE_FORMAT
3107 G1PPRL_TYPE_H_FORMAT,
3108 group->group_id(),
3109 group->length(),
3110 group->length() > 0 ? group->gc_efficiency() : 0.0,
3111 group->length() > 0 ? group->liveness_percent() : 0.0,
3112 group->card_set()->mem_size(),
3113 type);
3114 _total_remset_bytes += group->card_set()->mem_size();
3115 }
3116
3117 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3118 for (G1CSetCandidateGroup* group : gl) {
3119 log_cset_candidate_group_add_total(group, type);
3120 }
3121 }
3122
3123 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3124 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3125 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3126 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3127 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3128 G1PPRL_GID_H_FORMAT
3129 G1PPRL_LEN_H_FORMAT
3130 G1PPRL_GCEFF_H_FORMAT
3131 G1PPRL_BYTE_H_FORMAT
3132 G1PPRL_BYTE_H_FORMAT
3133 G1PPRL_TYPE_H_FORMAT,
3134 "groud-id", "num-regions",
3135 "gc-eff", "liveness",
3136 "remset", "type");
3137
3138 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3139 G1PPRL_GID_H_FORMAT
3140 G1PPRL_LEN_H_FORMAT
3141 G1PPRL_GCEFF_H_FORMAT
3142 G1PPRL_BYTE_H_FORMAT
3143 G1PPRL_BYTE_H_FORMAT
3144 G1PPRL_TYPE_H_FORMAT,
3145 "", "",
3146 "(bytes/ms)", "%",
3147 "(bytes)", "");
3148
3149 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3150
3151 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3152
3153 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3154 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3155 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3156 }