1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectionSetChooser.hpp"
34 #include "gc/g1/g1CollectorState.hpp"
35 #include "gc/g1/g1ConcurrentMark.inline.hpp"
36 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
37 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
38 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
39 #include "gc/g1/g1ConcurrentRefine.hpp"
40 #include "gc/g1/g1HeapRegion.inline.hpp"
41 #include "gc/g1/g1HeapRegionManager.hpp"
42 #include "gc/g1/g1HeapRegionPrinter.hpp"
43 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
44 #include "gc/g1/g1HeapRegionSet.inline.hpp"
45 #include "gc/g1/g1HeapVerifier.hpp"
46 #include "gc/g1/g1OopClosures.inline.hpp"
47 #include "gc/g1/g1Policy.hpp"
48 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
49 #include "gc/g1/g1ThreadLocalData.hpp"
50 #include "gc/g1/g1Trace.hpp"
51 #include "gc/shared/gcId.hpp"
52 #include "gc/shared/gcTimer.hpp"
53 #include "gc/shared/gcTraceTime.inline.hpp"
54 #include "gc/shared/gcVMOperations.hpp"
55 #include "gc/shared/partialArraySplitter.inline.hpp"
56 #include "gc/shared/partialArrayState.hpp"
57 #include "gc/shared/partialArrayTaskStats.hpp"
58 #include "gc/shared/referencePolicy.hpp"
59 #include "gc/shared/suspendibleThreadSet.hpp"
60 #include "gc/shared/taskqueue.inline.hpp"
61 #include "gc/shared/taskTerminator.hpp"
62 #include "gc/shared/weakProcessor.inline.hpp"
63 #include "gc/shared/workerPolicy.hpp"
64 #include "jvm.h"
65 #include "logging/log.hpp"
66 #include "memory/allocation.hpp"
67 #include "memory/iterator.hpp"
68 #include "memory/metaspaceUtils.hpp"
69 #include "memory/resourceArea.hpp"
70 #include "memory/universe.hpp"
71 #include "nmt/memTracker.hpp"
72 #include "oops/access.inline.hpp"
73 #include "oops/oop.inline.hpp"
74 #include "runtime/globals_extension.hpp"
75 #include "runtime/handles.inline.hpp"
76 #include "runtime/java.hpp"
77 #include "runtime/orderAccess.hpp"
78 #include "runtime/os.hpp"
79 #include "runtime/prefetch.inline.hpp"
80 #include "runtime/threads.hpp"
81 #include "utilities/align.hpp"
82 #include "utilities/checkedCast.hpp"
83 #include "utilities/formatBuffer.hpp"
84 #include "utilities/growableArray.hpp"
85 #include "utilities/powerOfTwo.hpp"
86
87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
88
89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
90 assert(cm != nullptr, "must be");
91 }
92
93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
94 assert(cm != nullptr, "must be");
95 assert(_cm == nullptr, "double initialize");
96 _cm = cm;
97 }
98
99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
100 assert(addr < _cm->finger(), "invariant");
101 assert(addr >= _task->finger(), "invariant");
102
103 // We move that task's local finger along.
104 _task->move_finger_to(addr);
105
106 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
107 // we only partially drain the local queue and global stack
108 _task->drain_local_queue(true);
109 _task->drain_global_stack(true);
110
111 // if the has_aborted flag has been raised, we need to bail out of
112 // the iteration
113 return !_task->has_aborted();
114 }
115
116 G1CMMarkStack::G1CMMarkStack() :
117 _chunk_allocator() {
118 set_empty();
119 }
120
121 size_t G1CMMarkStack::capacity_alignment() {
122 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
123 }
124
125 bool G1CMMarkStack::initialize() {
126 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
127
128 size_t initial_capacity = MarkStackSize;
129 size_t max_capacity = MarkStackSizeMax;
130
131 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
132
133 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135
136 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
137 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
138
139 size_t limit = (INT_MAX - 1);
140 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142
143 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
144 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
145
146 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
147
148 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
149 initial_num_chunks, max_capacity);
150
151 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
152 }
153
154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
155 if (_size.load_relaxed() >= _max_capacity) {
156 return nullptr;
157 }
158
159 size_t cur_idx = _size.fetch_then_add(1u);
160
161 if (cur_idx >= _max_capacity) {
162 return nullptr;
163 }
164
165 size_t bucket = get_bucket(cur_idx);
166 if (_buckets[bucket].load_acquire() == nullptr) {
167 if (!_should_grow) {
168 // Prefer to restart the CM.
169 return nullptr;
170 }
171
172 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
173 if (_buckets[bucket].load_acquire() == nullptr) {
174 size_t desired_capacity = bucket_size(bucket) * 2;
175 if (!try_expand_to(desired_capacity)) {
176 return nullptr;
177 }
178 }
179 }
180
181 size_t bucket_idx = get_bucket_index(cur_idx);
182 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
183 result->next = nullptr;
184 return result;
185 }
186
187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
188 _min_capacity(0),
189 _max_capacity(0),
190 _capacity(0),
191 _num_buckets(0),
192 _should_grow(false),
193 _buckets(nullptr),
194 _size(0)
195 { }
196
197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
198 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
199
200 _min_capacity = initial_capacity;
201 _max_capacity = max_capacity;
202 _num_buckets = get_bucket(_max_capacity) + 1;
203
204 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
205
206 for (size_t i = 0; i < _num_buckets; i++) {
207 _buckets[i].store_relaxed(nullptr);
208 }
209
210 size_t new_capacity = bucket_size(0);
211
212 if (!reserve(new_capacity)) {
213 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
214 return false;
215 }
216 return true;
217 }
218
219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
220 if (_capacity == _max_capacity) {
221 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
222 return false;
223 }
224
225 size_t old_capacity = _capacity;
226 desired_capacity = MIN2(desired_capacity, _max_capacity);
227
228 if (reserve(desired_capacity)) {
229 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
230 old_capacity, desired_capacity);
231 return true;
232 }
233 return false;
234 }
235
236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
237 size_t new_capacity = _capacity * 2;
238 return try_expand_to(new_capacity);
239 }
240
241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
242 if (_buckets == nullptr) {
243 return;
244 }
245
246 for (size_t i = 0; i < _num_buckets; i++) {
247 if (_buckets[i].load_relaxed() != nullptr) {
248 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
249 _buckets[i].store_relaxed(nullptr);
250 }
251 }
252
253 FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
254 }
255
256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
257 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
258
259 size_t highest_bucket = get_bucket(new_capacity - 1);
260 size_t i = get_bucket(_capacity);
261
262 // Allocate all buckets associated with indexes between the current capacity (_capacity)
263 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
264 // array and that the capacity accurately reflects the reserved memory.
265 for (; i <= highest_bucket; i++) {
266 if (_buckets[i].load_acquire() != nullptr) {
267 continue; // Skip over already allocated buckets.
268 }
269
270 size_t bucket_capacity = bucket_size(i);
271
272 // Trim bucket size so that we do not exceed the _max_capacity.
273 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
274 bucket_capacity :
275 _max_capacity - _capacity;
276
277
278 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
279
280 if (bucket_base == nullptr) {
281 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
282 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
283 return false;
284 }
285 _capacity += bucket_capacity;
286 _buckets[i].release_store(bucket_base);
287 }
288 return true;
289 }
290
291 void G1CMMarkStack::expand() {
292 _chunk_allocator.try_expand();
293 }
294
295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
296 elem->next = list->load_relaxed();
297 list->store_relaxed(elem);
298 }
299
300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
301 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
302 add_chunk_to_list(&_chunk_list, elem);
303 _chunks_in_chunk_list++;
304 }
305
306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
307 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
308 add_chunk_to_list(&_free_list, elem);
309 }
310
311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
312 TaskQueueEntryChunk* result = list->load_relaxed();
313 if (result != nullptr) {
314 list->store_relaxed(list->load_relaxed()->next);
315 }
316 return result;
317 }
318
319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
320 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
321 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
322 if (result != nullptr) {
323 _chunks_in_chunk_list--;
324 }
325 return result;
326 }
327
328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
329 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
330 return remove_chunk_from_list(&_free_list);
331 }
332
333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
334 // Get a new chunk.
335 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
336
337 if (new_chunk == nullptr) {
338 // Did not get a chunk from the free list. Allocate from backing memory.
339 new_chunk = _chunk_allocator.allocate_new_chunk();
340
341 if (new_chunk == nullptr) {
342 return false;
343 }
344 }
345
346 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
347
348 add_chunk_to_chunk_list(new_chunk);
349
350 return true;
351 }
352
353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
354 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
355
356 if (cur == nullptr) {
357 return false;
358 }
359
360 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
361
362 add_chunk_to_free_list(cur);
363 return true;
364 }
365
366 void G1CMMarkStack::set_empty() {
367 _chunks_in_chunk_list = 0;
368 _chunk_list.store_relaxed(nullptr);
369 _free_list.store_relaxed(nullptr);
370 _chunk_allocator.reset();
371 }
372
373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
374 _root_regions(MemRegion::create_array(max_regions, mtGC)),
375 _max_regions(max_regions),
376 _num_root_regions(0),
377 _claimed_root_regions(0),
378 _scan_in_progress(false),
379 _should_abort(false) { }
380
381 G1CMRootMemRegions::~G1CMRootMemRegions() {
382 MemRegion::destroy_array(_root_regions, _max_regions);
383 }
384
385 void G1CMRootMemRegions::reset() {
386 _num_root_regions.store_relaxed(0);
387 }
388
389 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
390 assert_at_safepoint();
391 size_t idx = _num_root_regions.fetch_then_add(1u);
392 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
393 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
394 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
395 _root_regions[idx].set_start(start);
396 _root_regions[idx].set_end(end);
397 }
398
399 void G1CMRootMemRegions::prepare_for_scan() {
400 assert(!scan_in_progress(), "pre-condition");
401
402 _scan_in_progress.store_relaxed(num_root_regions() > 0);
403
404 _claimed_root_regions.store_relaxed(0);
405 _should_abort.store_relaxed(false);
406 }
407
408 const MemRegion* G1CMRootMemRegions::claim_next() {
409 if (_should_abort.load_relaxed()) {
410 // If someone has set the should_abort flag, we return null to
411 // force the caller to bail out of their loop.
412 return nullptr;
413 }
414
415 uint local_num_root_regions = num_root_regions();
416 if (_claimed_root_regions.load_relaxed() >= local_num_root_regions) {
417 return nullptr;
418 }
419
420 size_t claimed_index = _claimed_root_regions.fetch_then_add(1u);
421 if (claimed_index < local_num_root_regions) {
422 return &_root_regions[claimed_index];
423 }
424 return nullptr;
425 }
426
427 uint G1CMRootMemRegions::num_root_regions() const {
428 return (uint)_num_root_regions.load_relaxed();
429 }
430
431 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
432 uint local_num_root_regions = num_root_regions();
433 for (uint i = 0; i < local_num_root_regions; i++) {
434 if (_root_regions[i].equals(mr)) {
435 return true;
436 }
437 }
438 return false;
439 }
440
441 void G1CMRootMemRegions::notify_scan_done() {
442 MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
443 _scan_in_progress.store_relaxed(false);
444 G1RootRegionScan_lock->notify_all();
445 }
446
447 void G1CMRootMemRegions::cancel_scan() {
448 notify_scan_done();
449 }
450
451 void G1CMRootMemRegions::scan_finished() {
452 assert(scan_in_progress(), "pre-condition");
453
454 if (!_should_abort.load_relaxed()) {
455 assert(_claimed_root_regions.load_relaxed() >= num_root_regions(),
456 "we should have claimed all root regions, claimed %zu, length = %u",
457 _claimed_root_regions.load_relaxed(), num_root_regions());
458 }
459
460 notify_scan_done();
461 }
462
463 bool G1CMRootMemRegions::wait_until_scan_finished() {
464 if (!scan_in_progress()) {
465 return false;
466 }
467
468 {
469 MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
470 while (scan_in_progress()) {
471 ml.wait();
472 }
473 }
474 return true;
475 }
476
477 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
478 G1RegionToSpaceMapper* bitmap_storage) :
479 _cm_thread(nullptr),
480 _g1h(g1h),
481
482 _mark_bitmap(),
483
484 _heap(_g1h->reserved()),
485
486 _root_regions(_g1h->max_num_regions()),
487
488 _global_mark_stack(),
489
490 _finger(nullptr), // _finger set in set_non_marking_state
491
492 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
493 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
494 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
495 _tasks(nullptr), // _tasks set inside late_init()
496 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
497 _terminator(_max_num_tasks, _task_queues),
498 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
499
500 _first_overflow_barrier_sync(),
501 _second_overflow_barrier_sync(),
502
503 _completed_mark_cycles(0),
504 _has_overflown(false),
505 _concurrent(false),
506 _has_aborted(false),
507 _restart_for_overflow(false),
508 _gc_timer_cm(new ConcurrentGCTimer()),
509 _gc_tracer_cm(new G1OldTracer()),
510
511 // _verbose_level set below
512
513 _remark_times(),
514 _remark_mark_times(),
515 _remark_weak_ref_times(),
516 _cleanup_times(),
517
518 _concurrent_workers(nullptr),
519 _num_concurrent_workers(0),
520 _max_concurrent_workers(0),
521
522 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
523 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
524 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
525 _needs_remembered_set_rebuild(false)
526 {
527 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
528
529 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
530 }
531
532 void G1ConcurrentMark::fully_initialize() {
533 if (is_fully_initialized()) {
534 return;
535 }
536
537 // Create & start ConcurrentMark thread.
538 _cm_thread = new G1ConcurrentMarkThread(this);
539 if (_cm_thread->osthread() == nullptr) {
540 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
541 }
542
543 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
544 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
545
546 _max_concurrent_workers = ConcGCThreads;
547
548 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
549 _concurrent_workers->initialize_workers();
550 _num_concurrent_workers = _concurrent_workers->active_workers();
551
552 if (!_global_mark_stack.initialize()) {
553 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
554 }
555
556 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
557
558 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
559 _num_active_tasks = _max_num_tasks;
560
561 for (uint i = 0; i < _max_num_tasks; ++i) {
562 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
563 _task_queues->register_queue(i, task_queue);
564
565 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
566 }
567
568 uint max_num_regions = _g1h->max_num_regions();
569 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
570 ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
571 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
572
573 reset_at_marking_complete();
574 }
575
576 bool G1ConcurrentMark::in_progress() const {
577 return is_fully_initialized() ? _cm_thread->in_progress() : false;
578 }
579
580 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
581 return _partial_array_state_manager;
582 }
583
584 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
585 assert(is_fully_initialized(), "must be");
586 return _cm_thread;
587 }
588
589 void G1ConcurrentMark::reset() {
590 _has_aborted.store_relaxed(false);
591
592 reset_marking_for_restart();
593
594 // Reset all tasks, since different phases will use different number of active
595 // threads. So, it's easiest to have all of them ready.
596 for (uint i = 0; i < _max_num_tasks; ++i) {
597 _tasks[i]->reset(mark_bitmap());
598 }
599
600 uint max_num_regions = _g1h->max_num_regions();
601 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
602 for (uint i = 0; i < max_num_regions; i++) {
603 _region_mark_stats[i].clear();
604 }
605
606 _root_regions.reset();
607 }
608
609 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
610 uint region_idx = r->hrm_index();
611 for (uint j = 0; j < _max_num_tasks; ++j) {
612 _tasks[j]->clear_mark_stats_cache(region_idx);
613 }
614 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
615 _region_mark_stats[region_idx].clear();
616 }
617
618 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
619 assert_at_safepoint();
620 assert(r->is_starts_humongous(), "Got humongous continues region here");
621
622 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
623 mark_bitmap()->clear(r->bottom());
624
625 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
626 return;
627 }
628
629 // Clear any statistics about the region gathered so far.
630 _g1h->humongous_obj_regions_iterate(r,
631 [&] (G1HeapRegion* r) {
632 clear_statistics(r);
633 });
634 }
635
636 void G1ConcurrentMark::reset_marking_for_restart() {
637 _global_mark_stack.set_empty();
638
639 // Expand the marking stack, if we have to and if we can.
640 if (has_overflown()) {
641 _global_mark_stack.expand();
642
643 uint max_num_regions = _g1h->max_num_regions();
644 for (uint i = 0; i < max_num_regions; i++) {
645 _region_mark_stats[i].clear_during_overflow();
646 }
647 }
648
649 clear_has_overflown();
650 _finger.store_relaxed(_heap.start());
651
652 for (uint i = 0; i < _max_num_tasks; ++i) {
653 _tasks[i]->reset_for_restart();
654 }
655 }
656
657 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
658 assert(active_tasks <= _max_num_tasks, "we should not have more");
659
660 _num_active_tasks = active_tasks;
661 // Need to update the three data structures below according to the
662 // number of active threads for this phase.
663 _terminator.reset_for_reuse(active_tasks);
664 _first_overflow_barrier_sync.set_n_workers(active_tasks);
665 _second_overflow_barrier_sync.set_n_workers(active_tasks);
666 }
667
668 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
669 set_concurrency(active_tasks);
670
671 _concurrent.store_relaxed(concurrent);
672
673 if (!concurrent) {
674 // At this point we should be in a STW phase, and completed marking.
675 assert_at_safepoint_on_vm_thread();
676 assert(out_of_regions(),
677 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
678 p2i(finger()), p2i(_heap.end()));
679 }
680 }
681
682 #if TASKQUEUE_STATS
683 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
684
685 _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
686
687 auto get_pa_stats = [&](uint i) {
688 return _tasks[i]->partial_array_task_stats();
689 };
690
691 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
692 "G1ConcurrentMark Partial Array Task Stats");
693
694 for (uint i = 0; i < _max_num_tasks; ++i) {
695 get_pa_stats(i)->reset();
696 }
697 }
698 #endif
699
700 void G1ConcurrentMark::reset_at_marking_complete() {
701 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
702 // We set the global marking state to some default values when we're
703 // not doing marking.
704 reset_marking_for_restart();
705 _num_active_tasks = 0;
706 }
707
708 G1ConcurrentMark::~G1ConcurrentMark() {
709 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
710 FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
711 FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
712 // The G1ConcurrentMark instance is never freed.
713 ShouldNotReachHere();
714 }
715
716 class G1ClearBitMapTask : public WorkerTask {
717 public:
718 static size_t chunk_size() { return M; }
719
720 private:
721 // Heap region closure used for clearing the _mark_bitmap.
722 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
723 G1ConcurrentMark* _cm;
724 G1CMBitMap* _bitmap;
725 bool _suspendible; // If suspendible, do yield checks.
726
727 bool suspendible() {
728 return _suspendible;
729 }
730
731 bool is_clear_concurrent_undo() {
732 return suspendible() && _cm->cm_thread()->in_undo_mark();
733 }
734
735 bool has_aborted() {
736 if (suspendible()) {
737 _cm->do_yield_check();
738 return _cm->has_aborted();
739 }
740 return false;
741 }
742
743 HeapWord* region_clear_limit(G1HeapRegion* r) {
744 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
745 // live_words data are current wrt to the _mark_bitmap. We use this information
746 // to only clear ranges of the bitmap that require clearing.
747 if (is_clear_concurrent_undo()) {
748 // No need to clear bitmaps for empty regions (which includes regions we
749 // did not mark through).
750 if (!_cm->contains_live_object(r->hrm_index())) {
751 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
752 return r->bottom();
753 }
754 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
755 }
756 return r->end();
757 }
758
759 public:
760 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
761 G1HeapRegionClosure(),
762 _cm(cm),
763 _bitmap(cm->mark_bitmap()),
764 _suspendible(suspendible)
765 { }
766
767 virtual bool do_heap_region(G1HeapRegion* r) {
768 if (has_aborted()) {
769 return true;
770 }
771
772 HeapWord* cur = r->bottom();
773 HeapWord* const end = region_clear_limit(r);
774
775 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
776
777 while (cur < end) {
778
779 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
780 _bitmap->clear_range(mr);
781
782 cur += chunk_size_in_words;
783
784 // Repeat the asserts from before the start of the closure. We will do them
785 // as asserts here to minimize their overhead on the product. However, we
786 // will have them as guarantees at the beginning / end of the bitmap
787 // clearing to get some checking in the product.
788 assert(!suspendible() || _cm->in_progress(), "invariant");
789 assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
790
791 // Abort iteration if necessary.
792 if (has_aborted()) {
793 return true;
794 }
795 }
796 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
797
798 _cm->reset_top_at_mark_start(r);
799
800 return false;
801 }
802 };
803
804 G1ClearBitmapHRClosure _cl;
805 G1HeapRegionClaimer _hr_claimer;
806 bool _suspendible; // If the task is suspendible, workers must join the STS.
807
808 public:
809 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
810 WorkerTask("G1 Clear Bitmap"),
811 _cl(cm, suspendible),
812 _hr_claimer(n_workers),
813 _suspendible(suspendible)
814 { }
815
816 void work(uint worker_id) {
817 SuspendibleThreadSetJoiner sts_join(_suspendible);
818 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
819 }
820 };
821
822 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
823 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
824
825 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
826 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
827
828 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
829
830 G1ClearBitMapTask cl(this, num_workers, may_yield);
831
832 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
833 workers->run_task(&cl, num_workers);
834 }
835
836 void G1ConcurrentMark::cleanup_for_next_mark() {
837 // Make sure that the concurrent mark thread looks to still be in
838 // the current cycle.
839 guarantee(is_fully_initialized(), "should be initializd");
840 guarantee(in_progress(), "invariant");
841
842 // We are finishing up the current cycle by clearing the next
843 // marking bitmap and getting it ready for the next cycle. During
844 // this time no other cycle can start. So, let's make sure that this
845 // is the case.
846 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
847
848 clear_bitmap(_concurrent_workers, true);
849
850 reset_partial_array_state_manager();
851
852 // Repeat the asserts from above.
853 guarantee(is_fully_initialized(), "should be initializd");
854 guarantee(in_progress(), "invariant");
855 guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
856 }
857
858 void G1ConcurrentMark::reset_partial_array_state_manager() {
859 for (uint i = 0; i < _max_num_tasks; ++i) {
860 _tasks[i]->unregister_partial_array_splitter();
861 }
862
863 partial_array_state_manager()->reset();
864
865 for (uint i = 0; i < _max_num_tasks; ++i) {
866 _tasks[i]->register_partial_array_splitter();
867 }
868 }
869
870 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
871 assert_at_safepoint_on_vm_thread();
872 // To avoid fragmentation the full collection requesting to clear the bitmap
873 // might use fewer workers than available. To ensure the bitmap is cleared
874 // as efficiently as possible the number of active workers are temporarily
875 // increased to include all currently created workers.
876 WithActiveWorkers update(workers, workers->created_workers());
877 clear_bitmap(workers, false);
878 }
879
880 class G1PreConcurrentStartTask : public G1BatchedTask {
881 // Reset marking state.
882 class ResetMarkingStateTask;
883 // For each region note start of marking.
884 class NoteStartOfMarkTask;
885
886 public:
887 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
888 };
889
890 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
891 G1ConcurrentMark* _cm;
892 public:
893 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
894
895 double worker_cost() const override { return 1.0; }
896 void do_work(uint worker_id) override;
897 };
898
899 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
900
901 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
902 G1ConcurrentMark* _cm;
903
904 public:
905 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
906
907 bool do_heap_region(G1HeapRegion* r) override {
908 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
909 _cm->update_top_at_mark_start(r);
910 } else {
911 _cm->reset_top_at_mark_start(r);
912 }
913 return false;
914 }
915 } _region_cl;
916
917 G1HeapRegionClaimer _claimer;
918 public:
919 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
920
921 double worker_cost() const override {
922 // The work done per region is very small, therefore we choose this magic number to cap the number
923 // of threads used when there are few regions.
924 const double regions_per_thread = 1000;
925 return _claimer.n_regions() / regions_per_thread;
926 }
927
928 void set_max_workers(uint max_workers) override {
929 _claimer.set_n_workers(max_workers);
930 }
931
932 void do_work(uint worker_id) override {
933 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
934 }
935 };
936
937 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
938 // Reset marking state.
939 _cm->reset();
940 }
941
942 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
943 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
944 add_serial_task(new ResetMarkingStateTask(cm));
945 add_parallel_task(new NoteStartOfMarkTask());
946 };
947
948 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
949 assert_at_safepoint_on_vm_thread();
950
951 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
952
953 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
954
955 G1PreConcurrentStartTask cl(cause, this);
956 G1CollectedHeap::heap()->run_batch_task(&cl);
957
958 _gc_tracer_cm->set_gc_cause(cause);
959 }
960
961 void G1ConcurrentMark::start_full_concurrent_cycle() {
962 // Start Concurrent Marking weak-reference discovery.
963 ReferenceProcessor* rp = _g1h->ref_processor_cm();
964 rp->start_discovery(false /* always_clear */);
965
966 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
967 // This is the start of the marking cycle, we're expected all
968 // threads to have SATB queues with active set to false.
969 satb_mq_set.set_active_all_threads(true, /* new active value */
970 false /* expected_active */);
971
972 _root_regions.prepare_for_scan();
973
974 // update_g1_committed() will be called at the end of an evac pause
975 // when marking is on. So, it's also called at the end of the
976 // concurrent start pause to update the heap end, if the heap expands
977 // during it. No need to call it here.
978
979 // Signal the thread to start work.
980 cm_thread()->start_full_mark();
981 }
982
983 void G1ConcurrentMark::start_undo_concurrent_cycle() {
984 root_regions()->cancel_scan();
985
986 // Signal the thread to start work.
987 cm_thread()->start_undo_mark();
988 }
989
990 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
991 cm_thread()->set_idle();
992 }
993
994 void G1ConcurrentMark::stop() {
995 if (is_fully_initialized()) {
996 cm_thread()->stop();
997 }
998 }
999
1000 /*
1001 * Notice that in the next two methods, we actually leave the STS
1002 * during the barrier sync and join it immediately afterwards. If we
1003 * do not do this, the following deadlock can occur: one thread could
1004 * be in the barrier sync code, waiting for the other thread to also
1005 * sync up, whereas another one could be trying to yield, while also
1006 * waiting for the other threads to sync up too.
1007 *
1008 * Note, however, that this code is also used during remark and in
1009 * this case we should not attempt to leave / enter the STS, otherwise
1010 * we'll either hit an assert (debug / fastdebug) or deadlock
1011 * (product). So we should only leave / enter the STS if we are
1012 * operating concurrently.
1013 *
1014 * Because the thread that does the sync barrier has left the STS, it
1015 * is possible to be suspended for a Full GC or an evacuation pause
1016 * could occur. This is actually safe, since the entering the sync
1017 * barrier is one of the last things do_marking_step() does, and it
1018 * doesn't manipulate any data structures afterwards.
1019 */
1020
1021 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1022 bool barrier_aborted;
1023 {
1024 SuspendibleThreadSetLeaver sts_leave(concurrent());
1025 barrier_aborted = !_first_overflow_barrier_sync.enter();
1026 }
1027
1028 // at this point everyone should have synced up and not be doing any
1029 // more work
1030
1031 if (barrier_aborted) {
1032 // If the barrier aborted we ignore the overflow condition and
1033 // just abort the whole marking phase as quickly as possible.
1034 return;
1035 }
1036 }
1037
1038 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1039 SuspendibleThreadSetLeaver sts_leave(concurrent());
1040 _second_overflow_barrier_sync.enter();
1041
1042 // at this point everything should be re-initialized and ready to go
1043 }
1044
1045 class G1CMConcurrentMarkingTask : public WorkerTask {
1046 G1ConcurrentMark* _cm;
1047
1048 public:
1049 void work(uint worker_id) {
1050 ResourceMark rm;
1051
1052 SuspendibleThreadSetJoiner sts_join;
1053
1054 assert(worker_id < _cm->active_tasks(), "invariant");
1055
1056 G1CMTask* task = _cm->task(worker_id);
1057 task->record_start_time();
1058 if (!_cm->has_aborted()) {
1059 do {
1060 task->do_marking_step(G1ConcMarkStepDurationMillis,
1061 true /* do_termination */,
1062 false /* is_serial*/);
1063
1064 _cm->do_yield_check();
1065 } while (!_cm->has_aborted() && task->has_aborted());
1066 }
1067 task->record_end_time();
1068 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1069 }
1070
1071 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1072 WorkerTask("Concurrent Mark"), _cm(cm) { }
1073
1074 ~G1CMConcurrentMarkingTask() { }
1075 };
1076
1077 uint G1ConcurrentMark::calc_active_marking_workers() {
1078 uint result = 0;
1079 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1080 result = _max_concurrent_workers;
1081 } else {
1082 result =
1083 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1084 1, /* Minimum workers */
1085 _num_concurrent_workers,
1086 Threads::number_of_non_daemon_threads());
1087 // Don't scale the result down by scale_concurrent_workers() because
1088 // that scaling has already gone into "_max_concurrent_workers".
1089 }
1090 assert(result > 0 && result <= _max_concurrent_workers,
1091 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1092 _max_concurrent_workers, result);
1093 return result;
1094 }
1095
1096 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1097 #ifdef ASSERT
1098 HeapWord* last = region->last();
1099 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1100 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1101 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1102 assert(top_at_mark_start(hr) == region->start(),
1103 "MemRegion start should be equal to TAMS");
1104 #endif
1105
1106 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1107
1108 const uintx interval = PrefetchScanIntervalInBytes;
1109 HeapWord* curr = region->start();
1110 const HeapWord* end = region->end();
1111 while (curr < end) {
1112 Prefetch::read(curr, interval);
1113 oop obj = cast_to_oop(curr);
1114 size_t size = obj->oop_iterate_size(&cl);
1115 assert(size == obj->size(), "sanity");
1116 curr += size;
1117 }
1118 }
1119
1120 class G1CMRootRegionScanTask : public WorkerTask {
1121 G1ConcurrentMark* _cm;
1122 public:
1123 G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1124 WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1125
1126 void work(uint worker_id) {
1127 G1CMRootMemRegions* root_regions = _cm->root_regions();
1128 const MemRegion* region = root_regions->claim_next();
1129 while (region != nullptr) {
1130 _cm->scan_root_region(region, worker_id);
1131 region = root_regions->claim_next();
1132 }
1133 }
1134 };
1135
1136 void G1ConcurrentMark::scan_root_regions() {
1137 // scan_in_progress() will have been set to true only if there was
1138 // at least one root region to scan. So, if it's false, we
1139 // should not attempt to do any further work.
1140 if (root_regions()->scan_in_progress()) {
1141 assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1142
1143 // Assign one worker to each root-region but subject to the max constraint.
1144 const uint num_workers = MIN2(root_regions()->num_root_regions(),
1145 _max_concurrent_workers);
1146
1147 G1CMRootRegionScanTask task(this);
1148 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1149 task.name(), num_workers, root_regions()->num_root_regions());
1150 _concurrent_workers->run_task(&task, num_workers);
1151
1152 // It's possible that has_aborted() is true here without actually
1153 // aborting the survivor scan earlier. This is OK as it's
1154 // mainly used for sanity checking.
1155 root_regions()->scan_finished();
1156 }
1157 }
1158
1159 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1160 return root_regions()->wait_until_scan_finished();
1161 }
1162
1163 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1164 root_regions()->add(top_at_mark_start(r), r->top());
1165 }
1166
1167 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1168 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1169 }
1170
1171 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1172 root_regions()->abort();
1173 root_regions()->wait_until_scan_finished();
1174 }
1175
1176 void G1ConcurrentMark::concurrent_cycle_start() {
1177 _gc_timer_cm->register_gc_start();
1178
1179 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1180
1181 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1182 }
1183
1184 uint G1ConcurrentMark::completed_mark_cycles() const {
1185 return _completed_mark_cycles.load_relaxed();
1186 }
1187
1188 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1189 _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1190
1191 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1192
1193 if (mark_cycle_completed) {
1194 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1195 }
1196
1197 if (has_aborted()) {
1198 log_info(gc, marking)("Concurrent Mark Abort");
1199 _gc_tracer_cm->report_concurrent_mode_failure();
1200 }
1201
1202 _gc_timer_cm->register_gc_end();
1203
1204 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1205 }
1206
1207 void G1ConcurrentMark::mark_from_roots() {
1208 _restart_for_overflow.store_relaxed(false);
1209
1210 uint active_workers = calc_active_marking_workers();
1211
1212 // Setting active workers is not guaranteed since fewer
1213 // worker threads may currently exist and more may not be
1214 // available.
1215 active_workers = _concurrent_workers->set_active_workers(active_workers);
1216 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1217
1218 _num_concurrent_workers = active_workers;
1219
1220 // Parallel task terminator is set in "set_concurrency_and_phase()"
1221 set_concurrency_and_phase(active_workers, true /* concurrent */);
1222
1223 G1CMConcurrentMarkingTask marking_task(this);
1224 _concurrent_workers->run_task(&marking_task);
1225 print_stats();
1226 }
1227
1228 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1229 static const char* location_strings[] = { "Remark Before",
1230 "Remark After",
1231 "Remark Overflow",
1232 "Cleanup Before",
1233 "Cleanup After" };
1234 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1235 }
1236
1237 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1238 VerifyLocation location) {
1239 G1HeapVerifier* verifier = _g1h->verifier();
1240
1241 verifier->verify_region_sets_optional();
1242
1243 const char* caller = verify_location_string(location);
1244
1245 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1246 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1247
1248 size_t const BufLen = 512;
1249 char buffer[BufLen];
1250
1251 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1252 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1253
1254 // Only check bitmap in Remark, and not at After-Verification because the regions
1255 // already have their TAMS'es reset.
1256 if (location != VerifyLocation::RemarkAfter) {
1257 verifier->verify_bitmap_clear(true /* above_tams_only */);
1258 }
1259 }
1260 }
1261
1262 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1263 G1CollectedHeap* _g1h;
1264 public:
1265 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1266
1267 bool do_object_b(oop obj) {
1268 return !_g1h->is_obj_dead(obj);
1269 }
1270 };
1271
1272 void G1ConcurrentMark::remark() {
1273 assert_at_safepoint_on_vm_thread();
1274
1275 // If a full collection has happened, we should not continue. However we might
1276 // have ended up here as the Remark VM operation has been scheduled already.
1277 if (has_aborted()) {
1278 return;
1279 }
1280
1281 G1Policy* policy = _g1h->policy();
1282 policy->record_pause_start_time();
1283
1284 double start = os::elapsedTime();
1285
1286 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1287
1288 {
1289 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1290 finalize_marking();
1291 }
1292
1293 double mark_work_end = os::elapsedTime();
1294
1295 bool const mark_finished = !has_overflown();
1296 if (mark_finished) {
1297 weak_refs_work();
1298
1299 // Unload Klasses, String, Code Cache, etc.
1300 if (ClassUnloadingWithConcurrentMark) {
1301 G1CMIsAliveClosure is_alive(this);
1302 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1303 }
1304
1305 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1306 // We're done with marking.
1307 // This is the end of the marking cycle, we're expected all
1308 // threads to have SATB queues with active set to true.
1309 satb_mq_set.set_active_all_threads(false, /* new active value */
1310 true /* expected_active */);
1311
1312 {
1313 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1314 flush_all_task_caches();
1315 }
1316
1317 // All marking completed. Check bitmap now as we will start to reset TAMSes
1318 // in parallel below so that we can not do this in the After-Remark verification.
1319 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1320
1321 {
1322 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1323
1324 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1325 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1326 _g1h->workers()->active_workers());
1327 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1328 _g1h->workers()->run_task(&cl, num_workers);
1329
1330 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1331 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1332
1333 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1334
1335 if (_needs_remembered_set_rebuild) {
1336 // Prune rebuild candidates based on G1HeapWastePercent.
1337 // Improves rebuild time in addition to remembered set memory usage.
1338 G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1339 }
1340 }
1341
1342 if (log_is_enabled(Trace, gc, liveness)) {
1343 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1344 _g1h->heap_region_iterate(&cl);
1345 }
1346
1347 // Potentially, some empty-regions have been reclaimed; make this a
1348 // "collection" so that pending allocation can retry before attempting a
1349 // GC pause.
1350 _g1h->increment_total_collections();
1351
1352 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1353 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1354 // triggered, it likely means there are very few regular GCs, making resizing
1355 // based on gc heuristics less effective.
1356 if (_g1h->last_gc_was_periodic()) {
1357 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1358 }
1359
1360 compute_new_sizes();
1361
1362 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1363
1364 assert(!restart_for_overflow(), "sanity");
1365 // Completely reset the marking state (except bitmaps) since marking completed.
1366 reset_at_marking_complete();
1367
1368 G1CollectedHeap::finish_codecache_marking_cycle();
1369
1370 {
1371 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1372 G1ObjectCountIsAliveClosure is_alive(_g1h);
1373 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1374 }
1375 } else {
1376 // We overflowed. Restart concurrent marking.
1377 _restart_for_overflow.store_relaxed(true);
1378
1379 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1380
1381 // Clear the marking state because we will be restarting
1382 // marking due to overflowing the global mark stack.
1383 reset_marking_for_restart();
1384 }
1385
1386 // Statistics
1387 double now = os::elapsedTime();
1388 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1389 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1390 _remark_times.add((now - start) * 1000.0);
1391
1392 _g1h->update_perf_counter_cpu_time();
1393
1394 policy->record_concurrent_mark_remark_end();
1395 }
1396
1397 void G1ConcurrentMark::compute_new_sizes() {
1398 MetaspaceGC::compute_new_size();
1399
1400 // Cleanup will have freed any regions completely full of garbage.
1401 // Update the soft reference policy with the new heap occupancy.
1402 Universe::heap()->update_capacity_and_used_at_gc();
1403
1404 // We reclaimed old regions so we should calculate the sizes to make
1405 // sure we update the old gen/space data.
1406 _g1h->monitoring_support()->update_sizes();
1407 }
1408
1409 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1410 G1CollectedHeap* _g1h;
1411
1412 public:
1413 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1414
1415 bool do_heap_region(G1HeapRegion* r) override {
1416 // Update the remset tracking state from updating to complete
1417 // if remembered sets have been rebuilt.
1418 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1419 return false;
1420 }
1421 };
1422
1423 void G1ConcurrentMark::cleanup() {
1424 assert_at_safepoint_on_vm_thread();
1425
1426 // If a full collection has happened, we shouldn't do this.
1427 if (has_aborted()) {
1428 return;
1429 }
1430
1431 G1Policy* policy = _g1h->policy();
1432 policy->record_pause_start_time();
1433
1434 double start = os::elapsedTime();
1435
1436 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1437
1438 if (needs_remembered_set_rebuild()) {
1439 // Update the remset tracking information as well as marking all regions
1440 // as fully parsable.
1441 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1442 G1UpdateRegionsAfterRebuild cl(_g1h);
1443 _g1h->heap_region_iterate(&cl);
1444 } else {
1445 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1446 }
1447
1448 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1449
1450 // Local statistics
1451 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1452
1453 {
1454 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1455 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1456 }
1457 }
1458
1459 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1460 // Uses the G1CMTask associated with a worker thread (for serial reference
1461 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1462 // trace referent objects.
1463 //
1464 // Using the G1CMTask and embedded local queues avoids having the worker
1465 // threads operating on the global mark stack. This reduces the risk
1466 // of overflowing the stack - which we would rather avoid at this late
1467 // state. Also using the tasks' local queues removes the potential
1468 // of the workers interfering with each other that could occur if
1469 // operating on the global stack.
1470
1471 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1472 G1ConcurrentMark* _cm;
1473 G1CMTask* _task;
1474 uint _ref_counter_limit;
1475 uint _ref_counter;
1476 bool _is_serial;
1477 public:
1478 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1479 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1480 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1481 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1482 }
1483
1484 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1485 virtual void do_oop( oop* p) { do_oop_work(p); }
1486
1487 template <class T> void do_oop_work(T* p) {
1488 if (_cm->has_overflown()) {
1489 return;
1490 }
1491 if (!_task->deal_with_reference(p)) {
1492 // We did not add anything to the mark bitmap (or mark stack), so there is
1493 // no point trying to drain it.
1494 return;
1495 }
1496 _ref_counter--;
1497
1498 if (_ref_counter == 0) {
1499 // We have dealt with _ref_counter_limit references, pushing them
1500 // and objects reachable from them on to the local stack (and
1501 // possibly the global stack). Call G1CMTask::do_marking_step() to
1502 // process these entries.
1503 //
1504 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1505 // there's nothing more to do (i.e. we're done with the entries that
1506 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1507 // above) or we overflow.
1508 //
1509 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1510 // flag while there may still be some work to do. (See the comment at
1511 // the beginning of G1CMTask::do_marking_step() for those conditions -
1512 // one of which is reaching the specified time target.) It is only
1513 // when G1CMTask::do_marking_step() returns without setting the
1514 // has_aborted() flag that the marking step has completed.
1515 do {
1516 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1517 _task->do_marking_step(mark_step_duration_ms,
1518 false /* do_termination */,
1519 _is_serial);
1520 } while (_task->has_aborted() && !_cm->has_overflown());
1521 _ref_counter = _ref_counter_limit;
1522 }
1523 }
1524 };
1525
1526 // 'Drain' oop closure used by both serial and parallel reference processing.
1527 // Uses the G1CMTask associated with a given worker thread (for serial
1528 // reference processing the G1CMtask for worker 0 is used). Calls the
1529 // do_marking_step routine, with an unbelievably large timeout value,
1530 // to drain the marking data structures of the remaining entries
1531 // added by the 'keep alive' oop closure above.
1532
1533 class G1CMDrainMarkingStackClosure : public VoidClosure {
1534 G1ConcurrentMark* _cm;
1535 G1CMTask* _task;
1536 bool _is_serial;
1537 public:
1538 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1539 _cm(cm), _task(task), _is_serial(is_serial) {
1540 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1541 }
1542
1543 void do_void() {
1544 do {
1545 // We call G1CMTask::do_marking_step() to completely drain the local
1546 // and global marking stacks of entries pushed by the 'keep alive'
1547 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1548 //
1549 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1550 // if there's nothing more to do (i.e. we've completely drained the
1551 // entries that were pushed as a result of applying the 'keep alive'
1552 // closure to the entries on the discovered ref lists) or we overflow
1553 // the global marking stack.
1554 //
1555 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1556 // flag while there may still be some work to do. (See the comment at
1557 // the beginning of G1CMTask::do_marking_step() for those conditions -
1558 // one of which is reaching the specified time target.) It is only
1559 // when G1CMTask::do_marking_step() returns without setting the
1560 // has_aborted() flag that the marking step has completed.
1561
1562 _task->do_marking_step(1000000000.0 /* something very large */,
1563 true /* do_termination */,
1564 _is_serial);
1565 } while (_task->has_aborted() && !_cm->has_overflown());
1566 }
1567 };
1568
1569 class G1CMRefProcProxyTask : public RefProcProxyTask {
1570 G1CollectedHeap& _g1h;
1571 G1ConcurrentMark& _cm;
1572
1573 public:
1574 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1575 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1576 _g1h(g1h),
1577 _cm(cm) {}
1578
1579 void work(uint worker_id) override {
1580 assert(worker_id < _max_workers, "sanity");
1581 G1CMIsAliveClosure is_alive(&_cm);
1582 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1583 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1584 BarrierEnqueueDiscoveredFieldClosure enqueue;
1585 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1586 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1587 }
1588
1589 void prepare_run_task_hook() override {
1590 // We need to reset the concurrency level before each
1591 // proxy task execution, so that the termination protocol
1592 // and overflow handling in G1CMTask::do_marking_step() knows
1593 // how many workers to wait for.
1594 _cm.set_concurrency(_queue_count);
1595 }
1596 };
1597
1598 void G1ConcurrentMark::weak_refs_work() {
1599 ResourceMark rm;
1600
1601 {
1602 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1603
1604 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1605
1606 // See the comment in G1CollectedHeap::ref_processing_init()
1607 // about how reference processing currently works in G1.
1608
1609 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1610
1611 // Prefer to grow the stack until the max capacity.
1612 _global_mark_stack.set_should_grow();
1613
1614 // Parallel processing task executor.
1615 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1616 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1617
1618 // Process the weak references.
1619 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1620 _gc_tracer_cm->report_gc_reference_stats(stats);
1621 pt.print_all_references();
1622
1623 // The do_oop work routines of the keep_alive and drain_marking_stack
1624 // oop closures will set the has_overflown flag if we overflow the
1625 // global marking stack.
1626
1627 assert(has_overflown() || _global_mark_stack.is_empty(),
1628 "Mark stack should be empty (unless it has overflown)");
1629 }
1630
1631 if (has_overflown()) {
1632 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1633 // overflowed while processing references. Exit the VM.
1634 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1635 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1636 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1637 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1638 return;
1639 }
1640
1641 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1642
1643 {
1644 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1645 G1CMIsAliveClosure is_alive(this);
1646 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1647 }
1648 }
1649
1650 class G1PrecleanYieldClosure : public YieldClosure {
1651 G1ConcurrentMark* _cm;
1652
1653 public:
1654 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1655
1656 virtual bool should_return() {
1657 return _cm->has_aborted();
1658 }
1659
1660 virtual bool should_return_fine_grain() {
1661 _cm->do_yield_check();
1662 return _cm->has_aborted();
1663 }
1664 };
1665
1666 void G1ConcurrentMark::preclean() {
1667 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1668
1669 SuspendibleThreadSetJoiner joiner;
1670
1671 BarrierEnqueueDiscoveredFieldClosure enqueue;
1672
1673 set_concurrency_and_phase(1, true);
1674
1675 G1PrecleanYieldClosure yield_cl(this);
1676
1677 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1678 // Precleaning is single threaded. Temporarily disable MT discovery.
1679 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1680 rp->preclean_discovered_references(rp->is_alive_non_header(),
1681 &enqueue,
1682 &yield_cl,
1683 _gc_timer_cm);
1684 }
1685
1686 // Closure for marking entries in SATB buffers.
1687 class G1CMSATBBufferClosure : public SATBBufferClosure {
1688 private:
1689 G1CMTask* _task;
1690 G1CollectedHeap* _g1h;
1691
1692 // This is very similar to G1CMTask::deal_with_reference, but with
1693 // more relaxed requirements for the argument, so this must be more
1694 // circumspect about treating the argument as an object.
1695 void do_entry(void* entry) const {
1696 _task->increment_refs_reached();
1697 oop const obj = cast_to_oop(entry);
1698 _task->make_reference_grey(obj);
1699 }
1700
1701 public:
1702 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1703 : _task(task), _g1h(g1h) { }
1704
1705 virtual void do_buffer(void** buffer, size_t size) {
1706 for (size_t i = 0; i < size; ++i) {
1707 do_entry(buffer[i]);
1708 }
1709 }
1710 };
1711
1712 class G1RemarkThreadsClosure : public ThreadClosure {
1713 G1SATBMarkQueueSet& _qset;
1714
1715 public:
1716 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1717 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1718
1719 void do_thread(Thread* thread) {
1720 // Transfer any partial buffer to the qset for completed buffer processing.
1721 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1722 }
1723 };
1724
1725 class G1CMRemarkTask : public WorkerTask {
1726 // For Threads::possibly_parallel_threads_do
1727 ThreadsClaimTokenScope _threads_claim_token_scope;
1728 G1ConcurrentMark* _cm;
1729 public:
1730 void work(uint worker_id) {
1731 G1CMTask* task = _cm->task(worker_id);
1732 task->record_start_time();
1733 {
1734 ResourceMark rm;
1735
1736 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1737 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1738 }
1739
1740 do {
1741 task->do_marking_step(1000000000.0 /* something very large */,
1742 true /* do_termination */,
1743 false /* is_serial */);
1744 } while (task->has_aborted() && !_cm->has_overflown());
1745 // If we overflow, then we do not want to restart. We instead
1746 // want to abort remark and do concurrent marking again.
1747 task->record_end_time();
1748 }
1749
1750 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1751 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1752 _cm->terminator()->reset_for_reuse(active_workers);
1753 }
1754 };
1755
1756 void G1ConcurrentMark::finalize_marking() {
1757 ResourceMark rm;
1758
1759 _g1h->ensure_parsability(false);
1760
1761 // this is remark, so we'll use up all active threads
1762 uint active_workers = _g1h->workers()->active_workers();
1763 set_concurrency_and_phase(active_workers, false /* concurrent */);
1764 // Leave _parallel_marking_threads at it's
1765 // value originally calculated in the G1ConcurrentMark
1766 // constructor and pass values of the active workers
1767 // through the task.
1768
1769 {
1770 G1CMRemarkTask remarkTask(this, active_workers);
1771 // We will start all available threads, even if we decide that the
1772 // active_workers will be fewer. The extra ones will just bail out
1773 // immediately.
1774 _g1h->workers()->run_task(&remarkTask);
1775 }
1776
1777 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1778 guarantee(has_overflown() ||
1779 satb_mq_set.completed_buffers_num() == 0,
1780 "Invariant: has_overflown = %s, num buffers = %zu",
1781 BOOL_TO_STR(has_overflown()),
1782 satb_mq_set.completed_buffers_num());
1783
1784 print_stats();
1785 }
1786
1787 void G1ConcurrentMark::flush_all_task_caches() {
1788 size_t hits = 0;
1789 size_t misses = 0;
1790 for (uint i = 0; i < _max_num_tasks; i++) {
1791 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1792 hits += stats.first;
1793 misses += stats.second;
1794 }
1795 size_t sum = hits + misses;
1796 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1797 hits, misses, percent_of(hits, sum));
1798 }
1799
1800 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1801 assert_at_safepoint();
1802 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1803 }
1804
1805 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1806 // "Checkpoint" the finger.
1807 HeapWord* local_finger = finger();
1808
1809 while (local_finger < _heap.end()) {
1810 assert(_g1h->is_in_reserved(local_finger), "invariant");
1811
1812 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1813 // Make sure that the reads below do not float before loading curr_region.
1814 OrderAccess::loadload();
1815 // Above heap_region_containing may return null as we always scan claim
1816 // until the end of the heap. In this case, just jump to the next region.
1817 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1818
1819 // Is the gap between reading the finger and doing the CAS too long?
1820 HeapWord* res = _finger.compare_exchange(local_finger, end);
1821 if (res == local_finger && curr_region != nullptr) {
1822 // We succeeded.
1823 HeapWord* bottom = curr_region->bottom();
1824 HeapWord* limit = top_at_mark_start(curr_region);
1825
1826 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1827 // Notice that _finger == end cannot be guaranteed here since,
1828 // someone else might have moved the finger even further.
1829 assert(finger() >= end, "The finger should have moved forward");
1830
1831 if (limit > bottom) {
1832 return curr_region;
1833 } else {
1834 assert(limit == bottom,
1835 "The region limit should be at bottom");
1836 // We return null and the caller should try calling
1837 // claim_region() again.
1838 return nullptr;
1839 }
1840 } else {
1841 // Read the finger again.
1842 HeapWord* next_finger = finger();
1843 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1844 local_finger = next_finger;
1845 }
1846 }
1847
1848 return nullptr;
1849 }
1850
1851 #ifndef PRODUCT
1852 class VerifyNoCSetOops {
1853 G1CollectedHeap* _g1h;
1854 const char* _phase;
1855 int _info;
1856
1857 public:
1858 VerifyNoCSetOops(const char* phase, int info = -1) :
1859 _g1h(G1CollectedHeap::heap()),
1860 _phase(phase),
1861 _info(info)
1862 { }
1863
1864 void operator()(G1TaskQueueEntry task_entry) const {
1865 if (task_entry.is_partial_array_state()) {
1866 oop obj = task_entry.to_partial_array_state()->source();
1867 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1868 return;
1869 }
1870 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1871 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1872 p2i(task_entry.to_oop()), _phase, _info);
1873 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1874 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1875 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1876 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1877 }
1878 };
1879
1880 void G1ConcurrentMark::verify_no_collection_set_oops() {
1881 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1882 "should be at a safepoint or initializing");
1883 if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1884 return;
1885 }
1886
1887 // Verify entries on the global mark stack
1888 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1889
1890 // Verify entries on the task queues
1891 for (uint i = 0; i < _max_num_tasks; ++i) {
1892 G1CMTaskQueue* queue = _task_queues->queue(i);
1893 queue->iterate(VerifyNoCSetOops("Queue", i));
1894 }
1895
1896 // Verify the global finger
1897 HeapWord* global_finger = finger();
1898 if (global_finger != nullptr && global_finger < _heap.end()) {
1899 // Since we always iterate over all regions, we might get a null G1HeapRegion
1900 // here.
1901 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1902 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1903 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1904 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1905 }
1906
1907 // Verify the task fingers
1908 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1909 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1910 G1CMTask* task = _tasks[i];
1911 HeapWord* task_finger = task->finger();
1912 if (task_finger != nullptr && task_finger < _heap.end()) {
1913 // See above note on the global finger verification.
1914 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1915 guarantee(r == nullptr || task_finger == r->bottom() ||
1916 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1917 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1918 p2i(task_finger), HR_FORMAT_PARAMS(r));
1919 }
1920 }
1921 }
1922 #endif // PRODUCT
1923
1924 void G1ConcurrentMark::rebuild_and_scrub() {
1925 if (!needs_remembered_set_rebuild()) {
1926 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1927 }
1928
1929 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1930 }
1931
1932 void G1ConcurrentMark::print_stats() {
1933 if (!log_is_enabled(Debug, gc, stats)) {
1934 return;
1935 }
1936 log_debug(gc, stats)("---------------------------------------------------------------------");
1937 for (size_t i = 0; i < _num_active_tasks; ++i) {
1938 _tasks[i]->print_stats();
1939 log_debug(gc, stats)("---------------------------------------------------------------------");
1940 }
1941 }
1942
1943 bool G1ConcurrentMark::concurrent_cycle_abort() {
1944 // If we start the compaction before the CM threads finish
1945 // scanning the root regions we might trip them over as we'll
1946 // be moving objects / updating references. So let's wait until
1947 // they are done. By telling them to abort, they should complete
1948 // early.
1949 root_region_scan_abort_and_wait();
1950
1951 // We haven't started a concurrent cycle no need to do anything; we might have
1952 // aborted the marking because of shutting down though. In this case the marking
1953 // might have already completed the abort (leading to in_progress() below to
1954 // return false), however this still left marking state particularly in the
1955 // shared marking bitmap that must be cleaned up.
1956 // If there are multiple full gcs during shutdown we do this work repeatedly for
1957 // nothing, but this situation should be extremely rare (a full gc after shutdown
1958 // has been signalled is already rare), and this work should be negligible compared
1959 // to actual full gc work.
1960
1961 if (!is_fully_initialized() || (!cm_thread()->in_progress() && !cm_thread()->should_terminate())) {
1962 return false;
1963 }
1964
1965 reset_marking_for_restart();
1966
1967 abort_marking_threads();
1968
1969 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1970 satb_mq_set.abandon_partial_marking();
1971 // This can be called either during or outside marking, we'll read
1972 // the expected_active value from the SATB queue set.
1973 satb_mq_set.set_active_all_threads(false, /* new active value */
1974 satb_mq_set.is_active() /* expected_active */);
1975 return true;
1976 }
1977
1978 void G1ConcurrentMark::abort_marking_threads() {
1979 assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1980 _has_aborted.store_relaxed(true);
1981 _first_overflow_barrier_sync.abort();
1982 _second_overflow_barrier_sync.abort();
1983 }
1984
1985 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1986 class CountCpuTimeThreadClosure : public ThreadClosure {
1987 public:
1988 jlong _total_cpu_time;
1989
1990 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1991
1992 void do_thread(Thread* t) {
1993 _total_cpu_time += os::thread_cpu_time(t);
1994 }
1995 } cl;
1996
1997 threads_do(&cl);
1998
1999 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2000 }
2001
2002 static void print_ms_time_info(const char* prefix, const char* name,
2003 NumberSeq& ns) {
2004 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2005 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2006 if (ns.num() > 0) {
2007 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2008 prefix, ns.sd(), ns.maximum());
2009 }
2010 }
2011
2012 void G1ConcurrentMark::print_summary_info() {
2013 Log(gc, marking) log;
2014 if (!log.is_trace()) {
2015 return;
2016 }
2017
2018 log.trace(" Concurrent marking:");
2019 if (!is_fully_initialized()) {
2020 log.trace(" has not been initialized yet");
2021 return;
2022 }
2023 print_ms_time_info(" ", "remarks", _remark_times);
2024 {
2025 print_ms_time_info(" ", "final marks", _remark_mark_times);
2026 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2027
2028 }
2029 print_ms_time_info(" ", "cleanups", _cleanup_times);
2030 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2031 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2032 log.trace(" Total stop_world time = %8.2f s.",
2033 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2034 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2035 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2036 }
2037
2038 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2039 if (is_fully_initialized()) { // they are initialized late
2040 tc->do_thread(_cm_thread);
2041 _concurrent_workers->threads_do(tc);
2042 }
2043 }
2044
2045 void G1ConcurrentMark::print_on(outputStream* st) const {
2046 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2047 _mark_bitmap.print_on(st, " Bits: ");
2048 }
2049
2050 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2051 ReferenceProcessor* result = g1h->ref_processor_cm();
2052 assert(result != nullptr, "CM reference processor should not be null");
2053 return result;
2054 }
2055
2056 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2057 G1CMTask* task)
2058 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2059 _g1h(g1h), _task(task)
2060 { }
2061
2062 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2063 assert(hr != nullptr,
2064 "claim_region() should have filtered out null regions");
2065 _curr_region = hr;
2066 _finger = hr->bottom();
2067 update_region_limit();
2068 }
2069
2070 void G1CMTask::update_region_limit() {
2071 G1HeapRegion* hr = _curr_region;
2072 HeapWord* bottom = hr->bottom();
2073 HeapWord* limit = _cm->top_at_mark_start(hr);
2074
2075 if (limit == bottom) {
2076 // The region was collected underneath our feet.
2077 // We set the finger to bottom to ensure that the bitmap
2078 // iteration that will follow this will not do anything.
2079 // (this is not a condition that holds when we set the region up,
2080 // as the region is not supposed to be empty in the first place)
2081 _finger = bottom;
2082 } else if (limit >= _region_limit) {
2083 assert(limit >= _finger, "peace of mind");
2084 } else {
2085 assert(limit < _region_limit, "only way to get here");
2086 // This can happen under some pretty unusual circumstances. An
2087 // evacuation pause empties the region underneath our feet (TAMS
2088 // at bottom). We then do some allocation in the region (TAMS
2089 // stays at bottom), followed by the region being used as a GC
2090 // alloc region (TAMS will move to top() and the objects
2091 // originally below it will be greyed). All objects now marked in
2092 // the region are explicitly greyed, if below the global finger,
2093 // and we do not need in fact to scan anything else. So, we simply
2094 // set _finger to be limit to ensure that the bitmap iteration
2095 // doesn't do anything.
2096 _finger = limit;
2097 }
2098
2099 _region_limit = limit;
2100 }
2101
2102 void G1CMTask::giveup_current_region() {
2103 assert(_curr_region != nullptr, "invariant");
2104 clear_region_fields();
2105 }
2106
2107 void G1CMTask::clear_region_fields() {
2108 // Values for these three fields that indicate that we're not
2109 // holding on to a region.
2110 _curr_region = nullptr;
2111 _finger = nullptr;
2112 _region_limit = nullptr;
2113 }
2114
2115 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2116 if (cm_oop_closure == nullptr) {
2117 assert(_cm_oop_closure != nullptr, "invariant");
2118 } else {
2119 assert(_cm_oop_closure == nullptr, "invariant");
2120 }
2121 _cm_oop_closure = cm_oop_closure;
2122 }
2123
2124 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2125 guarantee(mark_bitmap != nullptr, "invariant");
2126 _mark_bitmap = mark_bitmap;
2127 clear_region_fields();
2128
2129 _calls = 0;
2130 _elapsed_time_ms = 0.0;
2131 _termination_time_ms = 0.0;
2132
2133 _mark_stats_cache.reset();
2134 }
2135
2136 void G1CMTask::reset_for_restart() {
2137 clear_region_fields();
2138 _task_queue->set_empty();
2139 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2140 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2141 }
2142
2143 void G1CMTask::register_partial_array_splitter() {
2144
2145 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2146 _cm->max_num_tasks(),
2147 ObjArrayMarkingStride);
2148 }
2149
2150 void G1CMTask::unregister_partial_array_splitter() {
2151 _partial_array_splitter.~PartialArraySplitter();
2152 }
2153
2154 bool G1CMTask::should_exit_termination() {
2155 if (!regular_clock_call()) {
2156 return true;
2157 }
2158
2159 // This is called when we are in the termination protocol. We should
2160 // quit if, for some reason, this task wants to abort or the global
2161 // stack is not empty (this means that we can get work from it).
2162 return !_cm->mark_stack_empty() || has_aborted();
2163 }
2164
2165 void G1CMTask::reached_limit() {
2166 assert(_words_scanned >= _words_scanned_limit ||
2167 _refs_reached >= _refs_reached_limit ,
2168 "shouldn't have been called otherwise");
2169 abort_marking_if_regular_check_fail();
2170 }
2171
2172 bool G1CMTask::regular_clock_call() {
2173 if (has_aborted()) {
2174 return false;
2175 }
2176
2177 // First, we need to recalculate the words scanned and refs reached
2178 // limits for the next clock call.
2179 recalculate_limits();
2180
2181 // During the regular clock call we do the following
2182
2183 // (1) If an overflow has been flagged, then we abort.
2184 if (_cm->has_overflown()) {
2185 return false;
2186 }
2187
2188 // If we are not concurrent (i.e. we're doing remark) we don't need
2189 // to check anything else. The other steps are only needed during
2190 // the concurrent marking phase.
2191 if (!_cm->concurrent()) {
2192 return true;
2193 }
2194
2195 // (2) If marking has been aborted for Full GC, then we also abort.
2196 if (_cm->has_aborted()) {
2197 return false;
2198 }
2199
2200 // (4) We check whether we should yield. If we have to, then we abort.
2201 if (SuspendibleThreadSet::should_yield()) {
2202 // We should yield. To do this we abort the task. The caller is
2203 // responsible for yielding.
2204 return false;
2205 }
2206
2207 // (5) We check whether we've reached our time quota. If we have,
2208 // then we abort.
2209 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2210 if (elapsed_time_ms > _time_target_ms) {
2211 _has_timed_out = true;
2212 return false;
2213 }
2214
2215 // (6) Finally, we check whether there are enough completed STAB
2216 // buffers available for processing. If there are, we abort.
2217 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2218 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2219 // we do need to process SATB buffers, we'll abort and restart
2220 // the marking task to do so
2221 return false;
2222 }
2223 return true;
2224 }
2225
2226 void G1CMTask::recalculate_limits() {
2227 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2228 _words_scanned_limit = _real_words_scanned_limit;
2229
2230 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2231 _refs_reached_limit = _real_refs_reached_limit;
2232 }
2233
2234 void G1CMTask::decrease_limits() {
2235 // This is called when we believe that we're going to do an infrequent
2236 // operation which will increase the per byte scanned cost (i.e. move
2237 // entries to/from the global stack). It basically tries to decrease the
2238 // scanning limit so that the clock is called earlier.
2239
2240 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2241 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2242 }
2243
2244 void G1CMTask::move_entries_to_global_stack() {
2245 // Local array where we'll store the entries that will be popped
2246 // from the local queue.
2247 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2248
2249 size_t n = 0;
2250 G1TaskQueueEntry task_entry;
2251 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2252 buffer[n] = task_entry;
2253 ++n;
2254 }
2255 if (n < G1CMMarkStack::EntriesPerChunk) {
2256 buffer[n] = G1TaskQueueEntry();
2257 }
2258
2259 if (n > 0) {
2260 if (!_cm->mark_stack_push(buffer)) {
2261 set_has_aborted();
2262 }
2263 }
2264
2265 // This operation was quite expensive, so decrease the limits.
2266 decrease_limits();
2267 }
2268
2269 bool G1CMTask::get_entries_from_global_stack() {
2270 // Local array where we'll store the entries that will be popped
2271 // from the global stack.
2272 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2273
2274 if (!_cm->mark_stack_pop(buffer)) {
2275 return false;
2276 }
2277
2278 // We did actually pop at least one entry.
2279 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2280 G1TaskQueueEntry task_entry = buffer[i];
2281 if (task_entry.is_null()) {
2282 break;
2283 }
2284 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2285 bool success = _task_queue->push(task_entry);
2286 // We only call this when the local queue is empty or under a
2287 // given target limit. So, we do not expect this push to fail.
2288 assert(success, "invariant");
2289 }
2290
2291 // This operation was quite expensive, so decrease the limits
2292 decrease_limits();
2293 return true;
2294 }
2295
2296 void G1CMTask::drain_local_queue(bool partially) {
2297 if (has_aborted()) {
2298 return;
2299 }
2300
2301 // Decide what the target size is, depending whether we're going to
2302 // drain it partially (so that other tasks can steal if they run out
2303 // of things to do) or totally (at the very end).
2304 uint target_size;
2305 if (partially) {
2306 target_size = GCDrainStackTargetSize;
2307 } else {
2308 target_size = 0;
2309 }
2310
2311 if (_task_queue->size() > target_size) {
2312 G1TaskQueueEntry entry;
2313 bool ret = _task_queue->pop_local(entry);
2314 while (ret) {
2315 process_entry(entry, false /* stolen */);
2316 if (_task_queue->size() <= target_size || has_aborted()) {
2317 ret = false;
2318 } else {
2319 ret = _task_queue->pop_local(entry);
2320 }
2321 }
2322 }
2323 }
2324
2325 size_t G1CMTask::start_partial_array_processing(oop obj) {
2326 assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2327
2328 objArrayOop obj_array = objArrayOop(obj);
2329 size_t array_length = obj_array->length();
2330
2331 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2332
2333 // Mark objArray klass metadata
2334 if (_cm_oop_closure->do_metadata()) {
2335 _cm_oop_closure->do_klass(obj_array->klass());
2336 }
2337
2338 process_array_chunk(obj_array, 0, initial_chunk_size);
2339
2340 // Include object header size
2341 return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2342 }
2343
2344 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2345 PartialArrayState* state = task.to_partial_array_state();
2346 // Access state before release by claim().
2347 objArrayOop obj = objArrayOop(state->source());
2348
2349 PartialArraySplitter::Claim claim =
2350 _partial_array_splitter.claim(state, _task_queue, stolen);
2351
2352 process_array_chunk(obj, claim._start, claim._end);
2353 return heap_word_size((claim._end - claim._start) * heapOopSize);
2354 }
2355
2356 void G1CMTask::drain_global_stack(bool partially) {
2357 if (has_aborted()) {
2358 return;
2359 }
2360
2361 // We have a policy to drain the local queue before we attempt to
2362 // drain the global stack.
2363 assert(partially || _task_queue->size() == 0, "invariant");
2364
2365 // Decide what the target size is, depending whether we're going to
2366 // drain it partially (so that other tasks can steal if they run out
2367 // of things to do) or totally (at the very end).
2368 // Notice that when draining the global mark stack partially, due to the racyness
2369 // of the mark stack size update we might in fact drop below the target. But,
2370 // this is not a problem.
2371 // In case of total draining, we simply process until the global mark stack is
2372 // totally empty, disregarding the size counter.
2373 if (partially) {
2374 size_t const target_size = _cm->partial_mark_stack_size_target();
2375 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2376 if (get_entries_from_global_stack()) {
2377 drain_local_queue(partially);
2378 }
2379 }
2380 } else {
2381 while (!has_aborted() && get_entries_from_global_stack()) {
2382 drain_local_queue(partially);
2383 }
2384 }
2385 }
2386
2387 // SATB Queue has several assumptions on whether to call the par or
2388 // non-par versions of the methods. this is why some of the code is
2389 // replicated. We should really get rid of the single-threaded version
2390 // of the code to simplify things.
2391 void G1CMTask::drain_satb_buffers() {
2392 if (has_aborted()) {
2393 return;
2394 }
2395
2396 // We set this so that the regular clock knows that we're in the
2397 // middle of draining buffers and doesn't set the abort flag when it
2398 // notices that SATB buffers are available for draining. It'd be
2399 // very counter productive if it did that. :-)
2400 _draining_satb_buffers = true;
2401
2402 G1CMSATBBufferClosure satb_cl(this, _g1h);
2403 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2404
2405 // This keeps claiming and applying the closure to completed buffers
2406 // until we run out of buffers or we need to abort.
2407 while (!has_aborted() &&
2408 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2409 abort_marking_if_regular_check_fail();
2410 }
2411
2412 // Can't assert qset is empty here, even if not aborted. If concurrent,
2413 // some other thread might be adding to the queue. If not concurrent,
2414 // some other thread might have won the race for the last buffer, but
2415 // has not yet decremented the count.
2416
2417 _draining_satb_buffers = false;
2418
2419 // again, this was a potentially expensive operation, decrease the
2420 // limits to get the regular clock call early
2421 decrease_limits();
2422 }
2423
2424 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2425 _mark_stats_cache.reset(region_idx);
2426 }
2427
2428 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2429 return _mark_stats_cache.evict_all();
2430 }
2431
2432 void G1CMTask::print_stats() {
2433 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2434 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2435 _elapsed_time_ms, _termination_time_ms);
2436 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2437 _step_times_ms.num(),
2438 _step_times_ms.avg(),
2439 _step_times_ms.sd(),
2440 _step_times_ms.maximum(),
2441 _step_times_ms.sum());
2442 size_t const hits = _mark_stats_cache.hits();
2443 size_t const misses = _mark_stats_cache.misses();
2444 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2445 hits, misses, percent_of(hits, hits + misses));
2446 }
2447
2448 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2449 return _task_queues->steal(worker_id, task_entry);
2450 }
2451
2452 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2453 if (has_aborted() || _curr_region == nullptr) {
2454 return;
2455 }
2456
2457 // This means that we're already holding on to a region.
2458 assert(_finger != nullptr, "if region is not null, then the finger "
2459 "should not be null either");
2460
2461 // We might have restarted this task after an evacuation pause
2462 // which might have evacuated the region we're holding on to
2463 // underneath our feet. Let's read its limit again to make sure
2464 // that we do not iterate over a region of the heap that
2465 // contains garbage (update_region_limit() will also move
2466 // _finger to the start of the region if it is found empty).
2467 update_region_limit();
2468 // We will start from _finger not from the start of the region,
2469 // as we might be restarting this task after aborting half-way
2470 // through scanning this region. In this case, _finger points to
2471 // the address where we last found a marked object. If this is a
2472 // fresh region, _finger points to start().
2473 MemRegion mr = MemRegion(_finger, _region_limit);
2474
2475 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2476 "humongous regions should go around loop once only");
2477
2478 // Some special cases:
2479 // If the memory region is empty, we can just give up the region.
2480 // If the current region is humongous then we only need to check
2481 // the bitmap for the bit associated with the start of the object,
2482 // scan the object if it's live, and give up the region.
2483 // Otherwise, let's iterate over the bitmap of the part of the region
2484 // that is left.
2485 // If the iteration is successful, give up the region.
2486 if (mr.is_empty()) {
2487 giveup_current_region();
2488 abort_marking_if_regular_check_fail();
2489 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2490 if (_mark_bitmap->is_marked(mr.start())) {
2491 // The object is marked - apply the closure
2492 bitmap_closure.do_addr(mr.start());
2493 }
2494 // Even if this task aborted while scanning the humongous object
2495 // we can (and should) give up the current region.
2496 giveup_current_region();
2497 abort_marking_if_regular_check_fail();
2498 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2499 giveup_current_region();
2500 abort_marking_if_regular_check_fail();
2501 } else {
2502 assert(has_aborted(), "currently the only way to do so");
2503 // The only way to abort the bitmap iteration is to return
2504 // false from the do_bit() method. However, inside the
2505 // do_bit() method we move the _finger to point to the
2506 // object currently being looked at. So, if we bail out, we
2507 // have definitely set _finger to something non-null.
2508 assert(_finger != nullptr, "invariant");
2509
2510 // Region iteration was actually aborted. So now _finger
2511 // points to the address of the object we last scanned. If we
2512 // leave it there, when we restart this task, we will rescan
2513 // the object. It is easy to avoid this. We move the finger by
2514 // enough to point to the next possible object header.
2515 assert(_finger < _region_limit, "invariant");
2516 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2517 if (new_finger >= _region_limit) {
2518 giveup_current_region();
2519 } else {
2520 move_finger_to(new_finger);
2521 }
2522 }
2523 }
2524
2525 void G1CMTask::claim_new_region() {
2526 // Read the note on the claim_region() method on why it might
2527 // return null with potentially more regions available for
2528 // claiming and why we have to check out_of_regions() to determine
2529 // whether we're done or not.
2530 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2531 // We are going to try to claim a new region. We should have
2532 // given up on the previous one.
2533 // Separated the asserts so that we know which one fires.
2534 assert(_curr_region == nullptr, "invariant");
2535 assert(_finger == nullptr, "invariant");
2536 assert(_region_limit == nullptr, "invariant");
2537 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2538 if (claimed_region != nullptr) {
2539 // Yes, we managed to claim one
2540 setup_for_region(claimed_region);
2541 assert(_curr_region == claimed_region, "invariant");
2542 }
2543 // It is important to call the regular clock here. It might take
2544 // a while to claim a region if, for example, we hit a large
2545 // block of empty regions. So we need to call the regular clock
2546 // method once round the loop to make sure it's called
2547 // frequently enough.
2548 abort_marking_if_regular_check_fail();
2549 }
2550 }
2551
2552 void G1CMTask::attempt_stealing() {
2553 // We cannot check whether the global stack is empty, since other
2554 // tasks might be pushing objects to it concurrently.
2555 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2556 "only way to reach here");
2557 while (!has_aborted()) {
2558 G1TaskQueueEntry entry;
2559 if (_cm->try_stealing(_worker_id, entry)) {
2560 process_entry(entry, true /* stolen */);
2561
2562 // And since we're towards the end, let's totally drain the
2563 // local queue and global stack.
2564 drain_local_queue(false);
2565 drain_global_stack(false);
2566 } else {
2567 break;
2568 }
2569 }
2570 }
2571
2572 void G1CMTask::attempt_termination(bool is_serial) {
2573 // We cannot check whether the global stack is empty, since other
2574 // tasks might be concurrently pushing objects on it.
2575 // Separated the asserts so that we know which one fires.
2576 assert(_cm->out_of_regions(), "only way to reach here");
2577 assert(_task_queue->size() == 0, "only way to reach here");
2578 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2579
2580 // The G1CMTask class also extends the TerminatorTerminator class,
2581 // hence its should_exit_termination() method will also decide
2582 // whether to exit the termination protocol or not.
2583 bool finished = (is_serial ||
2584 _cm->terminator()->offer_termination(this));
2585 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2586
2587 if (finished) {
2588 // We're all done.
2589
2590 // We can now guarantee that the global stack is empty, since
2591 // all other tasks have finished. We separated the guarantees so
2592 // that, if a condition is false, we can immediately find out
2593 // which one.
2594 guarantee(_cm->out_of_regions(), "only way to reach here");
2595 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2596 guarantee(_task_queue->size() == 0, "only way to reach here");
2597 guarantee(!_cm->has_overflown(), "only way to reach here");
2598 guarantee(!has_aborted(), "should never happen if termination has completed");
2599 } else {
2600 // Apparently there's more work to do. Let's abort this task. We
2601 // will restart it and hopefully we can find more things to do.
2602 set_has_aborted();
2603 }
2604 }
2605
2606 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2607 if (_has_timed_out) {
2608 double diff_ms = elapsed_time_ms - _time_target_ms;
2609 // Keep statistics of how well we did with respect to hitting
2610 // our target only if we actually timed out (if we aborted for
2611 // other reasons, then the results might get skewed).
2612 _marking_step_diff_ms.add(diff_ms);
2613 }
2614
2615 if (!_cm->has_overflown()) {
2616 return;
2617 }
2618
2619 // This is the interesting one. We aborted because a global
2620 // overflow was raised. This means we have to restart the
2621 // marking phase and start iterating over regions. However, in
2622 // order to do this we have to make sure that all tasks stop
2623 // what they are doing and re-initialize in a safe manner. We
2624 // will achieve this with the use of two barrier sync points.
2625 if (!is_serial) {
2626 // We only need to enter the sync barrier if being called
2627 // from a parallel context
2628 _cm->enter_first_sync_barrier(_worker_id);
2629
2630 // When we exit this sync barrier we know that all tasks have
2631 // stopped doing marking work. So, it's now safe to
2632 // re-initialize our data structures.
2633 }
2634
2635 clear_region_fields();
2636 flush_mark_stats_cache();
2637
2638 if (!is_serial) {
2639 // If we're executing the concurrent phase of marking, reset the marking
2640 // state; otherwise the marking state is reset after reference processing,
2641 // during the remark pause.
2642 // If we reset here as a result of an overflow during the remark we will
2643 // see assertion failures from any subsequent set_concurrency_and_phase()
2644 // calls.
2645 if (_cm->concurrent() && _worker_id == 0) {
2646 // Worker 0 is responsible for clearing the global data structures because
2647 // of an overflow. During STW we should not clear the overflow flag (in
2648 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2649 // method to abort the pause and restart concurrent marking.
2650 _cm->reset_marking_for_restart();
2651
2652 log_info(gc, marking)("Concurrent Mark reset for overflow");
2653 }
2654
2655 // ...and enter the second barrier.
2656 _cm->enter_second_sync_barrier(_worker_id);
2657 }
2658 }
2659
2660 /*****************************************************************************
2661
2662 The do_marking_step(time_target_ms, ...) method is the building
2663 block of the parallel marking framework. It can be called in parallel
2664 with other invocations of do_marking_step() on different tasks
2665 (but only one per task, obviously) and concurrently with the
2666 mutator threads, or during remark, hence it eliminates the need
2667 for two versions of the code. When called during remark, it will
2668 pick up from where the task left off during the concurrent marking
2669 phase. Interestingly, tasks are also claimable during evacuation
2670 pauses too, since do_marking_step() ensures that it aborts before
2671 it needs to yield.
2672
2673 The data structures that it uses to do marking work are the
2674 following:
2675
2676 (1) Marking Bitmap. If there are grey objects that appear only
2677 on the bitmap (this happens either when dealing with an overflow
2678 or when the concurrent start pause has simply marked the roots
2679 and didn't push them on the stack), then tasks claim heap
2680 regions whose bitmap they then scan to find grey objects. A
2681 global finger indicates where the end of the last claimed region
2682 is. A local finger indicates how far into the region a task has
2683 scanned. The two fingers are used to determine how to grey an
2684 object (i.e. whether simply marking it is OK, as it will be
2685 visited by a task in the future, or whether it needs to be also
2686 pushed on a stack).
2687
2688 (2) Local Queue. The local queue of the task which is accessed
2689 reasonably efficiently by the task. Other tasks can steal from
2690 it when they run out of work. Throughout the marking phase, a
2691 task attempts to keep its local queue short but not totally
2692 empty, so that entries are available for stealing by other
2693 tasks. Only when there is no more work, a task will totally
2694 drain its local queue.
2695
2696 (3) Global Mark Stack. This handles local queue overflow. During
2697 marking only sets of entries are moved between it and the local
2698 queues, as access to it requires a mutex and more fine-grain
2699 interaction with it which might cause contention. If it
2700 overflows, then the marking phase should restart and iterate
2701 over the bitmap to identify grey objects. Throughout the marking
2702 phase, tasks attempt to keep the global mark stack at a small
2703 length but not totally empty, so that entries are available for
2704 popping by other tasks. Only when there is no more work, tasks
2705 will totally drain the global mark stack.
2706
2707 (4) SATB Buffer Queue. This is where completed SATB buffers are
2708 made available. Buffers are regularly removed from this queue
2709 and scanned for roots, so that the queue doesn't get too
2710 long. During remark, all completed buffers are processed, as
2711 well as the filled in parts of any uncompleted buffers.
2712
2713 The do_marking_step() method tries to abort when the time target
2714 has been reached. There are a few other cases when the
2715 do_marking_step() method also aborts:
2716
2717 (1) When the marking phase has been aborted (after a Full GC).
2718
2719 (2) When a global overflow (on the global stack) has been
2720 triggered. Before the task aborts, it will actually sync up with
2721 the other tasks to ensure that all the marking data structures
2722 (local queues, stacks, fingers etc.) are re-initialized so that
2723 when do_marking_step() completes, the marking phase can
2724 immediately restart.
2725
2726 (3) When enough completed SATB buffers are available. The
2727 do_marking_step() method only tries to drain SATB buffers right
2728 at the beginning. So, if enough buffers are available, the
2729 marking step aborts and the SATB buffers are processed at
2730 the beginning of the next invocation.
2731
2732 (4) To yield. when we have to yield then we abort and yield
2733 right at the end of do_marking_step(). This saves us from a lot
2734 of hassle as, by yielding we might allow a Full GC. If this
2735 happens then objects will be compacted underneath our feet, the
2736 heap might shrink, etc. We save checking for this by just
2737 aborting and doing the yield right at the end.
2738
2739 From the above it follows that the do_marking_step() method should
2740 be called in a loop (or, otherwise, regularly) until it completes.
2741
2742 If a marking step completes without its has_aborted() flag being
2743 true, it means it has completed the current marking phase (and
2744 also all other marking tasks have done so and have all synced up).
2745
2746 A method called regular_clock_call() is invoked "regularly" (in
2747 sub ms intervals) throughout marking. It is this clock method that
2748 checks all the abort conditions which were mentioned above and
2749 decides when the task should abort. A work-based scheme is used to
2750 trigger this clock method: when the number of object words the
2751 marking phase has scanned or the number of references the marking
2752 phase has visited reach a given limit. Additional invocations to
2753 the method clock have been planted in a few other strategic places
2754 too. The initial reason for the clock method was to avoid calling
2755 cpu time gathering too regularly, as it is quite expensive. So,
2756 once it was in place, it was natural to piggy-back all the other
2757 conditions on it too and not constantly check them throughout the code.
2758
2759 If do_termination is true then do_marking_step will enter its
2760 termination protocol.
2761
2762 The value of is_serial must be true when do_marking_step is being
2763 called serially (i.e. by the VMThread) and do_marking_step should
2764 skip any synchronization in the termination and overflow code.
2765 Examples include the serial remark code and the serial reference
2766 processing closures.
2767
2768 The value of is_serial must be false when do_marking_step is
2769 being called by any of the worker threads.
2770 Examples include the concurrent marking code (CMMarkingTask),
2771 the MT remark code, and the MT reference processing closures.
2772
2773 *****************************************************************************/
2774
2775 void G1CMTask::do_marking_step(double time_target_ms,
2776 bool do_termination,
2777 bool is_serial) {
2778 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2779
2780 _start_cpu_time_ns = os::current_thread_cpu_time();
2781
2782 // If do_stealing is true then do_marking_step will attempt to
2783 // steal work from the other G1CMTasks. It only makes sense to
2784 // enable stealing when the termination protocol is enabled
2785 // and do_marking_step() is not being called serially.
2786 bool do_stealing = do_termination && !is_serial;
2787
2788 G1Predictions const& predictor = _g1h->policy()->predictor();
2789 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2790 _time_target_ms = time_target_ms - diff_prediction_ms;
2791
2792 // set up the variables that are used in the work-based scheme to
2793 // call the regular clock method
2794 _words_scanned = 0;
2795 _refs_reached = 0;
2796 recalculate_limits();
2797
2798 // clear all flags
2799 clear_has_aborted();
2800 _has_timed_out = false;
2801 _draining_satb_buffers = false;
2802
2803 ++_calls;
2804
2805 // Set up the bitmap and oop closures. Anything that uses them is
2806 // eventually called from this method, so it is OK to allocate these
2807 // statically.
2808 G1CMBitMapClosure bitmap_closure(this, _cm);
2809 G1CMOopClosure cm_oop_closure(_g1h, this);
2810 set_cm_oop_closure(&cm_oop_closure);
2811
2812 if (_cm->has_overflown()) {
2813 // This can happen if the mark stack overflows during a GC pause
2814 // and this task, after a yield point, restarts. We have to abort
2815 // as we need to get into the overflow protocol which happens
2816 // right at the end of this task.
2817 set_has_aborted();
2818 }
2819
2820 // First drain any available SATB buffers. After this, we will not
2821 // look at SATB buffers before the next invocation of this method.
2822 // If enough completed SATB buffers are queued up, the regular clock
2823 // will abort this task so that it restarts.
2824 drain_satb_buffers();
2825 // ...then partially drain the local queue and the global stack
2826 drain_local_queue(true);
2827 drain_global_stack(true);
2828
2829 do {
2830 process_current_region(bitmap_closure);
2831 // At this point we have either completed iterating over the
2832 // region we were holding on to, or we have aborted.
2833
2834 // We then partially drain the local queue and the global stack.
2835 drain_local_queue(true);
2836 drain_global_stack(true);
2837
2838 claim_new_region();
2839
2840 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2841 "at this point we should be out of regions");
2842 } while ( _curr_region != nullptr && !has_aborted());
2843
2844 // We cannot check whether the global stack is empty, since other
2845 // tasks might be pushing objects to it concurrently.
2846 assert(has_aborted() || _cm->out_of_regions(),
2847 "at this point we should be out of regions");
2848 // Try to reduce the number of available SATB buffers so that
2849 // remark has less work to do.
2850 drain_satb_buffers();
2851
2852 // Since we've done everything else, we can now totally drain the
2853 // local queue and global stack.
2854 drain_local_queue(false);
2855 drain_global_stack(false);
2856
2857 // Attempt at work stealing from other task's queues.
2858 if (do_stealing && !has_aborted()) {
2859 // We have not aborted. This means that we have finished all that
2860 // we could. Let's try to do some stealing...
2861 attempt_stealing();
2862 }
2863
2864 // We still haven't aborted. Now, let's try to get into the
2865 // termination protocol.
2866 if (do_termination && !has_aborted()) {
2867 attempt_termination(is_serial);
2868 }
2869
2870 // Mainly for debugging purposes to make sure that a pointer to the
2871 // closure which was statically allocated in this frame doesn't
2872 // escape it by accident.
2873 set_cm_oop_closure(nullptr);
2874 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2875 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2876 // Update the step history.
2877 _step_times_ms.add(elapsed_time_ms);
2878
2879 if (has_aborted()) {
2880 // The task was aborted for some reason.
2881 handle_abort(is_serial, elapsed_time_ms);
2882 }
2883 }
2884
2885 G1CMTask::G1CMTask(uint worker_id,
2886 G1ConcurrentMark* cm,
2887 G1CMTaskQueue* task_queue,
2888 G1RegionMarkStats* mark_stats) :
2889 _worker_id(worker_id),
2890 _g1h(G1CollectedHeap::heap()),
2891 _cm(cm),
2892 _mark_bitmap(nullptr),
2893 _task_queue(task_queue),
2894 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2895 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2896 _calls(0),
2897 _time_target_ms(0.0),
2898 _start_cpu_time_ns(0),
2899 _cm_oop_closure(nullptr),
2900 _curr_region(nullptr),
2901 _finger(nullptr),
2902 _region_limit(nullptr),
2903 _words_scanned(0),
2904 _words_scanned_limit(0),
2905 _real_words_scanned_limit(0),
2906 _refs_reached(0),
2907 _refs_reached_limit(0),
2908 _real_refs_reached_limit(0),
2909 _has_aborted(false),
2910 _has_timed_out(false),
2911 _draining_satb_buffers(false),
2912 _step_times_ms(),
2913 _elapsed_time_ms(0.0),
2914 _termination_time_ms(0.0),
2915 _marking_step_diff_ms()
2916 {
2917 guarantee(task_queue != nullptr, "invariant");
2918
2919 _marking_step_diff_ms.add(0.5);
2920 }
2921
2922 // These are formatting macros that are used below to ensure
2923 // consistent formatting. The *_H_* versions are used to format the
2924 // header for a particular value and they should be kept consistent
2925 // with the corresponding macro. Also note that most of the macros add
2926 // the necessary white space (as a prefix) which makes them a bit
2927 // easier to compose.
2928
2929 // All the output lines are prefixed with this string to be able to
2930 // identify them easily in a large log file.
2931 #define G1PPRL_LINE_PREFIX "###"
2932
2933 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2934 #ifdef _LP64
2935 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2936 #else // _LP64
2937 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2938 #endif // _LP64
2939
2940 // For per-region info
2941 #define G1PPRL_TYPE_FORMAT " %-4s"
2942 #define G1PPRL_TYPE_H_FORMAT " %4s"
2943 #define G1PPRL_STATE_FORMAT " %-5s"
2944 #define G1PPRL_STATE_H_FORMAT " %5s"
2945 #define G1PPRL_BYTE_FORMAT " %9zu"
2946 #define G1PPRL_BYTE_H_FORMAT " %9s"
2947 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2948 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2949 #define G1PPRL_GID_H_FORMAT " %9s"
2950 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2951 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2952 #define G1PPRL_LEN_H_FORMAT " %14s"
2953 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2954 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2955
2956 // For summary info
2957 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2958 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
2959 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
2960 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2961
2962 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2963 _total_used_bytes(0),
2964 _total_capacity_bytes(0),
2965 _total_live_bytes(0),
2966 _total_remset_bytes(0),
2967 _total_code_roots_bytes(0)
2968 {
2969 if (!log_is_enabled(Trace, gc, liveness)) {
2970 return;
2971 }
2972
2973 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2974 MemRegion reserved = g1h->reserved();
2975 double now = os::elapsedTime();
2976
2977 // Print the header of the output.
2978 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2979 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2980 G1PPRL_SUM_ADDR_FORMAT("reserved")
2981 G1PPRL_SUM_BYTE_FORMAT("region-size"),
2982 p2i(reserved.start()), p2i(reserved.end()),
2983 G1HeapRegion::GrainBytes);
2984 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2985 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2986 G1PPRL_TYPE_H_FORMAT
2987 G1PPRL_ADDR_BASE_H_FORMAT
2988 G1PPRL_BYTE_H_FORMAT
2989 G1PPRL_BYTE_H_FORMAT
2990 G1PPRL_STATE_H_FORMAT
2991 G1PPRL_BYTE_H_FORMAT
2992 G1PPRL_GID_H_FORMAT,
2993 "type", "address-range",
2994 "used", "live",
2995 "state", "code-roots",
2996 "group-id");
2997 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2998 G1PPRL_TYPE_H_FORMAT
2999 G1PPRL_ADDR_BASE_H_FORMAT
3000 G1PPRL_BYTE_H_FORMAT
3001 G1PPRL_BYTE_H_FORMAT
3002 G1PPRL_STATE_H_FORMAT
3003 G1PPRL_BYTE_H_FORMAT
3004 G1PPRL_GID_H_FORMAT,
3005 "", "",
3006 "(bytes)", "(bytes)",
3007 "", "(bytes)", "");
3008 }
3009
3010 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3011 if (!log_is_enabled(Trace, gc, liveness)) {
3012 return false;
3013 }
3014
3015 const char* type = r->get_type_str();
3016 HeapWord* bottom = r->bottom();
3017 HeapWord* end = r->end();
3018 size_t capacity_bytes = r->capacity();
3019 size_t used_bytes = r->used();
3020 size_t live_bytes = r->live_bytes();
3021 size_t remset_bytes = r->rem_set()->mem_size();
3022 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3023 const char* remset_type = r->rem_set()->get_short_state_str();
3024 uint cset_group_id = r->rem_set()->has_cset_group()
3025 ? r->rem_set()->cset_group_id()
3026 : G1CSetCandidateGroup::NoRemSetId;
3027
3028 _total_used_bytes += used_bytes;
3029 _total_capacity_bytes += capacity_bytes;
3030 _total_live_bytes += live_bytes;
3031 _total_remset_bytes += remset_bytes;
3032 _total_code_roots_bytes += code_roots_bytes;
3033
3034 // Print a line for this particular region.
3035 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3036 G1PPRL_TYPE_FORMAT
3037 G1PPRL_ADDR_BASE_FORMAT
3038 G1PPRL_BYTE_FORMAT
3039 G1PPRL_BYTE_FORMAT
3040 G1PPRL_STATE_FORMAT
3041 G1PPRL_BYTE_FORMAT
3042 G1PPRL_GID_FORMAT,
3043 type, p2i(bottom), p2i(end),
3044 used_bytes, live_bytes,
3045 remset_type, code_roots_bytes,
3046 cset_group_id);
3047
3048 return false;
3049 }
3050
3051 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3052 if (!log_is_enabled(Trace, gc, liveness)) {
3053 return;
3054 }
3055
3056 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3057 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3058 // add static memory usages to remembered set sizes
3059 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3060
3061 log_cset_candidate_groups();
3062
3063 // Print the footer of the output.
3064 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3065 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3066 " SUMMARY"
3067 G1PPRL_SUM_MB_FORMAT("capacity")
3068 G1PPRL_SUM_MB_PERC_FORMAT("used")
3069 G1PPRL_SUM_MB_PERC_FORMAT("live")
3070 G1PPRL_SUM_MB_FORMAT("remset")
3071 G1PPRL_SUM_MB_FORMAT("code-roots"),
3072 bytes_to_mb(_total_capacity_bytes),
3073 bytes_to_mb(_total_used_bytes),
3074 percent_of(_total_used_bytes, _total_capacity_bytes),
3075 bytes_to_mb(_total_live_bytes),
3076 percent_of(_total_live_bytes, _total_capacity_bytes),
3077 bytes_to_mb(_total_remset_bytes),
3078 bytes_to_mb(_total_code_roots_bytes));
3079 }
3080
3081 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3082 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3083 G1PPRL_GID_FORMAT
3084 G1PPRL_LEN_FORMAT
3085 G1PPRL_GID_GCEFF_FORMAT
3086 G1PPRL_GID_LIVENESS_FORMAT
3087 G1PPRL_BYTE_FORMAT
3088 G1PPRL_TYPE_H_FORMAT,
3089 group->group_id(),
3090 group->length(),
3091 group->length() > 0 ? group->gc_efficiency() : 0.0,
3092 group->length() > 0 ? group->liveness_percent() : 0.0,
3093 group->card_set()->mem_size(),
3094 type);
3095 _total_remset_bytes += group->card_set()->mem_size();
3096 }
3097
3098 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3099 for (G1CSetCandidateGroup* group : gl) {
3100 log_cset_candidate_group_add_total(group, type);
3101 }
3102 }
3103
3104 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3105 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3106 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3107 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3108 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3109 G1PPRL_GID_H_FORMAT
3110 G1PPRL_LEN_H_FORMAT
3111 G1PPRL_GCEFF_H_FORMAT
3112 G1PPRL_BYTE_H_FORMAT
3113 G1PPRL_BYTE_H_FORMAT
3114 G1PPRL_TYPE_H_FORMAT,
3115 "groud-id", "num-regions",
3116 "gc-eff", "liveness",
3117 "remset", "type");
3118
3119 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3120 G1PPRL_GID_H_FORMAT
3121 G1PPRL_LEN_H_FORMAT
3122 G1PPRL_GCEFF_H_FORMAT
3123 G1PPRL_BYTE_H_FORMAT
3124 G1PPRL_BYTE_H_FORMAT
3125 G1PPRL_TYPE_H_FORMAT,
3126 "", "",
3127 "(bytes/ms)", "%",
3128 "(bytes)", "");
3129
3130 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3131
3132 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3133
3134 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3135 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3136 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3137 }