1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "oops/oopCast.inline.hpp"
74 #include "runtime/globals_extension.hpp"
75 #include "runtime/handles.inline.hpp"
76 #include "runtime/java.hpp"
77 #include "runtime/orderAccess.hpp"
78 #include "runtime/os.hpp"
79 #include "runtime/prefetch.inline.hpp"
80 #include "runtime/threads.hpp"
81 #include "utilities/align.hpp"
82 #include "utilities/checkedCast.hpp"
83 #include "utilities/formatBuffer.hpp"
84 #include "utilities/growableArray.hpp"
85 #include "utilities/powerOfTwo.hpp"
86
87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
88
89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
90 assert(cm != nullptr, "must be");
91 }
92
93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
94 assert(cm != nullptr, "must be");
95 assert(_cm == nullptr, "double initialize");
96 _cm = cm;
97 }
98
99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
100 assert(addr < _cm->finger(), "invariant");
101 assert(addr >= _task->finger(), "invariant");
102
103 // We move that task's local finger along.
104 _task->move_finger_to(addr);
105
106 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
107 // we only partially drain the local queue and global stack
108 _task->drain_local_queue(true);
109 _task->drain_global_stack(true);
110
111 // if the has_aborted flag has been raised, we need to bail out of
112 // the iteration
113 return !_task->has_aborted();
114 }
115
116 G1CMMarkStack::G1CMMarkStack() :
117 _chunk_allocator() {
118 set_empty();
119 }
120
121 size_t G1CMMarkStack::capacity_alignment() {
122 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
123 }
124
125 bool G1CMMarkStack::initialize() {
126 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
127
128 size_t initial_capacity = MarkStackSize;
129 size_t max_capacity = MarkStackSizeMax;
130
131 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
132
133 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135
136 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
137 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
138
139 size_t limit = (INT_MAX - 1);
140 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142
143 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
144 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
145
146 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
147
148 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
149 initial_num_chunks, max_capacity);
150
151 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
152 }
153
154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
155 if (_size.load_relaxed() >= _max_capacity) {
156 return nullptr;
157 }
158
159 size_t cur_idx = _size.fetch_then_add(1u);
160
161 if (cur_idx >= _max_capacity) {
162 return nullptr;
163 }
164
165 size_t bucket = get_bucket(cur_idx);
166 if (_buckets[bucket].load_acquire() == nullptr) {
167 if (!_should_grow) {
168 // Prefer to restart the CM.
169 return nullptr;
170 }
171
172 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
173 if (_buckets[bucket].load_acquire() == nullptr) {
174 size_t desired_capacity = bucket_size(bucket) * 2;
175 if (!try_expand_to(desired_capacity)) {
176 return nullptr;
177 }
178 }
179 }
180
181 size_t bucket_idx = get_bucket_index(cur_idx);
182 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
183 result->next = nullptr;
184 return result;
185 }
186
187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
188 _min_capacity(0),
189 _max_capacity(0),
190 _capacity(0),
191 _num_buckets(0),
192 _should_grow(false),
193 _buckets(nullptr),
194 _size(0)
195 { }
196
197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
198 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
199
200 _min_capacity = initial_capacity;
201 _max_capacity = max_capacity;
202 _num_buckets = get_bucket(_max_capacity) + 1;
203
204 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
205
206 for (size_t i = 0; i < _num_buckets; i++) {
207 _buckets[i].store_relaxed(nullptr);
208 }
209
210 size_t new_capacity = bucket_size(0);
211
212 if (!reserve(new_capacity)) {
213 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
214 return false;
215 }
216 return true;
217 }
218
219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
220 if (_capacity == _max_capacity) {
221 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
222 return false;
223 }
224
225 size_t old_capacity = _capacity;
226 desired_capacity = MIN2(desired_capacity, _max_capacity);
227
228 if (reserve(desired_capacity)) {
229 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
230 old_capacity, desired_capacity);
231 return true;
232 }
233 return false;
234 }
235
236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
237 size_t new_capacity = _capacity * 2;
238 return try_expand_to(new_capacity);
239 }
240
241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
242 if (_buckets == nullptr) {
243 return;
244 }
245
246 for (size_t i = 0; i < _num_buckets; i++) {
247 if (_buckets[i].load_relaxed() != nullptr) {
248 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
249 _buckets[i].store_relaxed(nullptr);
250 }
251 }
252
253 FREE_C_HEAP_ARRAY(_buckets);
254 }
255
256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
257 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
258
259 size_t highest_bucket = get_bucket(new_capacity - 1);
260 size_t i = get_bucket(_capacity);
261
262 // Allocate all buckets associated with indexes between the current capacity (_capacity)
263 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
264 // array and that the capacity accurately reflects the reserved memory.
265 for (; i <= highest_bucket; i++) {
266 if (_buckets[i].load_acquire() != nullptr) {
267 continue; // Skip over already allocated buckets.
268 }
269
270 size_t bucket_capacity = bucket_size(i);
271
272 // Trim bucket size so that we do not exceed the _max_capacity.
273 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
274 bucket_capacity :
275 _max_capacity - _capacity;
276
277
278 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
279
280 if (bucket_base == nullptr) {
281 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
282 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
283 return false;
284 }
285 _capacity += bucket_capacity;
286 _buckets[i].release_store(bucket_base);
287 }
288 return true;
289 }
290
291 void G1CMMarkStack::expand() {
292 _chunk_allocator.try_expand();
293 }
294
295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
296 elem->next = list->load_relaxed();
297 list->store_relaxed(elem);
298 }
299
300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
301 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
302 add_chunk_to_list(&_chunk_list, elem);
303 _chunks_in_chunk_list++;
304 }
305
306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
307 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
308 add_chunk_to_list(&_free_list, elem);
309 }
310
311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
312 TaskQueueEntryChunk* result = list->load_relaxed();
313 if (result != nullptr) {
314 list->store_relaxed(list->load_relaxed()->next);
315 }
316 return result;
317 }
318
319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
320 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
321 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
322 if (result != nullptr) {
323 _chunks_in_chunk_list--;
324 }
325 return result;
326 }
327
328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
329 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
330 return remove_chunk_from_list(&_free_list);
331 }
332
333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
334 // Get a new chunk.
335 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
336
337 if (new_chunk == nullptr) {
338 // Did not get a chunk from the free list. Allocate from backing memory.
339 new_chunk = _chunk_allocator.allocate_new_chunk();
340
341 if (new_chunk == nullptr) {
342 return false;
343 }
344 }
345
346 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
347
348 add_chunk_to_chunk_list(new_chunk);
349
350 return true;
351 }
352
353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
354 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
355
356 if (cur == nullptr) {
357 return false;
358 }
359
360 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
361
362 add_chunk_to_free_list(cur);
363 return true;
364 }
365
366 void G1CMMarkStack::set_empty() {
367 _chunks_in_chunk_list = 0;
368 _chunk_list.store_relaxed(nullptr);
369 _free_list.store_relaxed(nullptr);
370 _chunk_allocator.reset();
371 }
372
373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
374 _root_regions(MemRegion::create_array(max_regions, mtGC)),
375 _max_regions(max_regions),
376 _num_regions(0),
377 _num_claimed_regions(0) { }
378
379 G1CMRootMemRegions::~G1CMRootMemRegions() {
380 MemRegion::destroy_array(_root_regions, _max_regions);
381 }
382
383 void G1CMRootMemRegions::reset() {
384 assert_at_safepoint();
385 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
386
387 _num_regions.store_relaxed(0);
388 _num_claimed_regions.store_relaxed(0);
389 }
390
391 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
392 assert_at_safepoint();
393 uint idx = _num_regions.fetch_then_add(1u);
394 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
395 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
396 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
397 _root_regions[idx].set_start(start);
398 _root_regions[idx].set_end(end);
399 }
400
401 const MemRegion* G1CMRootMemRegions::claim_next() {
402 uint local_num_regions = num_regions();
403 if (num_claimed_regions() >= local_num_regions) {
404 return nullptr;
405 }
406
407 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
408 if (claimed_index < local_num_regions) {
409 return &_root_regions[claimed_index];
410 }
411 return nullptr;
412 }
413
414 bool G1CMRootMemRegions::work_completed() const {
415 return num_remaining_regions() == 0;
416 }
417
418 uint G1CMRootMemRegions::num_remaining_regions() const {
419 uint total = num_regions();
420 uint claimed = num_claimed_regions();
421 return (total > claimed) ? total - claimed : 0;
422 }
423
424 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
425 uint local_num_root_regions = num_regions();
426 for (uint i = 0; i < local_num_root_regions; i++) {
427 if (_root_regions[i].equals(mr)) {
428 return true;
429 }
430 }
431 return false;
432 }
433
434 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
435 G1RegionToSpaceMapper* bitmap_storage) :
436 _cm_thread(nullptr),
437 _g1h(g1h),
438
439 _mark_bitmap(),
440
441 _heap(_g1h->reserved()),
442
443 _root_regions(_g1h->max_num_regions()),
444 _root_region_scan_aborted(false),
445
446 _global_mark_stack(),
447
448 _finger(nullptr), // _finger set in set_non_marking_state
449
450 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
451 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
452 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
453 _tasks(nullptr), // _tasks set inside late_init()
454 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
455 _terminator(_max_num_tasks, _task_queues),
456 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
457
458 _first_overflow_barrier_sync(),
459 _second_overflow_barrier_sync(),
460
461 _completed_mark_cycles(0),
462 _has_overflown(false),
463 _concurrent(false),
464 _has_aborted(false),
465 _restart_for_overflow(false),
466 _gc_timer_cm(new ConcurrentGCTimer()),
467 _gc_tracer_cm(new G1OldTracer()),
468
469 // _verbose_level set below
470
471 _remark_times(),
472 _remark_mark_times(),
473 _remark_weak_ref_times(),
474 _cleanup_times(),
475
476 _concurrent_workers(nullptr),
477 _num_concurrent_workers(0),
478 _max_concurrent_workers(0),
479
480 _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
481 _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
482 _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
483 _needs_remembered_set_rebuild(false)
484 {
485 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
486
487 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
488 }
489
490 void G1ConcurrentMark::fully_initialize() {
491 if (is_fully_initialized()) {
492 return;
493 }
494
495 // Create & start ConcurrentMark thread.
496 _cm_thread = new G1ConcurrentMarkThread(this);
497 if (_cm_thread->osthread() == nullptr) {
498 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
499 }
500
501 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
502 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
503
504 _max_concurrent_workers = ConcGCThreads;
505
506 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
507 _concurrent_workers->initialize_workers();
508 _num_concurrent_workers = _concurrent_workers->active_workers();
509
510 if (!_global_mark_stack.initialize()) {
511 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
512 }
513
514 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
515
516 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
517 _num_active_tasks = _max_num_tasks;
518
519 for (uint i = 0; i < _max_num_tasks; ++i) {
520 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
521 _task_queues->register_queue(i, task_queue);
522
523 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
524 }
525
526 uint max_num_regions = _g1h->max_num_regions();
527 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
528 for (uint i = 0; i < max_num_regions; i++) {
529 ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>(_g1h->bottom_addr_for_region(i));
530 }
531 // Contrary to TAMS, the default value of _top_at_rebuild_starts needs to be null.
532 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
533
534 reset_at_marking_complete();
535 }
536
537 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
538 return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
539 }
540
541 bool G1ConcurrentMark::is_in_marking() const {
542 return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
543 }
544
545 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
546 return cm_thread()->is_in_rebuild_or_scrub();
547 }
548
549 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
550 return cm_thread()->is_in_reset_for_next_cycle();
551 }
552
553 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
554 return _partial_array_state_manager;
555 }
556
557 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
558 assert(is_fully_initialized(), "must be");
559 return _cm_thread;
560 }
561
562 void G1ConcurrentMark::reset() {
563 _has_aborted.store_relaxed(false);
564
565 reset_marking_for_restart();
566
567 // Reset all tasks, since different phases will use different number of active
568 // threads. So, it's easiest to have all of them ready.
569 for (uint i = 0; i < _max_num_tasks; ++i) {
570 _tasks[i]->reset(mark_bitmap());
571 }
572
573 uint max_num_regions = _g1h->max_num_regions();
574 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
575 for (uint i = 0; i < max_num_regions; i++) {
576 _region_mark_stats[i].clear();
577 }
578
579 _root_region_scan_aborted.store_relaxed(false);
580 _root_regions.reset();
581 }
582
583 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
584 uint region_idx = r->hrm_index();
585 for (uint j = 0; j < _max_num_tasks; ++j) {
586 _tasks[j]->clear_mark_stats_cache(region_idx);
587 }
588 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
589 _region_mark_stats[region_idx].clear();
590 }
591
592 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
593 assert_at_safepoint();
594 assert(r->is_starts_humongous(), "Got humongous continues region here");
595
596 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
597 mark_bitmap()->clear(r->bottom());
598
599 if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
600 return;
601 }
602
603 // Clear any statistics about the region gathered so far.
604 _g1h->humongous_obj_regions_iterate(r,
605 [&] (G1HeapRegion* r) {
606 clear_statistics(r);
607 });
608 }
609
610 void G1ConcurrentMark::reset_marking_for_restart() {
611 _global_mark_stack.set_empty();
612
613 // Expand the marking stack, if we have to and if we can.
614 if (has_overflown()) {
615 _global_mark_stack.expand();
616
617 uint max_num_regions = _g1h->max_num_regions();
618 for (uint i = 0; i < max_num_regions; i++) {
619 _region_mark_stats[i].clear_during_overflow();
620 }
621 }
622
623 clear_has_overflown();
624 _finger.store_relaxed(_heap.start());
625
626 for (uint i = 0; i < _max_num_tasks; ++i) {
627 _tasks[i]->reset_for_restart();
628 }
629 }
630
631 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
632 assert(active_tasks <= _max_num_tasks, "we should not have more");
633
634 _num_active_tasks = active_tasks;
635 // Need to update the three data structures below according to the
636 // number of active threads for this phase.
637 _terminator.reset_for_reuse(active_tasks);
638 _first_overflow_barrier_sync.set_n_workers(active_tasks);
639 _second_overflow_barrier_sync.set_n_workers(active_tasks);
640 }
641
642 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
643 set_concurrency(active_tasks);
644
645 _concurrent.store_relaxed(concurrent);
646
647 if (!concurrent) {
648 // At this point we should be in a STW phase, and completed marking.
649 assert_at_safepoint_on_vm_thread();
650 assert(out_of_regions(),
651 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
652 p2i(finger()), p2i(_heap.end()));
653 }
654 }
655
656 #if TASKQUEUE_STATS
657 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
658
659 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
660
661 auto get_pa_stats = [&](uint i) {
662 return _tasks[i]->partial_array_task_stats();
663 };
664
665 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
666 "Concurrent Mark Partial Array");
667
668 for (uint i = 0; i < _max_num_tasks; ++i) {
669 get_pa_stats(i)->reset();
670 }
671 }
672 #endif
673
674 void G1ConcurrentMark::reset_at_marking_complete() {
675 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
676 // We set the global marking state to some default values when we're
677 // not doing marking.
678 reset_marking_for_restart();
679 _num_active_tasks = 0;
680 }
681
682 G1ConcurrentMark::~G1ConcurrentMark() {
683 FREE_C_HEAP_ARRAY(_top_at_mark_starts);
684 FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
685 FREE_C_HEAP_ARRAY(_region_mark_stats);
686 // The G1ConcurrentMark instance is never freed.
687 ShouldNotReachHere();
688 }
689
690 class G1ClearBitMapTask : public WorkerTask {
691 public:
692 static size_t chunk_size() { return M; }
693
694 private:
695 // Heap region closure used for clearing the _mark_bitmap.
696 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
697 G1ConcurrentMark* _cm;
698 G1CMBitMap* _bitmap;
699 bool _suspendible; // If suspendible, do yield checks.
700
701 bool suspendible() {
702 return _suspendible;
703 }
704
705 bool is_clear_concurrent_undo() {
706 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
707 }
708
709 bool has_aborted() {
710 if (suspendible()) {
711 _cm->do_yield_check();
712 return _cm->has_aborted();
713 }
714 return false;
715 }
716
717 HeapWord* region_clear_limit(G1HeapRegion* r) {
718 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
719 // live_words data are current wrt to the _mark_bitmap. We use this information
720 // to only clear ranges of the bitmap that require clearing.
721 if (is_clear_concurrent_undo()) {
722 // No need to clear bitmaps for empty regions (which includes regions we
723 // did not mark through).
724 if (!_cm->contains_live_object(r->hrm_index())) {
725 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
726 return r->bottom();
727 }
728 assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
729 }
730 return r->end();
731 }
732
733 public:
734 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
735 G1HeapRegionClosure(),
736 _cm(cm),
737 _bitmap(cm->mark_bitmap()),
738 _suspendible(suspendible)
739 { }
740
741 virtual bool do_heap_region(G1HeapRegion* r) {
742 if (has_aborted()) {
743 return true;
744 }
745
746 HeapWord* cur = r->bottom();
747 HeapWord* const end = region_clear_limit(r);
748
749 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
750
751 while (cur < end) {
752
753 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
754 _bitmap->clear_range(mr);
755
756 cur += chunk_size_in_words;
757
758 // Repeat the asserts from before the start of the closure. We will do them
759 // as asserts here to minimize their overhead on the product. However, we
760 // will have them as guarantees at the beginning / end of the bitmap
761 // clearing to get some checking in the product.
762 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
763
764 // Abort iteration if necessary.
765 if (has_aborted()) {
766 return true;
767 }
768 }
769 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
770
771 _cm->reset_top_at_mark_start(r);
772
773 return false;
774 }
775 };
776
777 G1ClearBitmapHRClosure _cl;
778 G1HeapRegionClaimer _hr_claimer;
779 bool _suspendible; // If the task is suspendible, workers must join the STS.
780
781 public:
782 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
783 WorkerTask("G1 Clear Bitmap"),
784 _cl(cm, suspendible),
785 _hr_claimer(n_workers),
786 _suspendible(suspendible)
787 { }
788
789 void work(uint worker_id) {
790 SuspendibleThreadSetJoiner sts_join(_suspendible);
791 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
792 }
793 };
794
795 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
796 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
797
798 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
799 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
800
801 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
802
803 G1ClearBitMapTask cl(this, num_workers, may_yield);
804
805 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
806 workers->run_task(&cl, num_workers);
807 }
808
809 void G1ConcurrentMark::cleanup_for_next_mark() {
810 // Make sure that the concurrent mark thread looks to still be in
811 // the current cycle.
812 guarantee(is_in_reset_for_next_cycle(), "invariant");
813
814 clear_bitmap(_concurrent_workers, true);
815
816 reset_partial_array_state_manager();
817
818 // Should not have changed state yet (even if a Full GC interrupted us).
819 guarantee(is_in_reset_for_next_cycle(), "invariant");
820 }
821
822 void G1ConcurrentMark::reset_partial_array_state_manager() {
823 for (uint i = 0; i < _max_num_tasks; ++i) {
824 _tasks[i]->unregister_partial_array_splitter();
825 }
826
827 partial_array_state_manager()->reset();
828
829 for (uint i = 0; i < _max_num_tasks; ++i) {
830 _tasks[i]->register_partial_array_splitter();
831 }
832 }
833
834 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
835 assert_at_safepoint_on_vm_thread();
836 // To avoid fragmentation the full collection requesting to clear the bitmap
837 // might use fewer workers than available. To ensure the bitmap is cleared
838 // as efficiently as possible the number of active workers are temporarily
839 // increased to include all currently created workers.
840 WithActiveWorkers update(workers, workers->created_workers());
841 clear_bitmap(workers, false);
842 }
843
844 class G1PreConcurrentStartTask : public G1BatchedTask {
845 // Reset marking state.
846 class ResetMarkingStateTask;
847 // For each region note start of marking.
848 class NoteStartOfMarkTask;
849
850 public:
851 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
852 };
853
854 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
855 G1ConcurrentMark* _cm;
856 public:
857 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
858
859 double worker_cost() const override { return 1.0; }
860 void do_work(uint worker_id) override;
861 };
862
863 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
864
865 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
866 G1ConcurrentMark* _cm;
867
868 public:
869 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
870
871 bool do_heap_region(G1HeapRegion* r) override {
872 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
873 _cm->update_top_at_mark_start(r);
874 } else {
875 _cm->reset_top_at_mark_start(r);
876 }
877 return false;
878 }
879 } _region_cl;
880
881 G1HeapRegionClaimer _claimer;
882 public:
883 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
884
885 double worker_cost() const override {
886 // The work done per region is very small, therefore we choose this magic number to cap the number
887 // of threads used when there are few regions.
888 const double regions_per_thread = 1000;
889 return _claimer.n_regions() / regions_per_thread;
890 }
891
892 void set_max_workers(uint max_workers) override {
893 _claimer.set_n_workers(max_workers);
894 }
895
896 void do_work(uint worker_id) override {
897 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
898 }
899 };
900
901 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
902 // Reset marking state.
903 _cm->reset();
904 }
905
906 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
907 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
908 add_serial_task(new ResetMarkingStateTask(cm));
909 add_parallel_task(new NoteStartOfMarkTask());
910 };
911
912 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
913 assert_at_safepoint_on_vm_thread();
914
915 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
916
917 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
918
919 G1PreConcurrentStartTask cl(cause, this);
920 G1CollectedHeap::heap()->run_batch_task(&cl);
921
922 _gc_tracer_cm->set_gc_cause(cause);
923 }
924
925 void G1ConcurrentMark::start_full_concurrent_cycle() {
926 // Start Concurrent Marking weak-reference discovery.
927 ReferenceProcessor* rp = _g1h->ref_processor_cm();
928 rp->start_discovery(false /* always_clear */);
929
930 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
931 // This is the start of the marking cycle, we're expected all
932 // threads to have SATB queues with active set to false.
933 satb_mq_set.set_active_all_threads(true, /* new active value */
934 false /* expected_active */);
935
936 // update_g1_committed() will be called at the end of an evac pause
937 // when marking is on. So, it's also called at the end of the
938 // concurrent start pause to update the heap end, if the heap expands
939 // during it. No need to call it here.
940
941 // Signal the thread to start work.
942 cm_thread()->start_full_cycle();
943 }
944
945 void G1ConcurrentMark::start_undo_concurrent_cycle() {
946 assert_at_safepoint_on_vm_thread();
947 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
948 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
949
950 abort_root_region_scan_at_safepoint();
951
952 // Signal the thread to start work.
953 cm_thread()->start_undo_cycle();
954 }
955
956 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
957 cm_thread()->set_idle();
958 }
959
960 void G1ConcurrentMark::stop() {
961 if (is_fully_initialized()) {
962 cm_thread()->stop();
963 }
964 }
965
966 /*
967 * Notice that in the next two methods, we actually leave the STS
968 * during the barrier sync and join it immediately afterwards. If we
969 * do not do this, the following deadlock can occur: one thread could
970 * be in the barrier sync code, waiting for the other thread to also
971 * sync up, whereas another one could be trying to yield, while also
972 * waiting for the other threads to sync up too.
973 *
974 * Note, however, that this code is also used during remark and in
975 * this case we should not attempt to leave / enter the STS, otherwise
976 * we'll either hit an assert (debug / fastdebug) or deadlock
977 * (product). So we should only leave / enter the STS if we are
978 * operating concurrently.
979 *
980 * Because the thread that does the sync barrier has left the STS, it
981 * is possible to be suspended for a Full GC or an evacuation pause
982 * could occur. This is actually safe, since the entering the sync
983 * barrier is one of the last things do_marking_step() does, and it
984 * doesn't manipulate any data structures afterwards.
985 */
986
987 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
988 bool barrier_aborted;
989 {
990 SuspendibleThreadSetLeaver sts_leave(concurrent());
991 barrier_aborted = !_first_overflow_barrier_sync.enter();
992 }
993
994 // at this point everyone should have synced up and not be doing any
995 // more work
996
997 if (barrier_aborted) {
998 // If the barrier aborted we ignore the overflow condition and
999 // just abort the whole marking phase as quickly as possible.
1000 return;
1001 }
1002 }
1003
1004 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1005 SuspendibleThreadSetLeaver sts_leave(concurrent());
1006 _second_overflow_barrier_sync.enter();
1007
1008 // at this point everything should be re-initialized and ready to go
1009 }
1010
1011 class G1CMConcurrentMarkingTask : public WorkerTask {
1012 G1ConcurrentMark* _cm;
1013
1014 public:
1015 void work(uint worker_id) {
1016 ResourceMark rm;
1017
1018 SuspendibleThreadSetJoiner sts_join;
1019
1020 assert(worker_id < _cm->active_tasks(), "invariant");
1021
1022 G1CMTask* task = _cm->task(worker_id);
1023 task->record_start_time();
1024 if (!_cm->has_aborted()) {
1025 do {
1026 task->do_marking_step(G1ConcMarkStepDurationMillis,
1027 true /* do_termination */,
1028 false /* is_serial*/);
1029
1030 _cm->do_yield_check();
1031 } while (!_cm->has_aborted() && task->has_aborted());
1032 }
1033 task->record_end_time();
1034 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1035 }
1036
1037 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1038 WorkerTask("Concurrent Mark"), _cm(cm) { }
1039
1040 ~G1CMConcurrentMarkingTask() { }
1041 };
1042
1043 uint G1ConcurrentMark::calc_active_marking_workers() {
1044 uint result = 0;
1045 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1046 result = _max_concurrent_workers;
1047 } else {
1048 result =
1049 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1050 1, /* Minimum workers */
1051 _num_concurrent_workers,
1052 Threads::number_of_non_daemon_threads());
1053 // Don't scale the result down by scale_concurrent_workers() because
1054 // that scaling has already gone into "_max_concurrent_workers".
1055 }
1056 assert(result > 0 && result <= _max_concurrent_workers,
1057 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1058 _max_concurrent_workers, result);
1059 return result;
1060 }
1061
1062 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1063 return _root_region_scan_aborted.load_relaxed();
1064 }
1065
1066 #ifndef PRODUCT
1067 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1068 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1069 }
1070 #endif
1071
1072 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1073 #ifdef ASSERT
1074 HeapWord* last = region->last();
1075 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1076 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1077 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1078 assert(top_at_mark_start(hr) == region->start(),
1079 "MemRegion start should be equal to TAMS");
1080 #endif
1081
1082 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1083
1084 const uintx interval = PrefetchScanIntervalInBytes;
1085 HeapWord* curr = region->start();
1086 const HeapWord* end = region->end();
1087 while (curr < end) {
1088 Prefetch::read(curr, interval);
1089 oop obj = cast_to_oop(curr);
1090 size_t size = obj->oop_iterate_size(&cl);
1091 assert(size == obj->size(), "sanity");
1092 curr += size;
1093 }
1094 }
1095
1096 class G1CMRootRegionScanTask : public WorkerTask {
1097 G1ConcurrentMark* _cm;
1098 bool _should_yield;
1099
1100 public:
1101 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1102 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1103
1104 void work(uint worker_id) {
1105 SuspendibleThreadSetJoiner sts_join(_should_yield);
1106
1107 while (true) {
1108 if (_cm->has_root_region_scan_aborted()) {
1109 return;
1110 }
1111 G1CMRootMemRegions* root_regions = _cm->root_regions();
1112 const MemRegion* region = root_regions->claim_next();
1113 if (region == nullptr) {
1114 return;
1115 }
1116 _cm->scan_root_region(region, worker_id);
1117 if (_should_yield) {
1118 SuspendibleThreadSet::yield();
1119 // If we yielded, a GC may have processed all root regions,
1120 // so this loop will naturally exit on the next claim_next() call.
1121 // Same if a Full GC signalled abort of the concurrent mark.
1122 }
1123 }
1124 }
1125 };
1126
1127 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1128 // We first check whether there is any work to do as we might have already aborted
1129 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1130 // We want to avoid spinning up the worker threads if that happened.
1131 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1132 //
1133 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1134 // a young gc doing the work.
1135 //
1136 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1137 // continue after that safepoint.
1138 //
1139 // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1140 // consistent view of the value containing the number of remaining regions across the
1141 // usages below. The safepoint/gc may already be running and modifying it
1142 // while this code is still executing.
1143 uint num_remaining = root_regions()->num_remaining_regions();
1144 bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1145 if (do_scan) {
1146 // Assign one worker to each root-region but subject to the max constraint.
1147 // The constraint is also important to avoid accesses beyond the allocated per-worker
1148 // marking helper data structures. We might get passed different WorkerThreads with
1149 // different number of threads (potential worker ids) than helper data structures when
1150 // completing this work during GC.
1151 const uint num_workers = MIN2(num_remaining,
1152 _max_concurrent_workers);
1153
1154 G1CMRootRegionScanTask task(this, concurrent);
1155 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1156 task.name(), num_workers, num_remaining);
1157 workers->run_task(&task, num_workers);
1158 }
1159
1160 // At the end of this method, we can re-read num_remaining() in the assert: either
1161 // we got non-zero above and we processed all root regions (and it must be zero
1162 // after the worker task synchronization) or it had already been zero. We also
1163 // can't have started another concurrent cycle that could have set it to something else
1164 // while still in the concurrent cycle (if called concurrently).
1165 assert_root_region_scan_completed_or_aborted();
1166
1167 return do_scan;
1168 }
1169
1170 void G1ConcurrentMark::scan_root_regions_concurrently() {
1171 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1172 scan_root_regions(_concurrent_workers, true /* concurrent */);
1173 }
1174
1175 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1176 assert_at_safepoint_on_vm_thread();
1177 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1178 }
1179
1180 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1181 root_regions()->add(top_at_mark_start(r), r->top());
1182 }
1183
1184 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1185 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1186 }
1187
1188 void G1ConcurrentMark::abort_root_region_scan() {
1189 assert_not_at_safepoint();
1190
1191 _root_region_scan_aborted.store_relaxed(true);
1192 }
1193
1194 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1195 assert_at_safepoint_on_vm_thread();
1196
1197 _root_region_scan_aborted.store_relaxed(true);
1198 }
1199
1200 void G1ConcurrentMark::concurrent_cycle_start() {
1201 _gc_timer_cm->register_gc_start();
1202
1203 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1204
1205 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1206 }
1207
1208 uint G1ConcurrentMark::completed_mark_cycles() const {
1209 return _completed_mark_cycles.load_relaxed();
1210 }
1211
1212 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1213 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1214
1215 if (mark_cycle_completed) {
1216 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1217 }
1218
1219 if (has_aborted()) {
1220 log_info(gc, marking)("Concurrent Mark Abort");
1221 _gc_tracer_cm->report_concurrent_mode_failure();
1222 }
1223
1224 _gc_timer_cm->register_gc_end();
1225
1226 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1227 }
1228
1229 void G1ConcurrentMark::mark_from_roots() {
1230 _restart_for_overflow.store_relaxed(false);
1231
1232 uint active_workers = calc_active_marking_workers();
1233
1234 // Setting active workers is not guaranteed since fewer
1235 // worker threads may currently exist and more may not be
1236 // available.
1237 active_workers = _concurrent_workers->set_active_workers(active_workers);
1238 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1239
1240 _num_concurrent_workers = active_workers;
1241
1242 // Parallel task terminator is set in "set_concurrency_and_phase()"
1243 set_concurrency_and_phase(active_workers, true /* concurrent */);
1244
1245 G1CMConcurrentMarkingTask marking_task(this);
1246 _concurrent_workers->run_task(&marking_task);
1247 print_stats();
1248 }
1249
1250 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1251 static const char* location_strings[] = { "Remark Before",
1252 "Remark After",
1253 "Remark Overflow",
1254 "Cleanup Before",
1255 "Cleanup After" };
1256 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1257 }
1258
1259 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1260 VerifyLocation location) {
1261 G1HeapVerifier* verifier = _g1h->verifier();
1262
1263 verifier->verify_region_sets_optional();
1264
1265 const char* caller = verify_location_string(location);
1266
1267 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1268 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1269
1270 size_t const BufLen = 512;
1271 char buffer[BufLen];
1272
1273 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1274 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1275
1276 // Only check bitmap in Remark, and not at After-Verification because the regions
1277 // already have their TAMS'es reset.
1278 if (location != VerifyLocation::RemarkAfter) {
1279 verifier->verify_bitmap_clear(true /* above_tams_only */);
1280 }
1281 }
1282 }
1283
1284 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1285 G1CollectedHeap* _g1h;
1286 public:
1287 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1288
1289 bool do_object_b(oop obj) {
1290 return !_g1h->is_obj_dead(obj);
1291 }
1292 };
1293
1294 void G1ConcurrentMark::remark() {
1295 assert_at_safepoint_on_vm_thread();
1296
1297 // If a full collection has happened, we should not continue. However we might
1298 // have ended up here as the Remark VM operation has been scheduled already.
1299 if (has_aborted()) {
1300 return;
1301 }
1302
1303 G1Policy* policy = _g1h->policy();
1304 policy->record_pause_start_time();
1305
1306 double start = os::elapsedTime();
1307
1308 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1309
1310 {
1311 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1312 finalize_marking();
1313 }
1314
1315 double mark_work_end = os::elapsedTime();
1316
1317 bool const mark_finished = !has_overflown();
1318 if (mark_finished) {
1319 weak_refs_work();
1320
1321 // Unload Klasses, String, Code Cache, etc.
1322 if (ClassUnloadingWithConcurrentMark) {
1323 G1CMIsAliveClosure is_alive(this);
1324 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1325 }
1326
1327 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1328 // We're done with marking.
1329 // This is the end of the marking cycle, we're expected all
1330 // threads to have SATB queues with active set to true.
1331 satb_mq_set.set_active_all_threads(false, /* new active value */
1332 true /* expected_active */);
1333
1334 {
1335 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1336 flush_all_task_caches();
1337 }
1338
1339 // All marking completed. Check bitmap now as we will start to reset TAMSes
1340 // in parallel below so that we can not do this in the After-Remark verification.
1341 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1342
1343 {
1344 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1345
1346 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1347 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1348 _g1h->workers()->active_workers());
1349 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1350 _g1h->workers()->run_task(&cl, num_workers);
1351
1352 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1353 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1354
1355 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1356
1357 if (_needs_remembered_set_rebuild) {
1358 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1359 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1360 }
1361 }
1362
1363 if (log_is_enabled(Trace, gc, liveness)) {
1364 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1365 _g1h->heap_region_iterate(&cl);
1366 }
1367
1368 // Potentially, some empty-regions have been reclaimed; make this a
1369 // "collection" so that pending allocation can retry before attempting a
1370 // GC pause.
1371 _g1h->increment_total_collections();
1372
1373 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1374 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1375 // triggered, it likely means there are very few regular GCs, making resizing
1376 // based on gc heuristics less effective.
1377 if (_g1h->last_gc_was_periodic()) {
1378 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1379 }
1380
1381 compute_new_sizes();
1382
1383 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1384
1385 assert(!restart_for_overflow(), "sanity");
1386 // Completely reset the marking state (except bitmaps) since marking completed.
1387 reset_at_marking_complete();
1388
1389 G1CollectedHeap::finish_codecache_marking_cycle();
1390
1391 {
1392 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1393 G1ObjectCountIsAliveClosure is_alive(_g1h);
1394 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1395 }
1396
1397 // Successfully completed marking, advance state.
1398 cm_thread()->set_full_cycle_rebuild_and_scrub();
1399 } else {
1400 // We overflowed. Restart concurrent marking.
1401 _restart_for_overflow.store_relaxed(true);
1402
1403 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1404
1405 // Clear the marking state because we will be restarting
1406 // marking due to overflowing the global mark stack.
1407 reset_marking_for_restart();
1408 }
1409
1410 // Statistics
1411 double now = os::elapsedTime();
1412 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1413 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1414 _remark_times.add((now - start) * 1000.0);
1415
1416 _g1h->update_perf_counter_cpu_time();
1417
1418 policy->record_concurrent_mark_remark_end();
1419
1420 return;
1421 }
1422
1423 void G1ConcurrentMark::compute_new_sizes() {
1424 MetaspaceGC::compute_new_size();
1425
1426 // Cleanup will have freed any regions completely full of garbage.
1427 // Update the soft reference policy with the new heap occupancy.
1428 Universe::heap()->update_capacity_and_used_at_gc();
1429
1430 // We reclaimed old regions so we should calculate the sizes to make
1431 // sure we update the old gen/space data.
1432 _g1h->monitoring_support()->update_sizes();
1433 }
1434
1435 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1436 G1CollectedHeap* _g1h;
1437
1438 public:
1439 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1440
1441 bool do_heap_region(G1HeapRegion* r) override {
1442 // Update the remset tracking state from updating to complete
1443 // if remembered sets have been rebuilt.
1444 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1445 return false;
1446 }
1447 };
1448
1449 void G1ConcurrentMark::cleanup() {
1450 assert_at_safepoint_on_vm_thread();
1451
1452 // If a full collection has happened, we shouldn't do this.
1453 if (has_aborted()) {
1454 return;
1455 }
1456
1457 G1Policy* policy = _g1h->policy();
1458 policy->record_pause_start_time();
1459
1460 double start = os::elapsedTime();
1461
1462 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1463
1464 if (needs_remembered_set_rebuild()) {
1465 // Update the remset tracking information as well as marking all regions
1466 // as fully parsable.
1467 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1468 G1UpdateRegionsAfterRebuild cl(_g1h);
1469 _g1h->heap_region_iterate(&cl);
1470 } else {
1471 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1472 }
1473
1474 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1475
1476 // Local statistics
1477 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1478
1479 {
1480 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1481 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1482 }
1483
1484 // Advance state.
1485 cm_thread()->set_full_cycle_reset_for_next_cycle();
1486 return;
1487 }
1488
1489 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1490 // Uses the G1CMTask associated with a worker thread (for serial reference
1491 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1492 // trace referent objects.
1493 //
1494 // Using the G1CMTask and embedded local queues avoids having the worker
1495 // threads operating on the global mark stack. This reduces the risk
1496 // of overflowing the stack - which we would rather avoid at this late
1497 // state. Also using the tasks' local queues removes the potential
1498 // of the workers interfering with each other that could occur if
1499 // operating on the global stack.
1500
1501 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1502 G1ConcurrentMark* _cm;
1503 G1CMTask* _task;
1504 uint _ref_counter_limit;
1505 uint _ref_counter;
1506 bool _is_serial;
1507 public:
1508 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1509 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1510 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1511 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1512 }
1513
1514 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1515 virtual void do_oop( oop* p) { do_oop_work(p); }
1516
1517 template <class T> void do_oop_work(T* p) {
1518 if (_cm->has_overflown()) {
1519 return;
1520 }
1521 if (!_task->deal_with_reference(p)) {
1522 // We did not add anything to the mark bitmap (or mark stack), so there is
1523 // no point trying to drain it.
1524 return;
1525 }
1526 _ref_counter--;
1527
1528 if (_ref_counter == 0) {
1529 // We have dealt with _ref_counter_limit references, pushing them
1530 // and objects reachable from them on to the local stack (and
1531 // possibly the global stack). Call G1CMTask::do_marking_step() to
1532 // process these entries.
1533 //
1534 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1535 // there's nothing more to do (i.e. we're done with the entries that
1536 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1537 // above) or we overflow.
1538 //
1539 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1540 // flag while there may still be some work to do. (See the comment at
1541 // the beginning of G1CMTask::do_marking_step() for those conditions -
1542 // one of which is reaching the specified time target.) It is only
1543 // when G1CMTask::do_marking_step() returns without setting the
1544 // has_aborted() flag that the marking step has completed.
1545 do {
1546 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1547 _task->do_marking_step(mark_step_duration_ms,
1548 false /* do_termination */,
1549 _is_serial);
1550 } while (_task->has_aborted() && !_cm->has_overflown());
1551 _ref_counter = _ref_counter_limit;
1552 }
1553 }
1554 };
1555
1556 // 'Drain' oop closure used by both serial and parallel reference processing.
1557 // Uses the G1CMTask associated with a given worker thread (for serial
1558 // reference processing the G1CMtask for worker 0 is used). Calls the
1559 // do_marking_step routine, with an unbelievably large timeout value,
1560 // to drain the marking data structures of the remaining entries
1561 // added by the 'keep alive' oop closure above.
1562
1563 class G1CMDrainMarkingStackClosure : public VoidClosure {
1564 G1ConcurrentMark* _cm;
1565 G1CMTask* _task;
1566 bool _is_serial;
1567 public:
1568 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1569 _cm(cm), _task(task), _is_serial(is_serial) {
1570 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1571 }
1572
1573 void do_void() {
1574 do {
1575 // We call G1CMTask::do_marking_step() to completely drain the local
1576 // and global marking stacks of entries pushed by the 'keep alive'
1577 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1578 //
1579 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1580 // if there's nothing more to do (i.e. we've completely drained the
1581 // entries that were pushed as a result of applying the 'keep alive'
1582 // closure to the entries on the discovered ref lists) or we overflow
1583 // the global marking stack.
1584 //
1585 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1586 // flag while there may still be some work to do. (See the comment at
1587 // the beginning of G1CMTask::do_marking_step() for those conditions -
1588 // one of which is reaching the specified time target.) It is only
1589 // when G1CMTask::do_marking_step() returns without setting the
1590 // has_aborted() flag that the marking step has completed.
1591
1592 _task->do_marking_step(1000000000.0 /* something very large */,
1593 true /* do_termination */,
1594 _is_serial);
1595 } while (_task->has_aborted() && !_cm->has_overflown());
1596 }
1597 };
1598
1599 class G1CMRefProcProxyTask : public RefProcProxyTask {
1600 G1CollectedHeap& _g1h;
1601 G1ConcurrentMark& _cm;
1602
1603 public:
1604 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1605 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1606 _g1h(g1h),
1607 _cm(cm) {}
1608
1609 void work(uint worker_id) override {
1610 assert(worker_id < _max_workers, "sanity");
1611 G1CMIsAliveClosure is_alive(&_cm);
1612 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1613 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1614 BarrierEnqueueDiscoveredFieldClosure enqueue;
1615 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1616 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1617 }
1618
1619 void prepare_run_task_hook() override {
1620 // We need to reset the concurrency level before each
1621 // proxy task execution, so that the termination protocol
1622 // and overflow handling in G1CMTask::do_marking_step() knows
1623 // how many workers to wait for.
1624 _cm.set_concurrency(_queue_count);
1625 }
1626 };
1627
1628 void G1ConcurrentMark::weak_refs_work() {
1629 ResourceMark rm;
1630
1631 {
1632 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1633
1634 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1635
1636 // See the comment in G1CollectedHeap::ref_processing_init()
1637 // about how reference processing currently works in G1.
1638
1639 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1640
1641 // Prefer to grow the stack until the max capacity.
1642 _global_mark_stack.set_should_grow();
1643
1644 // Parallel processing task executor.
1645 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1646 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1647
1648 // Process the weak references.
1649 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1650 _gc_tracer_cm->report_gc_reference_stats(stats);
1651 pt.print_all_references();
1652
1653 // The do_oop work routines of the keep_alive and drain_marking_stack
1654 // oop closures will set the has_overflown flag if we overflow the
1655 // global marking stack.
1656
1657 assert(has_overflown() || _global_mark_stack.is_empty(),
1658 "Mark stack should be empty (unless it has overflown)");
1659 }
1660
1661 if (has_overflown()) {
1662 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1663 // overflowed while processing references. Exit the VM.
1664 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1665 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1666 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1667 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1668 return;
1669 }
1670
1671 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1672
1673 {
1674 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1675 G1CMIsAliveClosure is_alive(this);
1676 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1677 }
1678 }
1679
1680 class G1PrecleanYieldClosure : public YieldClosure {
1681 G1ConcurrentMark* _cm;
1682
1683 public:
1684 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1685
1686 virtual bool should_return() {
1687 return _cm->has_aborted();
1688 }
1689
1690 virtual bool should_return_fine_grain() {
1691 _cm->do_yield_check();
1692 return _cm->has_aborted();
1693 }
1694 };
1695
1696 void G1ConcurrentMark::preclean() {
1697 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1698
1699 SuspendibleThreadSetJoiner joiner;
1700
1701 BarrierEnqueueDiscoveredFieldClosure enqueue;
1702
1703 set_concurrency_and_phase(1, true);
1704
1705 G1PrecleanYieldClosure yield_cl(this);
1706
1707 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1708 // Precleaning is single threaded. Temporarily disable MT discovery.
1709 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1710 rp->preclean_discovered_references(rp->is_alive_non_header(),
1711 &enqueue,
1712 &yield_cl,
1713 _gc_timer_cm);
1714 }
1715
1716 // Closure for marking entries in SATB buffers.
1717 class G1CMSATBBufferClosure : public SATBBufferClosure {
1718 private:
1719 G1CMTask* _task;
1720 G1CollectedHeap* _g1h;
1721
1722 // This is very similar to G1CMTask::deal_with_reference, but with
1723 // more relaxed requirements for the argument, so this must be more
1724 // circumspect about treating the argument as an object.
1725 void do_entry(void* entry) const {
1726 _task->increment_refs_reached();
1727 oop const obj = cast_to_oop(entry);
1728 _task->make_reference_grey(obj);
1729 }
1730
1731 public:
1732 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1733 : _task(task), _g1h(g1h) { }
1734
1735 virtual void do_buffer(void** buffer, size_t size) {
1736 for (size_t i = 0; i < size; ++i) {
1737 do_entry(buffer[i]);
1738 }
1739 }
1740 };
1741
1742 class G1RemarkThreadsClosure : public ThreadClosure {
1743 G1SATBMarkQueueSet& _qset;
1744
1745 public:
1746 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1747 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1748
1749 void do_thread(Thread* thread) {
1750 // Transfer any partial buffer to the qset for completed buffer processing.
1751 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1752 }
1753 };
1754
1755 class G1CMRemarkTask : public WorkerTask {
1756 // For Threads::possibly_parallel_threads_do
1757 ThreadsClaimTokenScope _threads_claim_token_scope;
1758 G1ConcurrentMark* _cm;
1759 public:
1760 void work(uint worker_id) {
1761 G1CMTask* task = _cm->task(worker_id);
1762 task->record_start_time();
1763 {
1764 ResourceMark rm;
1765
1766 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1767 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1768 }
1769
1770 do {
1771 task->do_marking_step(1000000000.0 /* something very large */,
1772 true /* do_termination */,
1773 false /* is_serial */);
1774 } while (task->has_aborted() && !_cm->has_overflown());
1775 // If we overflow, then we do not want to restart. We instead
1776 // want to abort remark and do concurrent marking again.
1777 task->record_end_time();
1778 }
1779
1780 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1781 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1782 _cm->terminator()->reset_for_reuse(active_workers);
1783 }
1784 };
1785
1786 void G1ConcurrentMark::finalize_marking() {
1787 ResourceMark rm;
1788
1789 _g1h->ensure_parsability(false);
1790
1791 // this is remark, so we'll use up all active threads
1792 uint active_workers = _g1h->workers()->active_workers();
1793 set_concurrency_and_phase(active_workers, false /* concurrent */);
1794 // Leave _parallel_marking_threads at it's
1795 // value originally calculated in the G1ConcurrentMark
1796 // constructor and pass values of the active workers
1797 // through the task.
1798
1799 {
1800 G1CMRemarkTask remarkTask(this, active_workers);
1801 // We will start all available threads, even if we decide that the
1802 // active_workers will be fewer. The extra ones will just bail out
1803 // immediately.
1804 _g1h->workers()->run_task(&remarkTask);
1805 }
1806
1807 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1808 guarantee(has_overflown() ||
1809 satb_mq_set.completed_buffers_num() == 0,
1810 "Invariant: has_overflown = %s, num buffers = %zu",
1811 BOOL_TO_STR(has_overflown()),
1812 satb_mq_set.completed_buffers_num());
1813
1814 print_stats();
1815 }
1816
1817 void G1ConcurrentMark::flush_all_task_caches() {
1818 size_t hits = 0;
1819 size_t misses = 0;
1820 for (uint i = 0; i < _max_num_tasks; i++) {
1821 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1822 hits += stats.first;
1823 misses += stats.second;
1824 }
1825 size_t sum = hits + misses;
1826 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1827 hits, misses, percent_of(hits, sum));
1828 }
1829
1830 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1831 assert_at_safepoint();
1832 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1833 }
1834
1835 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1836 // "Checkpoint" the finger.
1837 HeapWord* local_finger = finger();
1838
1839 while (local_finger < _heap.end()) {
1840 assert(_g1h->is_in_reserved(local_finger), "invariant");
1841
1842 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1843 // Make sure that the reads below do not float before loading curr_region.
1844 OrderAccess::loadload();
1845 // Above heap_region_containing may return null as we always scan claim
1846 // until the end of the heap. In this case, just jump to the next region.
1847 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1848
1849 // Is the gap between reading the finger and doing the CAS too long?
1850 HeapWord* res = _finger.compare_exchange(local_finger, end);
1851 if (res == local_finger && curr_region != nullptr) {
1852 // We succeeded.
1853 HeapWord* bottom = curr_region->bottom();
1854 HeapWord* limit = top_at_mark_start(curr_region);
1855
1856 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1857 // Notice that _finger == end cannot be guaranteed here since,
1858 // someone else might have moved the finger even further.
1859 assert(finger() >= end, "The finger should have moved forward");
1860
1861 if (limit > bottom) {
1862 return curr_region;
1863 } else {
1864 assert(limit == bottom,
1865 "The region limit should be at bottom");
1866 // We return null and the caller should try calling
1867 // claim_region() again.
1868 return nullptr;
1869 }
1870 } else {
1871 // Read the finger again.
1872 HeapWord* next_finger = finger();
1873 assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1874 local_finger = next_finger;
1875 }
1876 }
1877
1878 return nullptr;
1879 }
1880
1881 #ifndef PRODUCT
1882 class VerifyNoCSetOops {
1883 G1CollectedHeap* _g1h;
1884 const char* _phase;
1885 int _info;
1886
1887 public:
1888 VerifyNoCSetOops(const char* phase, int info = -1) :
1889 _g1h(G1CollectedHeap::heap()),
1890 _phase(phase),
1891 _info(info)
1892 { }
1893
1894 void operator()(G1TaskQueueEntry task_entry) const {
1895 if (task_entry.is_partial_array_state()) {
1896 oop obj = task_entry.to_partial_array_state()->source();
1897 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1898 return;
1899 }
1900 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1901 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1902 p2i(task_entry.to_oop()), _phase, _info);
1903 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1904 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1905 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1906 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1907 }
1908 };
1909
1910 void G1ConcurrentMark::verify_no_collection_set_oops() {
1911 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1912 "should be at a safepoint or initializing");
1913 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1914 return;
1915 }
1916
1917 // Verify entries on the global mark stack
1918 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1919
1920 // Verify entries on the task queues
1921 for (uint i = 0; i < _max_num_tasks; ++i) {
1922 G1CMTaskQueue* queue = _task_queues->queue(i);
1923 queue->iterate(VerifyNoCSetOops("Queue", i));
1924 }
1925
1926 // Verify the global finger
1927 HeapWord* global_finger = finger();
1928 if (global_finger != nullptr && global_finger < _heap.end()) {
1929 // Since we always iterate over all regions, we might get a null G1HeapRegion
1930 // here.
1931 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1932 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1933 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1934 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1935 }
1936
1937 // Verify the task fingers
1938 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1939 for (uint i = 0; i < _num_concurrent_workers; ++i) {
1940 G1CMTask* task = _tasks[i];
1941 HeapWord* task_finger = task->finger();
1942 if (task_finger != nullptr && task_finger < _heap.end()) {
1943 // See above note on the global finger verification.
1944 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1945 guarantee(r == nullptr || task_finger == r->bottom() ||
1946 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1947 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1948 p2i(task_finger), HR_FORMAT_PARAMS(r));
1949 }
1950 }
1951 }
1952 #endif // PRODUCT
1953
1954 void G1ConcurrentMark::rebuild_and_scrub() {
1955 if (!needs_remembered_set_rebuild()) {
1956 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1957 }
1958
1959 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1960 }
1961
1962 void G1ConcurrentMark::print_stats() {
1963 if (!log_is_enabled(Debug, gc, stats)) {
1964 return;
1965 }
1966 log_debug(gc, stats)("---------------------------------------------------------------------");
1967 for (size_t i = 0; i < _num_active_tasks; ++i) {
1968 _tasks[i]->print_stats();
1969 log_debug(gc, stats)("---------------------------------------------------------------------");
1970 }
1971 }
1972
1973 bool G1ConcurrentMark::concurrent_cycle_abort() {
1974 assert_at_safepoint_on_vm_thread();
1975 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1976
1977 // If we start the compaction before the CM threads finish
1978 // scanning the root regions we might trip them over as we'll
1979 // be moving objects / updating references. Since the root region
1980 // scan synchronized with the safepoint, just tell it to abort.
1981 // It will notice when the threads start up again later.
1982 abort_root_region_scan_at_safepoint();
1983
1984 // We haven't started a concurrent cycle no need to do anything; we might have
1985 // aborted the marking because of shutting down though. In this case the marking
1986 // might have already completed the abort (leading to in_progress() below to
1987 // return false), however this still left marking state particularly in the
1988 // shared marking bitmap that must be cleaned up.
1989 // If there are multiple full gcs during shutdown we do this work repeatedly for
1990 // nothing, but this situation should be extremely rare (a full gc after shutdown
1991 // has been signalled is already rare), and this work should be negligible compared
1992 // to actual full gc work.
1993
1994 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1995 return false;
1996 }
1997
1998 reset_marking_for_restart();
1999
2000 abort_marking_threads();
2001
2002 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2003 satb_mq_set.abandon_partial_marking();
2004 // This can be called either during or outside marking, we'll read
2005 // the expected_active value from the SATB queue set.
2006 satb_mq_set.set_active_all_threads(false, /* new active value */
2007 satb_mq_set.is_active() /* expected_active */);
2008 return true;
2009 }
2010
2011 void G1ConcurrentMark::abort_marking_threads() {
2012 assert_root_region_scan_completed_or_aborted();
2013 _has_aborted.store_relaxed(true);
2014 _first_overflow_barrier_sync.abort();
2015 _second_overflow_barrier_sync.abort();
2016 }
2017
2018 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2019 class CountCpuTimeThreadClosure : public ThreadClosure {
2020 public:
2021 jlong _total_cpu_time;
2022
2023 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2024
2025 void do_thread(Thread* t) {
2026 _total_cpu_time += os::thread_cpu_time(t);
2027 }
2028 } cl;
2029
2030 threads_do(&cl);
2031
2032 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2033 }
2034
2035 static void print_ms_time_info(const char* prefix, const char* name,
2036 NumberSeq& ns) {
2037 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2038 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2039 if (ns.num() > 0) {
2040 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2041 prefix, ns.sd(), ns.maximum());
2042 }
2043 }
2044
2045 void G1ConcurrentMark::print_summary_info() {
2046 Log(gc, marking) log;
2047 if (!log.is_trace()) {
2048 return;
2049 }
2050
2051 log.trace(" Concurrent marking:");
2052 if (!is_fully_initialized()) {
2053 log.trace(" has not been initialized yet");
2054 return;
2055 }
2056 print_ms_time_info(" ", "remarks", _remark_times);
2057 {
2058 print_ms_time_info(" ", "final marks", _remark_mark_times);
2059 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2060
2061 }
2062 print_ms_time_info(" ", "cleanups", _cleanup_times);
2063 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2064 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2065 log.trace(" Total stop_world time = %8.2f s.",
2066 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2067 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2068 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2069 }
2070
2071 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2072 if (is_fully_initialized()) { // they are initialized late
2073 tc->do_thread(_cm_thread);
2074 _concurrent_workers->threads_do(tc);
2075 }
2076 }
2077
2078 void G1ConcurrentMark::print_on(outputStream* st) const {
2079 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2080 _mark_bitmap.print_on(st, " Bits: ");
2081 }
2082
2083 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2084 ReferenceProcessor* result = g1h->ref_processor_cm();
2085 assert(result != nullptr, "CM reference processor should not be null");
2086 return result;
2087 }
2088
2089 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2090 G1CMTask* task)
2091 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2092 _g1h(g1h), _task(task)
2093 { }
2094
2095 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2096 assert(hr != nullptr,
2097 "claim_region() should have filtered out null regions");
2098 _curr_region = hr;
2099 _finger = hr->bottom();
2100 update_region_limit();
2101 }
2102
2103 void G1CMTask::update_region_limit() {
2104 G1HeapRegion* hr = _curr_region;
2105 HeapWord* bottom = hr->bottom();
2106 HeapWord* limit = _cm->top_at_mark_start(hr);
2107
2108 if (limit == bottom) {
2109 // The region was collected underneath our feet.
2110 // We set the finger to bottom to ensure that the bitmap
2111 // iteration that will follow this will not do anything.
2112 // (this is not a condition that holds when we set the region up,
2113 // as the region is not supposed to be empty in the first place)
2114 _finger = bottom;
2115 } else if (limit >= _region_limit) {
2116 assert(limit >= _finger, "peace of mind");
2117 } else {
2118 assert(limit < _region_limit, "only way to get here");
2119 // This can happen under some pretty unusual circumstances. An
2120 // evacuation pause empties the region underneath our feet (TAMS
2121 // at bottom). We then do some allocation in the region (TAMS
2122 // stays at bottom), followed by the region being used as a GC
2123 // alloc region (TAMS will move to top() and the objects
2124 // originally below it will be greyed). All objects now marked in
2125 // the region are explicitly greyed, if below the global finger,
2126 // and we do not need in fact to scan anything else. So, we simply
2127 // set _finger to be limit to ensure that the bitmap iteration
2128 // doesn't do anything.
2129 _finger = limit;
2130 }
2131
2132 _region_limit = limit;
2133 }
2134
2135 void G1CMTask::giveup_current_region() {
2136 assert(_curr_region != nullptr, "invariant");
2137 clear_region_fields();
2138 }
2139
2140 void G1CMTask::clear_region_fields() {
2141 // Values for these three fields that indicate that we're not
2142 // holding on to a region.
2143 _curr_region = nullptr;
2144 _finger = nullptr;
2145 _region_limit = nullptr;
2146 }
2147
2148 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2149 if (cm_oop_closure == nullptr) {
2150 assert(_cm_oop_closure != nullptr, "invariant");
2151 } else {
2152 assert(_cm_oop_closure == nullptr, "invariant");
2153 }
2154 _cm_oop_closure = cm_oop_closure;
2155 }
2156
2157 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2158 guarantee(mark_bitmap != nullptr, "invariant");
2159 _mark_bitmap = mark_bitmap;
2160 clear_region_fields();
2161
2162 _calls = 0;
2163 _elapsed_time_ms = 0.0;
2164 _termination_time_ms = 0.0;
2165
2166 _mark_stats_cache.reset();
2167 }
2168
2169 void G1CMTask::reset_for_restart() {
2170 clear_region_fields();
2171 _task_queue->set_empty();
2172 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2173 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2174 }
2175
2176 void G1CMTask::register_partial_array_splitter() {
2177
2178 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2179 _cm->max_num_tasks());
2180 }
2181
2182 void G1CMTask::unregister_partial_array_splitter() {
2183 _partial_array_splitter.~PartialArraySplitter();
2184 }
2185
2186 bool G1CMTask::should_exit_termination() {
2187 if (!regular_clock_call()) {
2188 return true;
2189 }
2190
2191 // This is called when we are in the termination protocol. We should
2192 // quit if, for some reason, this task wants to abort or the global
2193 // stack is not empty (this means that we can get work from it).
2194 return !_cm->mark_stack_empty() || has_aborted();
2195 }
2196
2197 void G1CMTask::reached_limit() {
2198 assert(_words_scanned >= _words_scanned_limit ||
2199 _refs_reached >= _refs_reached_limit ,
2200 "shouldn't have been called otherwise");
2201 abort_marking_if_regular_check_fail();
2202 }
2203
2204 bool G1CMTask::regular_clock_call() {
2205 if (has_aborted()) {
2206 return false;
2207 }
2208
2209 // First, we need to recalculate the words scanned and refs reached
2210 // limits for the next clock call.
2211 recalculate_limits();
2212
2213 // During the regular clock call we do the following
2214
2215 // (1) If an overflow has been flagged, then we abort.
2216 if (_cm->has_overflown()) {
2217 return false;
2218 }
2219
2220 // If we are not concurrent (i.e. we're doing remark) we don't need
2221 // to check anything else. The other steps are only needed during
2222 // the concurrent marking phase.
2223 if (!_cm->concurrent()) {
2224 return true;
2225 }
2226
2227 // (2) If marking has been aborted for Full GC, then we also abort.
2228 if (_cm->has_aborted()) {
2229 return false;
2230 }
2231
2232 // (4) We check whether we should yield. If we have to, then we abort.
2233 if (SuspendibleThreadSet::should_yield()) {
2234 // We should yield. To do this we abort the task. The caller is
2235 // responsible for yielding.
2236 return false;
2237 }
2238
2239 // (5) We check whether we've reached our time quota. If we have,
2240 // then we abort.
2241 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2242 if (elapsed_time_ms > _time_target_ms) {
2243 _has_timed_out = true;
2244 return false;
2245 }
2246
2247 // (6) Finally, we check whether there are enough completed STAB
2248 // buffers available for processing. If there are, we abort.
2249 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2250 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2251 // we do need to process SATB buffers, we'll abort and restart
2252 // the marking task to do so
2253 return false;
2254 }
2255 return true;
2256 }
2257
2258 void G1CMTask::recalculate_limits() {
2259 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2260 _words_scanned_limit = _real_words_scanned_limit;
2261
2262 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2263 _refs_reached_limit = _real_refs_reached_limit;
2264 }
2265
2266 void G1CMTask::decrease_limits() {
2267 // This is called when we believe that we're going to do an infrequent
2268 // operation which will increase the per byte scanned cost (i.e. move
2269 // entries to/from the global stack). It basically tries to decrease the
2270 // scanning limit so that the clock is called earlier.
2271
2272 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2273 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2274 }
2275
2276 void G1CMTask::move_entries_to_global_stack() {
2277 // Local array where we'll store the entries that will be popped
2278 // from the local queue.
2279 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2280
2281 size_t n = 0;
2282 G1TaskQueueEntry task_entry;
2283 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2284 buffer[n] = task_entry;
2285 ++n;
2286 }
2287 if (n < G1CMMarkStack::EntriesPerChunk) {
2288 buffer[n] = G1TaskQueueEntry();
2289 }
2290
2291 if (n > 0) {
2292 if (!_cm->mark_stack_push(buffer)) {
2293 set_has_aborted();
2294 }
2295 }
2296
2297 // This operation was quite expensive, so decrease the limits.
2298 decrease_limits();
2299 }
2300
2301 bool G1CMTask::get_entries_from_global_stack() {
2302 // Local array where we'll store the entries that will be popped
2303 // from the global stack.
2304 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2305
2306 if (!_cm->mark_stack_pop(buffer)) {
2307 return false;
2308 }
2309
2310 // We did actually pop at least one entry.
2311 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2312 G1TaskQueueEntry task_entry = buffer[i];
2313 if (task_entry.is_null()) {
2314 break;
2315 }
2316 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2317 bool success = _task_queue->push(task_entry);
2318 // We only call this when the local queue is empty or under a
2319 // given target limit. So, we do not expect this push to fail.
2320 assert(success, "invariant");
2321 }
2322
2323 // This operation was quite expensive, so decrease the limits
2324 decrease_limits();
2325 return true;
2326 }
2327
2328 void G1CMTask::drain_local_queue(bool partially) {
2329 if (has_aborted()) {
2330 return;
2331 }
2332
2333 // Decide what the target size is, depending whether we're going to
2334 // drain it partially (so that other tasks can steal if they run out
2335 // of things to do) or totally (at the very end).
2336 uint target_size;
2337 if (partially) {
2338 target_size = GCDrainStackTargetSize;
2339 } else {
2340 target_size = 0;
2341 }
2342
2343 if (_task_queue->size() > target_size) {
2344 G1TaskQueueEntry entry;
2345 bool ret = _task_queue->pop_local(entry);
2346 while (ret) {
2347 process_entry(entry, false /* stolen */);
2348 if (_task_queue->size() <= target_size || has_aborted()) {
2349 ret = false;
2350 } else {
2351 ret = _task_queue->pop_local(entry);
2352 }
2353 }
2354 }
2355 }
2356
2357 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2358 assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2359
2360 // Mark klass metadata
2361 process_klass(obj->klass());
2362
2363 size_t array_length = obj->length();
2364 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2365
2366 process_array_chunk(obj, 0, initial_chunk_size);
2367
2368 // Include object header size
2369 if (obj->is_refArray()) {
2370 return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2371 } else {
2372 FlatArrayKlass* fak = FlatArrayKlass::cast(obj->klass());
2373 return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2374 }
2375 }
2376
2377 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2378 PartialArrayState* state = task.to_partial_array_state();
2379 // Access state before release by claim().
2380 objArrayOop obj = oop_cast<objArrayOop>(state->source());
2381
2382 PartialArraySplitter::Claim claim =
2383 _partial_array_splitter.claim(state, _task_queue, stolen);
2384
2385 process_array_chunk(obj, claim._start, claim._end);
2386
2387 if (obj->is_refArray()) {
2388 return heap_word_size((claim._end - claim._start) * heapOopSize);
2389 } else {
2390 assert(obj->is_flatArray(), "Must be!");
2391 size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2392 size_t nof_elements = claim._end - claim._start;
2393 return heap_word_size(nof_elements * element_byte_size);
2394 }
2395 }
2396
2397 void G1CMTask::drain_global_stack(bool partially) {
2398 if (has_aborted()) {
2399 return;
2400 }
2401
2402 // We have a policy to drain the local queue before we attempt to
2403 // drain the global stack.
2404 assert(partially || _task_queue->size() == 0, "invariant");
2405
2406 // Decide what the target size is, depending whether we're going to
2407 // drain it partially (so that other tasks can steal if they run out
2408 // of things to do) or totally (at the very end).
2409 // Notice that when draining the global mark stack partially, due to the racyness
2410 // of the mark stack size update we might in fact drop below the target. But,
2411 // this is not a problem.
2412 // In case of total draining, we simply process until the global mark stack is
2413 // totally empty, disregarding the size counter.
2414 if (partially) {
2415 size_t const target_size = _cm->partial_mark_stack_size_target();
2416 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2417 if (get_entries_from_global_stack()) {
2418 drain_local_queue(partially);
2419 }
2420 }
2421 } else {
2422 while (!has_aborted() && get_entries_from_global_stack()) {
2423 drain_local_queue(partially);
2424 }
2425 }
2426 }
2427
2428 // SATB Queue has several assumptions on whether to call the par or
2429 // non-par versions of the methods. this is why some of the code is
2430 // replicated. We should really get rid of the single-threaded version
2431 // of the code to simplify things.
2432 void G1CMTask::drain_satb_buffers() {
2433 if (has_aborted()) {
2434 return;
2435 }
2436
2437 // We set this so that the regular clock knows that we're in the
2438 // middle of draining buffers and doesn't set the abort flag when it
2439 // notices that SATB buffers are available for draining. It'd be
2440 // very counter productive if it did that. :-)
2441 _draining_satb_buffers = true;
2442
2443 G1CMSATBBufferClosure satb_cl(this, _g1h);
2444 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2445
2446 // This keeps claiming and applying the closure to completed buffers
2447 // until we run out of buffers or we need to abort.
2448 while (!has_aborted() &&
2449 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2450 abort_marking_if_regular_check_fail();
2451 }
2452
2453 // Can't assert qset is empty here, even if not aborted. If concurrent,
2454 // some other thread might be adding to the queue. If not concurrent,
2455 // some other thread might have won the race for the last buffer, but
2456 // has not yet decremented the count.
2457
2458 _draining_satb_buffers = false;
2459
2460 // again, this was a potentially expensive operation, decrease the
2461 // limits to get the regular clock call early
2462 decrease_limits();
2463 }
2464
2465 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2466 _mark_stats_cache.reset(region_idx);
2467 }
2468
2469 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2470 return _mark_stats_cache.evict_all();
2471 }
2472
2473 void G1CMTask::print_stats() {
2474 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2475 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2476 _elapsed_time_ms, _termination_time_ms);
2477 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2478 _step_times_ms.num(),
2479 _step_times_ms.avg(),
2480 _step_times_ms.sd(),
2481 _step_times_ms.maximum(),
2482 _step_times_ms.sum());
2483 size_t const hits = _mark_stats_cache.hits();
2484 size_t const misses = _mark_stats_cache.misses();
2485 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2486 hits, misses, percent_of(hits, hits + misses));
2487 }
2488
2489 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2490 return _task_queues->steal(worker_id, task_entry);
2491 }
2492
2493 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2494 if (has_aborted() || _curr_region == nullptr) {
2495 return;
2496 }
2497
2498 // This means that we're already holding on to a region.
2499 assert(_finger != nullptr, "if region is not null, then the finger "
2500 "should not be null either");
2501
2502 // We might have restarted this task after an evacuation pause
2503 // which might have evacuated the region we're holding on to
2504 // underneath our feet. Let's read its limit again to make sure
2505 // that we do not iterate over a region of the heap that
2506 // contains garbage (update_region_limit() will also move
2507 // _finger to the start of the region if it is found empty).
2508 update_region_limit();
2509 // We will start from _finger not from the start of the region,
2510 // as we might be restarting this task after aborting half-way
2511 // through scanning this region. In this case, _finger points to
2512 // the address where we last found a marked object. If this is a
2513 // fresh region, _finger points to start().
2514 MemRegion mr = MemRegion(_finger, _region_limit);
2515
2516 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2517 "humongous regions should go around loop once only");
2518
2519 // Some special cases:
2520 // If the memory region is empty, we can just give up the region.
2521 // If the current region is humongous then we only need to check
2522 // the bitmap for the bit associated with the start of the object,
2523 // scan the object if it's live, and give up the region.
2524 // Otherwise, let's iterate over the bitmap of the part of the region
2525 // that is left.
2526 // If the iteration is successful, give up the region.
2527 if (mr.is_empty()) {
2528 giveup_current_region();
2529 abort_marking_if_regular_check_fail();
2530 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2531 if (_mark_bitmap->is_marked(mr.start())) {
2532 // The object is marked - apply the closure
2533 bitmap_closure.do_addr(mr.start());
2534 }
2535 // Even if this task aborted while scanning the humongous object
2536 // we can (and should) give up the current region.
2537 giveup_current_region();
2538 abort_marking_if_regular_check_fail();
2539 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2540 giveup_current_region();
2541 abort_marking_if_regular_check_fail();
2542 } else {
2543 assert(has_aborted(), "currently the only way to do so");
2544 // The only way to abort the bitmap iteration is to return
2545 // false from the do_bit() method. However, inside the
2546 // do_bit() method we move the _finger to point to the
2547 // object currently being looked at. So, if we bail out, we
2548 // have definitely set _finger to something non-null.
2549 assert(_finger != nullptr, "invariant");
2550
2551 // Region iteration was actually aborted. So now _finger
2552 // points to the address of the object we last scanned. If we
2553 // leave it there, when we restart this task, we will rescan
2554 // the object. It is easy to avoid this. We move the finger by
2555 // enough to point to the next possible object header.
2556 assert(_finger < _region_limit, "invariant");
2557 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2558 if (new_finger >= _region_limit) {
2559 giveup_current_region();
2560 } else {
2561 move_finger_to(new_finger);
2562 }
2563 }
2564 }
2565
2566 void G1CMTask::claim_new_region() {
2567 // Read the note on the claim_region() method on why it might
2568 // return null with potentially more regions available for
2569 // claiming and why we have to check out_of_regions() to determine
2570 // whether we're done or not.
2571 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2572 // We are going to try to claim a new region. We should have
2573 // given up on the previous one.
2574 // Separated the asserts so that we know which one fires.
2575 assert(_curr_region == nullptr, "invariant");
2576 assert(_finger == nullptr, "invariant");
2577 assert(_region_limit == nullptr, "invariant");
2578 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2579 if (claimed_region != nullptr) {
2580 // Yes, we managed to claim one
2581 setup_for_region(claimed_region);
2582 assert(_curr_region == claimed_region, "invariant");
2583 }
2584 // It is important to call the regular clock here. It might take
2585 // a while to claim a region if, for example, we hit a large
2586 // block of empty regions. So we need to call the regular clock
2587 // method once round the loop to make sure it's called
2588 // frequently enough.
2589 abort_marking_if_regular_check_fail();
2590 }
2591 }
2592
2593 void G1CMTask::attempt_stealing() {
2594 // We cannot check whether the global stack is empty, since other
2595 // tasks might be pushing objects to it concurrently.
2596 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2597 "only way to reach here");
2598 while (!has_aborted()) {
2599 G1TaskQueueEntry entry;
2600 if (_cm->try_stealing(_worker_id, entry)) {
2601 process_entry(entry, true /* stolen */);
2602
2603 // And since we're towards the end, let's totally drain the
2604 // local queue and global stack.
2605 drain_local_queue(false);
2606 drain_global_stack(false);
2607 } else {
2608 break;
2609 }
2610 }
2611 }
2612
2613 void G1CMTask::attempt_termination(bool is_serial) {
2614 // We cannot check whether the global stack is empty, since other
2615 // tasks might be concurrently pushing objects on it.
2616 // Separated the asserts so that we know which one fires.
2617 assert(_cm->out_of_regions(), "only way to reach here");
2618 assert(_task_queue->size() == 0, "only way to reach here");
2619 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2620
2621 // The G1CMTask class also extends the TerminatorTerminator class,
2622 // hence its should_exit_termination() method will also decide
2623 // whether to exit the termination protocol or not.
2624 bool finished = (is_serial ||
2625 _cm->terminator()->offer_termination(this));
2626 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2627
2628 if (finished) {
2629 // We're all done.
2630
2631 // We can now guarantee that the global stack is empty, since
2632 // all other tasks have finished. We separated the guarantees so
2633 // that, if a condition is false, we can immediately find out
2634 // which one.
2635 guarantee(_cm->out_of_regions(), "only way to reach here");
2636 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2637 guarantee(_task_queue->size() == 0, "only way to reach here");
2638 guarantee(!_cm->has_overflown(), "only way to reach here");
2639 guarantee(!has_aborted(), "should never happen if termination has completed");
2640 } else {
2641 // Apparently there's more work to do. Let's abort this task. We
2642 // will restart it and hopefully we can find more things to do.
2643 set_has_aborted();
2644 }
2645 }
2646
2647 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2648 if (_has_timed_out) {
2649 double diff_ms = elapsed_time_ms - _time_target_ms;
2650 // Keep statistics of how well we did with respect to hitting
2651 // our target only if we actually timed out (if we aborted for
2652 // other reasons, then the results might get skewed).
2653 _marking_step_diff_ms.add(diff_ms);
2654 }
2655
2656 if (!_cm->has_overflown()) {
2657 return;
2658 }
2659
2660 // This is the interesting one. We aborted because a global
2661 // overflow was raised. This means we have to restart the
2662 // marking phase and start iterating over regions. However, in
2663 // order to do this we have to make sure that all tasks stop
2664 // what they are doing and re-initialize in a safe manner. We
2665 // will achieve this with the use of two barrier sync points.
2666 if (!is_serial) {
2667 // We only need to enter the sync barrier if being called
2668 // from a parallel context
2669 _cm->enter_first_sync_barrier(_worker_id);
2670
2671 // When we exit this sync barrier we know that all tasks have
2672 // stopped doing marking work. So, it's now safe to
2673 // re-initialize our data structures.
2674 }
2675
2676 clear_region_fields();
2677 flush_mark_stats_cache();
2678
2679 if (!is_serial) {
2680 // If we're executing the concurrent phase of marking, reset the marking
2681 // state; otherwise the marking state is reset after reference processing,
2682 // during the remark pause.
2683 // If we reset here as a result of an overflow during the remark we will
2684 // see assertion failures from any subsequent set_concurrency_and_phase()
2685 // calls.
2686 if (_cm->concurrent() && _worker_id == 0) {
2687 // Worker 0 is responsible for clearing the global data structures because
2688 // of an overflow. During STW we should not clear the overflow flag (in
2689 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2690 // method to abort the pause and restart concurrent marking.
2691 _cm->reset_marking_for_restart();
2692
2693 log_info(gc, marking)("Concurrent Mark reset for overflow");
2694 }
2695
2696 // ...and enter the second barrier.
2697 _cm->enter_second_sync_barrier(_worker_id);
2698 }
2699 }
2700
2701 /*****************************************************************************
2702
2703 The do_marking_step(time_target_ms, ...) method is the building
2704 block of the parallel marking framework. It can be called in parallel
2705 with other invocations of do_marking_step() on different tasks
2706 (but only one per task, obviously) and concurrently with the
2707 mutator threads, or during remark, hence it eliminates the need
2708 for two versions of the code. When called during remark, it will
2709 pick up from where the task left off during the concurrent marking
2710 phase. Interestingly, tasks are also claimable during evacuation
2711 pauses too, since do_marking_step() ensures that it aborts before
2712 it needs to yield.
2713
2714 The data structures that it uses to do marking work are the
2715 following:
2716
2717 (1) Marking Bitmap. If there are grey objects that appear only
2718 on the bitmap (this happens either when dealing with an overflow
2719 or when the concurrent start pause has simply marked the roots
2720 and didn't push them on the stack), then tasks claim heap
2721 regions whose bitmap they then scan to find grey objects. A
2722 global finger indicates where the end of the last claimed region
2723 is. A local finger indicates how far into the region a task has
2724 scanned. The two fingers are used to determine how to grey an
2725 object (i.e. whether simply marking it is OK, as it will be
2726 visited by a task in the future, or whether it needs to be also
2727 pushed on a stack).
2728
2729 (2) Local Queue. The local queue of the task which is accessed
2730 reasonably efficiently by the task. Other tasks can steal from
2731 it when they run out of work. Throughout the marking phase, a
2732 task attempts to keep its local queue short but not totally
2733 empty, so that entries are available for stealing by other
2734 tasks. Only when there is no more work, a task will totally
2735 drain its local queue.
2736
2737 (3) Global Mark Stack. This handles local queue overflow. During
2738 marking only sets of entries are moved between it and the local
2739 queues, as access to it requires a mutex and more fine-grain
2740 interaction with it which might cause contention. If it
2741 overflows, then the marking phase should restart and iterate
2742 over the bitmap to identify grey objects. Throughout the marking
2743 phase, tasks attempt to keep the global mark stack at a small
2744 length but not totally empty, so that entries are available for
2745 popping by other tasks. Only when there is no more work, tasks
2746 will totally drain the global mark stack.
2747
2748 (4) SATB Buffer Queue. This is where completed SATB buffers are
2749 made available. Buffers are regularly removed from this queue
2750 and scanned for roots, so that the queue doesn't get too
2751 long. During remark, all completed buffers are processed, as
2752 well as the filled in parts of any uncompleted buffers.
2753
2754 The do_marking_step() method tries to abort when the time target
2755 has been reached. There are a few other cases when the
2756 do_marking_step() method also aborts:
2757
2758 (1) When the marking phase has been aborted (after a Full GC).
2759
2760 (2) When a global overflow (on the global stack) has been
2761 triggered. Before the task aborts, it will actually sync up with
2762 the other tasks to ensure that all the marking data structures
2763 (local queues, stacks, fingers etc.) are re-initialized so that
2764 when do_marking_step() completes, the marking phase can
2765 immediately restart.
2766
2767 (3) When enough completed SATB buffers are available. The
2768 do_marking_step() method only tries to drain SATB buffers right
2769 at the beginning. So, if enough buffers are available, the
2770 marking step aborts and the SATB buffers are processed at
2771 the beginning of the next invocation.
2772
2773 (4) To yield. when we have to yield then we abort and yield
2774 right at the end of do_marking_step(). This saves us from a lot
2775 of hassle as, by yielding we might allow a Full GC. If this
2776 happens then objects will be compacted underneath our feet, the
2777 heap might shrink, etc. We save checking for this by just
2778 aborting and doing the yield right at the end.
2779
2780 From the above it follows that the do_marking_step() method should
2781 be called in a loop (or, otherwise, regularly) until it completes.
2782
2783 If a marking step completes without its has_aborted() flag being
2784 true, it means it has completed the current marking phase (and
2785 also all other marking tasks have done so and have all synced up).
2786
2787 A method called regular_clock_call() is invoked "regularly" (in
2788 sub ms intervals) throughout marking. It is this clock method that
2789 checks all the abort conditions which were mentioned above and
2790 decides when the task should abort. A work-based scheme is used to
2791 trigger this clock method: when the number of object words the
2792 marking phase has scanned or the number of references the marking
2793 phase has visited reach a given limit. Additional invocations to
2794 the method clock have been planted in a few other strategic places
2795 too. The initial reason for the clock method was to avoid calling
2796 cpu time gathering too regularly, as it is quite expensive. So,
2797 once it was in place, it was natural to piggy-back all the other
2798 conditions on it too and not constantly check them throughout the code.
2799
2800 If do_termination is true then do_marking_step will enter its
2801 termination protocol.
2802
2803 The value of is_serial must be true when do_marking_step is being
2804 called serially (i.e. by the VMThread) and do_marking_step should
2805 skip any synchronization in the termination and overflow code.
2806 Examples include the serial remark code and the serial reference
2807 processing closures.
2808
2809 The value of is_serial must be false when do_marking_step is
2810 being called by any of the worker threads.
2811 Examples include the concurrent marking code (CMMarkingTask),
2812 the MT remark code, and the MT reference processing closures.
2813
2814 *****************************************************************************/
2815
2816 void G1CMTask::do_marking_step(double time_target_ms,
2817 bool do_termination,
2818 bool is_serial) {
2819 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2820
2821 _start_cpu_time_ns = os::current_thread_cpu_time();
2822
2823 // If do_stealing is true then do_marking_step will attempt to
2824 // steal work from the other G1CMTasks. It only makes sense to
2825 // enable stealing when the termination protocol is enabled
2826 // and do_marking_step() is not being called serially.
2827 bool do_stealing = do_termination && !is_serial;
2828
2829 G1Predictions const& predictor = _g1h->policy()->predictor();
2830 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2831 _time_target_ms = time_target_ms - diff_prediction_ms;
2832
2833 // set up the variables that are used in the work-based scheme to
2834 // call the regular clock method
2835 _words_scanned = 0;
2836 _refs_reached = 0;
2837 recalculate_limits();
2838
2839 // clear all flags
2840 clear_has_aborted();
2841 _has_timed_out = false;
2842 _draining_satb_buffers = false;
2843
2844 ++_calls;
2845
2846 // Set up the bitmap and oop closures. Anything that uses them is
2847 // eventually called from this method, so it is OK to allocate these
2848 // statically.
2849 G1CMBitMapClosure bitmap_closure(this, _cm);
2850 G1CMOopClosure cm_oop_closure(_g1h, this);
2851 set_cm_oop_closure(&cm_oop_closure);
2852
2853 if (_cm->has_overflown()) {
2854 // This can happen if the mark stack overflows during a GC pause
2855 // and this task, after a yield point, restarts. We have to abort
2856 // as we need to get into the overflow protocol which happens
2857 // right at the end of this task.
2858 set_has_aborted();
2859 }
2860
2861 // First drain any available SATB buffers. After this, we will not
2862 // look at SATB buffers before the next invocation of this method.
2863 // If enough completed SATB buffers are queued up, the regular clock
2864 // will abort this task so that it restarts.
2865 drain_satb_buffers();
2866 // ...then partially drain the local queue and the global stack
2867 drain_local_queue(true);
2868 drain_global_stack(true);
2869
2870 do {
2871 process_current_region(bitmap_closure);
2872 // At this point we have either completed iterating over the
2873 // region we were holding on to, or we have aborted.
2874
2875 // We then partially drain the local queue and the global stack.
2876 drain_local_queue(true);
2877 drain_global_stack(true);
2878
2879 claim_new_region();
2880
2881 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2882 "at this point we should be out of regions");
2883 } while ( _curr_region != nullptr && !has_aborted());
2884
2885 // We cannot check whether the global stack is empty, since other
2886 // tasks might be pushing objects to it concurrently.
2887 assert(has_aborted() || _cm->out_of_regions(),
2888 "at this point we should be out of regions");
2889 // Try to reduce the number of available SATB buffers so that
2890 // remark has less work to do.
2891 drain_satb_buffers();
2892
2893 // Since we've done everything else, we can now totally drain the
2894 // local queue and global stack.
2895 drain_local_queue(false);
2896 drain_global_stack(false);
2897
2898 // Attempt at work stealing from other task's queues.
2899 if (do_stealing && !has_aborted()) {
2900 // We have not aborted. This means that we have finished all that
2901 // we could. Let's try to do some stealing...
2902 attempt_stealing();
2903 }
2904
2905 // We still haven't aborted. Now, let's try to get into the
2906 // termination protocol.
2907 if (do_termination && !has_aborted()) {
2908 attempt_termination(is_serial);
2909 }
2910
2911 // Mainly for debugging purposes to make sure that a pointer to the
2912 // closure which was statically allocated in this frame doesn't
2913 // escape it by accident.
2914 set_cm_oop_closure(nullptr);
2915 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2916 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2917 // Update the step history.
2918 _step_times_ms.add(elapsed_time_ms);
2919
2920 if (has_aborted()) {
2921 // The task was aborted for some reason.
2922 handle_abort(is_serial, elapsed_time_ms);
2923 }
2924 }
2925
2926 G1CMTask::G1CMTask(uint worker_id,
2927 G1ConcurrentMark* cm,
2928 G1CMTaskQueue* task_queue,
2929 G1RegionMarkStats* mark_stats) :
2930 _worker_id(worker_id),
2931 _g1h(G1CollectedHeap::heap()),
2932 _cm(cm),
2933 _mark_bitmap(nullptr),
2934 _task_queue(task_queue),
2935 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
2936 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2937 _calls(0),
2938 _time_target_ms(0.0),
2939 _start_cpu_time_ns(0),
2940 _cm_oop_closure(nullptr),
2941 _curr_region(nullptr),
2942 _finger(nullptr),
2943 _region_limit(nullptr),
2944 _words_scanned(0),
2945 _words_scanned_limit(0),
2946 _real_words_scanned_limit(0),
2947 _refs_reached(0),
2948 _refs_reached_limit(0),
2949 _real_refs_reached_limit(0),
2950 _has_aborted(false),
2951 _has_timed_out(false),
2952 _draining_satb_buffers(false),
2953 _step_times_ms(),
2954 _elapsed_time_ms(0.0),
2955 _termination_time_ms(0.0),
2956 _marking_step_diff_ms()
2957 {
2958 guarantee(task_queue != nullptr, "invariant");
2959
2960 _marking_step_diff_ms.add(0.5);
2961 }
2962
2963 // These are formatting macros that are used below to ensure
2964 // consistent formatting. The *_H_* versions are used to format the
2965 // header for a particular value and they should be kept consistent
2966 // with the corresponding macro. Also note that most of the macros add
2967 // the necessary white space (as a prefix) which makes them a bit
2968 // easier to compose.
2969
2970 // All the output lines are prefixed with this string to be able to
2971 // identify them easily in a large log file.
2972 #define G1PPRL_LINE_PREFIX "###"
2973
2974 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
2975 #ifdef _LP64
2976 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
2977 #else // _LP64
2978 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
2979 #endif // _LP64
2980
2981 // For per-region info
2982 #define G1PPRL_TYPE_FORMAT " %-4s"
2983 #define G1PPRL_TYPE_H_FORMAT " %4s"
2984 #define G1PPRL_STATE_FORMAT " %-5s"
2985 #define G1PPRL_STATE_H_FORMAT " %5s"
2986 #define G1PPRL_BYTE_FORMAT " %9zu"
2987 #define G1PPRL_BYTE_H_FORMAT " %9s"
2988 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
2989 #define G1PPRL_GCEFF_H_FORMAT " %14s"
2990 #define G1PPRL_GID_H_FORMAT " %9s"
2991 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
2992 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
2993 #define G1PPRL_LEN_H_FORMAT " %14s"
2994 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
2995 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
2996
2997 // For summary info
2998 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
2999 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
3000 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
3001 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3002
3003 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
3004 _total_used_bytes(0),
3005 _total_capacity_bytes(0),
3006 _total_live_bytes(0),
3007 _total_remset_bytes(0),
3008 _total_code_roots_bytes(0)
3009 {
3010 if (!log_is_enabled(Trace, gc, liveness)) {
3011 return;
3012 }
3013
3014 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3015 MemRegion reserved = g1h->reserved();
3016 double now = os::elapsedTime();
3017
3018 // Print the header of the output.
3019 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3020 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3021 G1PPRL_SUM_ADDR_FORMAT("reserved")
3022 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3023 p2i(reserved.start()), p2i(reserved.end()),
3024 G1HeapRegion::GrainBytes);
3025 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3026 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3027 G1PPRL_TYPE_H_FORMAT
3028 G1PPRL_ADDR_BASE_H_FORMAT
3029 G1PPRL_BYTE_H_FORMAT
3030 G1PPRL_BYTE_H_FORMAT
3031 G1PPRL_STATE_H_FORMAT
3032 G1PPRL_BYTE_H_FORMAT
3033 G1PPRL_GID_H_FORMAT,
3034 "type", "address-range",
3035 "used", "live",
3036 "state", "code-roots",
3037 "group-id");
3038 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3039 G1PPRL_TYPE_H_FORMAT
3040 G1PPRL_ADDR_BASE_H_FORMAT
3041 G1PPRL_BYTE_H_FORMAT
3042 G1PPRL_BYTE_H_FORMAT
3043 G1PPRL_STATE_H_FORMAT
3044 G1PPRL_BYTE_H_FORMAT
3045 G1PPRL_GID_H_FORMAT,
3046 "", "",
3047 "(bytes)", "(bytes)",
3048 "", "(bytes)", "");
3049 }
3050
3051 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3052 if (!log_is_enabled(Trace, gc, liveness)) {
3053 return false;
3054 }
3055
3056 const char* type = r->get_type_str();
3057 HeapWord* bottom = r->bottom();
3058 HeapWord* end = r->end();
3059 size_t capacity_bytes = r->capacity();
3060 size_t used_bytes = r->used();
3061 size_t live_bytes = r->live_bytes();
3062 size_t remset_bytes = r->rem_set()->mem_size();
3063 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3064 const char* remset_type = r->rem_set()->get_short_state_str();
3065 uint cset_group_id = r->rem_set()->has_cset_group()
3066 ? r->rem_set()->cset_group_id()
3067 : G1CSetCandidateGroup::NoRemSetId;
3068
3069 _total_used_bytes += used_bytes;
3070 _total_capacity_bytes += capacity_bytes;
3071 _total_live_bytes += live_bytes;
3072 _total_remset_bytes += remset_bytes;
3073 _total_code_roots_bytes += code_roots_bytes;
3074
3075 // Print a line for this particular region.
3076 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3077 G1PPRL_TYPE_FORMAT
3078 G1PPRL_ADDR_BASE_FORMAT
3079 G1PPRL_BYTE_FORMAT
3080 G1PPRL_BYTE_FORMAT
3081 G1PPRL_STATE_FORMAT
3082 G1PPRL_BYTE_FORMAT
3083 G1PPRL_GID_FORMAT,
3084 type, p2i(bottom), p2i(end),
3085 used_bytes, live_bytes,
3086 remset_type, code_roots_bytes,
3087 cset_group_id);
3088
3089 return false;
3090 }
3091
3092 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3093 if (!log_is_enabled(Trace, gc, liveness)) {
3094 return;
3095 }
3096
3097 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3098 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3099 // add static memory usages to remembered set sizes
3100 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3101
3102 log_cset_candidate_groups();
3103
3104 // Print the footer of the output.
3105 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3106 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3107 " SUMMARY"
3108 G1PPRL_SUM_MB_FORMAT("capacity")
3109 G1PPRL_SUM_MB_PERC_FORMAT("used")
3110 G1PPRL_SUM_MB_PERC_FORMAT("live")
3111 G1PPRL_SUM_MB_FORMAT("remset")
3112 G1PPRL_SUM_MB_FORMAT("code-roots"),
3113 bytes_to_mb(_total_capacity_bytes),
3114 bytes_to_mb(_total_used_bytes),
3115 percent_of(_total_used_bytes, _total_capacity_bytes),
3116 bytes_to_mb(_total_live_bytes),
3117 percent_of(_total_live_bytes, _total_capacity_bytes),
3118 bytes_to_mb(_total_remset_bytes),
3119 bytes_to_mb(_total_code_roots_bytes));
3120 }
3121
3122 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3123 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3124 G1PPRL_GID_FORMAT
3125 G1PPRL_LEN_FORMAT
3126 G1PPRL_GID_GCEFF_FORMAT
3127 G1PPRL_GID_LIVENESS_FORMAT
3128 G1PPRL_BYTE_FORMAT
3129 G1PPRL_TYPE_H_FORMAT,
3130 group->group_id(),
3131 group->length(),
3132 group->length() > 0 ? group->gc_efficiency() : 0.0,
3133 group->length() > 0 ? group->liveness_percent() : 0.0,
3134 group->card_set()->mem_size(),
3135 type);
3136 _total_remset_bytes += group->card_set()->mem_size();
3137 }
3138
3139 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3140 for (G1CSetCandidateGroup* group : gl) {
3141 log_cset_candidate_group_add_total(group, type);
3142 }
3143 }
3144
3145 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3146 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3147 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3148 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3149 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3150 G1PPRL_GID_H_FORMAT
3151 G1PPRL_LEN_H_FORMAT
3152 G1PPRL_GCEFF_H_FORMAT
3153 G1PPRL_BYTE_H_FORMAT
3154 G1PPRL_BYTE_H_FORMAT
3155 G1PPRL_TYPE_H_FORMAT,
3156 "groud-id", "num-regions",
3157 "gc-eff", "liveness",
3158 "remset", "type");
3159
3160 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3161 G1PPRL_GID_H_FORMAT
3162 G1PPRL_LEN_H_FORMAT
3163 G1PPRL_GCEFF_H_FORMAT
3164 G1PPRL_BYTE_H_FORMAT
3165 G1PPRL_BYTE_H_FORMAT
3166 G1PPRL_TYPE_H_FORMAT,
3167 "", "",
3168 "(bytes/ms)", "%",
3169 "(bytes)", "");
3170
3171 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3172
3173 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3174
3175 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3176 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3177 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3178 }