1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "oops/oopCast.inline.hpp"
74 #include "runtime/globals_extension.hpp"
75 #include "runtime/handles.inline.hpp"
76 #include "runtime/java.hpp"
77 #include "runtime/orderAccess.hpp"
78 #include "runtime/os.hpp"
79 #include "runtime/prefetch.inline.hpp"
80 #include "runtime/threads.hpp"
81 #include "utilities/align.hpp"
82 #include "utilities/checkedCast.hpp"
83 #include "utilities/formatBuffer.hpp"
84 #include "utilities/growableArray.hpp"
85 #include "utilities/powerOfTwo.hpp"
86
87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
88
89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
90 assert(cm != nullptr, "must be");
91 }
92
93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
94 assert(cm != nullptr, "must be");
95 assert(_cm == nullptr, "double initialize");
96 _cm = cm;
97 }
98
99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
100 assert(addr < _cm->finger(), "invariant");
101 assert(addr >= _task->finger(), "invariant");
102
103 // We move that task's local finger along.
104 _task->move_finger_to(addr);
105
106 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
107 // we only partially drain the local queue and global stack
108 _task->drain_local_queue(true);
109 _task->drain_global_stack(true);
110
111 // if the has_aborted flag has been raised, we need to bail out of
112 // the iteration
113 return !_task->has_aborted();
114 }
115
116 G1CMMarkStack::G1CMMarkStack() :
117 _chunk_allocator() {
118 set_empty();
119 }
120
121 size_t G1CMMarkStack::capacity_alignment() {
122 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
123 }
124
125 bool G1CMMarkStack::initialize() {
126 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
127
128 size_t initial_capacity = MarkStackSize;
129 size_t max_capacity = MarkStackSizeMax;
130
131 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
132
133 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
135
136 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
137 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
138
139 size_t limit = (INT_MAX - 1);
140 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
142
143 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
144 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
145
146 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
147
148 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
149 initial_num_chunks, max_capacity);
150
151 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
152 }
153
154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
155 if (_size.load_relaxed() >= _max_capacity) {
156 return nullptr;
157 }
158
159 size_t cur_idx = _size.fetch_then_add(1u);
160
161 if (cur_idx >= _max_capacity) {
162 return nullptr;
163 }
164
165 size_t bucket = get_bucket(cur_idx);
166 if (_buckets[bucket].load_acquire() == nullptr) {
167 if (!_should_grow) {
168 // Prefer to restart the CM.
169 return nullptr;
170 }
171
172 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
173 if (_buckets[bucket].load_acquire() == nullptr) {
174 size_t desired_capacity = bucket_size(bucket) * 2;
175 if (!try_expand_to(desired_capacity)) {
176 return nullptr;
177 }
178 }
179 }
180
181 size_t bucket_idx = get_bucket_index(cur_idx);
182 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
183 result->next = nullptr;
184 return result;
185 }
186
187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
188 _min_capacity(0),
189 _max_capacity(0),
190 _capacity(0),
191 _num_buckets(0),
192 _should_grow(false),
193 _buckets(nullptr),
194 _size(0)
195 { }
196
197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
198 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
199
200 _min_capacity = initial_capacity;
201 _max_capacity = max_capacity;
202 _num_buckets = get_bucket(_max_capacity) + 1;
203
204 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
205
206 for (size_t i = 0; i < _num_buckets; i++) {
207 _buckets[i].store_relaxed(nullptr);
208 }
209
210 size_t new_capacity = bucket_size(0);
211
212 if (!reserve(new_capacity)) {
213 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
214 return false;
215 }
216 return true;
217 }
218
219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
220 if (_capacity == _max_capacity) {
221 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
222 return false;
223 }
224
225 size_t old_capacity = _capacity;
226 desired_capacity = MIN2(desired_capacity, _max_capacity);
227
228 if (reserve(desired_capacity)) {
229 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
230 old_capacity, desired_capacity);
231 return true;
232 }
233 return false;
234 }
235
236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
237 size_t new_capacity = _capacity * 2;
238 return try_expand_to(new_capacity);
239 }
240
241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
242 if (_buckets == nullptr) {
243 return;
244 }
245
246 for (size_t i = 0; i < _num_buckets; i++) {
247 if (_buckets[i].load_relaxed() != nullptr) {
248 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
249 _buckets[i].store_relaxed(nullptr);
250 }
251 }
252
253 FREE_C_HEAP_ARRAY(_buckets);
254 }
255
256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
257 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
258
259 size_t highest_bucket = get_bucket(new_capacity - 1);
260 size_t i = get_bucket(_capacity);
261
262 // Allocate all buckets associated with indexes between the current capacity (_capacity)
263 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
264 // array and that the capacity accurately reflects the reserved memory.
265 for (; i <= highest_bucket; i++) {
266 if (_buckets[i].load_acquire() != nullptr) {
267 continue; // Skip over already allocated buckets.
268 }
269
270 size_t bucket_capacity = bucket_size(i);
271
272 // Trim bucket size so that we do not exceed the _max_capacity.
273 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
274 bucket_capacity :
275 _max_capacity - _capacity;
276
277
278 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
279
280 if (bucket_base == nullptr) {
281 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
282 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
283 return false;
284 }
285 _capacity += bucket_capacity;
286 _buckets[i].release_store(bucket_base);
287 }
288 return true;
289 }
290
291 void G1CMMarkStack::expand() {
292 _chunk_allocator.try_expand();
293 }
294
295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
296 elem->next = list->load_relaxed();
297 list->store_relaxed(elem);
298 }
299
300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
301 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
302 add_chunk_to_list(&_chunk_list, elem);
303 _chunks_in_chunk_list++;
304 }
305
306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
307 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
308 add_chunk_to_list(&_free_list, elem);
309 }
310
311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
312 TaskQueueEntryChunk* result = list->load_relaxed();
313 if (result != nullptr) {
314 list->store_relaxed(list->load_relaxed()->next);
315 }
316 return result;
317 }
318
319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
320 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
321 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
322 if (result != nullptr) {
323 _chunks_in_chunk_list--;
324 }
325 return result;
326 }
327
328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
329 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
330 return remove_chunk_from_list(&_free_list);
331 }
332
333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
334 // Get a new chunk.
335 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
336
337 if (new_chunk == nullptr) {
338 // Did not get a chunk from the free list. Allocate from backing memory.
339 new_chunk = _chunk_allocator.allocate_new_chunk();
340
341 if (new_chunk == nullptr) {
342 return false;
343 }
344 }
345
346 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
347
348 add_chunk_to_chunk_list(new_chunk);
349
350 return true;
351 }
352
353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
354 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
355
356 if (cur == nullptr) {
357 return false;
358 }
359
360 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
361
362 add_chunk_to_free_list(cur);
363 return true;
364 }
365
366 void G1CMMarkStack::set_empty() {
367 _chunks_in_chunk_list = 0;
368 _chunk_list.store_relaxed(nullptr);
369 _free_list.store_relaxed(nullptr);
370 _chunk_allocator.reset();
371 }
372
373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
374 _root_regions(MemRegion::create_array(max_regions, mtGC)),
375 _max_regions(max_regions),
376 _num_regions(0),
377 _num_claimed_regions(0) { }
378
379 G1CMRootMemRegions::~G1CMRootMemRegions() {
380 MemRegion::destroy_array(_root_regions, _max_regions);
381 }
382
383 void G1CMRootMemRegions::reset() {
384 assert_at_safepoint();
385 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
386
387 _num_regions.store_relaxed(0);
388 _num_claimed_regions.store_relaxed(0);
389 }
390
391 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
392 assert_at_safepoint();
393 uint idx = _num_regions.fetch_then_add(1u);
394 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
395 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
396 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
397 _root_regions[idx].set_start(start);
398 _root_regions[idx].set_end(end);
399 }
400
401 const MemRegion* G1CMRootMemRegions::claim_next() {
402 uint local_num_regions = num_regions();
403 if (num_claimed_regions() >= local_num_regions) {
404 return nullptr;
405 }
406
407 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
408 if (claimed_index < local_num_regions) {
409 return &_root_regions[claimed_index];
410 }
411 return nullptr;
412 }
413
414 bool G1CMRootMemRegions::work_completed() const {
415 return num_remaining_regions() == 0;
416 }
417
418 uint G1CMRootMemRegions::num_remaining_regions() const {
419 uint total = num_regions();
420 uint claimed = num_claimed_regions();
421 return (total > claimed) ? total - claimed : 0;
422 }
423
424 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
425 uint local_num_root_regions = num_regions();
426 for (uint i = 0; i < local_num_root_regions; i++) {
427 if (_root_regions[i].equals(mr)) {
428 return true;
429 }
430 }
431 return false;
432 }
433
434 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
435 G1RegionToSpaceMapper* bitmap_storage) :
436 _cm_thread(nullptr),
437 _g1h(g1h),
438
439 _mark_bitmap(),
440
441 _heap(_g1h->reserved()),
442
443 _root_regions(_g1h->max_num_regions()),
444 _root_region_scan_aborted(false),
445
446 _global_mark_stack(),
447
448 _finger(nullptr), // _finger set in set_non_marking_state
449
450 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
451 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
452 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
453 _tasks(nullptr),
454 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
455 _terminator(_max_num_tasks, _task_queues),
456 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
457
458 _first_overflow_barrier_sync(),
459 _second_overflow_barrier_sync(),
460
461 _completed_mark_cycles(0),
462 _has_overflown(false),
463 _concurrent(false),
464 _has_aborted(false),
465 _restart_for_overflow(false),
466 _gc_timer_cm(new ConcurrentGCTimer()),
467 _gc_tracer_cm(new G1OldTracer()),
468
469 // _verbose_level set below
470
471 _remark_times(),
472 _remark_mark_times(),
473 _remark_weak_ref_times(),
474 _cleanup_times(),
475
476 _concurrent_workers(nullptr),
477 _num_concurrent_workers(0),
478 _max_concurrent_workers(0),
479
480 _is_region_mark_stats_cache_in_use(false),
481 _region_mark_stats(nullptr),
482 _top_at_mark_starts(nullptr),
483 _top_at_rebuild_starts(nullptr),
484 _needs_remembered_set_rebuild(false)
485 {
486 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
487
488 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
489 }
490
491 void G1ConcurrentMark::fully_initialize() {
492 assert_at_safepoint();
493
494 if (is_fully_initialized()) {
495 return;
496 }
497
498 // Create & start ConcurrentMark thread.
499 _cm_thread = new G1ConcurrentMarkThread(this);
500 if (_cm_thread->osthread() == nullptr) {
501 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
502 }
503
504 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
505 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
506
507 _max_concurrent_workers = ConcGCThreads;
508
509 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
510 _concurrent_workers->initialize_workers();
511 _num_concurrent_workers = _concurrent_workers->active_workers();
512
513 if (!_global_mark_stack.initialize()) {
514 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
515 }
516
517 _region_mark_stats = NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC);
518 _top_at_mark_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
519 _top_at_rebuild_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
520
521 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
522
523 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
524 _num_active_tasks = _max_num_tasks;
525
526 for (uint i = 0; i < _max_num_tasks; ++i) {
527 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
528 _task_queues->register_queue(i, task_queue);
529
530 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
531 }
532
533 uint max_num_regions = _g1h->max_num_regions();
534 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
535 for (uint i = 0; i < max_num_regions; i++) {
536 ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>(_g1h->bottom_addr_for_region(i));
537 }
538 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
539
540 reset_at_marking_complete();
541 }
542
543 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
544 return _cm_thread->is_in_progress();
545 }
546
547 bool G1ConcurrentMark::is_in_marking() const {
548 return _cm_thread->is_in_marking();
549 }
550
551 bool G1ConcurrentMark::is_in_marking_or_rebuild() const {
552 return _cm_thread->is_in_marking_or_rebuild();
553 }
554
555 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
556 return cm_thread()->is_in_reset_for_next_cycle();
557 }
558
559 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
560 return _partial_array_state_manager;
561 }
562
563 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
564 assert(is_fully_initialized(), "must be");
565 return _cm_thread;
566 }
567
568 void G1ConcurrentMark::reset() {
569 assert_fully_initialized();
570
571 _has_aborted.store_relaxed(false);
572
573 _is_region_mark_stats_cache_in_use = true;
574 reset_marking_for_restart();
575
576 // Reset all tasks, since different phases will use different number of active
577 // threads. So, it's easiest to have all of them ready.
578 for (uint i = 0; i < _max_num_tasks; ++i) {
579 _tasks[i]->reset(mark_bitmap());
580 }
581
582 uint max_num_regions = _g1h->max_num_regions();
583 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
584 for (uint i = 0; i < max_num_regions; i++) {
585 // Do not update TAMS here. NoteStartOfMarkTask updates this in parallel in
586 // the pre-concurrent-start WorkerTask.
587 _top_at_rebuild_starts[i].store_relaxed(nullptr);
588 _region_mark_stats[i].clear();
589 }
590
591 _root_region_scan_aborted.store_relaxed(false);
592 _root_regions.reset();
593 }
594
595 void G1ConcurrentMark::assert_statistics_clear(G1HeapRegion* r) {
596 assert_fully_initialized();
597 #ifdef ASSERT
598 uint region_idx = r->hrm_index();
599 for (uint j = 0; j < _max_num_tasks; ++j) {
600 _tasks[j]->verify_no_mark_stats_for(r->hrm_index());
601 }
602
603 assert(_top_at_rebuild_starts[region_idx].load_relaxed() == nullptr, "must be");
604
605 G1RegionMarkStats* s = &_region_mark_stats[region_idx];
606 assert(s->incoming_refs() == 0, "must be");
607 assert(s->live_words() == 0, "must be");
608 #endif
609 }
610
611 void G1ConcurrentMark::note_start_of_mark_for_region(G1HeapRegion* r) {
612 assert_at_safepoint();
613 assert_fully_initialized();
614 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
615 update_top_at_mark_start(r);
616 } else {
617 set_top_at_mark_start_to_bottom(r);
618 }
619 }
620
621 void G1ConcurrentMark::notify_new_region(G1HeapRegion* r, size_t marked_live_bytes_below_tams) {
622 assert_at_safepoint();
623 if (!is_fully_initialized()) {
624 return;
625 }
626 G1CollectorState* state = _g1h->collector_state();
627 if (state->is_in_concurrent_start_gc()) {
628 update_top_at_mark_start(r);
629 set_live_bytes(r->hrm_index(), marked_live_bytes_below_tams);
630 }
631 }
632
633 void G1ConcurrentMark::reset_region_marking_state(G1HeapRegion* r) {
634 assert_at_safepoint();
635 if (!is_fully_initialized()) {
636 return;
637 }
638 uint region_idx = r->hrm_index();
639 // Only need to clear the stats cache for the given region if we are using the cache.
640 if (_is_region_mark_stats_cache_in_use) {
641 for (uint j = 0; j < _max_num_tasks; ++j) {
642 _tasks[j]->clear_mark_stats_cache(region_idx);
643 }
644 } else {
645 for (uint j = 0; j < _max_num_tasks; ++j) {
646 _tasks[j]->verify_no_mark_stats_for(region_idx);
647 }
648 }
649 set_top_at_mark_start_to_bottom(r);
650 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
651 _region_mark_stats[region_idx].clear();
652 }
653
654 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
655 assert_at_safepoint();
656 assert(r->is_starts_humongous(), "Got humongous continues region here");
657
658 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
659 mark_bitmap()->clear(r->bottom());
660 }
661
662 void G1ConcurrentMark::reset_marking_for_restart() {
663 assert_fully_initialized();
664
665 _global_mark_stack.set_empty();
666
667 // Expand the marking stack, if we have to and if we can.
668 if (has_overflown()) {
669 _global_mark_stack.expand();
670
671 uint max_num_regions = _g1h->max_num_regions();
672 for (uint i = 0; i < max_num_regions; i++) {
673 _region_mark_stats[i].clear_during_overflow();
674 }
675 }
676
677 clear_has_overflown();
678 _finger.store_relaxed(_heap.start());
679
680 for (uint i = 0; i < _max_num_tasks; ++i) {
681 _tasks[i]->reset_for_restart();
682 }
683 }
684
685 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
686 assert(active_tasks <= _max_num_tasks, "we should not have more");
687
688 _num_active_tasks = active_tasks;
689 // Need to update the three data structures below according to the
690 // number of active threads for this phase.
691 _terminator.reset_for_reuse(active_tasks);
692 _first_overflow_barrier_sync.set_n_workers(active_tasks);
693 _second_overflow_barrier_sync.set_n_workers(active_tasks);
694 }
695
696 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
697 set_concurrency(active_tasks);
698
699 _concurrent.store_relaxed(concurrent);
700
701 if (!concurrent) {
702 // At this point we should be in a STW phase, and completed marking.
703 assert_at_safepoint_on_vm_thread();
704 assert(out_of_regions(),
705 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
706 p2i(finger()), p2i(_heap.end()));
707 }
708 }
709
710 #if TASKQUEUE_STATS
711 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
712
713 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
714
715 auto get_pa_stats = [&](uint i) {
716 return _tasks[i]->partial_array_task_stats();
717 };
718
719 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
720 "Concurrent Mark Partial Array");
721
722 for (uint i = 0; i < _max_num_tasks; ++i) {
723 get_pa_stats(i)->reset();
724 }
725 }
726 #endif
727
728 void G1ConcurrentMark::reset_at_marking_complete() {
729 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
730 // We set the global marking state to some default values when we're
731 // not doing marking.
732 reset_marking_for_restart();
733 _num_active_tasks = 0;
734 }
735
736 G1ConcurrentMark::~G1ConcurrentMark() {
737 FREE_C_HEAP_ARRAY(_top_at_mark_starts);
738 FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
739 FREE_C_HEAP_ARRAY(_region_mark_stats);
740 // The G1ConcurrentMark instance is never freed.
741 ShouldNotReachHere();
742 }
743
744 class G1ClearBitMapTask : public WorkerTask {
745 public:
746 static size_t chunk_size() { return M; }
747
748 private:
749 // Heap region closure used for clearing the _mark_bitmap.
750 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
751 G1ConcurrentMark* _cm;
752 G1CMBitMap* _bitmap;
753 bool _suspendible; // If suspendible, do yield checks.
754
755 bool suspendible() {
756 return _suspendible;
757 }
758
759 bool is_clear_concurrent_undo() {
760 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
761 }
762
763 bool has_aborted() {
764 if (suspendible()) {
765 _cm->do_yield_check();
766 return _cm->has_aborted();
767 }
768 return false;
769 }
770
771 HeapWord* region_clear_limit(G1HeapRegion* r) {
772 // A garbage collection might have made the region unavailable after a yield during
773 // clearing. Just return bottom as the limit, causing the clearing for this region to end.
774 if (G1CollectedHeap::heap()->region_at_or_null(r->hrm_index()) == nullptr) {
775 return r->bottom();
776 }
777 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
778 // live_words data are current wrt to the _mark_bitmap. We use this information
779 // to only clear ranges of the bitmap that require clearing.
780 if (is_clear_concurrent_undo()) {
781 // No need to clear bitmaps for empty regions (which includes regions we
782 // did not mark through).
783 if (!_cm->contains_live_object(r->hrm_index())) {
784 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
785 return r->bottom();
786 }
787 }
788 return r->end();
789 }
790
791 public:
792 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
793 G1HeapRegionClosure(),
794 _cm(cm),
795 _bitmap(cm->mark_bitmap()),
796 _suspendible(suspendible)
797 { }
798
799 virtual bool do_heap_region(G1HeapRegion* r) {
800 if (has_aborted()) {
801 return true;
802 }
803
804 HeapWord* cur = r->bottom();
805 HeapWord* end = region_clear_limit(r);
806
807 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
808
809 while (cur < end) {
810
811 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
812 _bitmap->clear_range(mr);
813
814 cur += chunk_size_in_words;
815
816 // Repeat the asserts from before the start of the closure. We will do them
817 // as asserts here to minimize their overhead on the product. However, we
818 // will have them as guarantees at the beginning / end of the bitmap
819 // clearing to get some checking in the product.
820 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
821
822 // Abort iteration if necessary.
823 if (suspendible() && _cm->do_yield_check()) {
824 if (_cm->has_aborted()) {
825 return true;
826 }
827 // Re-read end. The region might have been uncommitted.
828 end = region_clear_limit(r);
829 }
830 }
831 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
832
833 return false;
834 }
835 };
836
837 G1ClearBitmapHRClosure _cl;
838 G1HeapRegionClaimer _hr_claimer;
839 bool _suspendible; // If the task is suspendible, workers must join the STS.
840
841 public:
842 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
843 WorkerTask("G1 Clear Bitmap"),
844 _cl(cm, suspendible),
845 _hr_claimer(n_workers),
846 _suspendible(suspendible)
847 { }
848
849 void work(uint worker_id) {
850 SuspendibleThreadSetJoiner sts_join(_suspendible);
851 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
852 }
853 };
854
855 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
856 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
857
858 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
859 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
860
861 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
862
863 G1ClearBitMapTask cl(this, num_workers, may_yield);
864
865 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
866 workers->run_task(&cl, num_workers);
867 }
868
869 void G1ConcurrentMark::cleanup_for_next_mark() {
870 // Make sure that the concurrent mark thread looks to still be in
871 // the current cycle.
872 guarantee(is_in_reset_for_next_cycle(), "invariant");
873
874 clear_bitmap(_concurrent_workers, true);
875
876 reset_partial_array_state_manager();
877
878 // Should not have changed state yet (even if a Full GC interrupted us).
879 guarantee(is_in_reset_for_next_cycle(), "invariant");
880 }
881
882 void G1ConcurrentMark::reset_partial_array_state_manager() {
883 for (uint i = 0; i < _max_num_tasks; ++i) {
884 _tasks[i]->unregister_partial_array_splitter();
885 }
886
887 partial_array_state_manager()->reset();
888
889 for (uint i = 0; i < _max_num_tasks; ++i) {
890 _tasks[i]->register_partial_array_splitter();
891 }
892 }
893
894 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
895 assert_at_safepoint_on_vm_thread();
896 // To avoid fragmentation the full collection requesting to clear the bitmap
897 // might use fewer workers than available. To ensure the bitmap is cleared
898 // as efficiently as possible the number of active workers are temporarily
899 // increased to include all currently created workers.
900 WithActiveWorkers update(workers, workers->created_workers());
901 clear_bitmap(workers, false);
902 }
903
904 class G1PreConcurrentStartTask : public G1BatchedTask {
905 // Reset marking state.
906 class ResetMarkingStateTask;
907 // For each region note start of marking.
908 class NoteStartOfMarkTask;
909
910 public:
911 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
912 };
913
914 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
915 G1ConcurrentMark* _cm;
916 public:
917 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
918
919 double worker_cost() const override { return 1.0; }
920 void do_work(uint worker_id) override;
921 };
922
923 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
924
925 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
926 G1ConcurrentMark* _cm;
927
928 public:
929 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
930
931 bool do_heap_region(G1HeapRegion* r) override {
932 _cm->note_start_of_mark_for_region(r);
933 return false;
934 }
935 } _region_cl;
936
937 G1HeapRegionClaimer _claimer;
938 public:
939 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
940
941 double worker_cost() const override {
942 // The work done per region is very small, therefore we choose this magic number to cap the number
943 // of threads used when there are few regions.
944 const double regions_per_thread = 1000;
945 return _claimer.n_regions() / regions_per_thread;
946 }
947
948 void set_max_workers(uint max_workers) override {
949 _claimer.set_n_workers(max_workers);
950 }
951
952 void do_work(uint worker_id) override {
953 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
954 }
955 };
956
957 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
958 // Reset marking state.
959 _cm->reset();
960 }
961
962 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
963 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
964 add_serial_task(new ResetMarkingStateTask(cm));
965 add_parallel_task(new NoteStartOfMarkTask());
966 };
967
968 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
969 assert_at_safepoint_on_vm_thread();
970
971 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
972
973 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
974
975 G1PreConcurrentStartTask cl(cause, this);
976 G1CollectedHeap::heap()->run_batch_task(&cl);
977
978 _gc_tracer_cm->set_gc_cause(cause);
979 }
980
981 void G1ConcurrentMark::start_full_concurrent_cycle() {
982 // Start Concurrent Marking weak-reference discovery.
983 ReferenceProcessor* rp = _g1h->ref_processor_cm();
984 rp->start_discovery(false /* always_clear */);
985
986 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
987 // This is the start of the marking cycle, we're expected all
988 // threads to have SATB queues with active set to false.
989 satb_mq_set.set_active_all_threads(true, /* new active value */
990 false /* expected_active */);
991
992 // update_g1_committed() will be called at the end of an evac pause
993 // when marking is on. So, it's also called at the end of the
994 // concurrent start pause to update the heap end, if the heap expands
995 // during it. No need to call it here.
996
997 // Signal the thread to start work.
998 cm_thread()->start_full_cycle();
999 }
1000
1001 void G1ConcurrentMark::start_undo_concurrent_cycle() {
1002 assert_at_safepoint_on_vm_thread();
1003 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
1004 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
1005
1006 abort_root_region_scan_at_safepoint();
1007
1008 // Signal the thread to start work.
1009 cm_thread()->start_undo_cycle();
1010 }
1011
1012 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
1013 cm_thread()->set_idle();
1014 }
1015
1016 void G1ConcurrentMark::stop() {
1017 if (is_fully_initialized()) {
1018 cm_thread()->stop();
1019 }
1020 }
1021
1022 /*
1023 * Notice that in the next two methods, we actually leave the STS
1024 * during the barrier sync and join it immediately afterwards. If we
1025 * do not do this, the following deadlock can occur: one thread could
1026 * be in the barrier sync code, waiting for the other thread to also
1027 * sync up, whereas another one could be trying to yield, while also
1028 * waiting for the other threads to sync up too.
1029 *
1030 * Note, however, that this code is also used during remark and in
1031 * this case we should not attempt to leave / enter the STS, otherwise
1032 * we'll either hit an assert (debug / fastdebug) or deadlock
1033 * (product). So we should only leave / enter the STS if we are
1034 * operating concurrently.
1035 *
1036 * Because the thread that does the sync barrier has left the STS, it
1037 * is possible to be suspended for a Full GC or an evacuation pause
1038 * could occur. This is actually safe, since the entering the sync
1039 * barrier is one of the last things do_marking_step() does, and it
1040 * doesn't manipulate any data structures afterwards.
1041 */
1042
1043 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1044 bool barrier_aborted;
1045 {
1046 SuspendibleThreadSetLeaver sts_leave(concurrent());
1047 barrier_aborted = !_first_overflow_barrier_sync.enter();
1048 }
1049
1050 // at this point everyone should have synced up and not be doing any
1051 // more work
1052
1053 if (barrier_aborted) {
1054 // If the barrier aborted we ignore the overflow condition and
1055 // just abort the whole marking phase as quickly as possible.
1056 return;
1057 }
1058 }
1059
1060 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1061 SuspendibleThreadSetLeaver sts_leave(concurrent());
1062 _second_overflow_barrier_sync.enter();
1063
1064 // at this point everything should be re-initialized and ready to go
1065 }
1066
1067 class G1CMConcurrentMarkingTask : public WorkerTask {
1068 G1ConcurrentMark* _cm;
1069
1070 public:
1071 void work(uint worker_id) {
1072 ResourceMark rm;
1073
1074 SuspendibleThreadSetJoiner sts_join;
1075
1076 assert(worker_id < _cm->active_tasks(), "invariant");
1077
1078 G1CMTask* task = _cm->task(worker_id);
1079 task->record_start_time();
1080 if (!_cm->has_aborted()) {
1081 do {
1082 task->do_marking_step(G1ConcMarkStepDurationMillis,
1083 true /* do_termination */,
1084 false /* is_serial*/);
1085
1086 _cm->do_yield_check();
1087 } while (!_cm->has_aborted() && task->has_aborted());
1088 }
1089 task->record_end_time();
1090 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1091 }
1092
1093 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1094 WorkerTask("Concurrent Mark"), _cm(cm) { }
1095
1096 ~G1CMConcurrentMarkingTask() { }
1097 };
1098
1099 uint G1ConcurrentMark::calc_active_marking_workers() {
1100 uint result = 0;
1101 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1102 result = _max_concurrent_workers;
1103 } else {
1104 result =
1105 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1106 1, /* Minimum workers */
1107 _num_concurrent_workers,
1108 Threads::number_of_non_daemon_threads());
1109 // Don't scale the result down by scale_concurrent_workers() because
1110 // that scaling has already gone into "_max_concurrent_workers".
1111 }
1112 assert(result > 0 && result <= _max_concurrent_workers,
1113 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1114 _max_concurrent_workers, result);
1115 return result;
1116 }
1117
1118 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1119 return _root_region_scan_aborted.load_relaxed();
1120 }
1121
1122 #ifndef PRODUCT
1123 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1124 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1125 }
1126 #endif
1127
1128 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1129 #ifdef ASSERT
1130 HeapWord* last = region->last();
1131 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1132 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1133 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1134 assert(top_at_mark_start(hr) == region->start(),
1135 "MemRegion start should be equal to TAMS");
1136 #endif
1137
1138 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1139
1140 const uintx interval = PrefetchScanIntervalInBytes;
1141 HeapWord* curr = region->start();
1142 const HeapWord* end = region->end();
1143 while (curr < end) {
1144 Prefetch::read(curr, interval);
1145 oop obj = cast_to_oop(curr);
1146 size_t size = obj->oop_iterate_size(&cl);
1147 assert(size == obj->size(), "sanity");
1148 curr += size;
1149 }
1150 }
1151
1152 class G1CMRootRegionScanTask : public WorkerTask {
1153 G1ConcurrentMark* _cm;
1154 bool _should_yield;
1155
1156 public:
1157 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1158 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1159
1160 void work(uint worker_id) {
1161 SuspendibleThreadSetJoiner sts_join(_should_yield);
1162
1163 while (true) {
1164 if (_cm->has_root_region_scan_aborted()) {
1165 return;
1166 }
1167 G1CMRootMemRegions* root_regions = _cm->root_regions();
1168 const MemRegion* region = root_regions->claim_next();
1169 if (region == nullptr) {
1170 return;
1171 }
1172 _cm->scan_root_region(region, worker_id);
1173 if (_should_yield) {
1174 SuspendibleThreadSet::yield();
1175 // If we yielded, a GC may have processed all root regions,
1176 // so this loop will naturally exit on the next claim_next() call.
1177 // Same if a Full GC signalled abort of the concurrent mark.
1178 }
1179 }
1180 }
1181 };
1182
1183 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1184 // We first check whether there is any work to do as we might have already aborted
1185 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1186 // We want to avoid spinning up the worker threads if that happened.
1187 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1188 //
1189 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1190 // a young gc doing the work.
1191 //
1192 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1193 // continue after that safepoint.
1194 //
1195 // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1196 // consistent view of the value containing the number of remaining regions across the
1197 // usages below. The safepoint/gc may already be running and modifying it
1198 // while this code is still executing.
1199 uint num_remaining = root_regions()->num_remaining_regions();
1200 bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1201 if (do_scan) {
1202 // Assign one worker to each root-region but subject to the max constraint.
1203 // The constraint is also important to avoid accesses beyond the allocated per-worker
1204 // marking helper data structures. We might get passed different WorkerThreads with
1205 // different number of threads (potential worker ids) than helper data structures when
1206 // completing this work during GC.
1207 const uint num_workers = MIN2(num_remaining,
1208 _max_concurrent_workers);
1209
1210 G1CMRootRegionScanTask task(this, concurrent);
1211 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1212 task.name(), num_workers, num_remaining);
1213 workers->run_task(&task, num_workers);
1214 }
1215
1216 // At the end of this method, we can re-read num_remaining() in the assert: either
1217 // we got non-zero above and we processed all root regions (and it must be zero
1218 // after the worker task synchronization) or it had already been zero. We also
1219 // can't have started another concurrent cycle that could have set it to something else
1220 // while still in the concurrent cycle (if called concurrently).
1221 assert_root_region_scan_completed_or_aborted();
1222
1223 return do_scan;
1224 }
1225
1226 void G1ConcurrentMark::scan_root_regions_concurrently() {
1227 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1228 scan_root_regions(_concurrent_workers, true /* concurrent */);
1229 }
1230
1231 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1232 assert_at_safepoint_on_vm_thread();
1233 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1234 }
1235
1236 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1237 root_regions()->add(top_at_mark_start(r), r->top());
1238 }
1239
1240 void G1ConcurrentMark::add_root_region_set_bottom(G1HeapRegion* r) {
1241 set_top_at_mark_start_to_bottom(r);
1242 root_regions()->add(r->bottom(), r->top());
1243 }
1244
1245 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1246 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1247 }
1248
1249 void G1ConcurrentMark::abort_root_region_scan() {
1250 assert_not_at_safepoint();
1251
1252 _root_region_scan_aborted.store_relaxed(true);
1253 }
1254
1255 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1256 assert_at_safepoint_on_vm_thread();
1257
1258 _root_region_scan_aborted.store_relaxed(true);
1259 }
1260
1261 void G1ConcurrentMark::concurrent_cycle_start() {
1262 _gc_timer_cm->register_gc_start();
1263
1264 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1265
1266 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1267 }
1268
1269 uint G1ConcurrentMark::completed_mark_cycles() const {
1270 return _completed_mark_cycles.load_relaxed();
1271 }
1272
1273 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1274 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1275
1276 if (mark_cycle_completed) {
1277 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1278 }
1279
1280 if (has_aborted()) {
1281 log_info(gc, marking)("Concurrent Mark Abort");
1282 _gc_tracer_cm->report_concurrent_mode_failure();
1283 }
1284
1285 _gc_timer_cm->register_gc_end();
1286
1287 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1288 }
1289
1290 void G1ConcurrentMark::mark_from_roots() {
1291 _restart_for_overflow.store_relaxed(false);
1292
1293 uint active_workers = calc_active_marking_workers();
1294
1295 // Setting active workers is not guaranteed since fewer
1296 // worker threads may currently exist and more may not be
1297 // available.
1298 active_workers = _concurrent_workers->set_active_workers(active_workers);
1299 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1300
1301 _num_concurrent_workers = active_workers;
1302
1303 // Parallel task terminator is set in "set_concurrency_and_phase()"
1304 set_concurrency_and_phase(active_workers, true /* concurrent */);
1305
1306 G1CMConcurrentMarkingTask marking_task(this);
1307 _concurrent_workers->run_task(&marking_task);
1308 print_stats();
1309 }
1310
1311 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1312 static const char* location_strings[] = { "Remark Before",
1313 "Remark After",
1314 "Remark Overflow",
1315 "Cleanup Before",
1316 "Cleanup After" };
1317 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1318 }
1319
1320 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1321 VerifyLocation location) {
1322 G1HeapVerifier* verifier = _g1h->verifier();
1323
1324 verifier->verify_region_sets_optional();
1325
1326 const char* caller = verify_location_string(location);
1327
1328 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1329 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1330
1331 size_t const BufLen = 512;
1332 char buffer[BufLen];
1333
1334 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1335 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1336
1337 // Only check bitmap in Remark, and not at After-Verification because the regions
1338 // already have their TAMS'es reset.
1339 if (location != VerifyLocation::RemarkAfter) {
1340 verifier->verify_bitmap_clear(true /* above_tams_only */);
1341 }
1342 }
1343 }
1344
1345 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1346 G1CollectedHeap* _g1h;
1347 public:
1348 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1349
1350 bool do_object_b(oop obj) {
1351 return !_g1h->is_obj_dead(obj);
1352 }
1353 };
1354
1355 void G1ConcurrentMark::remark() {
1356 assert_at_safepoint_on_vm_thread();
1357
1358 // If a full collection has happened, we should not continue. However we might
1359 // have ended up here as the Remark VM operation has been scheduled already.
1360 if (has_aborted()) {
1361 return;
1362 }
1363
1364 G1Policy* policy = _g1h->policy();
1365 policy->record_pause_start_time();
1366
1367 double start = os::elapsedTime();
1368
1369 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1370
1371 {
1372 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1373 finalize_marking();
1374 }
1375
1376 double mark_work_end = os::elapsedTime();
1377
1378 bool const mark_finished = !has_overflown();
1379 if (mark_finished) {
1380 weak_refs_work();
1381
1382 // Unload Klasses, String, Code Cache, etc.
1383 if (ClassUnloadingWithConcurrentMark) {
1384 G1CMIsAliveClosure is_alive(this);
1385 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1386 }
1387
1388 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1389 // We're done with marking.
1390 // This is the end of the marking cycle, we're expected all
1391 // threads to have SATB queues with active set to true.
1392 satb_mq_set.set_active_all_threads(false, /* new active value */
1393 true /* expected_active */);
1394
1395 {
1396 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1397 flush_all_task_caches();
1398 }
1399
1400 // All marking completed. Check bitmap now as we will start to reset TAMSes
1401 // in parallel below so that we can not do this in the After-Remark verification.
1402 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1403
1404 {
1405 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1406
1407 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1408 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1409 _g1h->workers()->active_workers());
1410 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1411 _g1h->workers()->run_task(&cl, num_workers);
1412
1413 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1414 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1415
1416 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1417
1418 if (_needs_remembered_set_rebuild) {
1419 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1420 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1421 }
1422 }
1423
1424 if (log_is_enabled(Trace, gc, liveness)) {
1425 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1426 _g1h->heap_region_iterate(&cl);
1427 }
1428
1429 // Potentially, some empty-regions have been reclaimed; make this a
1430 // "collection" so that pending allocation can retry before attempting a
1431 // GC pause.
1432 _g1h->increment_total_collections();
1433
1434 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1435 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1436 // triggered, it likely means there are very few regular GCs, making resizing
1437 // based on gc heuristics less effective.
1438 if (_g1h->last_gc_was_periodic()) {
1439 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1440 }
1441
1442 compute_new_sizes();
1443
1444 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1445
1446 assert(!restart_for_overflow(), "sanity");
1447 // Completely reset the marking state (except bitmaps) since marking completed.
1448 reset_at_marking_complete();
1449
1450 G1CollectedHeap::finish_codecache_marking_cycle();
1451
1452 {
1453 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1454 G1ObjectCountIsAliveClosure is_alive(_g1h);
1455 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1456 }
1457
1458 // Successfully completed marking, advance state.
1459 cm_thread()->set_full_cycle_rebuild_and_scrub();
1460 } else {
1461 // We overflowed. Restart concurrent marking.
1462 _restart_for_overflow.store_relaxed(true);
1463
1464 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1465
1466 // Clear the marking state because we will be restarting
1467 // marking due to overflowing the global mark stack.
1468 reset_marking_for_restart();
1469 }
1470
1471 // Statistics
1472 double now = os::elapsedTime();
1473 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1474 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1475 _remark_times.add((now - start) * 1000.0);
1476
1477 _g1h->update_perf_counter_cpu_time();
1478
1479 policy->record_concurrent_mark_remark_end();
1480
1481 return;
1482 }
1483
1484 void G1ConcurrentMark::compute_new_sizes() {
1485 MetaspaceGC::compute_new_size();
1486
1487 // Cleanup will have freed any regions completely full of garbage.
1488 // Update the soft reference policy with the new heap occupancy.
1489 Universe::heap()->update_capacity_and_used_at_gc();
1490
1491 // We reclaimed old regions so we should calculate the sizes to make
1492 // sure we update the old gen/space data.
1493 _g1h->monitoring_support()->update_sizes();
1494 }
1495
1496 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1497 G1CollectedHeap* _g1h;
1498
1499 public:
1500 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1501
1502 bool do_heap_region(G1HeapRegion* r) override {
1503 // Update the remset tracking state from updating to complete
1504 // if remembered sets have been rebuilt.
1505 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1506 return false;
1507 }
1508 };
1509
1510 void G1ConcurrentMark::cleanup() {
1511 assert_at_safepoint_on_vm_thread();
1512
1513 // If a full collection has happened, we shouldn't do this.
1514 if (has_aborted()) {
1515 return;
1516 }
1517
1518 G1Policy* policy = _g1h->policy();
1519 policy->record_pause_start_time();
1520
1521 double start = os::elapsedTime();
1522
1523 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1524
1525 if (needs_remembered_set_rebuild()) {
1526 // Update the remset tracking information as well as marking all regions
1527 // as fully parsable.
1528 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1529 G1UpdateRegionsAfterRebuild cl(_g1h);
1530 _g1h->heap_region_iterate(&cl);
1531 } else {
1532 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1533 }
1534
1535 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1536
1537 // Local statistics
1538 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1539
1540 {
1541 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1542 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1543 }
1544
1545 // Advance state.
1546 cm_thread()->set_full_cycle_reset_for_next_cycle();
1547 return;
1548 }
1549
1550 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1551 // Uses the G1CMTask associated with a worker thread (for serial reference
1552 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1553 // trace referent objects.
1554 //
1555 // Using the G1CMTask and embedded local queues avoids having the worker
1556 // threads operating on the global mark stack. This reduces the risk
1557 // of overflowing the stack - which we would rather avoid at this late
1558 // state. Also using the tasks' local queues removes the potential
1559 // of the workers interfering with each other that could occur if
1560 // operating on the global stack.
1561
1562 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1563 G1ConcurrentMark* _cm;
1564 G1CMTask* _task;
1565 uint _ref_counter_limit;
1566 uint _ref_counter;
1567 bool _is_serial;
1568 public:
1569 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1570 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1571 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1572 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1573 }
1574
1575 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1576 virtual void do_oop( oop* p) { do_oop_work(p); }
1577
1578 template <class T> void do_oop_work(T* p) {
1579 if (_cm->has_overflown()) {
1580 return;
1581 }
1582 if (!_task->deal_with_reference(p)) {
1583 // We did not add anything to the mark bitmap (or mark stack), so there is
1584 // no point trying to drain it.
1585 return;
1586 }
1587 _ref_counter--;
1588
1589 if (_ref_counter == 0) {
1590 // We have dealt with _ref_counter_limit references, pushing them
1591 // and objects reachable from them on to the local stack (and
1592 // possibly the global stack). Call G1CMTask::do_marking_step() to
1593 // process these entries.
1594 //
1595 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1596 // there's nothing more to do (i.e. we're done with the entries that
1597 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1598 // above) or we overflow.
1599 //
1600 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1601 // flag while there may still be some work to do. (See the comment at
1602 // the beginning of G1CMTask::do_marking_step() for those conditions -
1603 // one of which is reaching the specified time target.) It is only
1604 // when G1CMTask::do_marking_step() returns without setting the
1605 // has_aborted() flag that the marking step has completed.
1606 do {
1607 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1608 _task->do_marking_step(mark_step_duration_ms,
1609 false /* do_termination */,
1610 _is_serial);
1611 } while (_task->has_aborted() && !_cm->has_overflown());
1612 _ref_counter = _ref_counter_limit;
1613 }
1614 }
1615 };
1616
1617 // 'Drain' oop closure used by both serial and parallel reference processing.
1618 // Uses the G1CMTask associated with a given worker thread (for serial
1619 // reference processing the G1CMtask for worker 0 is used). Calls the
1620 // do_marking_step routine, with an unbelievably large timeout value,
1621 // to drain the marking data structures of the remaining entries
1622 // added by the 'keep alive' oop closure above.
1623
1624 class G1CMDrainMarkingStackClosure : public VoidClosure {
1625 G1ConcurrentMark* _cm;
1626 G1CMTask* _task;
1627 bool _is_serial;
1628 public:
1629 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1630 _cm(cm), _task(task), _is_serial(is_serial) {
1631 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1632 }
1633
1634 void do_void() {
1635 do {
1636 // We call G1CMTask::do_marking_step() to completely drain the local
1637 // and global marking stacks of entries pushed by the 'keep alive'
1638 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1639 //
1640 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1641 // if there's nothing more to do (i.e. we've completely drained the
1642 // entries that were pushed as a result of applying the 'keep alive'
1643 // closure to the entries on the discovered ref lists) or we overflow
1644 // the global marking stack.
1645 //
1646 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1647 // flag while there may still be some work to do. (See the comment at
1648 // the beginning of G1CMTask::do_marking_step() for those conditions -
1649 // one of which is reaching the specified time target.) It is only
1650 // when G1CMTask::do_marking_step() returns without setting the
1651 // has_aborted() flag that the marking step has completed.
1652
1653 _task->do_marking_step(1000000000.0 /* something very large */,
1654 true /* do_termination */,
1655 _is_serial);
1656 } while (_task->has_aborted() && !_cm->has_overflown());
1657 }
1658 };
1659
1660 class G1CMRefProcProxyTask : public RefProcProxyTask {
1661 G1CollectedHeap& _g1h;
1662 G1ConcurrentMark& _cm;
1663
1664 public:
1665 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1666 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1667 _g1h(g1h),
1668 _cm(cm) {}
1669
1670 void work(uint worker_id) override {
1671 assert(worker_id < _max_workers, "sanity");
1672 G1CMIsAliveClosure is_alive(&_cm);
1673 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1674 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1675 BarrierEnqueueDiscoveredFieldClosure enqueue;
1676 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1677 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1678 }
1679
1680 void prepare_run_task_hook() override {
1681 // We need to reset the concurrency level before each
1682 // proxy task execution, so that the termination protocol
1683 // and overflow handling in G1CMTask::do_marking_step() knows
1684 // how many workers to wait for.
1685 _cm.set_concurrency(_queue_count);
1686 }
1687 };
1688
1689 void G1ConcurrentMark::weak_refs_work() {
1690 ResourceMark rm;
1691
1692 {
1693 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1694
1695 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1696
1697 // See the comment in G1CollectedHeap::ref_processing_init()
1698 // about how reference processing currently works in G1.
1699
1700 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1701
1702 // Prefer to grow the stack until the max capacity.
1703 _global_mark_stack.set_should_grow();
1704
1705 // Parallel processing task executor.
1706 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1707 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1708
1709 // Process the weak references.
1710 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1711 _gc_tracer_cm->report_gc_reference_stats(stats);
1712 pt.print_all_references();
1713
1714 // The do_oop work routines of the keep_alive and drain_marking_stack
1715 // oop closures will set the has_overflown flag if we overflow the
1716 // global marking stack.
1717
1718 assert(has_overflown() || _global_mark_stack.is_empty(),
1719 "Mark stack should be empty (unless it has overflown)");
1720 }
1721
1722 if (has_overflown()) {
1723 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1724 // overflowed while processing references. Exit the VM.
1725 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1726 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1727 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1728 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1729 return;
1730 }
1731
1732 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1733
1734 {
1735 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1736 G1CMIsAliveClosure is_alive(this);
1737 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1738 }
1739 }
1740
1741 class G1PrecleanYieldClosure : public YieldClosure {
1742 G1ConcurrentMark* _cm;
1743
1744 public:
1745 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1746
1747 virtual bool should_return() {
1748 return _cm->has_aborted();
1749 }
1750
1751 virtual bool should_return_fine_grain() {
1752 _cm->do_yield_check();
1753 return _cm->has_aborted();
1754 }
1755 };
1756
1757 void G1ConcurrentMark::preclean() {
1758 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1759
1760 SuspendibleThreadSetJoiner joiner;
1761
1762 BarrierEnqueueDiscoveredFieldClosure enqueue;
1763
1764 set_concurrency_and_phase(1, true);
1765
1766 G1PrecleanYieldClosure yield_cl(this);
1767
1768 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1769 // Precleaning is single threaded. Temporarily disable MT discovery.
1770 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1771 rp->preclean_discovered_references(rp->is_alive_non_header(),
1772 &enqueue,
1773 &yield_cl,
1774 _gc_timer_cm);
1775 }
1776
1777 // Closure for marking entries in SATB buffers.
1778 class G1CMSATBBufferClosure : public SATBBufferClosure {
1779 private:
1780 G1CMTask* _task;
1781 G1CollectedHeap* _g1h;
1782
1783 // This is very similar to G1CMTask::deal_with_reference, but with
1784 // more relaxed requirements for the argument, so this must be more
1785 // circumspect about treating the argument as an object.
1786 void do_entry(void* entry) const {
1787 _task->increment_refs_reached();
1788 oop const obj = cast_to_oop(entry);
1789 _task->make_reference_grey(obj);
1790 }
1791
1792 public:
1793 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1794 : _task(task), _g1h(g1h) { }
1795
1796 virtual void do_buffer(void** buffer, size_t size) {
1797 for (size_t i = 0; i < size; ++i) {
1798 do_entry(buffer[i]);
1799 }
1800 }
1801 };
1802
1803 class G1RemarkThreadsClosure : public ThreadClosure {
1804 G1SATBMarkQueueSet& _qset;
1805
1806 public:
1807 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1808 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1809
1810 void do_thread(Thread* thread) {
1811 // Transfer any partial buffer to the qset for completed buffer processing.
1812 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1813 }
1814 };
1815
1816 class G1CMRemarkTask : public WorkerTask {
1817 // For Threads::possibly_parallel_threads_do
1818 ThreadsClaimTokenScope _threads_claim_token_scope;
1819 G1ConcurrentMark* _cm;
1820 public:
1821 void work(uint worker_id) {
1822 G1CMTask* task = _cm->task(worker_id);
1823 task->record_start_time();
1824 {
1825 ResourceMark rm;
1826
1827 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1828 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1829 }
1830
1831 do {
1832 task->do_marking_step(1000000000.0 /* something very large */,
1833 true /* do_termination */,
1834 false /* is_serial */);
1835 } while (task->has_aborted() && !_cm->has_overflown());
1836 // If we overflow, then we do not want to restart. We instead
1837 // want to abort remark and do concurrent marking again.
1838 task->record_end_time();
1839 }
1840
1841 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1842 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1843 _cm->terminator()->reset_for_reuse(active_workers);
1844 }
1845 };
1846
1847 void G1ConcurrentMark::finalize_marking() {
1848 ResourceMark rm;
1849
1850 _g1h->ensure_parsability(false);
1851
1852 // this is remark, so we'll use up all active threads
1853 uint active_workers = _g1h->workers()->active_workers();
1854 set_concurrency_and_phase(active_workers, false /* concurrent */);
1855 // Leave _parallel_marking_threads at it's
1856 // value originally calculated in the G1ConcurrentMark
1857 // constructor and pass values of the active workers
1858 // through the task.
1859
1860 {
1861 G1CMRemarkTask remarkTask(this, active_workers);
1862 // We will start all available threads, even if we decide that the
1863 // active_workers will be fewer. The extra ones will just bail out
1864 // immediately.
1865 _g1h->workers()->run_task(&remarkTask);
1866 }
1867
1868 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1869 guarantee(has_overflown() ||
1870 satb_mq_set.completed_buffers_num() == 0,
1871 "Invariant: has_overflown = %s, num buffers = %zu",
1872 BOOL_TO_STR(has_overflown()),
1873 satb_mq_set.completed_buffers_num());
1874
1875 print_stats();
1876 }
1877
1878 void G1ConcurrentMark::flush_all_task_caches(bool ends_use_of_mark_cache) {
1879 size_t hits = 0;
1880 size_t misses = 0;
1881 for (uint i = 0; i < _max_num_tasks; i++) {
1882 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1883 hits += stats.first;
1884 misses += stats.second;
1885 }
1886 size_t sum = hits + misses;
1887 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1888 hits, misses, percent_of(hits, sum));
1889 if (ends_use_of_mark_cache) {
1890 _is_region_mark_stats_cache_in_use = false;
1891 }
1892 }
1893
1894 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1895 assert_at_safepoint();
1896 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1897 }
1898
1899 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1900 // "Checkpoint" the finger.
1901 HeapWord* local_finger = finger();
1902
1903 while (local_finger < _heap.end()) {
1904 assert(_g1h->is_in_reserved(local_finger), "invariant");
1905
1906 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1907 // Make sure that the reads below do not float before loading curr_region.
1908 OrderAccess::loadload();
1909 // Above heap_region_containing may return null as we always scan claim
1910 // until the end of the heap. In this case, just jump to the next region.
1911 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1912
1913 // Is the gap between reading the finger and doing the CAS too long?
1914 HeapWord* res = _finger.compare_exchange(local_finger, end);
1915 if (res == local_finger && curr_region != nullptr) {
1916 // We succeeded.
1917 HeapWord* bottom = curr_region->bottom();
1918 HeapWord* limit = top_at_mark_start(curr_region);
1919
1920 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1921 // Notice that _finger == end cannot be guaranteed here since,
1922 // someone else might have moved the finger even further.
1923 assert(finger() >= end, "The finger should have moved forward");
1924
1925 if (limit > bottom) {
1926 return curr_region;
1927 } else {
1928 assert(limit == bottom,
1929 "The scan limit for region %u (%s) should be bottom but is " PTR_FORMAT,
1930 curr_region->hrm_index(), curr_region->get_short_type_str(), p2i(limit));
1931 // We return null and the caller should try calling
1932 // claim_region() again.
1933 return nullptr;
1934 }
1935 } else {
1936 // Read the finger again.
1937 HeapWord* next_finger = finger();
1938 assert(next_finger > local_finger,
1939 "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT,
1940 p2i(local_finger), p2i(next_finger));
1941 local_finger = next_finger;
1942 }
1943 }
1944
1945 return nullptr;
1946 }
1947
1948 #ifndef PRODUCT
1949 class VerifyNoCSetOops {
1950 G1CollectedHeap* _g1h;
1951 const char* _phase;
1952 int _info;
1953
1954 public:
1955 VerifyNoCSetOops(const char* phase, int info = -1) :
1956 _g1h(G1CollectedHeap::heap()),
1957 _phase(phase),
1958 _info(info)
1959 { }
1960
1961 void operator()(G1TaskQueueEntry task_entry) const {
1962 if (task_entry.is_partial_array_state()) {
1963 oop obj = task_entry.to_partial_array_state()->source();
1964 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1965 return;
1966 }
1967 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1968 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1969 p2i(task_entry.to_oop()), _phase, _info);
1970 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1971 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1972 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1973 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1974 }
1975 };
1976
1977 void G1ConcurrentMark::verify_no_collection_set_oops() {
1978 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1979 "should be at a safepoint or initializing");
1980 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1981 return;
1982 }
1983
1984 // Verify entries on the global mark stack
1985 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1986
1987 // Verify entries on the task queues
1988 for (uint i = 0; i < _max_num_tasks; ++i) {
1989 G1CMTaskQueue* queue = _task_queues->queue(i);
1990 queue->iterate(VerifyNoCSetOops("Queue", i));
1991 }
1992
1993 // Verify the global finger
1994 HeapWord* global_finger = finger();
1995 if (global_finger != nullptr && global_finger < _heap.end()) {
1996 // Since we always iterate over all regions, we might get a null G1HeapRegion
1997 // here.
1998 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1999 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
2000 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2001 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2002 }
2003
2004 // Verify the task fingers
2005 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
2006 for (uint i = 0; i < _num_concurrent_workers; ++i) {
2007 G1CMTask* task = _tasks[i];
2008 HeapWord* task_finger = task->finger();
2009 if (task_finger != nullptr && task_finger < _heap.end()) {
2010 // See above note on the global finger verification.
2011 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
2012 guarantee(r == nullptr || task_finger == r->bottom() ||
2013 !r->in_collection_set() || !r->has_index_in_opt_cset(),
2014 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2015 p2i(task_finger), HR_FORMAT_PARAMS(r));
2016 }
2017 }
2018 }
2019 #endif // PRODUCT
2020
2021 void G1ConcurrentMark::rebuild_and_scrub() {
2022 if (!needs_remembered_set_rebuild()) {
2023 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
2024 }
2025
2026 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
2027 }
2028
2029 void G1ConcurrentMark::print_stats() {
2030 if (!log_is_enabled(Debug, gc, stats)) {
2031 return;
2032 }
2033 log_debug(gc, stats)("---------------------------------------------------------------------");
2034 for (size_t i = 0; i < _num_active_tasks; ++i) {
2035 _tasks[i]->print_stats();
2036 log_debug(gc, stats)("---------------------------------------------------------------------");
2037 }
2038 }
2039
2040 bool G1ConcurrentMark::shutdown_cleanup_needed() const {
2041 // Cleanup (aborting threads, setting abort flags) is needed throughout the whole cycle before
2042 // stopping the CM thread.
2043 return is_fully_initialized() && is_in_concurrent_cycle();
2044 }
2045
2046 void G1ConcurrentMark::shutdown_concurrent_cycle() {
2047 assert_at_safepoint_on_vm_thread();
2048
2049 abort_root_region_scan_at_safepoint();
2050 abort_marking_threads();
2051
2052 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2053 satb_mq_set.abandon_partial_marking();
2054 // This can be called either during or outside marking, we'll read
2055 // the expected_active value from the SATB queue set.
2056 satb_mq_set.set_active_all_threads(false, /* new active value */
2057 satb_mq_set.is_active() /* expected_active */);
2058 }
2059
2060 bool G1ConcurrentMark::concurrent_cycle_abort() {
2061 assert_at_safepoint_on_vm_thread();
2062 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
2063
2064 // If we start the compaction before the CM threads finish
2065 // scanning the root regions we might trip them over as we'll
2066 // be moving objects / updating references. Since the root region
2067 // scan synchronized with the safepoint, just tell it to abort.
2068 // It will notice when the threads start up again later.
2069 abort_root_region_scan_at_safepoint();
2070
2071 // We haven't started a concurrent cycle no need to do anything; we might have
2072 // aborted the marking because of shutting down though. In this case the marking
2073 // might have already completed the abort (leading to in_progress() below to
2074 // return false), however this still left marking state particularly in the
2075 // shared marking bitmap that must be cleaned up.
2076 // If there are multiple full gcs during shutdown we do this work repeatedly for
2077 // nothing, but this situation should be extremely rare (a full gc after shutdown
2078 // has been signalled is already rare), and this work should be negligible compared
2079 // to actual full gc work.
2080
2081 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
2082 return false;
2083 }
2084
2085 flush_all_task_caches();
2086 reset_marking_for_restart();
2087
2088 abort_marking_threads();
2089
2090 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2091 satb_mq_set.abandon_partial_marking();
2092 // This can be called either during or outside marking, we'll read
2093 // the expected_active value from the SATB queue set.
2094 satb_mq_set.set_active_all_threads(false, /* new active value */
2095 satb_mq_set.is_active() /* expected_active */);
2096 return true;
2097 }
2098
2099 void G1ConcurrentMark::abort_marking_threads() {
2100 assert_root_region_scan_completed_or_aborted();
2101 _has_aborted.store_relaxed(true);
2102 _first_overflow_barrier_sync.abort();
2103 _second_overflow_barrier_sync.abort();
2104 }
2105
2106 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2107 class CountCpuTimeThreadClosure : public ThreadClosure {
2108 public:
2109 jlong _total_cpu_time;
2110
2111 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2112
2113 void do_thread(Thread* t) {
2114 _total_cpu_time += os::thread_cpu_time(t);
2115 }
2116 } cl;
2117
2118 threads_do(&cl);
2119
2120 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2121 }
2122
2123 static void print_ms_time_info(const char* prefix, const char* name,
2124 NumberSeq& ns) {
2125 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2126 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2127 if (ns.num() > 0) {
2128 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2129 prefix, ns.sd(), ns.maximum());
2130 }
2131 }
2132
2133 void G1ConcurrentMark::print_summary_info() {
2134 Log(gc, marking) log;
2135 if (!log.is_trace()) {
2136 return;
2137 }
2138
2139 log.trace(" Concurrent marking:");
2140 if (!is_fully_initialized()) {
2141 log.trace(" has not been initialized yet");
2142 return;
2143 }
2144 print_ms_time_info(" ", "remarks", _remark_times);
2145 {
2146 print_ms_time_info(" ", "final marks", _remark_mark_times);
2147 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2148
2149 }
2150 print_ms_time_info(" ", "cleanups", _cleanup_times);
2151 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2152 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2153 log.trace(" Total stop_world time = %8.2f s.",
2154 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2155 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2156 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2157 }
2158
2159 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2160 if (is_fully_initialized()) { // they are initialized late
2161 tc->do_thread(_cm_thread);
2162 _concurrent_workers->threads_do(tc);
2163 }
2164 }
2165
2166 void G1ConcurrentMark::print_on(outputStream* st) const {
2167 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2168 _mark_bitmap.print_on(st, " Bits: ");
2169 }
2170
2171 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2172 ReferenceProcessor* result = g1h->ref_processor_cm();
2173 assert(result != nullptr, "CM reference processor should not be null");
2174 return result;
2175 }
2176
2177 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2178 G1CMTask* task)
2179 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2180 _g1h(g1h), _task(task)
2181 { }
2182
2183 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2184 assert(hr != nullptr,
2185 "claim_region() should have filtered out null regions");
2186 _curr_region = hr;
2187 _finger = hr->bottom();
2188 update_region_limit();
2189 }
2190
2191 void G1CMTask::update_region_limit() {
2192 G1HeapRegion* hr = _curr_region;
2193 HeapWord* bottom = hr->bottom();
2194 HeapWord* limit = _cm->top_at_mark_start(hr);
2195
2196 if (limit == bottom) {
2197 // The region was collected underneath our feet.
2198 // We set the finger to bottom to ensure that the bitmap
2199 // iteration that will follow this will not do anything.
2200 // (this is not a condition that holds when we set the region up,
2201 // as the region is not supposed to be empty in the first place)
2202 _finger = bottom;
2203 } else if (limit >= _region_limit) {
2204 assert(limit >= _finger, "peace of mind");
2205 } else {
2206 assert(limit < _region_limit, "only way to get here");
2207 // This can happen under some pretty unusual circumstances. An
2208 // evacuation pause empties the region underneath our feet (TAMS
2209 // at bottom). We then do some allocation in the region (TAMS
2210 // stays at bottom), followed by the region being used as a GC
2211 // alloc region (TAMS will move to top() and the objects
2212 // originally below it will be greyed). All objects now marked in
2213 // the region are explicitly greyed, if below the global finger,
2214 // and we do not need in fact to scan anything else. So, we simply
2215 // set _finger to be limit to ensure that the bitmap iteration
2216 // doesn't do anything.
2217 _finger = limit;
2218 }
2219
2220 _region_limit = limit;
2221 }
2222
2223 void G1CMTask::giveup_current_region() {
2224 assert(_curr_region != nullptr, "invariant");
2225 clear_region_fields();
2226 }
2227
2228 void G1CMTask::clear_region_fields() {
2229 // Values for these three fields that indicate that we're not
2230 // holding on to a region.
2231 _curr_region = nullptr;
2232 _finger = nullptr;
2233 _region_limit = nullptr;
2234 }
2235
2236 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2237 if (cm_oop_closure == nullptr) {
2238 assert(_cm_oop_closure != nullptr, "invariant");
2239 } else {
2240 assert(_cm_oop_closure == nullptr, "invariant");
2241 }
2242 _cm_oop_closure = cm_oop_closure;
2243 }
2244
2245 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2246 guarantee(mark_bitmap != nullptr, "invariant");
2247 _mark_bitmap = mark_bitmap;
2248 clear_region_fields();
2249
2250 _calls = 0;
2251 _elapsed_time_ms = 0.0;
2252 _termination_time_ms = 0.0;
2253
2254 _mark_stats_cache.reset();
2255 }
2256
2257 void G1CMTask::reset_for_restart() {
2258 clear_region_fields();
2259 _task_queue->set_empty();
2260 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2261 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2262 }
2263
2264 void G1CMTask::register_partial_array_splitter() {
2265
2266 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2267 _cm->max_num_tasks());
2268 }
2269
2270 void G1CMTask::unregister_partial_array_splitter() {
2271 _partial_array_splitter.~PartialArraySplitter();
2272 }
2273
2274 bool G1CMTask::should_exit_termination() {
2275 if (!regular_clock_call()) {
2276 return true;
2277 }
2278
2279 // This is called when we are in the termination protocol. We should
2280 // quit if, for some reason, this task wants to abort or the global
2281 // stack is not empty (this means that we can get work from it).
2282 return !_cm->mark_stack_empty() || has_aborted();
2283 }
2284
2285 void G1CMTask::reached_limit() {
2286 assert(_words_scanned >= _words_scanned_limit ||
2287 _refs_reached >= _refs_reached_limit ,
2288 "shouldn't have been called otherwise");
2289 abort_marking_if_regular_check_fail();
2290 }
2291
2292 bool G1CMTask::regular_clock_call() {
2293 if (has_aborted()) {
2294 return false;
2295 }
2296
2297 // First, we need to recalculate the words scanned and refs reached
2298 // limits for the next clock call.
2299 recalculate_limits();
2300
2301 // During the regular clock call we do the following
2302
2303 // (1) If an overflow has been flagged, then we abort.
2304 if (_cm->has_overflown()) {
2305 return false;
2306 }
2307
2308 // If we are not concurrent (i.e. we're doing remark) we don't need
2309 // to check anything else. The other steps are only needed during
2310 // the concurrent marking phase.
2311 if (!_cm->concurrent()) {
2312 return true;
2313 }
2314
2315 // (2) If marking has been aborted for Full GC, then we also abort.
2316 if (_cm->has_aborted()) {
2317 return false;
2318 }
2319
2320 // (4) We check whether we should yield. If we have to, then we abort.
2321 if (SuspendibleThreadSet::should_yield()) {
2322 // We should yield. To do this we abort the task. The caller is
2323 // responsible for yielding.
2324 return false;
2325 }
2326
2327 // (5) We check whether we've reached our time quota. If we have,
2328 // then we abort.
2329 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2330 if (elapsed_time_ms > _time_target_ms) {
2331 _has_timed_out = true;
2332 return false;
2333 }
2334
2335 // (6) Finally, we check whether there are enough completed STAB
2336 // buffers available for processing. If there are, we abort.
2337 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2338 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2339 // we do need to process SATB buffers, we'll abort and restart
2340 // the marking task to do so
2341 return false;
2342 }
2343 return true;
2344 }
2345
2346 void G1CMTask::recalculate_limits() {
2347 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2348 _words_scanned_limit = _real_words_scanned_limit;
2349
2350 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2351 _refs_reached_limit = _real_refs_reached_limit;
2352 }
2353
2354 void G1CMTask::decrease_limits() {
2355 // This is called when we believe that we're going to do an infrequent
2356 // operation which will increase the per byte scanned cost (i.e. move
2357 // entries to/from the global stack). It basically tries to decrease the
2358 // scanning limit so that the clock is called earlier.
2359
2360 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2361 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2362 }
2363
2364 void G1CMTask::move_entries_to_global_stack() {
2365 // Local array where we'll store the entries that will be popped
2366 // from the local queue.
2367 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2368
2369 size_t n = 0;
2370 G1TaskQueueEntry task_entry;
2371 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2372 buffer[n] = task_entry;
2373 ++n;
2374 }
2375 if (n < G1CMMarkStack::EntriesPerChunk) {
2376 buffer[n] = G1TaskQueueEntry();
2377 }
2378
2379 if (n > 0) {
2380 if (!_cm->mark_stack_push(buffer)) {
2381 set_has_aborted();
2382 }
2383 }
2384
2385 // This operation was quite expensive, so decrease the limits.
2386 decrease_limits();
2387 }
2388
2389 bool G1CMTask::get_entries_from_global_stack() {
2390 // Local array where we'll store the entries that will be popped
2391 // from the global stack.
2392 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2393
2394 if (!_cm->mark_stack_pop(buffer)) {
2395 return false;
2396 }
2397
2398 // We did actually pop at least one entry.
2399 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2400 G1TaskQueueEntry task_entry = buffer[i];
2401 if (task_entry.is_null()) {
2402 break;
2403 }
2404 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2405 bool success = _task_queue->push(task_entry);
2406 // We only call this when the local queue is empty or under a
2407 // given target limit. So, we do not expect this push to fail.
2408 assert(success, "invariant");
2409 }
2410
2411 // This operation was quite expensive, so decrease the limits
2412 decrease_limits();
2413 return true;
2414 }
2415
2416 void G1CMTask::drain_local_queue(bool partially) {
2417 if (has_aborted()) {
2418 return;
2419 }
2420
2421 // Decide what the target size is, depending whether we're going to
2422 // drain it partially (so that other tasks can steal if they run out
2423 // of things to do) or totally (at the very end).
2424 uint target_size;
2425 if (partially) {
2426 target_size = GCDrainStackTargetSize;
2427 } else {
2428 target_size = 0;
2429 }
2430
2431 if (_task_queue->size() > target_size) {
2432 G1TaskQueueEntry entry;
2433 bool ret = _task_queue->pop_local(entry);
2434 while (ret) {
2435 process_entry(entry, false /* stolen */);
2436 if (_task_queue->size() <= target_size || has_aborted()) {
2437 ret = false;
2438 } else {
2439 ret = _task_queue->pop_local(entry);
2440 }
2441 }
2442 }
2443 }
2444
2445 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2446 assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2447
2448 // Mark klass metadata
2449 process_klass(obj->klass());
2450
2451 size_t array_length = obj->length();
2452 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2453
2454 process_array_chunk(obj, 0, initial_chunk_size);
2455
2456 // Include object header size
2457 if (obj->is_refArray()) {
2458 return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2459 } else {
2460 FlatArrayKlass* fak = FlatArrayKlass::cast(obj->klass());
2461 return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2462 }
2463 }
2464
2465 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2466 PartialArrayState* state = task.to_partial_array_state();
2467 // Access state before release by claim().
2468 objArrayOop obj = oop_cast<objArrayOop>(state->source());
2469
2470 PartialArraySplitter::Claim claim =
2471 _partial_array_splitter.claim(state, _task_queue, stolen);
2472
2473 process_array_chunk(obj, claim._start, claim._end);
2474
2475 if (obj->is_refArray()) {
2476 return heap_word_size((claim._end - claim._start) * heapOopSize);
2477 } else {
2478 assert(obj->is_flatArray(), "Must be!");
2479 size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2480 size_t nof_elements = claim._end - claim._start;
2481 return heap_word_size(nof_elements * element_byte_size);
2482 }
2483 }
2484
2485 void G1CMTask::drain_global_stack(bool partially) {
2486 if (has_aborted()) {
2487 return;
2488 }
2489
2490 // We have a policy to drain the local queue before we attempt to
2491 // drain the global stack.
2492 assert(partially || _task_queue->size() == 0, "invariant");
2493
2494 // Decide what the target size is, depending whether we're going to
2495 // drain it partially (so that other tasks can steal if they run out
2496 // of things to do) or totally (at the very end).
2497 // Notice that when draining the global mark stack partially, due to the racyness
2498 // of the mark stack size update we might in fact drop below the target. But,
2499 // this is not a problem.
2500 // In case of total draining, we simply process until the global mark stack is
2501 // totally empty, disregarding the size counter.
2502 if (partially) {
2503 size_t const target_size = _cm->partial_mark_stack_size_target();
2504 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2505 if (get_entries_from_global_stack()) {
2506 drain_local_queue(partially);
2507 }
2508 }
2509 } else {
2510 while (!has_aborted() && get_entries_from_global_stack()) {
2511 drain_local_queue(partially);
2512 }
2513 }
2514 }
2515
2516 // SATB Queue has several assumptions on whether to call the par or
2517 // non-par versions of the methods. this is why some of the code is
2518 // replicated. We should really get rid of the single-threaded version
2519 // of the code to simplify things.
2520 void G1CMTask::drain_satb_buffers() {
2521 if (has_aborted()) {
2522 return;
2523 }
2524
2525 // We set this so that the regular clock knows that we're in the
2526 // middle of draining buffers and doesn't set the abort flag when it
2527 // notices that SATB buffers are available for draining. It'd be
2528 // very counter productive if it did that. :-)
2529 _draining_satb_buffers = true;
2530
2531 G1CMSATBBufferClosure satb_cl(this, _g1h);
2532 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2533
2534 // This keeps claiming and applying the closure to completed buffers
2535 // until we run out of buffers or we need to abort.
2536 while (!has_aborted() &&
2537 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2538 abort_marking_if_regular_check_fail();
2539 }
2540
2541 // Can't assert qset is empty here, even if not aborted. If concurrent,
2542 // some other thread might be adding to the queue. If not concurrent,
2543 // some other thread might have won the race for the last buffer, but
2544 // has not yet decremented the count.
2545
2546 _draining_satb_buffers = false;
2547
2548 // again, this was a potentially expensive operation, decrease the
2549 // limits to get the regular clock call early
2550 decrease_limits();
2551 }
2552
2553 #ifndef PRODUCT
2554 void G1CMTask::verify_no_mark_stats_for(uint region_idx) {
2555 _mark_stats_cache.verify_no_mark_stats_for(region_idx);
2556 }
2557 #endif
2558
2559 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2560 _mark_stats_cache.reset(region_idx);
2561 }
2562
2563 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2564 return _mark_stats_cache.evict_all();
2565 }
2566
2567 void G1CMTask::print_stats() {
2568 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2569 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2570 _elapsed_time_ms, _termination_time_ms);
2571 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2572 _step_times_ms.num(),
2573 _step_times_ms.avg(),
2574 _step_times_ms.sd(),
2575 _step_times_ms.maximum(),
2576 _step_times_ms.sum());
2577 size_t const hits = _mark_stats_cache.hits();
2578 size_t const misses = _mark_stats_cache.misses();
2579 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2580 hits, misses, percent_of(hits, hits + misses));
2581 }
2582
2583 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2584 return _task_queues->steal(worker_id, task_entry);
2585 }
2586
2587 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2588 if (has_aborted() || _curr_region == nullptr) {
2589 return;
2590 }
2591
2592 // This means that we're already holding on to a region.
2593 assert(_finger != nullptr, "if region is not null, then the finger "
2594 "should not be null either");
2595
2596 // We might have restarted this task after an evacuation pause
2597 // which might have evacuated the region we're holding on to
2598 // underneath our feet. Let's read its limit again to make sure
2599 // that we do not iterate over a region of the heap that
2600 // contains garbage (update_region_limit() will also move
2601 // _finger to the start of the region if it is found empty).
2602 update_region_limit();
2603 // We will start from _finger not from the start of the region,
2604 // as we might be restarting this task after aborting half-way
2605 // through scanning this region. In this case, _finger points to
2606 // the address where we last found a marked object. If this is a
2607 // fresh region, _finger points to start().
2608 MemRegion mr = MemRegion(_finger, _region_limit);
2609
2610 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2611 "humongous regions should go around loop once only");
2612
2613 // Some special cases:
2614 // If the memory region is empty, we can just give up the region.
2615 // If the current region is humongous then we only need to check
2616 // the bitmap for the bit associated with the start of the object,
2617 // scan the object if it's live, and give up the region.
2618 // Otherwise, let's iterate over the bitmap of the part of the region
2619 // that is left.
2620 // If the iteration is successful, give up the region.
2621 if (mr.is_empty()) {
2622 giveup_current_region();
2623 abort_marking_if_regular_check_fail();
2624 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2625 if (_mark_bitmap->is_marked(mr.start())) {
2626 // The object is marked - apply the closure
2627 bitmap_closure.do_addr(mr.start());
2628 }
2629 // Even if this task aborted while scanning the humongous object
2630 // we can (and should) give up the current region.
2631 giveup_current_region();
2632 abort_marking_if_regular_check_fail();
2633 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2634 giveup_current_region();
2635 abort_marking_if_regular_check_fail();
2636 } else {
2637 assert(has_aborted(), "currently the only way to do so");
2638 // The only way to abort the bitmap iteration is to return
2639 // false from the do_bit() method. However, inside the
2640 // do_bit() method we move the _finger to point to the
2641 // object currently being looked at. So, if we bail out, we
2642 // have definitely set _finger to something non-null.
2643 assert(_finger != nullptr, "invariant");
2644
2645 // Region iteration was actually aborted. So now _finger
2646 // points to the address of the object we last scanned. If we
2647 // leave it there, when we restart this task, we will rescan
2648 // the object. It is easy to avoid this. We move the finger by
2649 // enough to point to the next possible object header.
2650 assert(_finger < _region_limit, "invariant");
2651 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2652 if (new_finger >= _region_limit) {
2653 giveup_current_region();
2654 } else {
2655 move_finger_to(new_finger);
2656 }
2657 }
2658 }
2659
2660 void G1CMTask::claim_new_region() {
2661 // Read the note on the claim_region() method on why it might
2662 // return null with potentially more regions available for
2663 // claiming and why we have to check out_of_regions() to determine
2664 // whether we're done or not.
2665 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2666 // We are going to try to claim a new region. We should have
2667 // given up on the previous one.
2668 // Separated the asserts so that we know which one fires.
2669 assert(_curr_region == nullptr, "invariant");
2670 assert(_finger == nullptr, "invariant");
2671 assert(_region_limit == nullptr, "invariant");
2672 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2673 if (claimed_region != nullptr) {
2674 // Yes, we managed to claim one
2675 setup_for_region(claimed_region);
2676 assert(_curr_region == claimed_region, "invariant");
2677 }
2678 // It is important to call the regular clock here. It might take
2679 // a while to claim a region if, for example, we hit a large
2680 // block of empty regions. So we need to call the regular clock
2681 // method once round the loop to make sure it's called
2682 // frequently enough.
2683 abort_marking_if_regular_check_fail();
2684 }
2685 }
2686
2687 void G1CMTask::attempt_stealing() {
2688 // We cannot check whether the global stack is empty, since other
2689 // tasks might be pushing objects to it concurrently.
2690 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2691 "only way to reach here");
2692 while (!has_aborted()) {
2693 G1TaskQueueEntry entry;
2694 if (_cm->try_stealing(_worker_id, entry)) {
2695 process_entry(entry, true /* stolen */);
2696
2697 // And since we're towards the end, let's totally drain the
2698 // local queue and global stack.
2699 drain_local_queue(false);
2700 drain_global_stack(false);
2701 } else {
2702 break;
2703 }
2704 }
2705 }
2706
2707 void G1CMTask::attempt_termination(bool is_serial) {
2708 // We cannot check whether the global stack is empty, since other
2709 // tasks might be concurrently pushing objects on it.
2710 // Separated the asserts so that we know which one fires.
2711 assert(_cm->out_of_regions(), "only way to reach here");
2712 assert(_task_queue->size() == 0, "only way to reach here");
2713 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2714
2715 // The G1CMTask class also extends the TerminatorTerminator class,
2716 // hence its should_exit_termination() method will also decide
2717 // whether to exit the termination protocol or not.
2718 bool finished = (is_serial ||
2719 _cm->terminator()->offer_termination(this));
2720 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2721
2722 if (finished) {
2723 // We're all done.
2724
2725 // We can now guarantee that the global stack is empty, since
2726 // all other tasks have finished. We separated the guarantees so
2727 // that, if a condition is false, we can immediately find out
2728 // which one.
2729 guarantee(_cm->out_of_regions(), "only way to reach here");
2730 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2731 guarantee(_task_queue->size() == 0, "only way to reach here");
2732 guarantee(!_cm->has_overflown(), "only way to reach here");
2733 guarantee(!has_aborted(), "should never happen if termination has completed");
2734 } else {
2735 // Apparently there's more work to do. Let's abort this task. We
2736 // will restart it and hopefully we can find more things to do.
2737 set_has_aborted();
2738 }
2739 }
2740
2741 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2742 if (_has_timed_out) {
2743 double diff_ms = elapsed_time_ms - _time_target_ms;
2744 // Keep statistics of how well we did with respect to hitting
2745 // our target only if we actually timed out (if we aborted for
2746 // other reasons, then the results might get skewed).
2747 _marking_step_diff_ms.add(diff_ms);
2748 }
2749
2750 if (!_cm->has_overflown()) {
2751 return;
2752 }
2753
2754 // This is the interesting one. We aborted because a global
2755 // overflow was raised. This means we have to restart the
2756 // marking phase and start iterating over regions. However, in
2757 // order to do this we have to make sure that all tasks stop
2758 // what they are doing and re-initialize in a safe manner. We
2759 // will achieve this with the use of two barrier sync points.
2760 if (!is_serial) {
2761 // We only need to enter the sync barrier if being called
2762 // from a parallel context
2763 _cm->enter_first_sync_barrier(_worker_id);
2764
2765 // When we exit this sync barrier we know that all tasks have
2766 // stopped doing marking work. So, it's now safe to
2767 // re-initialize our data structures.
2768 }
2769
2770 clear_region_fields();
2771 flush_mark_stats_cache();
2772
2773 if (!is_serial) {
2774 // If we're executing the concurrent phase of marking, reset the marking
2775 // state; otherwise the marking state is reset after reference processing,
2776 // during the remark pause.
2777 // If we reset here as a result of an overflow during the remark we will
2778 // see assertion failures from any subsequent set_concurrency_and_phase()
2779 // calls.
2780 if (_cm->concurrent() && _worker_id == 0) {
2781 // Worker 0 is responsible for clearing the global data structures because
2782 // of an overflow. During STW we should not clear the overflow flag (in
2783 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2784 // method to abort the pause and restart concurrent marking.
2785 _cm->reset_marking_for_restart();
2786
2787 log_info(gc, marking)("Concurrent Mark reset for overflow");
2788 }
2789
2790 // ...and enter the second barrier.
2791 _cm->enter_second_sync_barrier(_worker_id);
2792 }
2793 }
2794
2795 /*****************************************************************************
2796
2797 The do_marking_step(time_target_ms, ...) method is the building
2798 block of the parallel marking framework. It can be called in parallel
2799 with other invocations of do_marking_step() on different tasks
2800 (but only one per task, obviously) and concurrently with the
2801 mutator threads, or during remark, hence it eliminates the need
2802 for two versions of the code. When called during remark, it will
2803 pick up from where the task left off during the concurrent marking
2804 phase. Interestingly, tasks are also claimable during evacuation
2805 pauses too, since do_marking_step() ensures that it aborts before
2806 it needs to yield.
2807
2808 The data structures that it uses to do marking work are the
2809 following:
2810
2811 (1) Marking Bitmap. If there are grey objects that appear only
2812 on the bitmap (this happens either when dealing with an overflow
2813 or when the concurrent start pause has simply marked the roots
2814 and didn't push them on the stack), then tasks claim heap
2815 regions whose bitmap they then scan to find grey objects. A
2816 global finger indicates where the end of the last claimed region
2817 is. A local finger indicates how far into the region a task has
2818 scanned. The two fingers are used to determine how to grey an
2819 object (i.e. whether simply marking it is OK, as it will be
2820 visited by a task in the future, or whether it needs to be also
2821 pushed on a stack).
2822
2823 (2) Local Queue. The local queue of the task which is accessed
2824 reasonably efficiently by the task. Other tasks can steal from
2825 it when they run out of work. Throughout the marking phase, a
2826 task attempts to keep its local queue short but not totally
2827 empty, so that entries are available for stealing by other
2828 tasks. Only when there is no more work, a task will totally
2829 drain its local queue.
2830
2831 (3) Global Mark Stack. This handles local queue overflow. During
2832 marking only sets of entries are moved between it and the local
2833 queues, as access to it requires a mutex and more fine-grain
2834 interaction with it which might cause contention. If it
2835 overflows, then the marking phase should restart and iterate
2836 over the bitmap to identify grey objects. Throughout the marking
2837 phase, tasks attempt to keep the global mark stack at a small
2838 length but not totally empty, so that entries are available for
2839 popping by other tasks. Only when there is no more work, tasks
2840 will totally drain the global mark stack.
2841
2842 (4) SATB Buffer Queue. This is where completed SATB buffers are
2843 made available. Buffers are regularly removed from this queue
2844 and scanned for roots, so that the queue doesn't get too
2845 long. During remark, all completed buffers are processed, as
2846 well as the filled in parts of any uncompleted buffers.
2847
2848 The do_marking_step() method tries to abort when the time target
2849 has been reached. There are a few other cases when the
2850 do_marking_step() method also aborts:
2851
2852 (1) When the marking phase has been aborted (after a Full GC).
2853
2854 (2) When a global overflow (on the global stack) has been
2855 triggered. Before the task aborts, it will actually sync up with
2856 the other tasks to ensure that all the marking data structures
2857 (local queues, stacks, fingers etc.) are re-initialized so that
2858 when do_marking_step() completes, the marking phase can
2859 immediately restart.
2860
2861 (3) When enough completed SATB buffers are available. The
2862 do_marking_step() method only tries to drain SATB buffers right
2863 at the beginning. So, if enough buffers are available, the
2864 marking step aborts and the SATB buffers are processed at
2865 the beginning of the next invocation.
2866
2867 (4) To yield. when we have to yield then we abort and yield
2868 right at the end of do_marking_step(). This saves us from a lot
2869 of hassle as, by yielding we might allow a Full GC. If this
2870 happens then objects will be compacted underneath our feet, the
2871 heap might shrink, etc. We save checking for this by just
2872 aborting and doing the yield right at the end.
2873
2874 From the above it follows that the do_marking_step() method should
2875 be called in a loop (or, otherwise, regularly) until it completes.
2876
2877 If a marking step completes without its has_aborted() flag being
2878 true, it means it has completed the current marking phase (and
2879 also all other marking tasks have done so and have all synced up).
2880
2881 A method called regular_clock_call() is invoked "regularly" (in
2882 sub ms intervals) throughout marking. It is this clock method that
2883 checks all the abort conditions which were mentioned above and
2884 decides when the task should abort. A work-based scheme is used to
2885 trigger this clock method: when the number of object words the
2886 marking phase has scanned or the number of references the marking
2887 phase has visited reach a given limit. Additional invocations to
2888 the method clock have been planted in a few other strategic places
2889 too. The initial reason for the clock method was to avoid calling
2890 cpu time gathering too regularly, as it is quite expensive. So,
2891 once it was in place, it was natural to piggy-back all the other
2892 conditions on it too and not constantly check them throughout the code.
2893
2894 If do_termination is true then do_marking_step will enter its
2895 termination protocol.
2896
2897 The value of is_serial must be true when do_marking_step is being
2898 called serially (i.e. by the VMThread) and do_marking_step should
2899 skip any synchronization in the termination and overflow code.
2900 Examples include the serial remark code and the serial reference
2901 processing closures.
2902
2903 The value of is_serial must be false when do_marking_step is
2904 being called by any of the worker threads.
2905 Examples include the concurrent marking code (CMMarkingTask),
2906 the MT remark code, and the MT reference processing closures.
2907
2908 *****************************************************************************/
2909
2910 void G1CMTask::do_marking_step(double time_target_ms,
2911 bool do_termination,
2912 bool is_serial) {
2913 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2914
2915 _start_cpu_time_ns = os::current_thread_cpu_time();
2916
2917 // If do_stealing is true then do_marking_step will attempt to
2918 // steal work from the other G1CMTasks. It only makes sense to
2919 // enable stealing when the termination protocol is enabled
2920 // and do_marking_step() is not being called serially.
2921 bool do_stealing = do_termination && !is_serial;
2922
2923 G1Predictions const& predictor = _g1h->policy()->predictor();
2924 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2925 _time_target_ms = time_target_ms - diff_prediction_ms;
2926
2927 // set up the variables that are used in the work-based scheme to
2928 // call the regular clock method
2929 _words_scanned = 0;
2930 _refs_reached = 0;
2931 recalculate_limits();
2932
2933 // clear all flags
2934 clear_has_aborted();
2935 _has_timed_out = false;
2936 _draining_satb_buffers = false;
2937
2938 ++_calls;
2939
2940 // Set up the bitmap and oop closures. Anything that uses them is
2941 // eventually called from this method, so it is OK to allocate these
2942 // statically.
2943 G1CMBitMapClosure bitmap_closure(this, _cm);
2944 G1CMOopClosure cm_oop_closure(_g1h, this);
2945 set_cm_oop_closure(&cm_oop_closure);
2946
2947 if (_cm->has_overflown()) {
2948 // This can happen if the mark stack overflows during a GC pause
2949 // and this task, after a yield point, restarts. We have to abort
2950 // as we need to get into the overflow protocol which happens
2951 // right at the end of this task.
2952 set_has_aborted();
2953 }
2954
2955 // First drain any available SATB buffers. After this, we will not
2956 // look at SATB buffers before the next invocation of this method.
2957 // If enough completed SATB buffers are queued up, the regular clock
2958 // will abort this task so that it restarts.
2959 drain_satb_buffers();
2960 // ...then partially drain the local queue and the global stack
2961 drain_local_queue(true);
2962 drain_global_stack(true);
2963
2964 do {
2965 process_current_region(bitmap_closure);
2966 // At this point we have either completed iterating over the
2967 // region we were holding on to, or we have aborted.
2968
2969 // We then partially drain the local queue and the global stack.
2970 drain_local_queue(true);
2971 drain_global_stack(true);
2972
2973 claim_new_region();
2974
2975 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2976 "at this point we should be out of regions");
2977 } while ( _curr_region != nullptr && !has_aborted());
2978
2979 // We cannot check whether the global stack is empty, since other
2980 // tasks might be pushing objects to it concurrently.
2981 assert(has_aborted() || _cm->out_of_regions(),
2982 "at this point we should be out of regions");
2983 // Try to reduce the number of available SATB buffers so that
2984 // remark has less work to do.
2985 drain_satb_buffers();
2986
2987 // Since we've done everything else, we can now totally drain the
2988 // local queue and global stack.
2989 drain_local_queue(false);
2990 drain_global_stack(false);
2991
2992 // Attempt at work stealing from other task's queues.
2993 if (do_stealing && !has_aborted()) {
2994 // We have not aborted. This means that we have finished all that
2995 // we could. Let's try to do some stealing...
2996 attempt_stealing();
2997 }
2998
2999 // We still haven't aborted. Now, let's try to get into the
3000 // termination protocol.
3001 if (do_termination && !has_aborted()) {
3002 attempt_termination(is_serial);
3003 }
3004
3005 // Mainly for debugging purposes to make sure that a pointer to the
3006 // closure which was statically allocated in this frame doesn't
3007 // escape it by accident.
3008 set_cm_oop_closure(nullptr);
3009 jlong end_cpu_time_ns = os::current_thread_cpu_time();
3010 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
3011 // Update the step history.
3012 _step_times_ms.add(elapsed_time_ms);
3013
3014 if (has_aborted()) {
3015 // The task was aborted for some reason.
3016 handle_abort(is_serial, elapsed_time_ms);
3017 }
3018 }
3019
3020 G1CMTask::G1CMTask(uint worker_id,
3021 G1ConcurrentMark* cm,
3022 G1CMTaskQueue* task_queue,
3023 G1RegionMarkStats* mark_stats) :
3024 _worker_id(worker_id),
3025 _g1h(G1CollectedHeap::heap()),
3026 _cm(cm),
3027 _mark_bitmap(nullptr),
3028 _task_queue(task_queue),
3029 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
3030 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
3031 _calls(0),
3032 _time_target_ms(0.0),
3033 _start_cpu_time_ns(0),
3034 _cm_oop_closure(nullptr),
3035 _curr_region(nullptr),
3036 _finger(nullptr),
3037 _region_limit(nullptr),
3038 _words_scanned(0),
3039 _words_scanned_limit(0),
3040 _real_words_scanned_limit(0),
3041 _refs_reached(0),
3042 _refs_reached_limit(0),
3043 _real_refs_reached_limit(0),
3044 _has_aborted(false),
3045 _has_timed_out(false),
3046 _draining_satb_buffers(false),
3047 _step_times_ms(),
3048 _elapsed_time_ms(0.0),
3049 _termination_time_ms(0.0),
3050 _marking_step_diff_ms()
3051 {
3052 guarantee(task_queue != nullptr, "invariant");
3053
3054 _marking_step_diff_ms.add(0.5);
3055 }
3056
3057 // These are formatting macros that are used below to ensure
3058 // consistent formatting. The *_H_* versions are used to format the
3059 // header for a particular value and they should be kept consistent
3060 // with the corresponding macro. Also note that most of the macros add
3061 // the necessary white space (as a prefix) which makes them a bit
3062 // easier to compose.
3063
3064 // All the output lines are prefixed with this string to be able to
3065 // identify them easily in a large log file.
3066 #define G1PPRL_LINE_PREFIX "###"
3067
3068 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
3069 #ifdef _LP64
3070 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
3071 #else // _LP64
3072 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
3073 #endif // _LP64
3074
3075 // For per-region info
3076 #define G1PPRL_TYPE_FORMAT " %-4s"
3077 #define G1PPRL_TYPE_H_FORMAT " %4s"
3078 #define G1PPRL_STATE_FORMAT " %-5s"
3079 #define G1PPRL_STATE_H_FORMAT " %5s"
3080 #define G1PPRL_BYTE_FORMAT " %9zu"
3081 #define G1PPRL_BYTE_H_FORMAT " %9s"
3082 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
3083 #define G1PPRL_GCEFF_H_FORMAT " %14s"
3084 #define G1PPRL_GID_H_FORMAT " %9s"
3085 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
3086 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
3087 #define G1PPRL_LEN_H_FORMAT " %14s"
3088 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
3089 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
3090
3091 // For summary info
3092 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
3093 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
3094 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
3095 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3096
3097 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
3098 _total_used_bytes(0),
3099 _total_capacity_bytes(0),
3100 _total_live_bytes(0),
3101 _total_remset_bytes(0),
3102 _total_code_roots_bytes(0)
3103 {
3104 if (!log_is_enabled(Trace, gc, liveness)) {
3105 return;
3106 }
3107
3108 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3109 MemRegion reserved = g1h->reserved();
3110 double now = os::elapsedTime();
3111
3112 // Print the header of the output.
3113 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3114 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3115 G1PPRL_SUM_ADDR_FORMAT("reserved")
3116 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3117 p2i(reserved.start()), p2i(reserved.end()),
3118 G1HeapRegion::GrainBytes);
3119 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3120 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3121 G1PPRL_TYPE_H_FORMAT
3122 G1PPRL_ADDR_BASE_H_FORMAT
3123 G1PPRL_BYTE_H_FORMAT
3124 G1PPRL_BYTE_H_FORMAT
3125 G1PPRL_STATE_H_FORMAT
3126 G1PPRL_BYTE_H_FORMAT
3127 G1PPRL_GID_H_FORMAT,
3128 "type", "address-range",
3129 "used", "live",
3130 "state", "code-roots",
3131 "group-id");
3132 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3133 G1PPRL_TYPE_H_FORMAT
3134 G1PPRL_ADDR_BASE_H_FORMAT
3135 G1PPRL_BYTE_H_FORMAT
3136 G1PPRL_BYTE_H_FORMAT
3137 G1PPRL_STATE_H_FORMAT
3138 G1PPRL_BYTE_H_FORMAT
3139 G1PPRL_GID_H_FORMAT,
3140 "", "",
3141 "(bytes)", "(bytes)",
3142 "", "(bytes)", "");
3143 }
3144
3145 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3146 if (!log_is_enabled(Trace, gc, liveness)) {
3147 return false;
3148 }
3149
3150 const char* type = r->get_type_str();
3151 HeapWord* bottom = r->bottom();
3152 HeapWord* end = r->end();
3153 size_t capacity_bytes = r->capacity();
3154 size_t used_bytes = r->used();
3155 size_t live_bytes = r->live_bytes();
3156 size_t remset_bytes = r->rem_set()->mem_size();
3157 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3158 const char* remset_type = r->rem_set()->get_short_state_str();
3159 uint cset_group_id = r->rem_set()->has_cset_group()
3160 ? r->rem_set()->cset_group_id()
3161 : G1CSetCandidateGroup::NoRemSetId;
3162
3163 _total_used_bytes += used_bytes;
3164 _total_capacity_bytes += capacity_bytes;
3165 _total_live_bytes += live_bytes;
3166 _total_remset_bytes += remset_bytes;
3167 _total_code_roots_bytes += code_roots_bytes;
3168
3169 // Print a line for this particular region.
3170 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3171 G1PPRL_TYPE_FORMAT
3172 G1PPRL_ADDR_BASE_FORMAT
3173 G1PPRL_BYTE_FORMAT
3174 G1PPRL_BYTE_FORMAT
3175 G1PPRL_STATE_FORMAT
3176 G1PPRL_BYTE_FORMAT
3177 G1PPRL_GID_FORMAT,
3178 type, p2i(bottom), p2i(end),
3179 used_bytes, live_bytes,
3180 remset_type, code_roots_bytes,
3181 cset_group_id);
3182
3183 return false;
3184 }
3185
3186 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3187 if (!log_is_enabled(Trace, gc, liveness)) {
3188 return;
3189 }
3190
3191 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3192 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3193 // add static memory usages to remembered set sizes
3194 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3195
3196 log_cset_candidate_groups();
3197
3198 // Print the footer of the output.
3199 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3200 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3201 " SUMMARY"
3202 G1PPRL_SUM_MB_FORMAT("capacity")
3203 G1PPRL_SUM_MB_PERC_FORMAT("used")
3204 G1PPRL_SUM_MB_PERC_FORMAT("live")
3205 G1PPRL_SUM_MB_FORMAT("remset")
3206 G1PPRL_SUM_MB_FORMAT("code-roots"),
3207 bytes_to_mb(_total_capacity_bytes),
3208 bytes_to_mb(_total_used_bytes),
3209 percent_of(_total_used_bytes, _total_capacity_bytes),
3210 bytes_to_mb(_total_live_bytes),
3211 percent_of(_total_live_bytes, _total_capacity_bytes),
3212 bytes_to_mb(_total_remset_bytes),
3213 bytes_to_mb(_total_code_roots_bytes));
3214 }
3215
3216 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3217 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3218 G1PPRL_GID_FORMAT
3219 G1PPRL_LEN_FORMAT
3220 G1PPRL_GID_GCEFF_FORMAT
3221 G1PPRL_GID_LIVENESS_FORMAT
3222 G1PPRL_BYTE_FORMAT
3223 G1PPRL_TYPE_H_FORMAT,
3224 group->group_id(),
3225 group->length(),
3226 group->length() > 0 ? group->gc_efficiency() : 0.0,
3227 group->length() > 0 ? group->liveness_percent() : 0.0,
3228 group->card_set()->mem_size(),
3229 type);
3230 _total_remset_bytes += group->card_set()->mem_size();
3231 }
3232
3233 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3234 for (G1CSetCandidateGroup* group : gl) {
3235 log_cset_candidate_group_add_total(group, type);
3236 }
3237 }
3238
3239 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3240 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3241 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3242 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3243 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3244 G1PPRL_GID_H_FORMAT
3245 G1PPRL_LEN_H_FORMAT
3246 G1PPRL_GCEFF_H_FORMAT
3247 G1PPRL_BYTE_H_FORMAT
3248 G1PPRL_BYTE_H_FORMAT
3249 G1PPRL_TYPE_H_FORMAT,
3250 "groud-id", "num-regions",
3251 "gc-eff", "liveness",
3252 "remset", "type");
3253
3254 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3255 G1PPRL_GID_H_FORMAT
3256 G1PPRL_LEN_H_FORMAT
3257 G1PPRL_GCEFF_H_FORMAT
3258 G1PPRL_BYTE_H_FORMAT
3259 G1PPRL_BYTE_H_FORMAT
3260 G1PPRL_TYPE_H_FORMAT,
3261 "", "",
3262 "(bytes/ms)", "%",
3263 "(bytes)", "");
3264
3265 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3266
3267 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3268
3269 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3270 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3271 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3272 }