1 /*
2 * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "classfile/classLoaderData.hpp"
26 #include "classfile/classLoaderDataGraph.hpp"
27 #include "cppstdlib/new.hpp"
28 #include "gc/g1/g1BarrierSet.hpp"
29 #include "gc/g1/g1BatchedTask.hpp"
30 #include "gc/g1/g1CardSetMemory.hpp"
31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
32 #include "gc/g1/g1CollectedHeap.inline.hpp"
33 #include "gc/g1/g1CollectorState.inline.hpp"
34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
38 #include "gc/g1/g1ConcurrentRefine.hpp"
39 #include "gc/g1/g1HeapRegion.inline.hpp"
40 #include "gc/g1/g1HeapRegionManager.hpp"
41 #include "gc/g1/g1HeapRegionPrinter.hpp"
42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
44 #include "gc/g1/g1HeapVerifier.hpp"
45 #include "gc/g1/g1OopClosures.inline.hpp"
46 #include "gc/g1/g1Policy.hpp"
47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
48 #include "gc/g1/g1ThreadLocalData.hpp"
49 #include "gc/g1/g1Trace.hpp"
50 #include "gc/shared/gcId.hpp"
51 #include "gc/shared/gcTimer.hpp"
52 #include "gc/shared/gcTraceTime.inline.hpp"
53 #include "gc/shared/gcVMOperations.hpp"
54 #include "gc/shared/partialArraySplitter.inline.hpp"
55 #include "gc/shared/partialArrayState.hpp"
56 #include "gc/shared/partialArrayTaskStats.hpp"
57 #include "gc/shared/referencePolicy.hpp"
58 #include "gc/shared/suspendibleThreadSet.hpp"
59 #include "gc/shared/taskqueue.inline.hpp"
60 #include "gc/shared/taskTerminator.hpp"
61 #include "gc/shared/weakProcessor.inline.hpp"
62 #include "gc/shared/workerPolicy.hpp"
63 #include "jvm.h"
64 #include "logging/log.hpp"
65 #include "memory/allocation.hpp"
66 #include "memory/iterator.hpp"
67 #include "memory/metaspaceUtils.hpp"
68 #include "memory/resourceArea.hpp"
69 #include "memory/universe.hpp"
70 #include "nmt/memTracker.hpp"
71 #include "oops/access.inline.hpp"
72 #include "oops/oop.inline.hpp"
73 #include "runtime/globals_extension.hpp"
74 #include "runtime/handles.inline.hpp"
75 #include "runtime/java.hpp"
76 #include "runtime/orderAccess.hpp"
77 #include "runtime/os.hpp"
78 #include "runtime/prefetch.inline.hpp"
79 #include "runtime/threads.hpp"
80 #include "utilities/align.hpp"
81 #include "utilities/checkedCast.hpp"
82 #include "utilities/formatBuffer.hpp"
83 #include "utilities/growableArray.hpp"
84 #include "utilities/powerOfTwo.hpp"
85
86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
87
88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
89 assert(cm != nullptr, "must be");
90 }
91
92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
93 assert(cm != nullptr, "must be");
94 assert(_cm == nullptr, "double initialize");
95 _cm = cm;
96 }
97
98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
99 assert(addr < _cm->finger(), "invariant");
100 assert(addr >= _task->finger(), "invariant");
101
102 // We move that task's local finger along.
103 _task->move_finger_to(addr);
104
105 _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
106 // we only partially drain the local queue and global stack
107 _task->drain_local_queue(true);
108 _task->drain_global_stack(true);
109
110 // if the has_aborted flag has been raised, we need to bail out of
111 // the iteration
112 return !_task->has_aborted();
113 }
114
115 G1CMMarkStack::G1CMMarkStack() :
116 _chunk_allocator() {
117 set_empty();
118 }
119
120 size_t G1CMMarkStack::capacity_alignment() {
121 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
122 }
123
124 bool G1CMMarkStack::initialize() {
125 guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
126
127 size_t initial_capacity = MarkStackSize;
128 size_t max_capacity = MarkStackSizeMax;
129
130 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
131
132 size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
133 size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
134
135 initial_num_chunks = round_up_power_of_2(initial_num_chunks);
136 max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
137
138 size_t limit = (INT_MAX - 1);
139 max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
140 initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
141
142 FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
143 FLAG_SET_ERGO(MarkStackSize, initial_capacity);
144
145 log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
146
147 log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
148 initial_num_chunks, max_capacity);
149
150 return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
151 }
152
153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
154 if (_size.load_relaxed() >= _max_capacity) {
155 return nullptr;
156 }
157
158 size_t cur_idx = _size.fetch_then_add(1u);
159
160 if (cur_idx >= _max_capacity) {
161 return nullptr;
162 }
163
164 size_t bucket = get_bucket(cur_idx);
165 if (_buckets[bucket].load_acquire() == nullptr) {
166 if (!_should_grow) {
167 // Prefer to restart the CM.
168 return nullptr;
169 }
170
171 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
172 if (_buckets[bucket].load_acquire() == nullptr) {
173 size_t desired_capacity = bucket_size(bucket) * 2;
174 if (!try_expand_to(desired_capacity)) {
175 return nullptr;
176 }
177 }
178 }
179
180 size_t bucket_idx = get_bucket_index(cur_idx);
181 TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
182 result->next = nullptr;
183 return result;
184 }
185
186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
187 _min_capacity(0),
188 _max_capacity(0),
189 _capacity(0),
190 _num_buckets(0),
191 _should_grow(false),
192 _buckets(nullptr),
193 _size(0)
194 { }
195
196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
197 guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
198
199 _min_capacity = initial_capacity;
200 _max_capacity = max_capacity;
201 _num_buckets = get_bucket(_max_capacity) + 1;
202
203 _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
204
205 for (size_t i = 0; i < _num_buckets; i++) {
206 _buckets[i].store_relaxed(nullptr);
207 }
208
209 size_t new_capacity = bucket_size(0);
210
211 if (!reserve(new_capacity)) {
212 log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
213 return false;
214 }
215 return true;
216 }
217
218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
219 if (_capacity == _max_capacity) {
220 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
221 return false;
222 }
223
224 size_t old_capacity = _capacity;
225 desired_capacity = MIN2(desired_capacity, _max_capacity);
226
227 if (reserve(desired_capacity)) {
228 log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
229 old_capacity, desired_capacity);
230 return true;
231 }
232 return false;
233 }
234
235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
236 size_t new_capacity = _capacity * 2;
237 return try_expand_to(new_capacity);
238 }
239
240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
241 if (_buckets == nullptr) {
242 return;
243 }
244
245 for (size_t i = 0; i < _num_buckets; i++) {
246 if (_buckets[i].load_relaxed() != nullptr) {
247 MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(), bucket_size(i));
248 _buckets[i].store_relaxed(nullptr);
249 }
250 }
251
252 FREE_C_HEAP_ARRAY(_buckets);
253 }
254
255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
256 assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
257
258 size_t highest_bucket = get_bucket(new_capacity - 1);
259 size_t i = get_bucket(_capacity);
260
261 // Allocate all buckets associated with indexes between the current capacity (_capacity)
262 // and the new capacity (new_capacity). This step ensures that there are no gaps in the
263 // array and that the capacity accurately reflects the reserved memory.
264 for (; i <= highest_bucket; i++) {
265 if (_buckets[i].load_acquire() != nullptr) {
266 continue; // Skip over already allocated buckets.
267 }
268
269 size_t bucket_capacity = bucket_size(i);
270
271 // Trim bucket size so that we do not exceed the _max_capacity.
272 bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
273 bucket_capacity :
274 _max_capacity - _capacity;
275
276
277 TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
278
279 if (bucket_base == nullptr) {
280 log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
281 bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
282 return false;
283 }
284 _capacity += bucket_capacity;
285 _buckets[i].release_store(bucket_base);
286 }
287 return true;
288 }
289
290 void G1CMMarkStack::expand() {
291 _chunk_allocator.try_expand();
292 }
293
294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
295 elem->next = list->load_relaxed();
296 list->store_relaxed(elem);
297 }
298
299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
300 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
301 add_chunk_to_list(&_chunk_list, elem);
302 _chunks_in_chunk_list++;
303 }
304
305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
306 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
307 add_chunk_to_list(&_free_list, elem);
308 }
309
310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
311 TaskQueueEntryChunk* result = list->load_relaxed();
312 if (result != nullptr) {
313 list->store_relaxed(list->load_relaxed()->next);
314 }
315 return result;
316 }
317
318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
319 MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
320 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
321 if (result != nullptr) {
322 _chunks_in_chunk_list--;
323 }
324 return result;
325 }
326
327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
328 MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
329 return remove_chunk_from_list(&_free_list);
330 }
331
332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
333 // Get a new chunk.
334 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
335
336 if (new_chunk == nullptr) {
337 // Did not get a chunk from the free list. Allocate from backing memory.
338 new_chunk = _chunk_allocator.allocate_new_chunk();
339
340 if (new_chunk == nullptr) {
341 return false;
342 }
343 }
344
345 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
346
347 add_chunk_to_chunk_list(new_chunk);
348
349 return true;
350 }
351
352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
353 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
354
355 if (cur == nullptr) {
356 return false;
357 }
358
359 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
360
361 add_chunk_to_free_list(cur);
362 return true;
363 }
364
365 void G1CMMarkStack::set_empty() {
366 _chunks_in_chunk_list = 0;
367 _chunk_list.store_relaxed(nullptr);
368 _free_list.store_relaxed(nullptr);
369 _chunk_allocator.reset();
370 }
371
372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
373 _root_regions(MemRegion::create_array(max_regions, mtGC)),
374 _max_regions(max_regions),
375 _num_regions(0),
376 _num_claimed_regions(0) { }
377
378 G1CMRootMemRegions::~G1CMRootMemRegions() {
379 MemRegion::destroy_array(_root_regions, _max_regions);
380 }
381
382 void G1CMRootMemRegions::reset() {
383 assert_at_safepoint();
384 assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
385
386 _num_regions.store_relaxed(0);
387 _num_claimed_regions.store_relaxed(0);
388 }
389
390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
391 assert_at_safepoint();
392 uint idx = _num_regions.fetch_then_add(1u);
393 assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
394 assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
395 "end (" PTR_FORMAT ")", p2i(start), p2i(end));
396 _root_regions[idx].set_start(start);
397 _root_regions[idx].set_end(end);
398 }
399
400 const MemRegion* G1CMRootMemRegions::claim_next() {
401 uint local_num_regions = num_regions();
402 if (num_claimed_regions() >= local_num_regions) {
403 return nullptr;
404 }
405
406 uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
407 if (claimed_index < local_num_regions) {
408 return &_root_regions[claimed_index];
409 }
410 return nullptr;
411 }
412
413 bool G1CMRootMemRegions::work_completed() const {
414 return num_remaining_regions() == 0;
415 }
416
417 uint G1CMRootMemRegions::num_remaining_regions() const {
418 uint total = num_regions();
419 uint claimed = num_claimed_regions();
420 return (total > claimed) ? total - claimed : 0;
421 }
422
423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
424 uint local_num_root_regions = num_regions();
425 for (uint i = 0; i < local_num_root_regions; i++) {
426 if (_root_regions[i].equals(mr)) {
427 return true;
428 }
429 }
430 return false;
431 }
432
433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
434 G1RegionToSpaceMapper* bitmap_storage) :
435 _cm_thread(nullptr),
436 _g1h(g1h),
437
438 _mark_bitmap(),
439
440 _heap(_g1h->reserved()),
441
442 _root_regions(_g1h->max_num_regions()),
443 _root_region_scan_aborted(false),
444
445 _global_mark_stack(),
446
447 _finger(nullptr), // _finger set in set_non_marking_state
448
449 _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
450 _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
451 _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
452 _tasks(nullptr),
453 _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
454 _terminator(_max_num_tasks, _task_queues),
455 _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
456
457 _first_overflow_barrier_sync(),
458 _second_overflow_barrier_sync(),
459
460 _completed_mark_cycles(0),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _gc_timer_cm(new ConcurrentGCTimer()),
466 _gc_tracer_cm(new G1OldTracer()),
467
468 // _verbose_level set below
469
470 _remark_times(),
471 _remark_mark_times(),
472 _remark_weak_ref_times(),
473 _cleanup_times(),
474
475 _concurrent_workers(nullptr),
476 _num_concurrent_workers(0),
477 _max_concurrent_workers(0),
478
479 _is_region_mark_stats_cache_in_use(false),
480 _region_mark_stats(nullptr),
481 _top_at_mark_starts(nullptr),
482 _top_at_rebuild_starts(nullptr),
483 _needs_remembered_set_rebuild(false)
484 {
485 assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
486
487 _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
488 }
489
490 void G1ConcurrentMark::fully_initialize() {
491 assert_at_safepoint();
492
493 if (is_fully_initialized()) {
494 return;
495 }
496
497 // Create & start ConcurrentMark thread.
498 _cm_thread = new G1ConcurrentMarkThread(this);
499 if (_cm_thread->osthread() == nullptr) {
500 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
501 }
502
503 log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
504 log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
505
506 _max_concurrent_workers = ConcGCThreads;
507
508 _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
509 _concurrent_workers->initialize_workers();
510 _num_concurrent_workers = _concurrent_workers->active_workers();
511
512 if (!_global_mark_stack.initialize()) {
513 vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
514 }
515
516 _region_mark_stats = NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC);
517 _top_at_mark_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
518 _top_at_rebuild_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
519
520 _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
521
522 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
523 _num_active_tasks = _max_num_tasks;
524
525 for (uint i = 0; i < _max_num_tasks; ++i) {
526 G1CMTaskQueue* task_queue = new G1CMTaskQueue();
527 _task_queues->register_queue(i, task_queue);
528
529 _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
530 }
531
532 uint max_num_regions = _g1h->max_num_regions();
533 ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
534 for (uint i = 0; i < max_num_regions; i++) {
535 ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>(_g1h->bottom_addr_for_region(i));
536 }
537 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
538
539 reset_at_marking_complete();
540 }
541
542 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
543 return _cm_thread->is_in_progress();
544 }
545
546 bool G1ConcurrentMark::is_in_marking() const {
547 return _cm_thread->is_in_marking();
548 }
549
550 bool G1ConcurrentMark::is_in_marking_or_rebuild() const {
551 return _cm_thread->is_in_marking_or_rebuild();
552 }
553
554 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
555 return cm_thread()->is_in_reset_for_next_cycle();
556 }
557
558 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
559 return _partial_array_state_manager;
560 }
561
562 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
563 assert(is_fully_initialized(), "must be");
564 return _cm_thread;
565 }
566
567 void G1ConcurrentMark::reset() {
568 assert_fully_initialized();
569
570 _has_aborted.store_relaxed(false);
571
572 _is_region_mark_stats_cache_in_use = true;
573 reset_marking_for_restart();
574
575 // Reset all tasks, since different phases will use different number of active
576 // threads. So, it's easiest to have all of them ready.
577 for (uint i = 0; i < _max_num_tasks; ++i) {
578 _tasks[i]->reset(mark_bitmap());
579 }
580
581 uint max_num_regions = _g1h->max_num_regions();
582 ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
583 for (uint i = 0; i < max_num_regions; i++) {
584 // Do not update TAMS here. NoteStartOfMarkTask updates this in parallel in
585 // the pre-concurrent-start WorkerTask.
586 _top_at_rebuild_starts[i].store_relaxed(nullptr);
587 _region_mark_stats[i].clear();
588 }
589
590 _root_region_scan_aborted.store_relaxed(false);
591 _root_regions.reset();
592 }
593
594 void G1ConcurrentMark::assert_statistics_clear(G1HeapRegion* r) {
595 assert_fully_initialized();
596 #ifdef ASSERT
597 uint region_idx = r->hrm_index();
598 for (uint j = 0; j < _max_num_tasks; ++j) {
599 _tasks[j]->verify_no_mark_stats_for(r->hrm_index());
600 }
601
602 assert(_top_at_rebuild_starts[region_idx].load_relaxed() == nullptr, "must be");
603
604 G1RegionMarkStats* s = &_region_mark_stats[region_idx];
605 assert(s->incoming_refs() == 0, "must be");
606 assert(s->live_words() == 0, "must be");
607 #endif
608 }
609
610 void G1ConcurrentMark::note_start_of_mark_for_region(G1HeapRegion* r) {
611 assert_at_safepoint();
612 assert_fully_initialized();
613 if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
614 update_top_at_mark_start(r);
615 } else {
616 set_top_at_mark_start_to_bottom(r);
617 }
618 }
619
620 void G1ConcurrentMark::notify_new_region(G1HeapRegion* r, size_t marked_live_bytes_below_tams) {
621 assert_at_safepoint();
622 if (!is_fully_initialized()) {
623 return;
624 }
625 G1CollectorState* state = _g1h->collector_state();
626 if (state->is_in_concurrent_start_gc()) {
627 update_top_at_mark_start(r);
628 set_live_bytes(r->hrm_index(), marked_live_bytes_below_tams);
629 }
630 }
631
632 void G1ConcurrentMark::reset_region_marking_state(G1HeapRegion* r) {
633 assert_at_safepoint();
634 if (!is_fully_initialized()) {
635 return;
636 }
637 uint region_idx = r->hrm_index();
638 // Only need to clear the stats cache for the given region if we are using the cache.
639 if (_is_region_mark_stats_cache_in_use) {
640 for (uint j = 0; j < _max_num_tasks; ++j) {
641 _tasks[j]->clear_mark_stats_cache(region_idx);
642 }
643 } else {
644 for (uint j = 0; j < _max_num_tasks; ++j) {
645 _tasks[j]->verify_no_mark_stats_for(region_idx);
646 }
647 }
648 set_top_at_mark_start_to_bottom(r);
649 _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
650 _region_mark_stats[region_idx].clear();
651 }
652
653 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
654 assert_at_safepoint();
655 assert(r->is_starts_humongous(), "Got humongous continues region here");
656
657 // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
658 mark_bitmap()->clear(r->bottom());
659 }
660
661 void G1ConcurrentMark::reset_marking_for_restart() {
662 assert_fully_initialized();
663
664 _global_mark_stack.set_empty();
665
666 // Expand the marking stack, if we have to and if we can.
667 if (has_overflown()) {
668 _global_mark_stack.expand();
669
670 uint max_num_regions = _g1h->max_num_regions();
671 for (uint i = 0; i < max_num_regions; i++) {
672 _region_mark_stats[i].clear_during_overflow();
673 }
674 }
675
676 clear_has_overflown();
677 _finger.store_relaxed(_heap.start());
678
679 for (uint i = 0; i < _max_num_tasks; ++i) {
680 _tasks[i]->reset_for_restart();
681 }
682 }
683
684 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
685 assert(active_tasks <= _max_num_tasks, "we should not have more");
686
687 _num_active_tasks = active_tasks;
688 // Need to update the three data structures below according to the
689 // number of active threads for this phase.
690 _terminator.reset_for_reuse(active_tasks);
691 _first_overflow_barrier_sync.set_n_workers(active_tasks);
692 _second_overflow_barrier_sync.set_n_workers(active_tasks);
693 }
694
695 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
696 set_concurrency(active_tasks);
697
698 _concurrent.store_relaxed(concurrent);
699
700 if (!concurrent) {
701 // At this point we should be in a STW phase, and completed marking.
702 assert_at_safepoint_on_vm_thread();
703 assert(out_of_regions(),
704 "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
705 p2i(finger()), p2i(_heap.end()));
706 }
707 }
708
709 #if TASKQUEUE_STATS
710 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
711
712 _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
713
714 auto get_pa_stats = [&](uint i) {
715 return _tasks[i]->partial_array_task_stats();
716 };
717
718 PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
719 "Concurrent Mark Partial Array");
720
721 for (uint i = 0; i < _max_num_tasks; ++i) {
722 get_pa_stats(i)->reset();
723 }
724 }
725 #endif
726
727 void G1ConcurrentMark::reset_at_marking_complete() {
728 TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
729 // We set the global marking state to some default values when we're
730 // not doing marking.
731 reset_marking_for_restart();
732 _num_active_tasks = 0;
733 }
734
735 G1ConcurrentMark::~G1ConcurrentMark() {
736 FREE_C_HEAP_ARRAY(_top_at_mark_starts);
737 FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
738 FREE_C_HEAP_ARRAY(_region_mark_stats);
739 // The G1ConcurrentMark instance is never freed.
740 ShouldNotReachHere();
741 }
742
743 class G1ClearBitMapTask : public WorkerTask {
744 public:
745 static size_t chunk_size() { return M; }
746
747 private:
748 // Heap region closure used for clearing the _mark_bitmap.
749 class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
750 G1ConcurrentMark* _cm;
751 G1CMBitMap* _bitmap;
752 bool _suspendible; // If suspendible, do yield checks.
753
754 bool suspendible() {
755 return _suspendible;
756 }
757
758 bool is_clear_concurrent_undo() {
759 return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
760 }
761
762 bool has_aborted() {
763 if (suspendible()) {
764 _cm->do_yield_check();
765 return _cm->has_aborted();
766 }
767 return false;
768 }
769
770 HeapWord* region_clear_limit(G1HeapRegion* r) {
771 // A garbage collection might have made the region unavailable after a yield during
772 // clearing. Just return bottom as the limit, causing the clearing for this region to end.
773 if (G1CollectedHeap::heap()->region_at_or_null(r->hrm_index()) == nullptr) {
774 return r->bottom();
775 }
776 // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
777 // live_words data are current wrt to the _mark_bitmap. We use this information
778 // to only clear ranges of the bitmap that require clearing.
779 if (is_clear_concurrent_undo()) {
780 // No need to clear bitmaps for empty regions (which includes regions we
781 // did not mark through).
782 if (!_cm->contains_live_object(r->hrm_index())) {
783 assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
784 return r->bottom();
785 }
786 }
787 return r->end();
788 }
789
790 public:
791 G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
792 G1HeapRegionClosure(),
793 _cm(cm),
794 _bitmap(cm->mark_bitmap()),
795 _suspendible(suspendible)
796 { }
797
798 virtual bool do_heap_region(G1HeapRegion* r) {
799 if (has_aborted()) {
800 return true;
801 }
802
803 HeapWord* cur = r->bottom();
804 HeapWord* end = region_clear_limit(r);
805
806 size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
807
808 while (cur < end) {
809
810 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
811 _bitmap->clear_range(mr);
812
813 cur += chunk_size_in_words;
814
815 // Repeat the asserts from before the start of the closure. We will do them
816 // as asserts here to minimize their overhead on the product. However, we
817 // will have them as guarantees at the beginning / end of the bitmap
818 // clearing to get some checking in the product.
819 assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
820
821 // Abort iteration if necessary.
822 if (suspendible() && _cm->do_yield_check()) {
823 if (_cm->has_aborted()) {
824 return true;
825 }
826 // Re-read end. The region might have been uncommitted.
827 end = region_clear_limit(r);
828 }
829 }
830 assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
831
832 return false;
833 }
834 };
835
836 G1ClearBitmapHRClosure _cl;
837 G1HeapRegionClaimer _hr_claimer;
838 bool _suspendible; // If the task is suspendible, workers must join the STS.
839
840 public:
841 G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
842 WorkerTask("G1 Clear Bitmap"),
843 _cl(cm, suspendible),
844 _hr_claimer(n_workers),
845 _suspendible(suspendible)
846 { }
847
848 void work(uint worker_id) {
849 SuspendibleThreadSetJoiner sts_join(_suspendible);
850 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
851 }
852 };
853
854 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
855 assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
856
857 size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
858 size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
859
860 uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
861
862 G1ClearBitMapTask cl(this, num_workers, may_yield);
863
864 log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
865 workers->run_task(&cl, num_workers);
866 }
867
868 void G1ConcurrentMark::cleanup_for_next_mark() {
869 // Make sure that the concurrent mark thread looks to still be in
870 // the current cycle.
871 guarantee(is_in_reset_for_next_cycle(), "invariant");
872
873 clear_bitmap(_concurrent_workers, true);
874
875 reset_partial_array_state_manager();
876
877 // Should not have changed state yet (even if a Full GC interrupted us).
878 guarantee(is_in_reset_for_next_cycle(), "invariant");
879 }
880
881 void G1ConcurrentMark::reset_partial_array_state_manager() {
882 for (uint i = 0; i < _max_num_tasks; ++i) {
883 _tasks[i]->unregister_partial_array_splitter();
884 }
885
886 partial_array_state_manager()->reset();
887
888 for (uint i = 0; i < _max_num_tasks; ++i) {
889 _tasks[i]->register_partial_array_splitter();
890 }
891 }
892
893 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
894 assert_at_safepoint_on_vm_thread();
895 // To avoid fragmentation the full collection requesting to clear the bitmap
896 // might use fewer workers than available. To ensure the bitmap is cleared
897 // as efficiently as possible the number of active workers are temporarily
898 // increased to include all currently created workers.
899 WithActiveWorkers update(workers, workers->created_workers());
900 clear_bitmap(workers, false);
901 }
902
903 class G1PreConcurrentStartTask : public G1BatchedTask {
904 // Reset marking state.
905 class ResetMarkingStateTask;
906 // For each region note start of marking.
907 class NoteStartOfMarkTask;
908
909 public:
910 G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
911 };
912
913 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
914 G1ConcurrentMark* _cm;
915 public:
916 ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
917
918 double worker_cost() const override { return 1.0; }
919 void do_work(uint worker_id) override;
920 };
921
922 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
923
924 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
925 G1ConcurrentMark* _cm;
926
927 public:
928 NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
929
930 bool do_heap_region(G1HeapRegion* r) override {
931 _cm->note_start_of_mark_for_region(r);
932 return false;
933 }
934 } _region_cl;
935
936 G1HeapRegionClaimer _claimer;
937 public:
938 NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
939
940 double worker_cost() const override {
941 // The work done per region is very small, therefore we choose this magic number to cap the number
942 // of threads used when there are few regions.
943 const double regions_per_thread = 1000;
944 return _claimer.n_regions() / regions_per_thread;
945 }
946
947 void set_max_workers(uint max_workers) override {
948 _claimer.set_n_workers(max_workers);
949 }
950
951 void do_work(uint worker_id) override {
952 G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
953 }
954 };
955
956 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
957 // Reset marking state.
958 _cm->reset();
959 }
960
961 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
962 G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
963 add_serial_task(new ResetMarkingStateTask(cm));
964 add_parallel_task(new NoteStartOfMarkTask());
965 };
966
967 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
968 assert_at_safepoint_on_vm_thread();
969
970 G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
971
972 ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
973
974 G1PreConcurrentStartTask cl(cause, this);
975 G1CollectedHeap::heap()->run_batch_task(&cl);
976
977 _gc_tracer_cm->set_gc_cause(cause);
978 }
979
980 void G1ConcurrentMark::start_full_concurrent_cycle() {
981 // Start Concurrent Marking weak-reference discovery.
982 ReferenceProcessor* rp = _g1h->ref_processor_cm();
983 rp->start_discovery(false /* always_clear */);
984
985 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
986 // This is the start of the marking cycle, we're expected all
987 // threads to have SATB queues with active set to false.
988 satb_mq_set.set_active_all_threads(true, /* new active value */
989 false /* expected_active */);
990
991 // update_g1_committed() will be called at the end of an evac pause
992 // when marking is on. So, it's also called at the end of the
993 // concurrent start pause to update the heap end, if the heap expands
994 // during it. No need to call it here.
995
996 // Signal the thread to start work.
997 cm_thread()->start_full_cycle();
998 }
999
1000 void G1ConcurrentMark::start_undo_concurrent_cycle() {
1001 assert_at_safepoint_on_vm_thread();
1002 // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
1003 assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
1004
1005 abort_root_region_scan_at_safepoint();
1006
1007 // Signal the thread to start work.
1008 cm_thread()->start_undo_cycle();
1009 }
1010
1011 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
1012 cm_thread()->set_idle();
1013 }
1014
1015 void G1ConcurrentMark::stop() {
1016 if (is_fully_initialized()) {
1017 cm_thread()->stop();
1018 }
1019 }
1020
1021 /*
1022 * Notice that in the next two methods, we actually leave the STS
1023 * during the barrier sync and join it immediately afterwards. If we
1024 * do not do this, the following deadlock can occur: one thread could
1025 * be in the barrier sync code, waiting for the other thread to also
1026 * sync up, whereas another one could be trying to yield, while also
1027 * waiting for the other threads to sync up too.
1028 *
1029 * Note, however, that this code is also used during remark and in
1030 * this case we should not attempt to leave / enter the STS, otherwise
1031 * we'll either hit an assert (debug / fastdebug) or deadlock
1032 * (product). So we should only leave / enter the STS if we are
1033 * operating concurrently.
1034 *
1035 * Because the thread that does the sync barrier has left the STS, it
1036 * is possible to be suspended for a Full GC or an evacuation pause
1037 * could occur. This is actually safe, since the entering the sync
1038 * barrier is one of the last things do_marking_step() does, and it
1039 * doesn't manipulate any data structures afterwards.
1040 */
1041
1042 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1043 bool barrier_aborted;
1044 {
1045 SuspendibleThreadSetLeaver sts_leave(concurrent());
1046 barrier_aborted = !_first_overflow_barrier_sync.enter();
1047 }
1048
1049 // at this point everyone should have synced up and not be doing any
1050 // more work
1051
1052 if (barrier_aborted) {
1053 // If the barrier aborted we ignore the overflow condition and
1054 // just abort the whole marking phase as quickly as possible.
1055 return;
1056 }
1057 }
1058
1059 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1060 SuspendibleThreadSetLeaver sts_leave(concurrent());
1061 _second_overflow_barrier_sync.enter();
1062
1063 // at this point everything should be re-initialized and ready to go
1064 }
1065
1066 class G1CMConcurrentMarkingTask : public WorkerTask {
1067 G1ConcurrentMark* _cm;
1068
1069 public:
1070 void work(uint worker_id) {
1071 ResourceMark rm;
1072
1073 SuspendibleThreadSetJoiner sts_join;
1074
1075 assert(worker_id < _cm->active_tasks(), "invariant");
1076
1077 G1CMTask* task = _cm->task(worker_id);
1078 task->record_start_time();
1079 if (!_cm->has_aborted()) {
1080 do {
1081 task->do_marking_step(G1ConcMarkStepDurationMillis,
1082 true /* do_termination */,
1083 false /* is_serial*/);
1084
1085 _cm->do_yield_check();
1086 } while (!_cm->has_aborted() && task->has_aborted());
1087 }
1088 task->record_end_time();
1089 guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1090 }
1091
1092 G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1093 WorkerTask("Concurrent Mark"), _cm(cm) { }
1094
1095 ~G1CMConcurrentMarkingTask() { }
1096 };
1097
1098 uint G1ConcurrentMark::calc_active_marking_workers() {
1099 uint result = 0;
1100 if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1101 result = _max_concurrent_workers;
1102 } else {
1103 result =
1104 WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1105 1, /* Minimum workers */
1106 _num_concurrent_workers,
1107 Threads::number_of_non_daemon_threads());
1108 // Don't scale the result down by scale_concurrent_workers() because
1109 // that scaling has already gone into "_max_concurrent_workers".
1110 }
1111 assert(result > 0 && result <= _max_concurrent_workers,
1112 "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1113 _max_concurrent_workers, result);
1114 return result;
1115 }
1116
1117 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1118 return _root_region_scan_aborted.load_relaxed();
1119 }
1120
1121 #ifndef PRODUCT
1122 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1123 assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1124 }
1125 #endif
1126
1127 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1128 #ifdef ASSERT
1129 HeapWord* last = region->last();
1130 G1HeapRegion* hr = _g1h->heap_region_containing(last);
1131 assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1132 "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1133 assert(top_at_mark_start(hr) == region->start(),
1134 "MemRegion start should be equal to TAMS");
1135 #endif
1136
1137 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1138
1139 const uintx interval = PrefetchScanIntervalInBytes;
1140 HeapWord* curr = region->start();
1141 const HeapWord* end = region->end();
1142 while (curr < end) {
1143 Prefetch::read(curr, interval);
1144 oop obj = cast_to_oop(curr);
1145 size_t size = obj->oop_iterate_size(&cl);
1146 assert(size == obj->size(), "sanity");
1147 curr += size;
1148 }
1149 }
1150
1151 class G1CMRootRegionScanTask : public WorkerTask {
1152 G1ConcurrentMark* _cm;
1153 bool _should_yield;
1154
1155 public:
1156 G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1157 WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1158
1159 void work(uint worker_id) {
1160 SuspendibleThreadSetJoiner sts_join(_should_yield);
1161
1162 while (true) {
1163 if (_cm->has_root_region_scan_aborted()) {
1164 return;
1165 }
1166 G1CMRootMemRegions* root_regions = _cm->root_regions();
1167 const MemRegion* region = root_regions->claim_next();
1168 if (region == nullptr) {
1169 return;
1170 }
1171 _cm->scan_root_region(region, worker_id);
1172 if (_should_yield) {
1173 SuspendibleThreadSet::yield();
1174 // If we yielded, a GC may have processed all root regions,
1175 // so this loop will naturally exit on the next claim_next() call.
1176 // Same if a Full GC signalled abort of the concurrent mark.
1177 }
1178 }
1179 }
1180 };
1181
1182 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1183 // We first check whether there is any work to do as we might have already aborted
1184 // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1185 // We want to avoid spinning up the worker threads if that happened.
1186 // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1187 //
1188 // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1189 // a young gc doing the work.
1190 //
1191 // Concurrent gc threads enter an STS when starting the task, so they stop, then
1192 // continue after that safepoint.
1193 //
1194 // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1195 // consistent view of the value containing the number of remaining regions across the
1196 // usages below. The safepoint/gc may already be running and modifying it
1197 // while this code is still executing.
1198 uint num_remaining = root_regions()->num_remaining_regions();
1199 bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1200 if (do_scan) {
1201 // Assign one worker to each root-region but subject to the max constraint.
1202 // The constraint is also important to avoid accesses beyond the allocated per-worker
1203 // marking helper data structures. We might get passed different WorkerThreads with
1204 // different number of threads (potential worker ids) than helper data structures when
1205 // completing this work during GC.
1206 const uint num_workers = MIN2(num_remaining,
1207 _max_concurrent_workers);
1208
1209 G1CMRootRegionScanTask task(this, concurrent);
1210 log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1211 task.name(), num_workers, num_remaining);
1212 workers->run_task(&task, num_workers);
1213 }
1214
1215 // At the end of this method, we can re-read num_remaining() in the assert: either
1216 // we got non-zero above and we processed all root regions (and it must be zero
1217 // after the worker task synchronization) or it had already been zero. We also
1218 // can't have started another concurrent cycle that could have set it to something else
1219 // while still in the concurrent cycle (if called concurrently).
1220 assert_root_region_scan_completed_or_aborted();
1221
1222 return do_scan;
1223 }
1224
1225 void G1ConcurrentMark::scan_root_regions_concurrently() {
1226 assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1227 scan_root_regions(_concurrent_workers, true /* concurrent */);
1228 }
1229
1230 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1231 assert_at_safepoint_on_vm_thread();
1232 return scan_root_regions(_g1h->workers(), false /* concurrent */);
1233 }
1234
1235 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1236 root_regions()->add(top_at_mark_start(r), r->top());
1237 }
1238
1239 void G1ConcurrentMark::add_root_region_set_bottom(G1HeapRegion* r) {
1240 set_top_at_mark_start_to_bottom(r);
1241 root_regions()->add(r->bottom(), r->top());
1242 }
1243
1244 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1245 return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1246 }
1247
1248 void G1ConcurrentMark::abort_root_region_scan() {
1249 assert_not_at_safepoint();
1250
1251 _root_region_scan_aborted.store_relaxed(true);
1252 }
1253
1254 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1255 assert_at_safepoint_on_vm_thread();
1256
1257 _root_region_scan_aborted.store_relaxed(true);
1258 }
1259
1260 void G1ConcurrentMark::concurrent_cycle_start() {
1261 _gc_timer_cm->register_gc_start();
1262
1263 _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1264
1265 _g1h->trace_heap_before_gc(_gc_tracer_cm);
1266 }
1267
1268 uint G1ConcurrentMark::completed_mark_cycles() const {
1269 return _completed_mark_cycles.load_relaxed();
1270 }
1271
1272 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1273 _g1h->trace_heap_after_gc(_gc_tracer_cm);
1274
1275 if (mark_cycle_completed) {
1276 _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1277 }
1278
1279 if (has_aborted()) {
1280 log_info(gc, marking)("Concurrent Mark Abort");
1281 _gc_tracer_cm->report_concurrent_mode_failure();
1282 }
1283
1284 _gc_timer_cm->register_gc_end();
1285
1286 _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1287 }
1288
1289 void G1ConcurrentMark::mark_from_roots() {
1290 _restart_for_overflow.store_relaxed(false);
1291
1292 uint active_workers = calc_active_marking_workers();
1293
1294 // Setting active workers is not guaranteed since fewer
1295 // worker threads may currently exist and more may not be
1296 // available.
1297 active_workers = _concurrent_workers->set_active_workers(active_workers);
1298 log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1299
1300 _num_concurrent_workers = active_workers;
1301
1302 // Parallel task terminator is set in "set_concurrency_and_phase()"
1303 set_concurrency_and_phase(active_workers, true /* concurrent */);
1304
1305 G1CMConcurrentMarkingTask marking_task(this);
1306 _concurrent_workers->run_task(&marking_task);
1307 print_stats();
1308 }
1309
1310 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1311 static const char* location_strings[] = { "Remark Before",
1312 "Remark After",
1313 "Remark Overflow",
1314 "Cleanup Before",
1315 "Cleanup After" };
1316 return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1317 }
1318
1319 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1320 VerifyLocation location) {
1321 G1HeapVerifier* verifier = _g1h->verifier();
1322
1323 verifier->verify_region_sets_optional();
1324
1325 const char* caller = verify_location_string(location);
1326
1327 if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1328 GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1329
1330 size_t const BufLen = 512;
1331 char buffer[BufLen];
1332
1333 jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1334 verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1335
1336 // Only check bitmap in Remark, and not at After-Verification because the regions
1337 // already have their TAMS'es reset.
1338 if (location != VerifyLocation::RemarkAfter) {
1339 verifier->verify_bitmap_clear(true /* above_tams_only */);
1340 }
1341 }
1342 }
1343
1344 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1345 G1CollectedHeap* _g1h;
1346 public:
1347 G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1348
1349 bool do_object_b(oop obj) {
1350 return !_g1h->is_obj_dead(obj);
1351 }
1352 };
1353
1354 void G1ConcurrentMark::remark() {
1355 assert_at_safepoint_on_vm_thread();
1356
1357 // If a full collection has happened, we should not continue. However we might
1358 // have ended up here as the Remark VM operation has been scheduled already.
1359 if (has_aborted()) {
1360 return;
1361 }
1362
1363 G1Policy* policy = _g1h->policy();
1364 policy->record_pause_start_time();
1365
1366 double start = os::elapsedTime();
1367
1368 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1369
1370 {
1371 GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1372 finalize_marking();
1373 }
1374
1375 double mark_work_end = os::elapsedTime();
1376
1377 bool const mark_finished = !has_overflown();
1378 if (mark_finished) {
1379 weak_refs_work();
1380
1381 // Unload Klasses, String, Code Cache, etc.
1382 if (ClassUnloadingWithConcurrentMark) {
1383 G1CMIsAliveClosure is_alive(this);
1384 _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1385 }
1386
1387 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1388 // We're done with marking.
1389 // This is the end of the marking cycle, we're expected all
1390 // threads to have SATB queues with active set to true.
1391 satb_mq_set.set_active_all_threads(false, /* new active value */
1392 true /* expected_active */);
1393
1394 {
1395 GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1396 flush_all_task_caches();
1397 }
1398
1399 // All marking completed. Check bitmap now as we will start to reset TAMSes
1400 // in parallel below so that we can not do this in the After-Remark verification.
1401 _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1402
1403 {
1404 GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1405
1406 G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1407 uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1408 _g1h->workers()->active_workers());
1409 log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1410 _g1h->workers()->run_task(&cl, num_workers);
1411
1412 log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1413 _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1414
1415 _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1416
1417 if (_needs_remembered_set_rebuild) {
1418 GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1419 _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1420 }
1421 }
1422
1423 if (log_is_enabled(Trace, gc, liveness)) {
1424 G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1425 _g1h->heap_region_iterate(&cl);
1426 }
1427
1428 // Potentially, some empty-regions have been reclaimed; make this a
1429 // "collection" so that pending allocation can retry before attempting a
1430 // GC pause.
1431 _g1h->increment_total_collections();
1432
1433 // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1434 // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1435 // triggered, it likely means there are very few regular GCs, making resizing
1436 // based on gc heuristics less effective.
1437 if (_g1h->last_gc_was_periodic()) {
1438 _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1439 }
1440
1441 compute_new_sizes();
1442
1443 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1444
1445 assert(!restart_for_overflow(), "sanity");
1446 // Completely reset the marking state (except bitmaps) since marking completed.
1447 reset_at_marking_complete();
1448
1449 G1CollectedHeap::finish_codecache_marking_cycle();
1450
1451 {
1452 GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1453 G1ObjectCountIsAliveClosure is_alive(_g1h);
1454 _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1455 }
1456
1457 // Successfully completed marking, advance state.
1458 cm_thread()->set_full_cycle_rebuild_and_scrub();
1459 } else {
1460 // We overflowed. Restart concurrent marking.
1461 _restart_for_overflow.store_relaxed(true);
1462
1463 verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1464
1465 // Clear the marking state because we will be restarting
1466 // marking due to overflowing the global mark stack.
1467 reset_marking_for_restart();
1468 }
1469
1470 // Statistics
1471 double now = os::elapsedTime();
1472 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1473 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1474 _remark_times.add((now - start) * 1000.0);
1475
1476 _g1h->update_perf_counter_cpu_time();
1477
1478 policy->record_concurrent_mark_remark_end();
1479
1480 return;
1481 }
1482
1483 void G1ConcurrentMark::compute_new_sizes() {
1484 MetaspaceGC::compute_new_size();
1485
1486 // Cleanup will have freed any regions completely full of garbage.
1487 // Update the soft reference policy with the new heap occupancy.
1488 Universe::heap()->update_capacity_and_used_at_gc();
1489
1490 // We reclaimed old regions so we should calculate the sizes to make
1491 // sure we update the old gen/space data.
1492 _g1h->monitoring_support()->update_sizes();
1493 }
1494
1495 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1496 G1CollectedHeap* _g1h;
1497
1498 public:
1499 G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1500
1501 bool do_heap_region(G1HeapRegion* r) override {
1502 // Update the remset tracking state from updating to complete
1503 // if remembered sets have been rebuilt.
1504 _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1505 return false;
1506 }
1507 };
1508
1509 void G1ConcurrentMark::cleanup() {
1510 assert_at_safepoint_on_vm_thread();
1511
1512 // If a full collection has happened, we shouldn't do this.
1513 if (has_aborted()) {
1514 return;
1515 }
1516
1517 G1Policy* policy = _g1h->policy();
1518 policy->record_pause_start_time();
1519
1520 double start = os::elapsedTime();
1521
1522 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1523
1524 if (needs_remembered_set_rebuild()) {
1525 // Update the remset tracking information as well as marking all regions
1526 // as fully parsable.
1527 GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1528 G1UpdateRegionsAfterRebuild cl(_g1h);
1529 _g1h->heap_region_iterate(&cl);
1530 } else {
1531 log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1532 }
1533
1534 verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1535
1536 // Local statistics
1537 _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1538
1539 {
1540 GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1541 policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1542 }
1543
1544 // Advance state.
1545 cm_thread()->set_full_cycle_reset_for_next_cycle();
1546 return;
1547 }
1548
1549 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1550 // Uses the G1CMTask associated with a worker thread (for serial reference
1551 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1552 // trace referent objects.
1553 //
1554 // Using the G1CMTask and embedded local queues avoids having the worker
1555 // threads operating on the global mark stack. This reduces the risk
1556 // of overflowing the stack - which we would rather avoid at this late
1557 // state. Also using the tasks' local queues removes the potential
1558 // of the workers interfering with each other that could occur if
1559 // operating on the global stack.
1560
1561 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1562 G1ConcurrentMark* _cm;
1563 G1CMTask* _task;
1564 uint _ref_counter_limit;
1565 uint _ref_counter;
1566 bool _is_serial;
1567 public:
1568 G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1569 _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1570 _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1571 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1572 }
1573
1574 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1575 virtual void do_oop( oop* p) { do_oop_work(p); }
1576
1577 template <class T> void do_oop_work(T* p) {
1578 if (_cm->has_overflown()) {
1579 return;
1580 }
1581 if (!_task->deal_with_reference(p)) {
1582 // We did not add anything to the mark bitmap (or mark stack), so there is
1583 // no point trying to drain it.
1584 return;
1585 }
1586 _ref_counter--;
1587
1588 if (_ref_counter == 0) {
1589 // We have dealt with _ref_counter_limit references, pushing them
1590 // and objects reachable from them on to the local stack (and
1591 // possibly the global stack). Call G1CMTask::do_marking_step() to
1592 // process these entries.
1593 //
1594 // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1595 // there's nothing more to do (i.e. we're done with the entries that
1596 // were pushed as a result of the G1CMTask::deal_with_reference() calls
1597 // above) or we overflow.
1598 //
1599 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1600 // flag while there may still be some work to do. (See the comment at
1601 // the beginning of G1CMTask::do_marking_step() for those conditions -
1602 // one of which is reaching the specified time target.) It is only
1603 // when G1CMTask::do_marking_step() returns without setting the
1604 // has_aborted() flag that the marking step has completed.
1605 do {
1606 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1607 _task->do_marking_step(mark_step_duration_ms,
1608 false /* do_termination */,
1609 _is_serial);
1610 } while (_task->has_aborted() && !_cm->has_overflown());
1611 _ref_counter = _ref_counter_limit;
1612 }
1613 }
1614 };
1615
1616 // 'Drain' oop closure used by both serial and parallel reference processing.
1617 // Uses the G1CMTask associated with a given worker thread (for serial
1618 // reference processing the G1CMtask for worker 0 is used). Calls the
1619 // do_marking_step routine, with an unbelievably large timeout value,
1620 // to drain the marking data structures of the remaining entries
1621 // added by the 'keep alive' oop closure above.
1622
1623 class G1CMDrainMarkingStackClosure : public VoidClosure {
1624 G1ConcurrentMark* _cm;
1625 G1CMTask* _task;
1626 bool _is_serial;
1627 public:
1628 G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1629 _cm(cm), _task(task), _is_serial(is_serial) {
1630 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1631 }
1632
1633 void do_void() {
1634 do {
1635 // We call G1CMTask::do_marking_step() to completely drain the local
1636 // and global marking stacks of entries pushed by the 'keep alive'
1637 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1638 //
1639 // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1640 // if there's nothing more to do (i.e. we've completely drained the
1641 // entries that were pushed as a result of applying the 'keep alive'
1642 // closure to the entries on the discovered ref lists) or we overflow
1643 // the global marking stack.
1644 //
1645 // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1646 // flag while there may still be some work to do. (See the comment at
1647 // the beginning of G1CMTask::do_marking_step() for those conditions -
1648 // one of which is reaching the specified time target.) It is only
1649 // when G1CMTask::do_marking_step() returns without setting the
1650 // has_aborted() flag that the marking step has completed.
1651
1652 _task->do_marking_step(1000000000.0 /* something very large */,
1653 true /* do_termination */,
1654 _is_serial);
1655 } while (_task->has_aborted() && !_cm->has_overflown());
1656 }
1657 };
1658
1659 class G1CMRefProcProxyTask : public RefProcProxyTask {
1660 G1CollectedHeap& _g1h;
1661 G1ConcurrentMark& _cm;
1662
1663 public:
1664 G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1665 : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1666 _g1h(g1h),
1667 _cm(cm) {}
1668
1669 void work(uint worker_id) override {
1670 assert(worker_id < _max_workers, "sanity");
1671 G1CMIsAliveClosure is_alive(&_cm);
1672 uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1673 G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1674 BarrierEnqueueDiscoveredFieldClosure enqueue;
1675 G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1676 _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1677 }
1678
1679 void prepare_run_task_hook() override {
1680 // We need to reset the concurrency level before each
1681 // proxy task execution, so that the termination protocol
1682 // and overflow handling in G1CMTask::do_marking_step() knows
1683 // how many workers to wait for.
1684 _cm.set_concurrency(_queue_count);
1685 }
1686 };
1687
1688 void G1ConcurrentMark::weak_refs_work() {
1689 ResourceMark rm;
1690
1691 {
1692 GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1693
1694 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1695
1696 // See the comment in G1CollectedHeap::ref_processing_init()
1697 // about how reference processing currently works in G1.
1698
1699 assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1700
1701 // Prefer to grow the stack until the max capacity.
1702 _global_mark_stack.set_should_grow();
1703
1704 // Parallel processing task executor.
1705 G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1706 ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1707
1708 // Process the weak references.
1709 const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1710 _gc_tracer_cm->report_gc_reference_stats(stats);
1711 pt.print_all_references();
1712
1713 // The do_oop work routines of the keep_alive and drain_marking_stack
1714 // oop closures will set the has_overflown flag if we overflow the
1715 // global marking stack.
1716
1717 assert(has_overflown() || _global_mark_stack.is_empty(),
1718 "Mark stack should be empty (unless it has overflown)");
1719 }
1720
1721 if (has_overflown()) {
1722 // We can not trust g1_is_alive and the contents of the heap if the marking stack
1723 // overflowed while processing references. Exit the VM.
1724 fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1725 "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1726 "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1727 _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1728 return;
1729 }
1730
1731 assert(_global_mark_stack.is_empty(), "Marking should have completed");
1732
1733 {
1734 GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1735 G1CMIsAliveClosure is_alive(this);
1736 WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1737 }
1738 }
1739
1740 class G1PrecleanYieldClosure : public YieldClosure {
1741 G1ConcurrentMark* _cm;
1742
1743 public:
1744 G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1745
1746 virtual bool should_return() {
1747 return _cm->has_aborted();
1748 }
1749
1750 virtual bool should_return_fine_grain() {
1751 _cm->do_yield_check();
1752 return _cm->has_aborted();
1753 }
1754 };
1755
1756 void G1ConcurrentMark::preclean() {
1757 assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1758
1759 SuspendibleThreadSetJoiner joiner;
1760
1761 BarrierEnqueueDiscoveredFieldClosure enqueue;
1762
1763 set_concurrency_and_phase(1, true);
1764
1765 G1PrecleanYieldClosure yield_cl(this);
1766
1767 ReferenceProcessor* rp = _g1h->ref_processor_cm();
1768 // Precleaning is single threaded. Temporarily disable MT discovery.
1769 ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1770 rp->preclean_discovered_references(rp->is_alive_non_header(),
1771 &enqueue,
1772 &yield_cl,
1773 _gc_timer_cm);
1774 }
1775
1776 // Closure for marking entries in SATB buffers.
1777 class G1CMSATBBufferClosure : public SATBBufferClosure {
1778 private:
1779 G1CMTask* _task;
1780 G1CollectedHeap* _g1h;
1781
1782 // This is very similar to G1CMTask::deal_with_reference, but with
1783 // more relaxed requirements for the argument, so this must be more
1784 // circumspect about treating the argument as an object.
1785 void do_entry(void* entry) const {
1786 _task->increment_refs_reached();
1787 oop const obj = cast_to_oop(entry);
1788 _task->make_reference_grey(obj);
1789 }
1790
1791 public:
1792 G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1793 : _task(task), _g1h(g1h) { }
1794
1795 virtual void do_buffer(void** buffer, size_t size) {
1796 for (size_t i = 0; i < size; ++i) {
1797 do_entry(buffer[i]);
1798 }
1799 }
1800 };
1801
1802 class G1RemarkThreadsClosure : public ThreadClosure {
1803 G1SATBMarkQueueSet& _qset;
1804
1805 public:
1806 G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1807 _qset(G1BarrierSet::satb_mark_queue_set()) {}
1808
1809 void do_thread(Thread* thread) {
1810 // Transfer any partial buffer to the qset for completed buffer processing.
1811 _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1812 }
1813 };
1814
1815 class G1CMRemarkTask : public WorkerTask {
1816 // For Threads::possibly_parallel_threads_do
1817 ThreadsClaimTokenScope _threads_claim_token_scope;
1818 G1ConcurrentMark* _cm;
1819 public:
1820 void work(uint worker_id) {
1821 G1CMTask* task = _cm->task(worker_id);
1822 task->record_start_time();
1823 {
1824 ResourceMark rm;
1825
1826 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1827 Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1828 }
1829
1830 do {
1831 task->do_marking_step(1000000000.0 /* something very large */,
1832 true /* do_termination */,
1833 false /* is_serial */);
1834 } while (task->has_aborted() && !_cm->has_overflown());
1835 // If we overflow, then we do not want to restart. We instead
1836 // want to abort remark and do concurrent marking again.
1837 task->record_end_time();
1838 }
1839
1840 G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1841 WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1842 _cm->terminator()->reset_for_reuse(active_workers);
1843 }
1844 };
1845
1846 void G1ConcurrentMark::finalize_marking() {
1847 ResourceMark rm;
1848
1849 _g1h->ensure_parsability(false);
1850
1851 // this is remark, so we'll use up all active threads
1852 uint active_workers = _g1h->workers()->active_workers();
1853 set_concurrency_and_phase(active_workers, false /* concurrent */);
1854 // Leave _parallel_marking_threads at it's
1855 // value originally calculated in the G1ConcurrentMark
1856 // constructor and pass values of the active workers
1857 // through the task.
1858
1859 {
1860 G1CMRemarkTask remarkTask(this, active_workers);
1861 // We will start all available threads, even if we decide that the
1862 // active_workers will be fewer. The extra ones will just bail out
1863 // immediately.
1864 _g1h->workers()->run_task(&remarkTask);
1865 }
1866
1867 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1868 guarantee(has_overflown() ||
1869 satb_mq_set.completed_buffers_num() == 0,
1870 "Invariant: has_overflown = %s, num buffers = %zu",
1871 BOOL_TO_STR(has_overflown()),
1872 satb_mq_set.completed_buffers_num());
1873
1874 print_stats();
1875 }
1876
1877 void G1ConcurrentMark::flush_all_task_caches(bool ends_use_of_mark_cache) {
1878 size_t hits = 0;
1879 size_t misses = 0;
1880 for (uint i = 0; i < _max_num_tasks; i++) {
1881 Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1882 hits += stats.first;
1883 misses += stats.second;
1884 }
1885 size_t sum = hits + misses;
1886 log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1887 hits, misses, percent_of(hits, sum));
1888 if (ends_use_of_mark_cache) {
1889 _is_region_mark_stats_cache_in_use = false;
1890 }
1891 }
1892
1893 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1894 assert_at_safepoint();
1895 _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1896 }
1897
1898 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1899 // "Checkpoint" the finger.
1900 HeapWord* local_finger = finger();
1901
1902 while (local_finger < _heap.end()) {
1903 assert(_g1h->is_in_reserved(local_finger), "invariant");
1904
1905 G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1906 // Make sure that the reads below do not float before loading curr_region.
1907 OrderAccess::loadload();
1908 // Above heap_region_containing may return null as we always scan claim
1909 // until the end of the heap. In this case, just jump to the next region.
1910 HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1911
1912 // Is the gap between reading the finger and doing the CAS too long?
1913 HeapWord* res = _finger.compare_exchange(local_finger, end);
1914 if (res == local_finger && curr_region != nullptr) {
1915 // We succeeded.
1916 HeapWord* bottom = curr_region->bottom();
1917 HeapWord* limit = top_at_mark_start(curr_region);
1918
1919 log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1920 // Notice that _finger == end cannot be guaranteed here since,
1921 // someone else might have moved the finger even further.
1922 assert(finger() >= end, "The finger should have moved forward");
1923
1924 if (limit > bottom) {
1925 return curr_region;
1926 } else {
1927 assert(limit == bottom,
1928 "The scan limit for region %u (%s) should be bottom but is " PTR_FORMAT,
1929 curr_region->hrm_index(), curr_region->get_short_type_str(), p2i(limit));
1930 // We return null and the caller should try calling
1931 // claim_region() again.
1932 return nullptr;
1933 }
1934 } else {
1935 // Read the finger again.
1936 HeapWord* next_finger = finger();
1937 assert(next_finger > local_finger,
1938 "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT,
1939 p2i(local_finger), p2i(next_finger));
1940 local_finger = next_finger;
1941 }
1942 }
1943
1944 return nullptr;
1945 }
1946
1947 #ifndef PRODUCT
1948 class VerifyNoCSetOops {
1949 G1CollectedHeap* _g1h;
1950 const char* _phase;
1951 int _info;
1952
1953 public:
1954 VerifyNoCSetOops(const char* phase, int info = -1) :
1955 _g1h(G1CollectedHeap::heap()),
1956 _phase(phase),
1957 _info(info)
1958 { }
1959
1960 void operator()(G1TaskQueueEntry task_entry) const {
1961 if (task_entry.is_partial_array_state()) {
1962 oop obj = task_entry.to_partial_array_state()->source();
1963 guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1964 return;
1965 }
1966 guarantee(oopDesc::is_oop(task_entry.to_oop()),
1967 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1968 p2i(task_entry.to_oop()), _phase, _info);
1969 G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1970 guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1971 "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1972 p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1973 }
1974 };
1975
1976 void G1ConcurrentMark::verify_no_collection_set_oops() {
1977 assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1978 "should be at a safepoint or initializing");
1979 if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1980 return;
1981 }
1982
1983 // Verify entries on the global mark stack
1984 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1985
1986 // Verify entries on the task queues
1987 for (uint i = 0; i < _max_num_tasks; ++i) {
1988 G1CMTaskQueue* queue = _task_queues->queue(i);
1989 queue->iterate(VerifyNoCSetOops("Queue", i));
1990 }
1991
1992 // Verify the global finger
1993 HeapWord* global_finger = finger();
1994 if (global_finger != nullptr && global_finger < _heap.end()) {
1995 // Since we always iterate over all regions, we might get a null G1HeapRegion
1996 // here.
1997 G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1998 guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1999 "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2000 p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2001 }
2002
2003 // Verify the task fingers
2004 assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
2005 for (uint i = 0; i < _num_concurrent_workers; ++i) {
2006 G1CMTask* task = _tasks[i];
2007 HeapWord* task_finger = task->finger();
2008 if (task_finger != nullptr && task_finger < _heap.end()) {
2009 // See above note on the global finger verification.
2010 G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
2011 guarantee(r == nullptr || task_finger == r->bottom() ||
2012 !r->in_collection_set() || !r->has_index_in_opt_cset(),
2013 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2014 p2i(task_finger), HR_FORMAT_PARAMS(r));
2015 }
2016 }
2017 }
2018 #endif // PRODUCT
2019
2020 void G1ConcurrentMark::rebuild_and_scrub() {
2021 if (!needs_remembered_set_rebuild()) {
2022 log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
2023 }
2024
2025 G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
2026 }
2027
2028 void G1ConcurrentMark::print_stats() {
2029 if (!log_is_enabled(Debug, gc, stats)) {
2030 return;
2031 }
2032 log_debug(gc, stats)("---------------------------------------------------------------------");
2033 for (size_t i = 0; i < _num_active_tasks; ++i) {
2034 _tasks[i]->print_stats();
2035 log_debug(gc, stats)("---------------------------------------------------------------------");
2036 }
2037 }
2038
2039 bool G1ConcurrentMark::shutdown_cleanup_needed() const {
2040 // Cleanup (aborting threads, setting abort flags) is needed throughout the whole cycle before
2041 // stopping the CM thread.
2042 return is_fully_initialized() && is_in_concurrent_cycle();
2043 }
2044
2045 void G1ConcurrentMark::shutdown_concurrent_cycle() {
2046 assert_at_safepoint_on_vm_thread();
2047
2048 abort_root_region_scan_at_safepoint();
2049 abort_marking_threads();
2050
2051 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2052 satb_mq_set.abandon_partial_marking();
2053 // This can be called either during or outside marking, we'll read
2054 // the expected_active value from the SATB queue set.
2055 satb_mq_set.set_active_all_threads(false, /* new active value */
2056 satb_mq_set.is_active() /* expected_active */);
2057 }
2058
2059 bool G1ConcurrentMark::concurrent_cycle_abort() {
2060 assert_at_safepoint_on_vm_thread();
2061 assert(_g1h->collector_state()->is_in_full_gc(), "must be");
2062
2063 // If we start the compaction before the CM threads finish
2064 // scanning the root regions we might trip them over as we'll
2065 // be moving objects / updating references. Since the root region
2066 // scan synchronized with the safepoint, just tell it to abort.
2067 // It will notice when the threads start up again later.
2068 abort_root_region_scan_at_safepoint();
2069
2070 // We haven't started a concurrent cycle no need to do anything; we might have
2071 // aborted the marking because of shutting down though. In this case the marking
2072 // might have already completed the abort (leading to in_progress() below to
2073 // return false), however this still left marking state particularly in the
2074 // shared marking bitmap that must be cleaned up.
2075 // If there are multiple full gcs during shutdown we do this work repeatedly for
2076 // nothing, but this situation should be extremely rare (a full gc after shutdown
2077 // has been signalled is already rare), and this work should be negligible compared
2078 // to actual full gc work.
2079
2080 if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
2081 return false;
2082 }
2083
2084 flush_all_task_caches();
2085 reset_marking_for_restart();
2086
2087 abort_marking_threads();
2088
2089 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2090 satb_mq_set.abandon_partial_marking();
2091 // This can be called either during or outside marking, we'll read
2092 // the expected_active value from the SATB queue set.
2093 satb_mq_set.set_active_all_threads(false, /* new active value */
2094 satb_mq_set.is_active() /* expected_active */);
2095 return true;
2096 }
2097
2098 void G1ConcurrentMark::abort_marking_threads() {
2099 assert_root_region_scan_completed_or_aborted();
2100 _has_aborted.store_relaxed(true);
2101 _first_overflow_barrier_sync.abort();
2102 _second_overflow_barrier_sync.abort();
2103 }
2104
2105 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2106 class CountCpuTimeThreadClosure : public ThreadClosure {
2107 public:
2108 jlong _total_cpu_time;
2109
2110 CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2111
2112 void do_thread(Thread* t) {
2113 _total_cpu_time += os::thread_cpu_time(t);
2114 }
2115 } cl;
2116
2117 threads_do(&cl);
2118
2119 return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2120 }
2121
2122 static void print_ms_time_info(const char* prefix, const char* name,
2123 NumberSeq& ns) {
2124 log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2125 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2126 if (ns.num() > 0) {
2127 log_trace(gc, marking)("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2128 prefix, ns.sd(), ns.maximum());
2129 }
2130 }
2131
2132 void G1ConcurrentMark::print_summary_info() {
2133 Log(gc, marking) log;
2134 if (!log.is_trace()) {
2135 return;
2136 }
2137
2138 log.trace(" Concurrent marking:");
2139 if (!is_fully_initialized()) {
2140 log.trace(" has not been initialized yet");
2141 return;
2142 }
2143 print_ms_time_info(" ", "remarks", _remark_times);
2144 {
2145 print_ms_time_info(" ", "final marks", _remark_mark_times);
2146 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2147
2148 }
2149 print_ms_time_info(" ", "cleanups", _cleanup_times);
2150 log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2151 _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2152 log.trace(" Total stop_world time = %8.2f s.",
2153 (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2154 log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
2155 cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2156 }
2157
2158 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2159 if (is_fully_initialized()) { // they are initialized late
2160 tc->do_thread(_cm_thread);
2161 _concurrent_workers->threads_do(tc);
2162 }
2163 }
2164
2165 void G1ConcurrentMark::print_on(outputStream* st) const {
2166 st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2167 _mark_bitmap.print_on(st, " Bits: ");
2168 }
2169
2170 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2171 ReferenceProcessor* result = g1h->ref_processor_cm();
2172 assert(result != nullptr, "CM reference processor should not be null");
2173 return result;
2174 }
2175
2176 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2177 G1CMTask* task)
2178 : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2179 _g1h(g1h), _task(task)
2180 { }
2181
2182 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2183 assert(hr != nullptr,
2184 "claim_region() should have filtered out null regions");
2185 _curr_region = hr;
2186 _finger = hr->bottom();
2187 update_region_limit();
2188 }
2189
2190 void G1CMTask::update_region_limit() {
2191 G1HeapRegion* hr = _curr_region;
2192 HeapWord* bottom = hr->bottom();
2193 HeapWord* limit = _cm->top_at_mark_start(hr);
2194
2195 if (limit == bottom) {
2196 // The region was collected underneath our feet.
2197 // We set the finger to bottom to ensure that the bitmap
2198 // iteration that will follow this will not do anything.
2199 // (this is not a condition that holds when we set the region up,
2200 // as the region is not supposed to be empty in the first place)
2201 _finger = bottom;
2202 } else if (limit >= _region_limit) {
2203 assert(limit >= _finger, "peace of mind");
2204 } else {
2205 assert(limit < _region_limit, "only way to get here");
2206 // This can happen under some pretty unusual circumstances. An
2207 // evacuation pause empties the region underneath our feet (TAMS
2208 // at bottom). We then do some allocation in the region (TAMS
2209 // stays at bottom), followed by the region being used as a GC
2210 // alloc region (TAMS will move to top() and the objects
2211 // originally below it will be greyed). All objects now marked in
2212 // the region are explicitly greyed, if below the global finger,
2213 // and we do not need in fact to scan anything else. So, we simply
2214 // set _finger to be limit to ensure that the bitmap iteration
2215 // doesn't do anything.
2216 _finger = limit;
2217 }
2218
2219 _region_limit = limit;
2220 }
2221
2222 void G1CMTask::giveup_current_region() {
2223 assert(_curr_region != nullptr, "invariant");
2224 clear_region_fields();
2225 }
2226
2227 void G1CMTask::clear_region_fields() {
2228 // Values for these three fields that indicate that we're not
2229 // holding on to a region.
2230 _curr_region = nullptr;
2231 _finger = nullptr;
2232 _region_limit = nullptr;
2233 }
2234
2235 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2236 if (cm_oop_closure == nullptr) {
2237 assert(_cm_oop_closure != nullptr, "invariant");
2238 } else {
2239 assert(_cm_oop_closure == nullptr, "invariant");
2240 }
2241 _cm_oop_closure = cm_oop_closure;
2242 }
2243
2244 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2245 guarantee(mark_bitmap != nullptr, "invariant");
2246 _mark_bitmap = mark_bitmap;
2247 clear_region_fields();
2248
2249 _calls = 0;
2250 _elapsed_time_ms = 0.0;
2251 _termination_time_ms = 0.0;
2252
2253 _mark_stats_cache.reset();
2254 }
2255
2256 void G1CMTask::reset_for_restart() {
2257 clear_region_fields();
2258 _task_queue->set_empty();
2259 TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2260 TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2261 }
2262
2263 void G1CMTask::register_partial_array_splitter() {
2264
2265 ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2266 _cm->max_num_tasks());
2267 }
2268
2269 void G1CMTask::unregister_partial_array_splitter() {
2270 _partial_array_splitter.~PartialArraySplitter();
2271 }
2272
2273 bool G1CMTask::should_exit_termination() {
2274 if (!regular_clock_call()) {
2275 return true;
2276 }
2277
2278 // This is called when we are in the termination protocol. We should
2279 // quit if, for some reason, this task wants to abort or the global
2280 // stack is not empty (this means that we can get work from it).
2281 return !_cm->mark_stack_empty() || has_aborted();
2282 }
2283
2284 void G1CMTask::reached_limit() {
2285 assert(_words_scanned >= _words_scanned_limit ||
2286 _refs_reached >= _refs_reached_limit ,
2287 "shouldn't have been called otherwise");
2288 abort_marking_if_regular_check_fail();
2289 }
2290
2291 bool G1CMTask::regular_clock_call() {
2292 if (has_aborted()) {
2293 return false;
2294 }
2295
2296 // First, we need to recalculate the words scanned and refs reached
2297 // limits for the next clock call.
2298 recalculate_limits();
2299
2300 // During the regular clock call we do the following
2301
2302 // (1) If an overflow has been flagged, then we abort.
2303 if (_cm->has_overflown()) {
2304 return false;
2305 }
2306
2307 // If we are not concurrent (i.e. we're doing remark) we don't need
2308 // to check anything else. The other steps are only needed during
2309 // the concurrent marking phase.
2310 if (!_cm->concurrent()) {
2311 return true;
2312 }
2313
2314 // (2) If marking has been aborted for Full GC, then we also abort.
2315 if (_cm->has_aborted()) {
2316 return false;
2317 }
2318
2319 // (4) We check whether we should yield. If we have to, then we abort.
2320 if (SuspendibleThreadSet::should_yield()) {
2321 // We should yield. To do this we abort the task. The caller is
2322 // responsible for yielding.
2323 return false;
2324 }
2325
2326 // (5) We check whether we've reached our time quota. If we have,
2327 // then we abort.
2328 double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2329 if (elapsed_time_ms > _time_target_ms) {
2330 _has_timed_out = true;
2331 return false;
2332 }
2333
2334 // (6) Finally, we check whether there are enough completed STAB
2335 // buffers available for processing. If there are, we abort.
2336 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2337 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2338 // we do need to process SATB buffers, we'll abort and restart
2339 // the marking task to do so
2340 return false;
2341 }
2342 return true;
2343 }
2344
2345 void G1CMTask::recalculate_limits() {
2346 _real_words_scanned_limit = _words_scanned + words_scanned_period;
2347 _words_scanned_limit = _real_words_scanned_limit;
2348
2349 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2350 _refs_reached_limit = _real_refs_reached_limit;
2351 }
2352
2353 void G1CMTask::decrease_limits() {
2354 // This is called when we believe that we're going to do an infrequent
2355 // operation which will increase the per byte scanned cost (i.e. move
2356 // entries to/from the global stack). It basically tries to decrease the
2357 // scanning limit so that the clock is called earlier.
2358
2359 _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2360 _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2361 }
2362
2363 void G1CMTask::move_entries_to_global_stack() {
2364 // Local array where we'll store the entries that will be popped
2365 // from the local queue.
2366 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2367
2368 size_t n = 0;
2369 G1TaskQueueEntry task_entry;
2370 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2371 buffer[n] = task_entry;
2372 ++n;
2373 }
2374 if (n < G1CMMarkStack::EntriesPerChunk) {
2375 buffer[n] = G1TaskQueueEntry();
2376 }
2377
2378 if (n > 0) {
2379 if (!_cm->mark_stack_push(buffer)) {
2380 set_has_aborted();
2381 }
2382 }
2383
2384 // This operation was quite expensive, so decrease the limits.
2385 decrease_limits();
2386 }
2387
2388 bool G1CMTask::get_entries_from_global_stack() {
2389 // Local array where we'll store the entries that will be popped
2390 // from the global stack.
2391 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2392
2393 if (!_cm->mark_stack_pop(buffer)) {
2394 return false;
2395 }
2396
2397 // We did actually pop at least one entry.
2398 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2399 G1TaskQueueEntry task_entry = buffer[i];
2400 if (task_entry.is_null()) {
2401 break;
2402 }
2403 assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2404 bool success = _task_queue->push(task_entry);
2405 // We only call this when the local queue is empty or under a
2406 // given target limit. So, we do not expect this push to fail.
2407 assert(success, "invariant");
2408 }
2409
2410 // This operation was quite expensive, so decrease the limits
2411 decrease_limits();
2412 return true;
2413 }
2414
2415 void G1CMTask::drain_local_queue(bool partially) {
2416 if (has_aborted()) {
2417 return;
2418 }
2419
2420 // Decide what the target size is, depending whether we're going to
2421 // drain it partially (so that other tasks can steal if they run out
2422 // of things to do) or totally (at the very end).
2423 uint target_size;
2424 if (partially) {
2425 target_size = GCDrainStackTargetSize;
2426 } else {
2427 target_size = 0;
2428 }
2429
2430 if (_task_queue->size() > target_size) {
2431 G1TaskQueueEntry entry;
2432 bool ret = _task_queue->pop_local(entry);
2433 while (ret) {
2434 process_entry(entry, false /* stolen */);
2435 if (_task_queue->size() <= target_size || has_aborted()) {
2436 ret = false;
2437 } else {
2438 ret = _task_queue->pop_local(entry);
2439 }
2440 }
2441 }
2442 }
2443
2444 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2445 assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2446
2447 // Mark objArray klass metadata
2448 process_klass(obj->klass());
2449
2450 size_t array_length = obj->length();
2451 size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2452
2453 process_array_chunk(obj, 0, initial_chunk_size);
2454
2455 // Include object header size
2456 return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2457 }
2458
2459 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2460 PartialArrayState* state = task.to_partial_array_state();
2461 // Access state before release by claim().
2462 objArrayOop obj = objArrayOop(state->source());
2463
2464 PartialArraySplitter::Claim claim =
2465 _partial_array_splitter.claim(state, _task_queue, stolen);
2466
2467 process_array_chunk(obj, claim._start, claim._end);
2468 return heap_word_size((claim._end - claim._start) * heapOopSize);
2469 }
2470
2471 void G1CMTask::drain_global_stack(bool partially) {
2472 if (has_aborted()) {
2473 return;
2474 }
2475
2476 // We have a policy to drain the local queue before we attempt to
2477 // drain the global stack.
2478 assert(partially || _task_queue->size() == 0, "invariant");
2479
2480 // Decide what the target size is, depending whether we're going to
2481 // drain it partially (so that other tasks can steal if they run out
2482 // of things to do) or totally (at the very end).
2483 // Notice that when draining the global mark stack partially, due to the racyness
2484 // of the mark stack size update we might in fact drop below the target. But,
2485 // this is not a problem.
2486 // In case of total draining, we simply process until the global mark stack is
2487 // totally empty, disregarding the size counter.
2488 if (partially) {
2489 size_t const target_size = _cm->partial_mark_stack_size_target();
2490 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2491 if (get_entries_from_global_stack()) {
2492 drain_local_queue(partially);
2493 }
2494 }
2495 } else {
2496 while (!has_aborted() && get_entries_from_global_stack()) {
2497 drain_local_queue(partially);
2498 }
2499 }
2500 }
2501
2502 // SATB Queue has several assumptions on whether to call the par or
2503 // non-par versions of the methods. this is why some of the code is
2504 // replicated. We should really get rid of the single-threaded version
2505 // of the code to simplify things.
2506 void G1CMTask::drain_satb_buffers() {
2507 if (has_aborted()) {
2508 return;
2509 }
2510
2511 // We set this so that the regular clock knows that we're in the
2512 // middle of draining buffers and doesn't set the abort flag when it
2513 // notices that SATB buffers are available for draining. It'd be
2514 // very counter productive if it did that. :-)
2515 _draining_satb_buffers = true;
2516
2517 G1CMSATBBufferClosure satb_cl(this, _g1h);
2518 SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2519
2520 // This keeps claiming and applying the closure to completed buffers
2521 // until we run out of buffers or we need to abort.
2522 while (!has_aborted() &&
2523 satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2524 abort_marking_if_regular_check_fail();
2525 }
2526
2527 // Can't assert qset is empty here, even if not aborted. If concurrent,
2528 // some other thread might be adding to the queue. If not concurrent,
2529 // some other thread might have won the race for the last buffer, but
2530 // has not yet decremented the count.
2531
2532 _draining_satb_buffers = false;
2533
2534 // again, this was a potentially expensive operation, decrease the
2535 // limits to get the regular clock call early
2536 decrease_limits();
2537 }
2538
2539 #ifndef PRODUCT
2540 void G1CMTask::verify_no_mark_stats_for(uint region_idx) {
2541 _mark_stats_cache.verify_no_mark_stats_for(region_idx);
2542 }
2543 #endif
2544
2545 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2546 _mark_stats_cache.reset(region_idx);
2547 }
2548
2549 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2550 return _mark_stats_cache.evict_all();
2551 }
2552
2553 void G1CMTask::print_stats() {
2554 log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2555 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2556 _elapsed_time_ms, _termination_time_ms);
2557 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2558 _step_times_ms.num(),
2559 _step_times_ms.avg(),
2560 _step_times_ms.sd(),
2561 _step_times_ms.maximum(),
2562 _step_times_ms.sum());
2563 size_t const hits = _mark_stats_cache.hits();
2564 size_t const misses = _mark_stats_cache.misses();
2565 log_debug(gc, stats)(" Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2566 hits, misses, percent_of(hits, hits + misses));
2567 }
2568
2569 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2570 return _task_queues->steal(worker_id, task_entry);
2571 }
2572
2573 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2574 if (has_aborted() || _curr_region == nullptr) {
2575 return;
2576 }
2577
2578 // This means that we're already holding on to a region.
2579 assert(_finger != nullptr, "if region is not null, then the finger "
2580 "should not be null either");
2581
2582 // We might have restarted this task after an evacuation pause
2583 // which might have evacuated the region we're holding on to
2584 // underneath our feet. Let's read its limit again to make sure
2585 // that we do not iterate over a region of the heap that
2586 // contains garbage (update_region_limit() will also move
2587 // _finger to the start of the region if it is found empty).
2588 update_region_limit();
2589 // We will start from _finger not from the start of the region,
2590 // as we might be restarting this task after aborting half-way
2591 // through scanning this region. In this case, _finger points to
2592 // the address where we last found a marked object. If this is a
2593 // fresh region, _finger points to start().
2594 MemRegion mr = MemRegion(_finger, _region_limit);
2595
2596 assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2597 "humongous regions should go around loop once only");
2598
2599 // Some special cases:
2600 // If the memory region is empty, we can just give up the region.
2601 // If the current region is humongous then we only need to check
2602 // the bitmap for the bit associated with the start of the object,
2603 // scan the object if it's live, and give up the region.
2604 // Otherwise, let's iterate over the bitmap of the part of the region
2605 // that is left.
2606 // If the iteration is successful, give up the region.
2607 if (mr.is_empty()) {
2608 giveup_current_region();
2609 abort_marking_if_regular_check_fail();
2610 } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2611 if (_mark_bitmap->is_marked(mr.start())) {
2612 // The object is marked - apply the closure
2613 bitmap_closure.do_addr(mr.start());
2614 }
2615 // Even if this task aborted while scanning the humongous object
2616 // we can (and should) give up the current region.
2617 giveup_current_region();
2618 abort_marking_if_regular_check_fail();
2619 } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2620 giveup_current_region();
2621 abort_marking_if_regular_check_fail();
2622 } else {
2623 assert(has_aborted(), "currently the only way to do so");
2624 // The only way to abort the bitmap iteration is to return
2625 // false from the do_bit() method. However, inside the
2626 // do_bit() method we move the _finger to point to the
2627 // object currently being looked at. So, if we bail out, we
2628 // have definitely set _finger to something non-null.
2629 assert(_finger != nullptr, "invariant");
2630
2631 // Region iteration was actually aborted. So now _finger
2632 // points to the address of the object we last scanned. If we
2633 // leave it there, when we restart this task, we will rescan
2634 // the object. It is easy to avoid this. We move the finger by
2635 // enough to point to the next possible object header.
2636 assert(_finger < _region_limit, "invariant");
2637 HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2638 if (new_finger >= _region_limit) {
2639 giveup_current_region();
2640 } else {
2641 move_finger_to(new_finger);
2642 }
2643 }
2644 }
2645
2646 void G1CMTask::claim_new_region() {
2647 // Read the note on the claim_region() method on why it might
2648 // return null with potentially more regions available for
2649 // claiming and why we have to check out_of_regions() to determine
2650 // whether we're done or not.
2651 while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2652 // We are going to try to claim a new region. We should have
2653 // given up on the previous one.
2654 // Separated the asserts so that we know which one fires.
2655 assert(_curr_region == nullptr, "invariant");
2656 assert(_finger == nullptr, "invariant");
2657 assert(_region_limit == nullptr, "invariant");
2658 G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2659 if (claimed_region != nullptr) {
2660 // Yes, we managed to claim one
2661 setup_for_region(claimed_region);
2662 assert(_curr_region == claimed_region, "invariant");
2663 }
2664 // It is important to call the regular clock here. It might take
2665 // a while to claim a region if, for example, we hit a large
2666 // block of empty regions. So we need to call the regular clock
2667 // method once round the loop to make sure it's called
2668 // frequently enough.
2669 abort_marking_if_regular_check_fail();
2670 }
2671 }
2672
2673 void G1CMTask::attempt_stealing() {
2674 // We cannot check whether the global stack is empty, since other
2675 // tasks might be pushing objects to it concurrently.
2676 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2677 "only way to reach here");
2678 while (!has_aborted()) {
2679 G1TaskQueueEntry entry;
2680 if (_cm->try_stealing(_worker_id, entry)) {
2681 process_entry(entry, true /* stolen */);
2682
2683 // And since we're towards the end, let's totally drain the
2684 // local queue and global stack.
2685 drain_local_queue(false);
2686 drain_global_stack(false);
2687 } else {
2688 break;
2689 }
2690 }
2691 }
2692
2693 void G1CMTask::attempt_termination(bool is_serial) {
2694 // We cannot check whether the global stack is empty, since other
2695 // tasks might be concurrently pushing objects on it.
2696 // Separated the asserts so that we know which one fires.
2697 assert(_cm->out_of_regions(), "only way to reach here");
2698 assert(_task_queue->size() == 0, "only way to reach here");
2699 double termination_start_time_ms = os::elapsedTime() * 1000.0;
2700
2701 // The G1CMTask class also extends the TerminatorTerminator class,
2702 // hence its should_exit_termination() method will also decide
2703 // whether to exit the termination protocol or not.
2704 bool finished = (is_serial ||
2705 _cm->terminator()->offer_termination(this));
2706 _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2707
2708 if (finished) {
2709 // We're all done.
2710
2711 // We can now guarantee that the global stack is empty, since
2712 // all other tasks have finished. We separated the guarantees so
2713 // that, if a condition is false, we can immediately find out
2714 // which one.
2715 guarantee(_cm->out_of_regions(), "only way to reach here");
2716 guarantee(_cm->mark_stack_empty(), "only way to reach here");
2717 guarantee(_task_queue->size() == 0, "only way to reach here");
2718 guarantee(!_cm->has_overflown(), "only way to reach here");
2719 guarantee(!has_aborted(), "should never happen if termination has completed");
2720 } else {
2721 // Apparently there's more work to do. Let's abort this task. We
2722 // will restart it and hopefully we can find more things to do.
2723 set_has_aborted();
2724 }
2725 }
2726
2727 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2728 if (_has_timed_out) {
2729 double diff_ms = elapsed_time_ms - _time_target_ms;
2730 // Keep statistics of how well we did with respect to hitting
2731 // our target only if we actually timed out (if we aborted for
2732 // other reasons, then the results might get skewed).
2733 _marking_step_diff_ms.add(diff_ms);
2734 }
2735
2736 if (!_cm->has_overflown()) {
2737 return;
2738 }
2739
2740 // This is the interesting one. We aborted because a global
2741 // overflow was raised. This means we have to restart the
2742 // marking phase and start iterating over regions. However, in
2743 // order to do this we have to make sure that all tasks stop
2744 // what they are doing and re-initialize in a safe manner. We
2745 // will achieve this with the use of two barrier sync points.
2746 if (!is_serial) {
2747 // We only need to enter the sync barrier if being called
2748 // from a parallel context
2749 _cm->enter_first_sync_barrier(_worker_id);
2750
2751 // When we exit this sync barrier we know that all tasks have
2752 // stopped doing marking work. So, it's now safe to
2753 // re-initialize our data structures.
2754 }
2755
2756 clear_region_fields();
2757 flush_mark_stats_cache();
2758
2759 if (!is_serial) {
2760 // If we're executing the concurrent phase of marking, reset the marking
2761 // state; otherwise the marking state is reset after reference processing,
2762 // during the remark pause.
2763 // If we reset here as a result of an overflow during the remark we will
2764 // see assertion failures from any subsequent set_concurrency_and_phase()
2765 // calls.
2766 if (_cm->concurrent() && _worker_id == 0) {
2767 // Worker 0 is responsible for clearing the global data structures because
2768 // of an overflow. During STW we should not clear the overflow flag (in
2769 // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2770 // method to abort the pause and restart concurrent marking.
2771 _cm->reset_marking_for_restart();
2772
2773 log_info(gc, marking)("Concurrent Mark reset for overflow");
2774 }
2775
2776 // ...and enter the second barrier.
2777 _cm->enter_second_sync_barrier(_worker_id);
2778 }
2779 }
2780
2781 /*****************************************************************************
2782
2783 The do_marking_step(time_target_ms, ...) method is the building
2784 block of the parallel marking framework. It can be called in parallel
2785 with other invocations of do_marking_step() on different tasks
2786 (but only one per task, obviously) and concurrently with the
2787 mutator threads, or during remark, hence it eliminates the need
2788 for two versions of the code. When called during remark, it will
2789 pick up from where the task left off during the concurrent marking
2790 phase. Interestingly, tasks are also claimable during evacuation
2791 pauses too, since do_marking_step() ensures that it aborts before
2792 it needs to yield.
2793
2794 The data structures that it uses to do marking work are the
2795 following:
2796
2797 (1) Marking Bitmap. If there are grey objects that appear only
2798 on the bitmap (this happens either when dealing with an overflow
2799 or when the concurrent start pause has simply marked the roots
2800 and didn't push them on the stack), then tasks claim heap
2801 regions whose bitmap they then scan to find grey objects. A
2802 global finger indicates where the end of the last claimed region
2803 is. A local finger indicates how far into the region a task has
2804 scanned. The two fingers are used to determine how to grey an
2805 object (i.e. whether simply marking it is OK, as it will be
2806 visited by a task in the future, or whether it needs to be also
2807 pushed on a stack).
2808
2809 (2) Local Queue. The local queue of the task which is accessed
2810 reasonably efficiently by the task. Other tasks can steal from
2811 it when they run out of work. Throughout the marking phase, a
2812 task attempts to keep its local queue short but not totally
2813 empty, so that entries are available for stealing by other
2814 tasks. Only when there is no more work, a task will totally
2815 drain its local queue.
2816
2817 (3) Global Mark Stack. This handles local queue overflow. During
2818 marking only sets of entries are moved between it and the local
2819 queues, as access to it requires a mutex and more fine-grain
2820 interaction with it which might cause contention. If it
2821 overflows, then the marking phase should restart and iterate
2822 over the bitmap to identify grey objects. Throughout the marking
2823 phase, tasks attempt to keep the global mark stack at a small
2824 length but not totally empty, so that entries are available for
2825 popping by other tasks. Only when there is no more work, tasks
2826 will totally drain the global mark stack.
2827
2828 (4) SATB Buffer Queue. This is where completed SATB buffers are
2829 made available. Buffers are regularly removed from this queue
2830 and scanned for roots, so that the queue doesn't get too
2831 long. During remark, all completed buffers are processed, as
2832 well as the filled in parts of any uncompleted buffers.
2833
2834 The do_marking_step() method tries to abort when the time target
2835 has been reached. There are a few other cases when the
2836 do_marking_step() method also aborts:
2837
2838 (1) When the marking phase has been aborted (after a Full GC).
2839
2840 (2) When a global overflow (on the global stack) has been
2841 triggered. Before the task aborts, it will actually sync up with
2842 the other tasks to ensure that all the marking data structures
2843 (local queues, stacks, fingers etc.) are re-initialized so that
2844 when do_marking_step() completes, the marking phase can
2845 immediately restart.
2846
2847 (3) When enough completed SATB buffers are available. The
2848 do_marking_step() method only tries to drain SATB buffers right
2849 at the beginning. So, if enough buffers are available, the
2850 marking step aborts and the SATB buffers are processed at
2851 the beginning of the next invocation.
2852
2853 (4) To yield. when we have to yield then we abort and yield
2854 right at the end of do_marking_step(). This saves us from a lot
2855 of hassle as, by yielding we might allow a Full GC. If this
2856 happens then objects will be compacted underneath our feet, the
2857 heap might shrink, etc. We save checking for this by just
2858 aborting and doing the yield right at the end.
2859
2860 From the above it follows that the do_marking_step() method should
2861 be called in a loop (or, otherwise, regularly) until it completes.
2862
2863 If a marking step completes without its has_aborted() flag being
2864 true, it means it has completed the current marking phase (and
2865 also all other marking tasks have done so and have all synced up).
2866
2867 A method called regular_clock_call() is invoked "regularly" (in
2868 sub ms intervals) throughout marking. It is this clock method that
2869 checks all the abort conditions which were mentioned above and
2870 decides when the task should abort. A work-based scheme is used to
2871 trigger this clock method: when the number of object words the
2872 marking phase has scanned or the number of references the marking
2873 phase has visited reach a given limit. Additional invocations to
2874 the method clock have been planted in a few other strategic places
2875 too. The initial reason for the clock method was to avoid calling
2876 cpu time gathering too regularly, as it is quite expensive. So,
2877 once it was in place, it was natural to piggy-back all the other
2878 conditions on it too and not constantly check them throughout the code.
2879
2880 If do_termination is true then do_marking_step will enter its
2881 termination protocol.
2882
2883 The value of is_serial must be true when do_marking_step is being
2884 called serially (i.e. by the VMThread) and do_marking_step should
2885 skip any synchronization in the termination and overflow code.
2886 Examples include the serial remark code and the serial reference
2887 processing closures.
2888
2889 The value of is_serial must be false when do_marking_step is
2890 being called by any of the worker threads.
2891 Examples include the concurrent marking code (CMMarkingTask),
2892 the MT remark code, and the MT reference processing closures.
2893
2894 *****************************************************************************/
2895
2896 void G1CMTask::do_marking_step(double time_target_ms,
2897 bool do_termination,
2898 bool is_serial) {
2899 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2900
2901 _start_cpu_time_ns = os::current_thread_cpu_time();
2902
2903 // If do_stealing is true then do_marking_step will attempt to
2904 // steal work from the other G1CMTasks. It only makes sense to
2905 // enable stealing when the termination protocol is enabled
2906 // and do_marking_step() is not being called serially.
2907 bool do_stealing = do_termination && !is_serial;
2908
2909 G1Predictions const& predictor = _g1h->policy()->predictor();
2910 double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2911 _time_target_ms = time_target_ms - diff_prediction_ms;
2912
2913 // set up the variables that are used in the work-based scheme to
2914 // call the regular clock method
2915 _words_scanned = 0;
2916 _refs_reached = 0;
2917 recalculate_limits();
2918
2919 // clear all flags
2920 clear_has_aborted();
2921 _has_timed_out = false;
2922 _draining_satb_buffers = false;
2923
2924 ++_calls;
2925
2926 // Set up the bitmap and oop closures. Anything that uses them is
2927 // eventually called from this method, so it is OK to allocate these
2928 // statically.
2929 G1CMBitMapClosure bitmap_closure(this, _cm);
2930 G1CMOopClosure cm_oop_closure(_g1h, this);
2931 set_cm_oop_closure(&cm_oop_closure);
2932
2933 if (_cm->has_overflown()) {
2934 // This can happen if the mark stack overflows during a GC pause
2935 // and this task, after a yield point, restarts. We have to abort
2936 // as we need to get into the overflow protocol which happens
2937 // right at the end of this task.
2938 set_has_aborted();
2939 }
2940
2941 // First drain any available SATB buffers. After this, we will not
2942 // look at SATB buffers before the next invocation of this method.
2943 // If enough completed SATB buffers are queued up, the regular clock
2944 // will abort this task so that it restarts.
2945 drain_satb_buffers();
2946 // ...then partially drain the local queue and the global stack
2947 drain_local_queue(true);
2948 drain_global_stack(true);
2949
2950 do {
2951 process_current_region(bitmap_closure);
2952 // At this point we have either completed iterating over the
2953 // region we were holding on to, or we have aborted.
2954
2955 // We then partially drain the local queue and the global stack.
2956 drain_local_queue(true);
2957 drain_global_stack(true);
2958
2959 claim_new_region();
2960
2961 assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2962 "at this point we should be out of regions");
2963 } while ( _curr_region != nullptr && !has_aborted());
2964
2965 // We cannot check whether the global stack is empty, since other
2966 // tasks might be pushing objects to it concurrently.
2967 assert(has_aborted() || _cm->out_of_regions(),
2968 "at this point we should be out of regions");
2969 // Try to reduce the number of available SATB buffers so that
2970 // remark has less work to do.
2971 drain_satb_buffers();
2972
2973 // Since we've done everything else, we can now totally drain the
2974 // local queue and global stack.
2975 drain_local_queue(false);
2976 drain_global_stack(false);
2977
2978 // Attempt at work stealing from other task's queues.
2979 if (do_stealing && !has_aborted()) {
2980 // We have not aborted. This means that we have finished all that
2981 // we could. Let's try to do some stealing...
2982 attempt_stealing();
2983 }
2984
2985 // We still haven't aborted. Now, let's try to get into the
2986 // termination protocol.
2987 if (do_termination && !has_aborted()) {
2988 attempt_termination(is_serial);
2989 }
2990
2991 // Mainly for debugging purposes to make sure that a pointer to the
2992 // closure which was statically allocated in this frame doesn't
2993 // escape it by accident.
2994 set_cm_oop_closure(nullptr);
2995 jlong end_cpu_time_ns = os::current_thread_cpu_time();
2996 double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2997 // Update the step history.
2998 _step_times_ms.add(elapsed_time_ms);
2999
3000 if (has_aborted()) {
3001 // The task was aborted for some reason.
3002 handle_abort(is_serial, elapsed_time_ms);
3003 }
3004 }
3005
3006 G1CMTask::G1CMTask(uint worker_id,
3007 G1ConcurrentMark* cm,
3008 G1CMTaskQueue* task_queue,
3009 G1RegionMarkStats* mark_stats) :
3010 _worker_id(worker_id),
3011 _g1h(G1CollectedHeap::heap()),
3012 _cm(cm),
3013 _mark_bitmap(nullptr),
3014 _task_queue(task_queue),
3015 _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
3016 _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
3017 _calls(0),
3018 _time_target_ms(0.0),
3019 _start_cpu_time_ns(0),
3020 _cm_oop_closure(nullptr),
3021 _curr_region(nullptr),
3022 _finger(nullptr),
3023 _region_limit(nullptr),
3024 _words_scanned(0),
3025 _words_scanned_limit(0),
3026 _real_words_scanned_limit(0),
3027 _refs_reached(0),
3028 _refs_reached_limit(0),
3029 _real_refs_reached_limit(0),
3030 _has_aborted(false),
3031 _has_timed_out(false),
3032 _draining_satb_buffers(false),
3033 _step_times_ms(),
3034 _elapsed_time_ms(0.0),
3035 _termination_time_ms(0.0),
3036 _marking_step_diff_ms()
3037 {
3038 guarantee(task_queue != nullptr, "invariant");
3039
3040 _marking_step_diff_ms.add(0.5);
3041 }
3042
3043 // These are formatting macros that are used below to ensure
3044 // consistent formatting. The *_H_* versions are used to format the
3045 // header for a particular value and they should be kept consistent
3046 // with the corresponding macro. Also note that most of the macros add
3047 // the necessary white space (as a prefix) which makes them a bit
3048 // easier to compose.
3049
3050 // All the output lines are prefixed with this string to be able to
3051 // identify them easily in a large log file.
3052 #define G1PPRL_LINE_PREFIX "###"
3053
3054 #define G1PPRL_ADDR_BASE_FORMAT " " PTR_FORMAT "-" PTR_FORMAT
3055 #ifdef _LP64
3056 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
3057 #else // _LP64
3058 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
3059 #endif // _LP64
3060
3061 // For per-region info
3062 #define G1PPRL_TYPE_FORMAT " %-4s"
3063 #define G1PPRL_TYPE_H_FORMAT " %4s"
3064 #define G1PPRL_STATE_FORMAT " %-5s"
3065 #define G1PPRL_STATE_H_FORMAT " %5s"
3066 #define G1PPRL_BYTE_FORMAT " %9zu"
3067 #define G1PPRL_BYTE_H_FORMAT " %9s"
3068 #define G1PPRL_DOUBLE_FORMAT "%14.1f"
3069 #define G1PPRL_GCEFF_H_FORMAT " %14s"
3070 #define G1PPRL_GID_H_FORMAT " %9s"
3071 #define G1PPRL_GID_FORMAT " " UINT32_FORMAT_W(9)
3072 #define G1PPRL_LEN_FORMAT " " UINT32_FORMAT_W(14)
3073 #define G1PPRL_LEN_H_FORMAT " %14s"
3074 #define G1PPRL_GID_GCEFF_FORMAT " %14.1f"
3075 #define G1PPRL_GID_LIVENESS_FORMAT " %9.2f"
3076
3077 // For summary info
3078 #define G1PPRL_SUM_ADDR_FORMAT(tag) " " tag ":" G1PPRL_ADDR_BASE_FORMAT
3079 #define G1PPRL_SUM_BYTE_FORMAT(tag) " " tag ": %zu"
3080 #define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
3081 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3082
3083 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
3084 _total_used_bytes(0),
3085 _total_capacity_bytes(0),
3086 _total_live_bytes(0),
3087 _total_remset_bytes(0),
3088 _total_code_roots_bytes(0)
3089 {
3090 if (!log_is_enabled(Trace, gc, liveness)) {
3091 return;
3092 }
3093
3094 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3095 MemRegion reserved = g1h->reserved();
3096 double now = os::elapsedTime();
3097
3098 // Print the header of the output.
3099 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3100 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3101 G1PPRL_SUM_ADDR_FORMAT("reserved")
3102 G1PPRL_SUM_BYTE_FORMAT("region-size"),
3103 p2i(reserved.start()), p2i(reserved.end()),
3104 G1HeapRegion::GrainBytes);
3105 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3106 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3107 G1PPRL_TYPE_H_FORMAT
3108 G1PPRL_ADDR_BASE_H_FORMAT
3109 G1PPRL_BYTE_H_FORMAT
3110 G1PPRL_BYTE_H_FORMAT
3111 G1PPRL_STATE_H_FORMAT
3112 G1PPRL_BYTE_H_FORMAT
3113 G1PPRL_GID_H_FORMAT,
3114 "type", "address-range",
3115 "used", "live",
3116 "state", "code-roots",
3117 "group-id");
3118 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3119 G1PPRL_TYPE_H_FORMAT
3120 G1PPRL_ADDR_BASE_H_FORMAT
3121 G1PPRL_BYTE_H_FORMAT
3122 G1PPRL_BYTE_H_FORMAT
3123 G1PPRL_STATE_H_FORMAT
3124 G1PPRL_BYTE_H_FORMAT
3125 G1PPRL_GID_H_FORMAT,
3126 "", "",
3127 "(bytes)", "(bytes)",
3128 "", "(bytes)", "");
3129 }
3130
3131 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3132 if (!log_is_enabled(Trace, gc, liveness)) {
3133 return false;
3134 }
3135
3136 const char* type = r->get_type_str();
3137 HeapWord* bottom = r->bottom();
3138 HeapWord* end = r->end();
3139 size_t capacity_bytes = r->capacity();
3140 size_t used_bytes = r->used();
3141 size_t live_bytes = r->live_bytes();
3142 size_t remset_bytes = r->rem_set()->mem_size();
3143 size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3144 const char* remset_type = r->rem_set()->get_short_state_str();
3145 uint cset_group_id = r->rem_set()->has_cset_group()
3146 ? r->rem_set()->cset_group_id()
3147 : G1CSetCandidateGroup::NoRemSetId;
3148
3149 _total_used_bytes += used_bytes;
3150 _total_capacity_bytes += capacity_bytes;
3151 _total_live_bytes += live_bytes;
3152 _total_remset_bytes += remset_bytes;
3153 _total_code_roots_bytes += code_roots_bytes;
3154
3155 // Print a line for this particular region.
3156 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3157 G1PPRL_TYPE_FORMAT
3158 G1PPRL_ADDR_BASE_FORMAT
3159 G1PPRL_BYTE_FORMAT
3160 G1PPRL_BYTE_FORMAT
3161 G1PPRL_STATE_FORMAT
3162 G1PPRL_BYTE_FORMAT
3163 G1PPRL_GID_FORMAT,
3164 type, p2i(bottom), p2i(end),
3165 used_bytes, live_bytes,
3166 remset_type, code_roots_bytes,
3167 cset_group_id);
3168
3169 return false;
3170 }
3171
3172 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3173 if (!log_is_enabled(Trace, gc, liveness)) {
3174 return;
3175 }
3176
3177 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3178 _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3179 // add static memory usages to remembered set sizes
3180 _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3181
3182 log_cset_candidate_groups();
3183
3184 // Print the footer of the output.
3185 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3186 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3187 " SUMMARY"
3188 G1PPRL_SUM_MB_FORMAT("capacity")
3189 G1PPRL_SUM_MB_PERC_FORMAT("used")
3190 G1PPRL_SUM_MB_PERC_FORMAT("live")
3191 G1PPRL_SUM_MB_FORMAT("remset")
3192 G1PPRL_SUM_MB_FORMAT("code-roots"),
3193 bytes_to_mb(_total_capacity_bytes),
3194 bytes_to_mb(_total_used_bytes),
3195 percent_of(_total_used_bytes, _total_capacity_bytes),
3196 bytes_to_mb(_total_live_bytes),
3197 percent_of(_total_live_bytes, _total_capacity_bytes),
3198 bytes_to_mb(_total_remset_bytes),
3199 bytes_to_mb(_total_code_roots_bytes));
3200 }
3201
3202 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3203 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3204 G1PPRL_GID_FORMAT
3205 G1PPRL_LEN_FORMAT
3206 G1PPRL_GID_GCEFF_FORMAT
3207 G1PPRL_GID_LIVENESS_FORMAT
3208 G1PPRL_BYTE_FORMAT
3209 G1PPRL_TYPE_H_FORMAT,
3210 group->group_id(),
3211 group->length(),
3212 group->length() > 0 ? group->gc_efficiency() : 0.0,
3213 group->length() > 0 ? group->liveness_percent() : 0.0,
3214 group->card_set()->mem_size(),
3215 type);
3216 _total_remset_bytes += group->card_set()->mem_size();
3217 }
3218
3219 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3220 for (G1CSetCandidateGroup* group : gl) {
3221 log_cset_candidate_group_add_total(group, type);
3222 }
3223 }
3224
3225 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3226 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3227 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3228 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3229 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3230 G1PPRL_GID_H_FORMAT
3231 G1PPRL_LEN_H_FORMAT
3232 G1PPRL_GCEFF_H_FORMAT
3233 G1PPRL_BYTE_H_FORMAT
3234 G1PPRL_BYTE_H_FORMAT
3235 G1PPRL_TYPE_H_FORMAT,
3236 "groud-id", "num-regions",
3237 "gc-eff", "liveness",
3238 "remset", "type");
3239
3240 log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3241 G1PPRL_GID_H_FORMAT
3242 G1PPRL_LEN_H_FORMAT
3243 G1PPRL_GCEFF_H_FORMAT
3244 G1PPRL_BYTE_H_FORMAT
3245 G1PPRL_BYTE_H_FORMAT
3246 G1PPRL_TYPE_H_FORMAT,
3247 "", "",
3248 "(bytes/ms)", "%",
3249 "(bytes)", "");
3250
3251 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3252
3253 log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3254
3255 G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3256 log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3257 log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3258 }