1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectorState.inline.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "runtime/globals_extension.hpp"
  74 #include "runtime/handles.inline.hpp"
  75 #include "runtime/java.hpp"
  76 #include "runtime/orderAccess.hpp"
  77 #include "runtime/os.hpp"
  78 #include "runtime/prefetch.inline.hpp"
  79 #include "runtime/threads.hpp"
  80 #include "utilities/align.hpp"
  81 #include "utilities/checkedCast.hpp"
  82 #include "utilities/formatBuffer.hpp"
  83 #include "utilities/growableArray.hpp"
  84 #include "utilities/powerOfTwo.hpp"
  85 
  86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  87 
  88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  89   assert(cm != nullptr, "must be");
  90 }
  91 
  92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  93   assert(cm != nullptr, "must be");
  94   assert(_cm == nullptr, "double initialize");
  95   _cm = cm;
  96 }
  97 
  98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  99   assert(addr < _cm->finger(), "invariant");
 100   assert(addr >= _task->finger(), "invariant");
 101 
 102   // We move that task's local finger along.
 103   _task->move_finger_to(addr);
 104 
 105   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 106   // we only partially drain the local queue and global stack
 107   _task->drain_local_queue(true);
 108   _task->drain_global_stack(true);
 109 
 110   // if the has_aborted flag has been raised, we need to bail out of
 111   // the iteration
 112   return !_task->has_aborted();
 113 }
 114 
 115 G1CMMarkStack::G1CMMarkStack() :
 116   _chunk_allocator() {
 117   set_empty();
 118 }
 119 
 120 size_t G1CMMarkStack::capacity_alignment() {
 121   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 122 }
 123 
 124 bool G1CMMarkStack::initialize() {
 125   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 126 
 127   size_t initial_capacity = MarkStackSize;
 128   size_t max_capacity = MarkStackSizeMax;
 129 
 130   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 131 
 132   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 133   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134 
 135   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 136   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 137 
 138   size_t limit = (INT_MAX - 1);
 139   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 140   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141 
 142   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 143   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 144 
 145   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 146 
 147   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 148                 initial_num_chunks, max_capacity);
 149 
 150   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 151 }
 152 
 153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 154   if (_size.load_relaxed() >= _max_capacity) {
 155     return nullptr;
 156   }
 157 
 158   size_t cur_idx = _size.fetch_then_add(1u);
 159 
 160   if (cur_idx >= _max_capacity) {
 161     return nullptr;
 162   }
 163 
 164   size_t bucket = get_bucket(cur_idx);
 165   if (_buckets[bucket].load_acquire() == nullptr) {
 166     if (!_should_grow) {
 167       // Prefer to restart the CM.
 168       return nullptr;
 169     }
 170 
 171     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 172     if (_buckets[bucket].load_acquire() == nullptr) {
 173       size_t desired_capacity = bucket_size(bucket) * 2;
 174       if (!try_expand_to(desired_capacity)) {
 175         return nullptr;
 176       }
 177     }
 178   }
 179 
 180   size_t bucket_idx = get_bucket_index(cur_idx);
 181   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 182   result->next = nullptr;
 183   return result;
 184 }
 185 
 186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 187   _min_capacity(0),
 188   _max_capacity(0),
 189   _capacity(0),
 190   _num_buckets(0),
 191   _should_grow(false),
 192   _buckets(nullptr),
 193   _size(0)
 194 { }
 195 
 196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 197   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 198 
 199   _min_capacity = initial_capacity;
 200   _max_capacity = max_capacity;
 201   _num_buckets  = get_bucket(_max_capacity) + 1;
 202 
 203   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 204 
 205   for (size_t i = 0; i < _num_buckets; i++) {
 206     _buckets[i].store_relaxed(nullptr);
 207   }
 208 
 209   size_t new_capacity = bucket_size(0);
 210 
 211   if (!reserve(new_capacity)) {
 212     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 213     return false;
 214   }
 215   return true;
 216 }
 217 
 218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 219   if (_capacity == _max_capacity) {
 220     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 221     return false;
 222   }
 223 
 224   size_t old_capacity = _capacity;
 225   desired_capacity = MIN2(desired_capacity, _max_capacity);
 226 
 227   if (reserve(desired_capacity)) {
 228     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 229                   old_capacity, desired_capacity);
 230     return true;
 231   }
 232   return false;
 233 }
 234 
 235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 236   size_t new_capacity = _capacity * 2;
 237   return try_expand_to(new_capacity);
 238 }
 239 
 240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 241   if (_buckets == nullptr) {
 242     return;
 243   }
 244 
 245   for (size_t i = 0; i < _num_buckets; i++) {
 246     if (_buckets[i].load_relaxed() != nullptr) {
 247       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 248       _buckets[i].store_relaxed(nullptr);
 249     }
 250   }
 251 
 252   FREE_C_HEAP_ARRAY(_buckets);
 253 }
 254 
 255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 256   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 257 
 258   size_t highest_bucket = get_bucket(new_capacity - 1);
 259   size_t i = get_bucket(_capacity);
 260 
 261   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 262   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 263   // array and that the capacity accurately reflects the reserved memory.
 264   for (; i <= highest_bucket; i++) {
 265     if (_buckets[i].load_acquire() != nullptr) {
 266       continue; // Skip over already allocated buckets.
 267     }
 268 
 269     size_t bucket_capacity = bucket_size(i);
 270 
 271     // Trim bucket size so that we do not exceed the _max_capacity.
 272     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 273                       bucket_capacity :
 274                       _max_capacity - _capacity;
 275 
 276 
 277     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 278 
 279     if (bucket_base == nullptr) {
 280       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 281                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 282       return false;
 283     }
 284     _capacity += bucket_capacity;
 285     _buckets[i].release_store(bucket_base);
 286   }
 287   return true;
 288 }
 289 
 290 void G1CMMarkStack::expand() {
 291   _chunk_allocator.try_expand();
 292 }
 293 
 294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 295   elem->next = list->load_relaxed();
 296   list->store_relaxed(elem);
 297 }
 298 
 299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 300   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 301   add_chunk_to_list(&_chunk_list, elem);
 302   _chunks_in_chunk_list++;
 303 }
 304 
 305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 306   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 307   add_chunk_to_list(&_free_list, elem);
 308 }
 309 
 310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 311   TaskQueueEntryChunk* result = list->load_relaxed();
 312   if (result != nullptr) {
 313     list->store_relaxed(list->load_relaxed()->next);
 314   }
 315   return result;
 316 }
 317 
 318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 319   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 320   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 321   if (result != nullptr) {
 322     _chunks_in_chunk_list--;
 323   }
 324   return result;
 325 }
 326 
 327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 328   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 329   return remove_chunk_from_list(&_free_list);
 330 }
 331 
 332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 333   // Get a new chunk.
 334   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 335 
 336   if (new_chunk == nullptr) {
 337     // Did not get a chunk from the free list. Allocate from backing memory.
 338     new_chunk = _chunk_allocator.allocate_new_chunk();
 339 
 340     if (new_chunk == nullptr) {
 341       return false;
 342     }
 343   }
 344 
 345   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 346 
 347   add_chunk_to_chunk_list(new_chunk);
 348 
 349   return true;
 350 }
 351 
 352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 353   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 354 
 355   if (cur == nullptr) {
 356     return false;
 357   }
 358 
 359   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 360 
 361   add_chunk_to_free_list(cur);
 362   return true;
 363 }
 364 
 365 void G1CMMarkStack::set_empty() {
 366   _chunks_in_chunk_list = 0;
 367   _chunk_list.store_relaxed(nullptr);
 368   _free_list.store_relaxed(nullptr);
 369   _chunk_allocator.reset();
 370 }
 371 
 372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 373     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 374     _max_regions(max_regions),
 375     _num_regions(0),
 376     _num_claimed_regions(0) { }
 377 
 378 G1CMRootMemRegions::~G1CMRootMemRegions() {
 379   MemRegion::destroy_array(_root_regions, _max_regions);
 380 }
 381 
 382 void G1CMRootMemRegions::reset() {
 383   assert_at_safepoint();
 384   assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
 385 
 386   _num_regions.store_relaxed(0);
 387   _num_claimed_regions.store_relaxed(0);
 388 }
 389 
 390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 391   assert_at_safepoint();
 392   uint idx = _num_regions.fetch_then_add(1u);
 393   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
 394   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 395          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 396   _root_regions[idx].set_start(start);
 397   _root_regions[idx].set_end(end);
 398 }
 399 
 400 const MemRegion* G1CMRootMemRegions::claim_next() {
 401   uint local_num_regions = num_regions();
 402   if (num_claimed_regions() >= local_num_regions) {
 403     return nullptr;
 404   }
 405 
 406   uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
 407   if (claimed_index < local_num_regions) {
 408     return &_root_regions[claimed_index];
 409   }
 410   return nullptr;
 411 }
 412 
 413 bool G1CMRootMemRegions::work_completed() const {
 414   return num_remaining_regions() == 0;
 415 }
 416 
 417 uint G1CMRootMemRegions::num_remaining_regions() const {
 418   uint total = num_regions();
 419   uint claimed = num_claimed_regions();
 420   return (total > claimed) ? total - claimed : 0;
 421 }
 422 
 423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 424   uint local_num_root_regions = num_regions();
 425   for (uint i = 0; i < local_num_root_regions; i++) {
 426     if (_root_regions[i].equals(mr)) {
 427       return true;
 428     }
 429   }
 430   return false;
 431 }
 432 
 433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 434                                    G1RegionToSpaceMapper* bitmap_storage) :
 435   _cm_thread(nullptr),
 436   _g1h(g1h),
 437 
 438   _mark_bitmap(),
 439 
 440   _heap(_g1h->reserved()),
 441 
 442   _root_regions(_g1h->max_num_regions()),
 443   _root_region_scan_aborted(false),
 444 
 445   _global_mark_stack(),
 446 
 447   _finger(nullptr), // _finger set in set_non_marking_state
 448 
 449   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 450   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 451   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 452   _tasks(nullptr),
 453   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 454   _terminator(_max_num_tasks, _task_queues),
 455   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 456 
 457   _first_overflow_barrier_sync(),
 458   _second_overflow_barrier_sync(),
 459 
 460   _completed_mark_cycles(0),
 461   _has_overflown(false),
 462   _concurrent(false),
 463   _has_aborted(false),
 464   _restart_for_overflow(false),
 465   _gc_timer_cm(new ConcurrentGCTimer()),
 466   _gc_tracer_cm(new G1OldTracer()),
 467 
 468   // _verbose_level set below
 469 
 470   _remark_times(),
 471   _remark_mark_times(),
 472   _remark_weak_ref_times(),
 473   _cleanup_times(),
 474 
 475   _concurrent_workers(nullptr),
 476   _num_concurrent_workers(0),
 477   _max_concurrent_workers(0),
 478 
 479   _is_region_mark_stats_cache_in_use(false),
 480   _region_mark_stats(nullptr),
 481   _top_at_mark_starts(nullptr),
 482   _top_at_rebuild_starts(nullptr),
 483   _needs_remembered_set_rebuild(false)
 484 {
 485   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 486 
 487   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 488 }
 489 
 490 void G1ConcurrentMark::fully_initialize() {
 491   assert_at_safepoint();
 492 
 493   if (is_fully_initialized()) {
 494     return;
 495   }
 496 
 497   // Create & start ConcurrentMark thread.
 498   _cm_thread = new G1ConcurrentMarkThread(this);
 499   if (_cm_thread->osthread() == nullptr) {
 500     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 501   }
 502 
 503   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 504   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 505 
 506   _max_concurrent_workers = ConcGCThreads;
 507 
 508   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 509   _concurrent_workers->initialize_workers();
 510   _num_concurrent_workers = _concurrent_workers->active_workers();
 511 
 512   if (!_global_mark_stack.initialize()) {
 513     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 514   }
 515 
 516   _region_mark_stats = NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC);
 517   _top_at_mark_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
 518   _top_at_rebuild_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
 519 
 520   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 521 
 522   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 523   _num_active_tasks = _max_num_tasks;
 524 
 525   for (uint i = 0; i < _max_num_tasks; ++i) {
 526     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 527     _task_queues->register_queue(i, task_queue);
 528 
 529     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 530   }
 531 
 532   uint max_num_regions = _g1h->max_num_regions();
 533   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 534   for (uint i = 0; i < max_num_regions; i++) {
 535     ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>(_g1h->bottom_addr_for_region(i));
 536   }
 537   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 538 
 539   reset_at_marking_complete();
 540 }
 541 
 542 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
 543   return _cm_thread->is_in_progress();
 544 }
 545 
 546 bool G1ConcurrentMark::is_in_marking() const {
 547   return _cm_thread->is_in_marking();
 548 }
 549 
 550 bool G1ConcurrentMark::is_in_marking_or_rebuild() const {
 551   return _cm_thread->is_in_marking_or_rebuild();
 552 }
 553 
 554 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
 555   return cm_thread()->is_in_reset_for_next_cycle();
 556 }
 557 
 558 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 559   return _partial_array_state_manager;
 560 }
 561 
 562 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 563   assert(is_fully_initialized(), "must be");
 564   return _cm_thread;
 565 }
 566 
 567 void G1ConcurrentMark::reset() {
 568   assert_fully_initialized();
 569 
 570   _has_aborted.store_relaxed(false);
 571 
 572   _is_region_mark_stats_cache_in_use = true;
 573   reset_marking_for_restart();
 574 
 575   // Reset all tasks, since different phases will use different number of active
 576   // threads. So, it's easiest to have all of them ready.
 577   for (uint i = 0; i < _max_num_tasks; ++i) {
 578     _tasks[i]->reset(mark_bitmap());
 579   }
 580 
 581   uint max_num_regions = _g1h->max_num_regions();
 582   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 583   for (uint i = 0; i < max_num_regions; i++) {
 584     // Do not update TAMS here. NoteStartOfMarkTask updates this in parallel in
 585     // the pre-concurrent-start WorkerTask.
 586     _top_at_rebuild_starts[i].store_relaxed(nullptr);
 587     _region_mark_stats[i].clear();
 588   }
 589 
 590   _root_region_scan_aborted.store_relaxed(false);
 591   _root_regions.reset();
 592 }
 593 
 594 void G1ConcurrentMark::assert_statistics_clear(G1HeapRegion* r) {
 595   assert_fully_initialized();
 596 #ifdef ASSERT
 597   uint region_idx = r->hrm_index();
 598   for (uint j = 0; j < _max_num_tasks; ++j) {
 599     _tasks[j]->verify_no_mark_stats_for(r->hrm_index());
 600   }
 601 
 602   assert(_top_at_rebuild_starts[region_idx].load_relaxed() == nullptr, "must be");
 603 
 604   G1RegionMarkStats* s = &_region_mark_stats[region_idx];
 605   assert(s->incoming_refs() == 0, "must be");
 606   assert(s->live_words() == 0, "must be");
 607 #endif
 608 }
 609 
 610 void G1ConcurrentMark::note_start_of_mark_for_region(G1HeapRegion* r) {
 611   assert_at_safepoint();
 612   assert_fully_initialized();
 613   if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 614     update_top_at_mark_start(r);
 615   } else {
 616     set_top_at_mark_start_to_bottom(r);
 617   }
 618 }
 619 
 620 void G1ConcurrentMark::notify_new_region(G1HeapRegion* r, size_t marked_live_bytes_below_tams) {
 621   assert_at_safepoint();
 622   if (!is_fully_initialized()) {
 623     return;
 624   }
 625   G1CollectorState* state = _g1h->collector_state();
 626   if (state->is_in_concurrent_start_gc()) {
 627     update_top_at_mark_start(r);
 628     set_live_bytes(r->hrm_index(), marked_live_bytes_below_tams);
 629   }
 630 }
 631 
 632 void G1ConcurrentMark::reset_region_marking_state(G1HeapRegion* r) {
 633   assert_at_safepoint();
 634   if (!is_fully_initialized()) {
 635     return;
 636   }
 637   uint region_idx = r->hrm_index();
 638   // Only need to clear the stats cache for the given region if we are using the cache.
 639   if (_is_region_mark_stats_cache_in_use) {
 640     for (uint j = 0; j < _max_num_tasks; ++j) {
 641       _tasks[j]->clear_mark_stats_cache(region_idx);
 642     }
 643   } else {
 644     for (uint j = 0; j < _max_num_tasks; ++j) {
 645       _tasks[j]->verify_no_mark_stats_for(region_idx);
 646     }
 647   }
 648   set_top_at_mark_start_to_bottom(r);
 649   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 650   _region_mark_stats[region_idx].clear();
 651 }
 652 
 653 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 654   assert_at_safepoint();
 655   assert(r->is_starts_humongous(), "Got humongous continues region here");
 656 
 657   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 658   mark_bitmap()->clear(r->bottom());
 659 }
 660 
 661 void G1ConcurrentMark::reset_marking_for_restart() {
 662   assert_fully_initialized();
 663 
 664   _global_mark_stack.set_empty();
 665 
 666   // Expand the marking stack, if we have to and if we can.
 667   if (has_overflown()) {
 668     _global_mark_stack.expand();
 669 
 670     uint max_num_regions = _g1h->max_num_regions();
 671     for (uint i = 0; i < max_num_regions; i++) {
 672       _region_mark_stats[i].clear_during_overflow();
 673     }
 674   }
 675 
 676   clear_has_overflown();
 677   _finger.store_relaxed(_heap.start());
 678 
 679   for (uint i = 0; i < _max_num_tasks; ++i) {
 680     _tasks[i]->reset_for_restart();
 681   }
 682 }
 683 
 684 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 685   assert(active_tasks <= _max_num_tasks, "we should not have more");
 686 
 687   _num_active_tasks = active_tasks;
 688   // Need to update the three data structures below according to the
 689   // number of active threads for this phase.
 690   _terminator.reset_for_reuse(active_tasks);
 691   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 692   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 693 }
 694 
 695 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 696   set_concurrency(active_tasks);
 697 
 698   _concurrent.store_relaxed(concurrent);
 699 
 700   if (!concurrent) {
 701     // At this point we should be in a STW phase, and completed marking.
 702     assert_at_safepoint_on_vm_thread();
 703     assert(out_of_regions(),
 704            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 705            p2i(finger()), p2i(_heap.end()));
 706   }
 707 }
 708 
 709 #if TASKQUEUE_STATS
 710 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 711 
 712   _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
 713 
 714   auto get_pa_stats = [&](uint i) {
 715     return _tasks[i]->partial_array_task_stats();
 716   };
 717 
 718   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 719                                  "Concurrent Mark Partial Array");
 720 
 721   for (uint i = 0; i < _max_num_tasks; ++i) {
 722     get_pa_stats(i)->reset();
 723   }
 724 }
 725 #endif
 726 
 727 void G1ConcurrentMark::reset_at_marking_complete() {
 728   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 729   // We set the global marking state to some default values when we're
 730   // not doing marking.
 731   reset_marking_for_restart();
 732   _num_active_tasks = 0;
 733 }
 734 
 735 G1ConcurrentMark::~G1ConcurrentMark() {
 736   FREE_C_HEAP_ARRAY(_top_at_mark_starts);
 737   FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
 738   FREE_C_HEAP_ARRAY(_region_mark_stats);
 739   // The G1ConcurrentMark instance is never freed.
 740   ShouldNotReachHere();
 741 }
 742 
 743 class G1ClearBitMapTask : public WorkerTask {
 744 public:
 745   static size_t chunk_size() { return M; }
 746 
 747 private:
 748   // Heap region closure used for clearing the _mark_bitmap.
 749   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 750     G1ConcurrentMark* _cm;
 751     G1CMBitMap* _bitmap;
 752     bool _suspendible; // If suspendible, do yield checks.
 753 
 754     bool suspendible() {
 755       return _suspendible;
 756     }
 757 
 758     bool is_clear_concurrent_undo() {
 759       return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
 760     }
 761 
 762     bool has_aborted() {
 763       if (suspendible()) {
 764         _cm->do_yield_check();
 765         return _cm->has_aborted();
 766       }
 767       return false;
 768     }
 769 
 770     HeapWord* region_clear_limit(G1HeapRegion* r) {
 771       // A garbage collection might have made the region unavailable after a yield during
 772       // clearing. Just return bottom as the limit, causing the clearing for this region to end.
 773       if (G1CollectedHeap::heap()->region_at_or_null(r->hrm_index()) == nullptr) {
 774         return r->bottom();
 775       }
 776       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 777       // live_words data are current wrt to the _mark_bitmap. We use this information
 778       // to only clear ranges of the bitmap that require clearing.
 779       if (is_clear_concurrent_undo()) {
 780         // No need to clear bitmaps for empty regions (which includes regions we
 781         // did not mark through).
 782         if (!_cm->contains_live_object(r->hrm_index())) {
 783           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 784           return r->bottom();
 785         }
 786       }
 787       return r->end();
 788     }
 789 
 790   public:
 791     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 792       G1HeapRegionClosure(),
 793       _cm(cm),
 794       _bitmap(cm->mark_bitmap()),
 795       _suspendible(suspendible)
 796     { }
 797 
 798     virtual bool do_heap_region(G1HeapRegion* r) {
 799       if (has_aborted()) {
 800         return true;
 801       }
 802 
 803       HeapWord* cur = r->bottom();
 804       HeapWord* end = region_clear_limit(r);
 805 
 806       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 807 
 808       while (cur < end) {
 809 
 810         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 811         _bitmap->clear_range(mr);
 812 
 813         cur += chunk_size_in_words;
 814 
 815         // Repeat the asserts from before the start of the closure. We will do them
 816         // as asserts here to minimize their overhead on the product. However, we
 817         // will have them as guarantees at the beginning / end of the bitmap
 818         // clearing to get some checking in the product.
 819         assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
 820 
 821         // Abort iteration if necessary.
 822         if (suspendible() && _cm->do_yield_check()) {
 823           if (_cm->has_aborted()) {
 824             return true;
 825           }
 826           // Re-read end. The region might have been uncommitted.
 827           end = region_clear_limit(r);
 828         }
 829       }
 830       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 831 
 832       return false;
 833     }
 834   };
 835 
 836   G1ClearBitmapHRClosure _cl;
 837   G1HeapRegionClaimer _hr_claimer;
 838   bool _suspendible; // If the task is suspendible, workers must join the STS.
 839 
 840 public:
 841   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 842     WorkerTask("G1 Clear Bitmap"),
 843     _cl(cm, suspendible),
 844     _hr_claimer(n_workers),
 845     _suspendible(suspendible)
 846   { }
 847 
 848   void work(uint worker_id) {
 849     SuspendibleThreadSetJoiner sts_join(_suspendible);
 850     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 851   }
 852 };
 853 
 854 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 855   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 856 
 857   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 858   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 859 
 860   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 861 
 862   G1ClearBitMapTask cl(this, num_workers, may_yield);
 863 
 864   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 865   workers->run_task(&cl, num_workers);
 866 }
 867 
 868 void G1ConcurrentMark::cleanup_for_next_mark() {
 869   // Make sure that the concurrent mark thread looks to still be in
 870   // the current cycle.
 871   guarantee(is_in_reset_for_next_cycle(), "invariant");
 872 
 873   clear_bitmap(_concurrent_workers, true);
 874 
 875   reset_partial_array_state_manager();
 876 
 877   // Should not have changed state yet (even if a Full GC interrupted us).
 878   guarantee(is_in_reset_for_next_cycle(), "invariant");
 879 }
 880 
 881 void G1ConcurrentMark::reset_partial_array_state_manager() {
 882   for (uint i = 0; i < _max_num_tasks; ++i) {
 883     _tasks[i]->unregister_partial_array_splitter();
 884   }
 885 
 886   partial_array_state_manager()->reset();
 887 
 888   for (uint i = 0; i < _max_num_tasks; ++i) {
 889     _tasks[i]->register_partial_array_splitter();
 890   }
 891 }
 892 
 893 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 894   assert_at_safepoint_on_vm_thread();
 895   // To avoid fragmentation the full collection requesting to clear the bitmap
 896   // might use fewer workers than available. To ensure the bitmap is cleared
 897   // as efficiently as possible the number of active workers are temporarily
 898   // increased to include all currently created workers.
 899   WithActiveWorkers update(workers, workers->created_workers());
 900   clear_bitmap(workers, false);
 901 }
 902 
 903 class G1PreConcurrentStartTask : public G1BatchedTask {
 904   // Reset marking state.
 905   class ResetMarkingStateTask;
 906   // For each region note start of marking.
 907   class NoteStartOfMarkTask;
 908 
 909 public:
 910   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 911 };
 912 
 913 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 914   G1ConcurrentMark* _cm;
 915 public:
 916   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 917 
 918   double worker_cost() const override { return 1.0; }
 919   void do_work(uint worker_id) override;
 920 };
 921 
 922 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 923 
 924   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 925     G1ConcurrentMark* _cm;
 926 
 927   public:
 928     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 929 
 930     bool do_heap_region(G1HeapRegion* r) override {
 931       _cm->note_start_of_mark_for_region(r);
 932       return false;
 933     }
 934   } _region_cl;
 935 
 936   G1HeapRegionClaimer _claimer;
 937 public:
 938   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 939 
 940   double worker_cost() const override {
 941     // The work done per region is very small, therefore we choose this magic number to cap the number
 942     // of threads used when there are few regions.
 943     const double regions_per_thread = 1000;
 944     return _claimer.n_regions() / regions_per_thread;
 945   }
 946 
 947   void set_max_workers(uint max_workers) override {
 948     _claimer.set_n_workers(max_workers);
 949   }
 950 
 951   void do_work(uint worker_id) override {
 952     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 953   }
 954 };
 955 
 956 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 957   // Reset marking state.
 958   _cm->reset();
 959 }
 960 
 961 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 962   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 963   add_serial_task(new ResetMarkingStateTask(cm));
 964   add_parallel_task(new NoteStartOfMarkTask());
 965 };
 966 
 967 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 968   assert_at_safepoint_on_vm_thread();
 969 
 970   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 971 
 972   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 973 
 974   G1PreConcurrentStartTask cl(cause, this);
 975   G1CollectedHeap::heap()->run_batch_task(&cl);
 976 
 977   _gc_tracer_cm->set_gc_cause(cause);
 978 }
 979 
 980 void G1ConcurrentMark::start_full_concurrent_cycle() {
 981   // Start Concurrent Marking weak-reference discovery.
 982   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 983   rp->start_discovery(false /* always_clear */);
 984 
 985   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 986   // This is the start of  the marking cycle, we're expected all
 987   // threads to have SATB queues with active set to false.
 988   satb_mq_set.set_active_all_threads(true, /* new active value */
 989                                      false /* expected_active */);
 990 
 991   // update_g1_committed() will be called at the end of an evac pause
 992   // when marking is on. So, it's also called at the end of the
 993   // concurrent start pause to update the heap end, if the heap expands
 994   // during it. No need to call it here.
 995 
 996   // Signal the thread to start work.
 997   cm_thread()->start_full_cycle();
 998 }
 999 
1000 void G1ConcurrentMark::start_undo_concurrent_cycle() {
1001   assert_at_safepoint_on_vm_thread();
1002   // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
1003   assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
1004 
1005   abort_root_region_scan_at_safepoint();
1006 
1007   // Signal the thread to start work.
1008   cm_thread()->start_undo_cycle();
1009 }
1010 
1011 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
1012   cm_thread()->set_idle();
1013 }
1014 
1015 void G1ConcurrentMark::stop() {
1016   if (is_fully_initialized()) {
1017     cm_thread()->stop();
1018   }
1019 }
1020 
1021 /*
1022  * Notice that in the next two methods, we actually leave the STS
1023  * during the barrier sync and join it immediately afterwards. If we
1024  * do not do this, the following deadlock can occur: one thread could
1025  * be in the barrier sync code, waiting for the other thread to also
1026  * sync up, whereas another one could be trying to yield, while also
1027  * waiting for the other threads to sync up too.
1028  *
1029  * Note, however, that this code is also used during remark and in
1030  * this case we should not attempt to leave / enter the STS, otherwise
1031  * we'll either hit an assert (debug / fastdebug) or deadlock
1032  * (product). So we should only leave / enter the STS if we are
1033  * operating concurrently.
1034  *
1035  * Because the thread that does the sync barrier has left the STS, it
1036  * is possible to be suspended for a Full GC or an evacuation pause
1037  * could occur. This is actually safe, since the entering the sync
1038  * barrier is one of the last things do_marking_step() does, and it
1039  * doesn't manipulate any data structures afterwards.
1040  */
1041 
1042 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1043   bool barrier_aborted;
1044   {
1045     SuspendibleThreadSetLeaver sts_leave(concurrent());
1046     barrier_aborted = !_first_overflow_barrier_sync.enter();
1047   }
1048 
1049   // at this point everyone should have synced up and not be doing any
1050   // more work
1051 
1052   if (barrier_aborted) {
1053     // If the barrier aborted we ignore the overflow condition and
1054     // just abort the whole marking phase as quickly as possible.
1055     return;
1056   }
1057 }
1058 
1059 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1060   SuspendibleThreadSetLeaver sts_leave(concurrent());
1061   _second_overflow_barrier_sync.enter();
1062 
1063   // at this point everything should be re-initialized and ready to go
1064 }
1065 
1066 class G1CMConcurrentMarkingTask : public WorkerTask {
1067   G1ConcurrentMark*     _cm;
1068 
1069 public:
1070   void work(uint worker_id) {
1071     ResourceMark rm;
1072 
1073     SuspendibleThreadSetJoiner sts_join;
1074 
1075     assert(worker_id < _cm->active_tasks(), "invariant");
1076 
1077     G1CMTask* task = _cm->task(worker_id);
1078     task->record_start_time();
1079     if (!_cm->has_aborted()) {
1080       do {
1081         task->do_marking_step(G1ConcMarkStepDurationMillis,
1082                               true  /* do_termination */,
1083                               false /* is_serial*/);
1084 
1085         _cm->do_yield_check();
1086       } while (!_cm->has_aborted() && task->has_aborted());
1087     }
1088     task->record_end_time();
1089     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1090   }
1091 
1092   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1093       WorkerTask("Concurrent Mark"), _cm(cm) { }
1094 
1095   ~G1CMConcurrentMarkingTask() { }
1096 };
1097 
1098 uint G1ConcurrentMark::calc_active_marking_workers() {
1099   uint result = 0;
1100   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1101     result = _max_concurrent_workers;
1102   } else {
1103     result =
1104       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1105                                                 1, /* Minimum workers */
1106                                                 _num_concurrent_workers,
1107                                                 Threads::number_of_non_daemon_threads());
1108     // Don't scale the result down by scale_concurrent_workers() because
1109     // that scaling has already gone into "_max_concurrent_workers".
1110   }
1111   assert(result > 0 && result <= _max_concurrent_workers,
1112          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1113          _max_concurrent_workers, result);
1114   return result;
1115 }
1116 
1117 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1118   return _root_region_scan_aborted.load_relaxed();
1119 }
1120 
1121 #ifndef PRODUCT
1122 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1123   assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1124 }
1125 #endif
1126 
1127 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1128 #ifdef ASSERT
1129   HeapWord* last = region->last();
1130   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1131   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1132          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1133   assert(top_at_mark_start(hr) == region->start(),
1134          "MemRegion start should be equal to TAMS");
1135 #endif
1136 
1137   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1138 
1139   const uintx interval = PrefetchScanIntervalInBytes;
1140   HeapWord* curr = region->start();
1141   const HeapWord* end = region->end();
1142   while (curr < end) {
1143     Prefetch::read(curr, interval);
1144     oop obj = cast_to_oop(curr);
1145     size_t size = obj->oop_iterate_size(&cl);
1146     assert(size == obj->size(), "sanity");
1147     curr += size;
1148   }
1149 }
1150 
1151 class G1CMRootRegionScanTask : public WorkerTask {
1152   G1ConcurrentMark* _cm;
1153   bool _should_yield;
1154 
1155 public:
1156   G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1157     WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1158 
1159   void work(uint worker_id) {
1160     SuspendibleThreadSetJoiner sts_join(_should_yield);
1161 
1162     while (true) {
1163       if (_cm->has_root_region_scan_aborted()) {
1164         return;
1165       }
1166       G1CMRootMemRegions* root_regions = _cm->root_regions();
1167       const MemRegion* region = root_regions->claim_next();
1168       if (region == nullptr) {
1169         return;
1170       }
1171       _cm->scan_root_region(region, worker_id);
1172       if (_should_yield) {
1173         SuspendibleThreadSet::yield();
1174         // If we yielded, a GC may have processed all root regions,
1175         // so this loop will naturally exit on the next claim_next() call.
1176         // Same if a Full GC signalled abort of the concurrent mark.
1177       }
1178     }
1179   }
1180 };
1181 
1182 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1183   // We first check whether there is any work to do as we might have already aborted
1184   // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1185   // We want to avoid spinning up the worker threads if that happened.
1186   // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1187   //
1188   // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1189   // a young gc doing the work.
1190   //
1191   // Concurrent gc threads enter an STS when starting the task, so they stop, then
1192   // continue after that safepoint.
1193   //
1194   // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1195   // consistent view of the value containing the number of remaining regions across the
1196   // usages below. The safepoint/gc may already be running and modifying it
1197   // while this code is still executing.
1198   uint num_remaining = root_regions()->num_remaining_regions();
1199   bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1200   if (do_scan) {
1201     // Assign one worker to each root-region but subject to the max constraint.
1202     // The constraint is also important to avoid accesses beyond the allocated per-worker
1203     // marking helper data structures. We might get passed different WorkerThreads with
1204     // different number of threads (potential worker ids) than helper data structures when
1205     // completing this work during GC.
1206     const uint num_workers = MIN2(num_remaining,
1207                                   _max_concurrent_workers);
1208 
1209     G1CMRootRegionScanTask task(this, concurrent);
1210     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1211                         task.name(), num_workers, num_remaining);
1212     workers->run_task(&task, num_workers);
1213   }
1214 
1215   // At the end of this method, we can re-read num_remaining() in the assert: either
1216   // we got non-zero above and we processed all root regions (and it must be zero
1217   // after the worker task synchronization) or it had already been zero. We also
1218   // can't have started another concurrent cycle that could have set it to something else
1219   // while still in the concurrent cycle (if called concurrently).
1220   assert_root_region_scan_completed_or_aborted();
1221 
1222   return do_scan;
1223 }
1224 
1225 void G1ConcurrentMark::scan_root_regions_concurrently() {
1226   assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1227   scan_root_regions(_concurrent_workers, true /* concurrent */);
1228 }
1229 
1230 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1231   assert_at_safepoint_on_vm_thread();
1232   return scan_root_regions(_g1h->workers(), false /* concurrent */);
1233 }
1234 
1235 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1236   root_regions()->add(top_at_mark_start(r), r->top());
1237 }
1238 
1239 void G1ConcurrentMark::add_root_region_set_bottom(G1HeapRegion* r) {
1240   set_top_at_mark_start_to_bottom(r);
1241   root_regions()->add(r->bottom(), r->top());
1242 }
1243 
1244 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1245   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1246 }
1247 
1248 void G1ConcurrentMark::abort_root_region_scan() {
1249   assert_not_at_safepoint();
1250 
1251   _root_region_scan_aborted.store_relaxed(true);
1252 }
1253 
1254 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1255   assert_at_safepoint_on_vm_thread();
1256 
1257   _root_region_scan_aborted.store_relaxed(true);
1258 }
1259 
1260 void G1ConcurrentMark::concurrent_cycle_start() {
1261   _gc_timer_cm->register_gc_start();
1262 
1263   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1264 
1265   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1266 }
1267 
1268 uint G1ConcurrentMark::completed_mark_cycles() const {
1269   return _completed_mark_cycles.load_relaxed();
1270 }
1271 
1272 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1273   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1274 
1275   if (mark_cycle_completed) {
1276     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1277   }
1278 
1279   if (has_aborted()) {
1280     log_info(gc, marking)("Concurrent Mark Abort");
1281     _gc_tracer_cm->report_concurrent_mode_failure();
1282   }
1283 
1284   _gc_timer_cm->register_gc_end();
1285 
1286   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1287 }
1288 
1289 void G1ConcurrentMark::mark_from_roots() {
1290   _restart_for_overflow.store_relaxed(false);
1291 
1292   uint active_workers = calc_active_marking_workers();
1293 
1294   // Setting active workers is not guaranteed since fewer
1295   // worker threads may currently exist and more may not be
1296   // available.
1297   active_workers = _concurrent_workers->set_active_workers(active_workers);
1298   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1299 
1300   _num_concurrent_workers = active_workers;
1301 
1302   // Parallel task terminator is set in "set_concurrency_and_phase()"
1303   set_concurrency_and_phase(active_workers, true /* concurrent */);
1304 
1305   G1CMConcurrentMarkingTask marking_task(this);
1306   _concurrent_workers->run_task(&marking_task);
1307   print_stats();
1308 }
1309 
1310 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1311   static const char* location_strings[] = { "Remark Before",
1312                                             "Remark After",
1313                                             "Remark Overflow",
1314                                             "Cleanup Before",
1315                                             "Cleanup After" };
1316   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1317 }
1318 
1319 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1320                                            VerifyLocation location) {
1321   G1HeapVerifier* verifier = _g1h->verifier();
1322 
1323   verifier->verify_region_sets_optional();
1324 
1325   const char* caller = verify_location_string(location);
1326 
1327   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1328     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1329 
1330     size_t const BufLen = 512;
1331     char buffer[BufLen];
1332 
1333     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1334     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1335 
1336     // Only check bitmap in Remark, and not at After-Verification because the regions
1337     // already have their TAMS'es reset.
1338     if (location != VerifyLocation::RemarkAfter) {
1339       verifier->verify_bitmap_clear(true /* above_tams_only */);
1340     }
1341   }
1342 }
1343 
1344 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1345   G1CollectedHeap* _g1h;
1346 public:
1347   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1348 
1349   bool do_object_b(oop obj) {
1350     return !_g1h->is_obj_dead(obj);
1351   }
1352 };
1353 
1354 void G1ConcurrentMark::remark() {
1355   assert_at_safepoint_on_vm_thread();
1356 
1357   // If a full collection has happened, we should not continue. However we might
1358   // have ended up here as the Remark VM operation has been scheduled already.
1359   if (has_aborted()) {
1360     return;
1361   }
1362 
1363   G1Policy* policy = _g1h->policy();
1364   policy->record_pause_start_time();
1365 
1366   double start = os::elapsedTime();
1367 
1368   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1369 
1370   {
1371     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1372     finalize_marking();
1373   }
1374 
1375   double mark_work_end = os::elapsedTime();
1376 
1377   bool const mark_finished = !has_overflown();
1378   if (mark_finished) {
1379     weak_refs_work();
1380 
1381     // Unload Klasses, String, Code Cache, etc.
1382     if (ClassUnloadingWithConcurrentMark) {
1383       G1CMIsAliveClosure is_alive(this);
1384       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1385     }
1386 
1387     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1388     // We're done with marking.
1389     // This is the end of the marking cycle, we're expected all
1390     // threads to have SATB queues with active set to true.
1391     satb_mq_set.set_active_all_threads(false, /* new active value */
1392                                        true /* expected_active */);
1393 
1394     {
1395       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1396       flush_all_task_caches();
1397     }
1398 
1399     // All marking completed. Check bitmap now as we will start to reset TAMSes
1400     // in parallel below so that we can not do this in the After-Remark verification.
1401     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1402 
1403     {
1404       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1405 
1406       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1407       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1408                                     _g1h->workers()->active_workers());
1409       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1410       _g1h->workers()->run_task(&cl, num_workers);
1411 
1412       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1413                                       _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1414 
1415       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1416 
1417       if (_needs_remembered_set_rebuild) {
1418         GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1419         _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1420       }
1421     }
1422 
1423     if (log_is_enabled(Trace, gc, liveness)) {
1424       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1425       _g1h->heap_region_iterate(&cl);
1426     }
1427 
1428     // Potentially, some empty-regions have been reclaimed; make this a
1429     // "collection" so that pending allocation can retry before attempting a
1430     // GC pause.
1431     _g1h->increment_total_collections();
1432 
1433     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1434     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1435     // triggered, it likely means there are very few regular GCs, making resizing
1436     // based on gc heuristics less effective.
1437     if (_g1h->last_gc_was_periodic()) {
1438       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1439     }
1440 
1441     compute_new_sizes();
1442 
1443     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1444 
1445     assert(!restart_for_overflow(), "sanity");
1446     // Completely reset the marking state (except bitmaps) since marking completed.
1447     reset_at_marking_complete();
1448 
1449     G1CollectedHeap::finish_codecache_marking_cycle();
1450 
1451     {
1452       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1453       G1ObjectCountIsAliveClosure is_alive(_g1h);
1454       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1455     }
1456 
1457     // Successfully completed marking, advance state.
1458     cm_thread()->set_full_cycle_rebuild_and_scrub();
1459   } else {
1460     // We overflowed.  Restart concurrent marking.
1461     _restart_for_overflow.store_relaxed(true);
1462 
1463     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1464 
1465     // Clear the marking state because we will be restarting
1466     // marking due to overflowing the global mark stack.
1467     reset_marking_for_restart();
1468   }
1469 
1470   // Statistics
1471   double now = os::elapsedTime();
1472   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1473   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1474   _remark_times.add((now - start) * 1000.0);
1475 
1476   _g1h->update_perf_counter_cpu_time();
1477 
1478   policy->record_concurrent_mark_remark_end();
1479 
1480   return;
1481 }
1482 
1483 void G1ConcurrentMark::compute_new_sizes() {
1484   MetaspaceGC::compute_new_size();
1485 
1486   // Cleanup will have freed any regions completely full of garbage.
1487   // Update the soft reference policy with the new heap occupancy.
1488   Universe::heap()->update_capacity_and_used_at_gc();
1489 
1490   // We reclaimed old regions so we should calculate the sizes to make
1491   // sure we update the old gen/space data.
1492   _g1h->monitoring_support()->update_sizes();
1493 }
1494 
1495 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1496   G1CollectedHeap* _g1h;
1497 
1498 public:
1499   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1500 
1501   bool do_heap_region(G1HeapRegion* r) override {
1502     // Update the remset tracking state from updating to complete
1503     // if remembered sets have been rebuilt.
1504     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1505     return false;
1506   }
1507 };
1508 
1509 void G1ConcurrentMark::cleanup() {
1510   assert_at_safepoint_on_vm_thread();
1511 
1512   // If a full collection has happened, we shouldn't do this.
1513   if (has_aborted()) {
1514     return;
1515   }
1516 
1517   G1Policy* policy = _g1h->policy();
1518   policy->record_pause_start_time();
1519 
1520   double start = os::elapsedTime();
1521 
1522   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1523 
1524   if (needs_remembered_set_rebuild()) {
1525     // Update the remset tracking information as well as marking all regions
1526     // as fully parsable.
1527     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1528     G1UpdateRegionsAfterRebuild cl(_g1h);
1529     _g1h->heap_region_iterate(&cl);
1530   } else {
1531     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1532   }
1533 
1534   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1535 
1536   // Local statistics
1537   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1538 
1539   {
1540     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1541     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1542   }
1543 
1544   // Advance state.
1545   cm_thread()->set_full_cycle_reset_for_next_cycle();
1546   return;
1547 }
1548 
1549 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1550 // Uses the G1CMTask associated with a worker thread (for serial reference
1551 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1552 // trace referent objects.
1553 //
1554 // Using the G1CMTask and embedded local queues avoids having the worker
1555 // threads operating on the global mark stack. This reduces the risk
1556 // of overflowing the stack - which we would rather avoid at this late
1557 // state. Also using the tasks' local queues removes the potential
1558 // of the workers interfering with each other that could occur if
1559 // operating on the global stack.
1560 
1561 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1562   G1ConcurrentMark* _cm;
1563   G1CMTask*         _task;
1564   uint              _ref_counter_limit;
1565   uint              _ref_counter;
1566   bool              _is_serial;
1567 public:
1568   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1569     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1570     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1571     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1572   }
1573 
1574   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1575   virtual void do_oop(      oop* p) { do_oop_work(p); }
1576 
1577   template <class T> void do_oop_work(T* p) {
1578     if (_cm->has_overflown()) {
1579       return;
1580     }
1581     if (!_task->deal_with_reference(p)) {
1582       // We did not add anything to the mark bitmap (or mark stack), so there is
1583       // no point trying to drain it.
1584       return;
1585     }
1586     _ref_counter--;
1587 
1588     if (_ref_counter == 0) {
1589       // We have dealt with _ref_counter_limit references, pushing them
1590       // and objects reachable from them on to the local stack (and
1591       // possibly the global stack). Call G1CMTask::do_marking_step() to
1592       // process these entries.
1593       //
1594       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1595       // there's nothing more to do (i.e. we're done with the entries that
1596       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1597       // above) or we overflow.
1598       //
1599       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1600       // flag while there may still be some work to do. (See the comment at
1601       // the beginning of G1CMTask::do_marking_step() for those conditions -
1602       // one of which is reaching the specified time target.) It is only
1603       // when G1CMTask::do_marking_step() returns without setting the
1604       // has_aborted() flag that the marking step has completed.
1605       do {
1606         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1607         _task->do_marking_step(mark_step_duration_ms,
1608                                false      /* do_termination */,
1609                                _is_serial);
1610       } while (_task->has_aborted() && !_cm->has_overflown());
1611       _ref_counter = _ref_counter_limit;
1612     }
1613   }
1614 };
1615 
1616 // 'Drain' oop closure used by both serial and parallel reference processing.
1617 // Uses the G1CMTask associated with a given worker thread (for serial
1618 // reference processing the G1CMtask for worker 0 is used). Calls the
1619 // do_marking_step routine, with an unbelievably large timeout value,
1620 // to drain the marking data structures of the remaining entries
1621 // added by the 'keep alive' oop closure above.
1622 
1623 class G1CMDrainMarkingStackClosure : public VoidClosure {
1624   G1ConcurrentMark* _cm;
1625   G1CMTask*         _task;
1626   bool              _is_serial;
1627  public:
1628   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1629     _cm(cm), _task(task), _is_serial(is_serial) {
1630     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1631   }
1632 
1633   void do_void() {
1634     do {
1635       // We call G1CMTask::do_marking_step() to completely drain the local
1636       // and global marking stacks of entries pushed by the 'keep alive'
1637       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1638       //
1639       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1640       // if there's nothing more to do (i.e. we've completely drained the
1641       // entries that were pushed as a result of applying the 'keep alive'
1642       // closure to the entries on the discovered ref lists) or we overflow
1643       // the global marking stack.
1644       //
1645       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1646       // flag while there may still be some work to do. (See the comment at
1647       // the beginning of G1CMTask::do_marking_step() for those conditions -
1648       // one of which is reaching the specified time target.) It is only
1649       // when G1CMTask::do_marking_step() returns without setting the
1650       // has_aborted() flag that the marking step has completed.
1651 
1652       _task->do_marking_step(1000000000.0 /* something very large */,
1653                              true         /* do_termination */,
1654                              _is_serial);
1655     } while (_task->has_aborted() && !_cm->has_overflown());
1656   }
1657 };
1658 
1659 class G1CMRefProcProxyTask : public RefProcProxyTask {
1660   G1CollectedHeap& _g1h;
1661   G1ConcurrentMark& _cm;
1662 
1663 public:
1664   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1665     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1666       _g1h(g1h),
1667       _cm(cm) {}
1668 
1669   void work(uint worker_id) override {
1670     assert(worker_id < _max_workers, "sanity");
1671     G1CMIsAliveClosure is_alive(&_cm);
1672     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1673     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1674     BarrierEnqueueDiscoveredFieldClosure enqueue;
1675     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1676     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1677   }
1678 
1679   void prepare_run_task_hook() override {
1680     // We need to reset the concurrency level before each
1681     // proxy task execution, so that the termination protocol
1682     // and overflow handling in G1CMTask::do_marking_step() knows
1683     // how many workers to wait for.
1684     _cm.set_concurrency(_queue_count);
1685   }
1686 };
1687 
1688 void G1ConcurrentMark::weak_refs_work() {
1689   ResourceMark rm;
1690 
1691   {
1692     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1693 
1694     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1695 
1696     // See the comment in G1CollectedHeap::ref_processing_init()
1697     // about how reference processing currently works in G1.
1698 
1699     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1700 
1701     // Prefer to grow the stack until the max capacity.
1702     _global_mark_stack.set_should_grow();
1703 
1704     // Parallel processing task executor.
1705     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1706     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1707 
1708     // Process the weak references.
1709     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1710     _gc_tracer_cm->report_gc_reference_stats(stats);
1711     pt.print_all_references();
1712 
1713     // The do_oop work routines of the keep_alive and drain_marking_stack
1714     // oop closures will set the has_overflown flag if we overflow the
1715     // global marking stack.
1716 
1717     assert(has_overflown() || _global_mark_stack.is_empty(),
1718            "Mark stack should be empty (unless it has overflown)");
1719   }
1720 
1721   if (has_overflown()) {
1722     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1723     // overflowed while processing references. Exit the VM.
1724     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1725           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1726           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1727           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1728     return;
1729   }
1730 
1731   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1732 
1733   {
1734     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1735     G1CMIsAliveClosure is_alive(this);
1736     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1737   }
1738 }
1739 
1740 class G1PrecleanYieldClosure : public YieldClosure {
1741   G1ConcurrentMark* _cm;
1742 
1743 public:
1744   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1745 
1746   virtual bool should_return() {
1747     return _cm->has_aborted();
1748   }
1749 
1750   virtual bool should_return_fine_grain() {
1751     _cm->do_yield_check();
1752     return _cm->has_aborted();
1753   }
1754 };
1755 
1756 void G1ConcurrentMark::preclean() {
1757   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1758 
1759   SuspendibleThreadSetJoiner joiner;
1760 
1761   BarrierEnqueueDiscoveredFieldClosure enqueue;
1762 
1763   set_concurrency_and_phase(1, true);
1764 
1765   G1PrecleanYieldClosure yield_cl(this);
1766 
1767   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1768   // Precleaning is single threaded. Temporarily disable MT discovery.
1769   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1770   rp->preclean_discovered_references(rp->is_alive_non_header(),
1771                                      &enqueue,
1772                                      &yield_cl,
1773                                      _gc_timer_cm);
1774 }
1775 
1776 // Closure for marking entries in SATB buffers.
1777 class G1CMSATBBufferClosure : public SATBBufferClosure {
1778 private:
1779   G1CMTask* _task;
1780   G1CollectedHeap* _g1h;
1781 
1782   // This is very similar to G1CMTask::deal_with_reference, but with
1783   // more relaxed requirements for the argument, so this must be more
1784   // circumspect about treating the argument as an object.
1785   void do_entry(void* entry) const {
1786     _task->increment_refs_reached();
1787     oop const obj = cast_to_oop(entry);
1788     _task->make_reference_grey(obj);
1789   }
1790 
1791 public:
1792   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1793     : _task(task), _g1h(g1h) { }
1794 
1795   virtual void do_buffer(void** buffer, size_t size) {
1796     for (size_t i = 0; i < size; ++i) {
1797       do_entry(buffer[i]);
1798     }
1799   }
1800 };
1801 
1802 class G1RemarkThreadsClosure : public ThreadClosure {
1803   G1SATBMarkQueueSet& _qset;
1804 
1805  public:
1806   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1807     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1808 
1809   void do_thread(Thread* thread) {
1810     // Transfer any partial buffer to the qset for completed buffer processing.
1811     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1812   }
1813 };
1814 
1815 class G1CMRemarkTask : public WorkerTask {
1816   // For Threads::possibly_parallel_threads_do
1817   ThreadsClaimTokenScope _threads_claim_token_scope;
1818   G1ConcurrentMark* _cm;
1819 public:
1820   void work(uint worker_id) {
1821     G1CMTask* task = _cm->task(worker_id);
1822     task->record_start_time();
1823     {
1824       ResourceMark rm;
1825 
1826       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1827       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1828     }
1829 
1830     do {
1831       task->do_marking_step(1000000000.0 /* something very large */,
1832                             true         /* do_termination       */,
1833                             false        /* is_serial            */);
1834     } while (task->has_aborted() && !_cm->has_overflown());
1835     // If we overflow, then we do not want to restart. We instead
1836     // want to abort remark and do concurrent marking again.
1837     task->record_end_time();
1838   }
1839 
1840   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1841     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1842     _cm->terminator()->reset_for_reuse(active_workers);
1843   }
1844 };
1845 
1846 void G1ConcurrentMark::finalize_marking() {
1847   ResourceMark rm;
1848 
1849   _g1h->ensure_parsability(false);
1850 
1851   // this is remark, so we'll use up all active threads
1852   uint active_workers = _g1h->workers()->active_workers();
1853   set_concurrency_and_phase(active_workers, false /* concurrent */);
1854   // Leave _parallel_marking_threads at it's
1855   // value originally calculated in the G1ConcurrentMark
1856   // constructor and pass values of the active workers
1857   // through the task.
1858 
1859   {
1860     G1CMRemarkTask remarkTask(this, active_workers);
1861     // We will start all available threads, even if we decide that the
1862     // active_workers will be fewer. The extra ones will just bail out
1863     // immediately.
1864     _g1h->workers()->run_task(&remarkTask);
1865   }
1866 
1867   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1868   guarantee(has_overflown() ||
1869             satb_mq_set.completed_buffers_num() == 0,
1870             "Invariant: has_overflown = %s, num buffers = %zu",
1871             BOOL_TO_STR(has_overflown()),
1872             satb_mq_set.completed_buffers_num());
1873 
1874   print_stats();
1875 }
1876 
1877 void G1ConcurrentMark::flush_all_task_caches(bool ends_use_of_mark_cache) {
1878   size_t hits = 0;
1879   size_t misses = 0;
1880   for (uint i = 0; i < _max_num_tasks; i++) {
1881     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1882     hits += stats.first;
1883     misses += stats.second;
1884   }
1885   size_t sum = hits + misses;
1886   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1887                        hits, misses, percent_of(hits, sum));
1888   if (ends_use_of_mark_cache) {
1889     _is_region_mark_stats_cache_in_use = false;
1890   }
1891 }
1892 
1893 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1894   assert_at_safepoint();
1895   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1896 }
1897 
1898 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1899   // "Checkpoint" the finger.
1900   HeapWord* local_finger = finger();
1901 
1902   while (local_finger < _heap.end()) {
1903     assert(_g1h->is_in_reserved(local_finger), "invariant");
1904 
1905     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1906     // Make sure that the reads below do not float before loading curr_region.
1907     OrderAccess::loadload();
1908     // Above heap_region_containing may return null as we always scan claim
1909     // until the end of the heap. In this case, just jump to the next region.
1910     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1911 
1912     // Is the gap between reading the finger and doing the CAS too long?
1913     HeapWord* res = _finger.compare_exchange(local_finger, end);
1914     if (res == local_finger && curr_region != nullptr) {
1915       // We succeeded.
1916       HeapWord* bottom = curr_region->bottom();
1917       HeapWord* limit = top_at_mark_start(curr_region);
1918 
1919       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1920       // Notice that _finger == end cannot be guaranteed here since,
1921       // someone else might have moved the finger even further.
1922       assert(finger() >= end, "The finger should have moved forward");
1923 
1924       if (limit > bottom) {
1925         return curr_region;
1926       } else {
1927         assert(limit == bottom,
1928                "The scan limit for region %u (%s) should be bottom but is " PTR_FORMAT,
1929                curr_region->hrm_index(), curr_region->get_short_type_str(), p2i(limit));
1930         // We return null and the caller should try calling
1931         // claim_region() again.
1932         return nullptr;
1933       }
1934     } else {
1935       // Read the finger again.
1936       HeapWord* next_finger = finger();
1937       assert(next_finger > local_finger,
1938              "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT,
1939              p2i(local_finger), p2i(next_finger));
1940       local_finger = next_finger;
1941     }
1942   }
1943 
1944   return nullptr;
1945 }
1946 
1947 #ifndef PRODUCT
1948 class VerifyNoCSetOops {
1949   G1CollectedHeap* _g1h;
1950   const char* _phase;
1951   int _info;
1952 
1953 public:
1954   VerifyNoCSetOops(const char* phase, int info = -1) :
1955     _g1h(G1CollectedHeap::heap()),
1956     _phase(phase),
1957     _info(info)
1958   { }
1959 
1960   void operator()(G1TaskQueueEntry task_entry) const {
1961     if (task_entry.is_partial_array_state()) {
1962       oop obj = task_entry.to_partial_array_state()->source();
1963       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1964       return;
1965     }
1966     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1967               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1968               p2i(task_entry.to_oop()), _phase, _info);
1969     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1970     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1971               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1972               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1973   }
1974 };
1975 
1976 void G1ConcurrentMark::verify_no_collection_set_oops() {
1977   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1978          "should be at a safepoint or initializing");
1979   if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1980     return;
1981   }
1982 
1983   // Verify entries on the global mark stack
1984   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1985 
1986   // Verify entries on the task queues
1987   for (uint i = 0; i < _max_num_tasks; ++i) {
1988     G1CMTaskQueue* queue = _task_queues->queue(i);
1989     queue->iterate(VerifyNoCSetOops("Queue", i));
1990   }
1991 
1992   // Verify the global finger
1993   HeapWord* global_finger = finger();
1994   if (global_finger != nullptr && global_finger < _heap.end()) {
1995     // Since we always iterate over all regions, we might get a null G1HeapRegion
1996     // here.
1997     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1998     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1999               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2000               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2001   }
2002 
2003   // Verify the task fingers
2004   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
2005   for (uint i = 0; i < _num_concurrent_workers; ++i) {
2006     G1CMTask* task = _tasks[i];
2007     HeapWord* task_finger = task->finger();
2008     if (task_finger != nullptr && task_finger < _heap.end()) {
2009       // See above note on the global finger verification.
2010       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
2011       guarantee(r == nullptr || task_finger == r->bottom() ||
2012                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
2013                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2014                 p2i(task_finger), HR_FORMAT_PARAMS(r));
2015     }
2016   }
2017 }
2018 #endif // PRODUCT
2019 
2020 void G1ConcurrentMark::rebuild_and_scrub() {
2021   if (!needs_remembered_set_rebuild()) {
2022     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
2023   }
2024 
2025   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
2026 }
2027 
2028 void G1ConcurrentMark::print_stats() {
2029   if (!log_is_enabled(Debug, gc, stats)) {
2030     return;
2031   }
2032   log_debug(gc, stats)("---------------------------------------------------------------------");
2033   for (size_t i = 0; i < _num_active_tasks; ++i) {
2034     _tasks[i]->print_stats();
2035     log_debug(gc, stats)("---------------------------------------------------------------------");
2036   }
2037 }
2038 
2039 bool G1ConcurrentMark::shutdown_cleanup_needed() const {
2040   // Cleanup (aborting threads, setting abort flags) is needed throughout the whole cycle before
2041   // stopping the CM thread.
2042   return is_fully_initialized() && is_in_concurrent_cycle();
2043 }
2044 
2045 void G1ConcurrentMark::shutdown_concurrent_cycle() {
2046   assert_at_safepoint_on_vm_thread();
2047 
2048   abort_root_region_scan_at_safepoint();
2049   abort_marking_threads();
2050 
2051   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2052   satb_mq_set.abandon_partial_marking();
2053   // This can be called either during or outside marking, we'll read
2054   // the expected_active value from the SATB queue set.
2055   satb_mq_set.set_active_all_threads(false, /* new active value */
2056                                      satb_mq_set.is_active() /* expected_active */);
2057 }
2058 
2059 bool G1ConcurrentMark::concurrent_cycle_abort() {
2060   assert_at_safepoint_on_vm_thread();
2061   assert(_g1h->collector_state()->is_in_full_gc(), "must be");
2062 
2063   // If we start the compaction before the CM threads finish
2064   // scanning the root regions we might trip them over as we'll
2065   // be moving objects / updating references. Since the root region
2066   // scan synchronized with the safepoint, just tell it to abort.
2067   // It will notice when the threads start up again later.
2068   abort_root_region_scan_at_safepoint();
2069 
2070   // We haven't started a concurrent cycle no need to do anything; we might have
2071   // aborted the marking because of shutting down though. In this case the marking
2072   // might have already completed the abort (leading to in_progress() below to
2073   // return false), however this still left marking state particularly in the
2074   // shared marking bitmap that must be cleaned up.
2075   // If there are multiple full gcs during shutdown we do this work repeatedly for
2076   // nothing, but this situation should be extremely rare (a full gc after shutdown
2077   // has been signalled is already rare), and this work should be negligible compared
2078   // to actual full gc work.
2079 
2080   if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
2081     return false;
2082   }
2083 
2084   flush_all_task_caches();
2085   reset_marking_for_restart();
2086 
2087   abort_marking_threads();
2088 
2089   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2090   satb_mq_set.abandon_partial_marking();
2091   // This can be called either during or outside marking, we'll read
2092   // the expected_active value from the SATB queue set.
2093   satb_mq_set.set_active_all_threads(false, /* new active value */
2094                                      satb_mq_set.is_active() /* expected_active */);
2095   return true;
2096 }
2097 
2098 void G1ConcurrentMark::abort_marking_threads() {
2099   assert_root_region_scan_completed_or_aborted();
2100   _has_aborted.store_relaxed(true);
2101   _first_overflow_barrier_sync.abort();
2102   _second_overflow_barrier_sync.abort();
2103 }
2104 
2105 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2106   class CountCpuTimeThreadClosure : public ThreadClosure {
2107   public:
2108     jlong _total_cpu_time;
2109 
2110     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2111 
2112     void do_thread(Thread* t) {
2113       _total_cpu_time += os::thread_cpu_time(t);
2114     }
2115   } cl;
2116 
2117   threads_do(&cl);
2118 
2119   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2120 }
2121 
2122 static void print_ms_time_info(const char* prefix, const char* name,
2123                                NumberSeq& ns) {
2124   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2125                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2126   if (ns.num() > 0) {
2127     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2128                            prefix, ns.sd(), ns.maximum());
2129   }
2130 }
2131 
2132 void G1ConcurrentMark::print_summary_info() {
2133   Log(gc, marking) log;
2134   if (!log.is_trace()) {
2135     return;
2136   }
2137 
2138   log.trace(" Concurrent marking:");
2139   if (!is_fully_initialized()) {
2140     log.trace("    has not been initialized yet");
2141     return;
2142   }
2143   print_ms_time_info("  ", "remarks", _remark_times);
2144   {
2145     print_ms_time_info("     ", "final marks", _remark_mark_times);
2146     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2147 
2148   }
2149   print_ms_time_info("  ", "cleanups", _cleanup_times);
2150   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2151             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2152   log.trace("  Total stop_world time = %8.2f s.",
2153             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2154   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2155             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2156 }
2157 
2158 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2159   if (is_fully_initialized()) { // they are initialized late
2160     tc->do_thread(_cm_thread);
2161     _concurrent_workers->threads_do(tc);
2162   }
2163 }
2164 
2165 void G1ConcurrentMark::print_on(outputStream* st) const {
2166   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2167   _mark_bitmap.print_on(st, " Bits: ");
2168 }
2169 
2170 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2171   ReferenceProcessor* result = g1h->ref_processor_cm();
2172   assert(result != nullptr, "CM reference processor should not be null");
2173   return result;
2174 }
2175 
2176 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2177                                G1CMTask* task)
2178   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2179     _g1h(g1h), _task(task)
2180 { }
2181 
2182 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2183   assert(hr != nullptr,
2184         "claim_region() should have filtered out null regions");
2185   _curr_region  = hr;
2186   _finger       = hr->bottom();
2187   update_region_limit();
2188 }
2189 
2190 void G1CMTask::update_region_limit() {
2191   G1HeapRegion* hr = _curr_region;
2192   HeapWord* bottom = hr->bottom();
2193   HeapWord* limit = _cm->top_at_mark_start(hr);
2194 
2195   if (limit == bottom) {
2196     // The region was collected underneath our feet.
2197     // We set the finger to bottom to ensure that the bitmap
2198     // iteration that will follow this will not do anything.
2199     // (this is not a condition that holds when we set the region up,
2200     // as the region is not supposed to be empty in the first place)
2201     _finger = bottom;
2202   } else if (limit >= _region_limit) {
2203     assert(limit >= _finger, "peace of mind");
2204   } else {
2205     assert(limit < _region_limit, "only way to get here");
2206     // This can happen under some pretty unusual circumstances.  An
2207     // evacuation pause empties the region underneath our feet (TAMS
2208     // at bottom). We then do some allocation in the region (TAMS
2209     // stays at bottom), followed by the region being used as a GC
2210     // alloc region (TAMS will move to top() and the objects
2211     // originally below it will be greyed). All objects now marked in
2212     // the region are explicitly greyed, if below the global finger,
2213     // and we do not need in fact to scan anything else. So, we simply
2214     // set _finger to be limit to ensure that the bitmap iteration
2215     // doesn't do anything.
2216     _finger = limit;
2217   }
2218 
2219   _region_limit = limit;
2220 }
2221 
2222 void G1CMTask::giveup_current_region() {
2223   assert(_curr_region != nullptr, "invariant");
2224   clear_region_fields();
2225 }
2226 
2227 void G1CMTask::clear_region_fields() {
2228   // Values for these three fields that indicate that we're not
2229   // holding on to a region.
2230   _curr_region   = nullptr;
2231   _finger        = nullptr;
2232   _region_limit  = nullptr;
2233 }
2234 
2235 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2236   if (cm_oop_closure == nullptr) {
2237     assert(_cm_oop_closure != nullptr, "invariant");
2238   } else {
2239     assert(_cm_oop_closure == nullptr, "invariant");
2240   }
2241   _cm_oop_closure = cm_oop_closure;
2242 }
2243 
2244 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2245   guarantee(mark_bitmap != nullptr, "invariant");
2246   _mark_bitmap              = mark_bitmap;
2247   clear_region_fields();
2248 
2249   _calls                         = 0;
2250   _elapsed_time_ms               = 0.0;
2251   _termination_time_ms           = 0.0;
2252 
2253   _mark_stats_cache.reset();
2254 }
2255 
2256 void G1CMTask::reset_for_restart() {
2257   clear_region_fields();
2258   _task_queue->set_empty();
2259   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2260   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2261 }
2262 
2263 void G1CMTask::register_partial_array_splitter() {
2264 
2265   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2266                                                         _cm->max_num_tasks());
2267 }
2268 
2269 void G1CMTask::unregister_partial_array_splitter() {
2270   _partial_array_splitter.~PartialArraySplitter();
2271 }
2272 
2273 bool G1CMTask::should_exit_termination() {
2274   if (!regular_clock_call()) {
2275     return true;
2276   }
2277 
2278   // This is called when we are in the termination protocol. We should
2279   // quit if, for some reason, this task wants to abort or the global
2280   // stack is not empty (this means that we can get work from it).
2281   return !_cm->mark_stack_empty() || has_aborted();
2282 }
2283 
2284 void G1CMTask::reached_limit() {
2285   assert(_words_scanned >= _words_scanned_limit ||
2286          _refs_reached >= _refs_reached_limit ,
2287          "shouldn't have been called otherwise");
2288   abort_marking_if_regular_check_fail();
2289 }
2290 
2291 bool G1CMTask::regular_clock_call() {
2292   if (has_aborted()) {
2293     return false;
2294   }
2295 
2296   // First, we need to recalculate the words scanned and refs reached
2297   // limits for the next clock call.
2298   recalculate_limits();
2299 
2300   // During the regular clock call we do the following
2301 
2302   // (1) If an overflow has been flagged, then we abort.
2303   if (_cm->has_overflown()) {
2304     return false;
2305   }
2306 
2307   // If we are not concurrent (i.e. we're doing remark) we don't need
2308   // to check anything else. The other steps are only needed during
2309   // the concurrent marking phase.
2310   if (!_cm->concurrent()) {
2311     return true;
2312   }
2313 
2314   // (2) If marking has been aborted for Full GC, then we also abort.
2315   if (_cm->has_aborted()) {
2316     return false;
2317   }
2318 
2319   // (4) We check whether we should yield. If we have to, then we abort.
2320   if (SuspendibleThreadSet::should_yield()) {
2321     // We should yield. To do this we abort the task. The caller is
2322     // responsible for yielding.
2323     return false;
2324   }
2325 
2326   // (5) We check whether we've reached our time quota. If we have,
2327   // then we abort.
2328   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2329   if (elapsed_time_ms > _time_target_ms) {
2330     _has_timed_out = true;
2331     return false;
2332   }
2333 
2334   // (6) Finally, we check whether there are enough completed STAB
2335   // buffers available for processing. If there are, we abort.
2336   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2337   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2338     // we do need to process SATB buffers, we'll abort and restart
2339     // the marking task to do so
2340     return false;
2341   }
2342   return true;
2343 }
2344 
2345 void G1CMTask::recalculate_limits() {
2346   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2347   _words_scanned_limit      = _real_words_scanned_limit;
2348 
2349   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2350   _refs_reached_limit       = _real_refs_reached_limit;
2351 }
2352 
2353 void G1CMTask::decrease_limits() {
2354   // This is called when we believe that we're going to do an infrequent
2355   // operation which will increase the per byte scanned cost (i.e. move
2356   // entries to/from the global stack). It basically tries to decrease the
2357   // scanning limit so that the clock is called earlier.
2358 
2359   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2360   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2361 }
2362 
2363 void G1CMTask::move_entries_to_global_stack() {
2364   // Local array where we'll store the entries that will be popped
2365   // from the local queue.
2366   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2367 
2368   size_t n = 0;
2369   G1TaskQueueEntry task_entry;
2370   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2371     buffer[n] = task_entry;
2372     ++n;
2373   }
2374   if (n < G1CMMarkStack::EntriesPerChunk) {
2375     buffer[n] = G1TaskQueueEntry();
2376   }
2377 
2378   if (n > 0) {
2379     if (!_cm->mark_stack_push(buffer)) {
2380       set_has_aborted();
2381     }
2382   }
2383 
2384   // This operation was quite expensive, so decrease the limits.
2385   decrease_limits();
2386 }
2387 
2388 bool G1CMTask::get_entries_from_global_stack() {
2389   // Local array where we'll store the entries that will be popped
2390   // from the global stack.
2391   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2392 
2393   if (!_cm->mark_stack_pop(buffer)) {
2394     return false;
2395   }
2396 
2397   // We did actually pop at least one entry.
2398   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2399     G1TaskQueueEntry task_entry = buffer[i];
2400     if (task_entry.is_null()) {
2401       break;
2402     }
2403     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2404     bool success = _task_queue->push(task_entry);
2405     // We only call this when the local queue is empty or under a
2406     // given target limit. So, we do not expect this push to fail.
2407     assert(success, "invariant");
2408   }
2409 
2410   // This operation was quite expensive, so decrease the limits
2411   decrease_limits();
2412   return true;
2413 }
2414 
2415 void G1CMTask::drain_local_queue(bool partially) {
2416   if (has_aborted()) {
2417     return;
2418   }
2419 
2420   // Decide what the target size is, depending whether we're going to
2421   // drain it partially (so that other tasks can steal if they run out
2422   // of things to do) or totally (at the very end).
2423   uint target_size;
2424   if (partially) {
2425     target_size = GCDrainStackTargetSize;
2426   } else {
2427     target_size = 0;
2428   }
2429 
2430   if (_task_queue->size() > target_size) {
2431     G1TaskQueueEntry entry;
2432     bool ret = _task_queue->pop_local(entry);
2433     while (ret) {
2434       process_entry(entry, false /* stolen */);
2435       if (_task_queue->size() <= target_size || has_aborted()) {
2436         ret = false;
2437       } else {
2438         ret = _task_queue->pop_local(entry);
2439       }
2440     }
2441   }
2442 }
2443 
2444 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2445   assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2446 
2447   // Mark objArray klass metadata
2448   process_klass(obj->klass());
2449 
2450   size_t array_length = obj->length();
2451   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2452 
2453   process_array_chunk(obj, 0, initial_chunk_size);
2454 
2455   // Include object header size
2456   return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2457 }
2458 
2459 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2460   PartialArrayState* state = task.to_partial_array_state();
2461   // Access state before release by claim().
2462   objArrayOop obj = objArrayOop(state->source());
2463 
2464   PartialArraySplitter::Claim claim =
2465     _partial_array_splitter.claim(state, _task_queue, stolen);
2466 
2467   process_array_chunk(obj, claim._start, claim._end);
2468   return heap_word_size((claim._end - claim._start) * heapOopSize);
2469 }
2470 
2471 void G1CMTask::drain_global_stack(bool partially) {
2472   if (has_aborted()) {
2473     return;
2474   }
2475 
2476   // We have a policy to drain the local queue before we attempt to
2477   // drain the global stack.
2478   assert(partially || _task_queue->size() == 0, "invariant");
2479 
2480   // Decide what the target size is, depending whether we're going to
2481   // drain it partially (so that other tasks can steal if they run out
2482   // of things to do) or totally (at the very end).
2483   // Notice that when draining the global mark stack partially, due to the racyness
2484   // of the mark stack size update we might in fact drop below the target. But,
2485   // this is not a problem.
2486   // In case of total draining, we simply process until the global mark stack is
2487   // totally empty, disregarding the size counter.
2488   if (partially) {
2489     size_t const target_size = _cm->partial_mark_stack_size_target();
2490     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2491       if (get_entries_from_global_stack()) {
2492         drain_local_queue(partially);
2493       }
2494     }
2495   } else {
2496     while (!has_aborted() && get_entries_from_global_stack()) {
2497       drain_local_queue(partially);
2498     }
2499   }
2500 }
2501 
2502 // SATB Queue has several assumptions on whether to call the par or
2503 // non-par versions of the methods. this is why some of the code is
2504 // replicated. We should really get rid of the single-threaded version
2505 // of the code to simplify things.
2506 void G1CMTask::drain_satb_buffers() {
2507   if (has_aborted()) {
2508     return;
2509   }
2510 
2511   // We set this so that the regular clock knows that we're in the
2512   // middle of draining buffers and doesn't set the abort flag when it
2513   // notices that SATB buffers are available for draining. It'd be
2514   // very counter productive if it did that. :-)
2515   _draining_satb_buffers = true;
2516 
2517   G1CMSATBBufferClosure satb_cl(this, _g1h);
2518   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2519 
2520   // This keeps claiming and applying the closure to completed buffers
2521   // until we run out of buffers or we need to abort.
2522   while (!has_aborted() &&
2523          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2524     abort_marking_if_regular_check_fail();
2525   }
2526 
2527   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2528   // some other thread might be adding to the queue.  If not concurrent,
2529   // some other thread might have won the race for the last buffer, but
2530   // has not yet decremented the count.
2531 
2532   _draining_satb_buffers = false;
2533 
2534   // again, this was a potentially expensive operation, decrease the
2535   // limits to get the regular clock call early
2536   decrease_limits();
2537 }
2538 
2539 #ifndef PRODUCT
2540 void G1CMTask::verify_no_mark_stats_for(uint region_idx) {
2541   _mark_stats_cache.verify_no_mark_stats_for(region_idx);
2542 }
2543 #endif
2544 
2545 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2546   _mark_stats_cache.reset(region_idx);
2547 }
2548 
2549 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2550   return _mark_stats_cache.evict_all();
2551 }
2552 
2553 void G1CMTask::print_stats() {
2554   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2555   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2556                        _elapsed_time_ms, _termination_time_ms);
2557   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2558                        _step_times_ms.num(),
2559                        _step_times_ms.avg(),
2560                        _step_times_ms.sd(),
2561                        _step_times_ms.maximum(),
2562                        _step_times_ms.sum());
2563   size_t const hits = _mark_stats_cache.hits();
2564   size_t const misses = _mark_stats_cache.misses();
2565   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2566                        hits, misses, percent_of(hits, hits + misses));
2567 }
2568 
2569 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2570   return _task_queues->steal(worker_id, task_entry);
2571 }
2572 
2573 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2574   if (has_aborted() || _curr_region == nullptr) {
2575     return;
2576   }
2577 
2578   // This means that we're already holding on to a region.
2579   assert(_finger != nullptr, "if region is not null, then the finger "
2580          "should not be null either");
2581 
2582   // We might have restarted this task after an evacuation pause
2583   // which might have evacuated the region we're holding on to
2584   // underneath our feet. Let's read its limit again to make sure
2585   // that we do not iterate over a region of the heap that
2586   // contains garbage (update_region_limit() will also move
2587   // _finger to the start of the region if it is found empty).
2588   update_region_limit();
2589   // We will start from _finger not from the start of the region,
2590   // as we might be restarting this task after aborting half-way
2591   // through scanning this region. In this case, _finger points to
2592   // the address where we last found a marked object. If this is a
2593   // fresh region, _finger points to start().
2594   MemRegion mr = MemRegion(_finger, _region_limit);
2595 
2596   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2597          "humongous regions should go around loop once only");
2598 
2599   // Some special cases:
2600   // If the memory region is empty, we can just give up the region.
2601   // If the current region is humongous then we only need to check
2602   // the bitmap for the bit associated with the start of the object,
2603   // scan the object if it's live, and give up the region.
2604   // Otherwise, let's iterate over the bitmap of the part of the region
2605   // that is left.
2606   // If the iteration is successful, give up the region.
2607   if (mr.is_empty()) {
2608     giveup_current_region();
2609     abort_marking_if_regular_check_fail();
2610   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2611     if (_mark_bitmap->is_marked(mr.start())) {
2612       // The object is marked - apply the closure
2613       bitmap_closure.do_addr(mr.start());
2614     }
2615     // Even if this task aborted while scanning the humongous object
2616     // we can (and should) give up the current region.
2617     giveup_current_region();
2618     abort_marking_if_regular_check_fail();
2619   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2620     giveup_current_region();
2621     abort_marking_if_regular_check_fail();
2622   } else {
2623     assert(has_aborted(), "currently the only way to do so");
2624     // The only way to abort the bitmap iteration is to return
2625     // false from the do_bit() method. However, inside the
2626     // do_bit() method we move the _finger to point to the
2627     // object currently being looked at. So, if we bail out, we
2628     // have definitely set _finger to something non-null.
2629     assert(_finger != nullptr, "invariant");
2630 
2631     // Region iteration was actually aborted. So now _finger
2632     // points to the address of the object we last scanned. If we
2633     // leave it there, when we restart this task, we will rescan
2634     // the object. It is easy to avoid this. We move the finger by
2635     // enough to point to the next possible object header.
2636     assert(_finger < _region_limit, "invariant");
2637     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2638     if (new_finger >= _region_limit) {
2639       giveup_current_region();
2640     } else {
2641       move_finger_to(new_finger);
2642     }
2643   }
2644 }
2645 
2646 void G1CMTask::claim_new_region() {
2647   // Read the note on the claim_region() method on why it might
2648   // return null with potentially more regions available for
2649   // claiming and why we have to check out_of_regions() to determine
2650   // whether we're done or not.
2651   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2652     // We are going to try to claim a new region. We should have
2653     // given up on the previous one.
2654     // Separated the asserts so that we know which one fires.
2655     assert(_curr_region  == nullptr, "invariant");
2656     assert(_finger       == nullptr, "invariant");
2657     assert(_region_limit == nullptr, "invariant");
2658     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2659     if (claimed_region != nullptr) {
2660       // Yes, we managed to claim one
2661       setup_for_region(claimed_region);
2662       assert(_curr_region == claimed_region, "invariant");
2663     }
2664     // It is important to call the regular clock here. It might take
2665     // a while to claim a region if, for example, we hit a large
2666     // block of empty regions. So we need to call the regular clock
2667     // method once round the loop to make sure it's called
2668     // frequently enough.
2669     abort_marking_if_regular_check_fail();
2670   }
2671 }
2672 
2673 void G1CMTask::attempt_stealing() {
2674   // We cannot check whether the global stack is empty, since other
2675   // tasks might be pushing objects to it concurrently.
2676   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2677          "only way to reach here");
2678   while (!has_aborted()) {
2679     G1TaskQueueEntry entry;
2680     if (_cm->try_stealing(_worker_id, entry)) {
2681       process_entry(entry, true /* stolen */);
2682 
2683       // And since we're towards the end, let's totally drain the
2684       // local queue and global stack.
2685       drain_local_queue(false);
2686       drain_global_stack(false);
2687     } else {
2688       break;
2689     }
2690   }
2691 }
2692 
2693 void G1CMTask::attempt_termination(bool is_serial) {
2694   // We cannot check whether the global stack is empty, since other
2695   // tasks might be concurrently pushing objects on it.
2696   // Separated the asserts so that we know which one fires.
2697   assert(_cm->out_of_regions(), "only way to reach here");
2698   assert(_task_queue->size() == 0, "only way to reach here");
2699   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2700 
2701   // The G1CMTask class also extends the TerminatorTerminator class,
2702   // hence its should_exit_termination() method will also decide
2703   // whether to exit the termination protocol or not.
2704   bool finished = (is_serial ||
2705                    _cm->terminator()->offer_termination(this));
2706   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2707 
2708   if (finished) {
2709     // We're all done.
2710 
2711     // We can now guarantee that the global stack is empty, since
2712     // all other tasks have finished. We separated the guarantees so
2713     // that, if a condition is false, we can immediately find out
2714     // which one.
2715     guarantee(_cm->out_of_regions(), "only way to reach here");
2716     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2717     guarantee(_task_queue->size() == 0, "only way to reach here");
2718     guarantee(!_cm->has_overflown(), "only way to reach here");
2719     guarantee(!has_aborted(), "should never happen if termination has completed");
2720   } else {
2721     // Apparently there's more work to do. Let's abort this task. We
2722     // will restart it and hopefully we can find more things to do.
2723     set_has_aborted();
2724   }
2725 }
2726 
2727 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2728   if (_has_timed_out) {
2729     double diff_ms = elapsed_time_ms - _time_target_ms;
2730     // Keep statistics of how well we did with respect to hitting
2731     // our target only if we actually timed out (if we aborted for
2732     // other reasons, then the results might get skewed).
2733     _marking_step_diff_ms.add(diff_ms);
2734   }
2735 
2736   if (!_cm->has_overflown()) {
2737     return;
2738   }
2739 
2740   // This is the interesting one. We aborted because a global
2741   // overflow was raised. This means we have to restart the
2742   // marking phase and start iterating over regions. However, in
2743   // order to do this we have to make sure that all tasks stop
2744   // what they are doing and re-initialize in a safe manner. We
2745   // will achieve this with the use of two barrier sync points.
2746   if (!is_serial) {
2747     // We only need to enter the sync barrier if being called
2748     // from a parallel context
2749     _cm->enter_first_sync_barrier(_worker_id);
2750 
2751     // When we exit this sync barrier we know that all tasks have
2752     // stopped doing marking work. So, it's now safe to
2753     // re-initialize our data structures.
2754   }
2755 
2756   clear_region_fields();
2757   flush_mark_stats_cache();
2758 
2759   if (!is_serial) {
2760     // If we're executing the concurrent phase of marking, reset the marking
2761     // state; otherwise the marking state is reset after reference processing,
2762     // during the remark pause.
2763     // If we reset here as a result of an overflow during the remark we will
2764     // see assertion failures from any subsequent set_concurrency_and_phase()
2765     // calls.
2766     if (_cm->concurrent() && _worker_id == 0) {
2767       // Worker 0 is responsible for clearing the global data structures because
2768       // of an overflow. During STW we should not clear the overflow flag (in
2769       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2770       // method to abort the pause and restart concurrent marking.
2771       _cm->reset_marking_for_restart();
2772 
2773       log_info(gc, marking)("Concurrent Mark reset for overflow");
2774     }
2775 
2776     // ...and enter the second barrier.
2777     _cm->enter_second_sync_barrier(_worker_id);
2778   }
2779 }
2780 
2781 /*****************************************************************************
2782 
2783     The do_marking_step(time_target_ms, ...) method is the building
2784     block of the parallel marking framework. It can be called in parallel
2785     with other invocations of do_marking_step() on different tasks
2786     (but only one per task, obviously) and concurrently with the
2787     mutator threads, or during remark, hence it eliminates the need
2788     for two versions of the code. When called during remark, it will
2789     pick up from where the task left off during the concurrent marking
2790     phase. Interestingly, tasks are also claimable during evacuation
2791     pauses too, since do_marking_step() ensures that it aborts before
2792     it needs to yield.
2793 
2794     The data structures that it uses to do marking work are the
2795     following:
2796 
2797       (1) Marking Bitmap. If there are grey objects that appear only
2798       on the bitmap (this happens either when dealing with an overflow
2799       or when the concurrent start pause has simply marked the roots
2800       and didn't push them on the stack), then tasks claim heap
2801       regions whose bitmap they then scan to find grey objects. A
2802       global finger indicates where the end of the last claimed region
2803       is. A local finger indicates how far into the region a task has
2804       scanned. The two fingers are used to determine how to grey an
2805       object (i.e. whether simply marking it is OK, as it will be
2806       visited by a task in the future, or whether it needs to be also
2807       pushed on a stack).
2808 
2809       (2) Local Queue. The local queue of the task which is accessed
2810       reasonably efficiently by the task. Other tasks can steal from
2811       it when they run out of work. Throughout the marking phase, a
2812       task attempts to keep its local queue short but not totally
2813       empty, so that entries are available for stealing by other
2814       tasks. Only when there is no more work, a task will totally
2815       drain its local queue.
2816 
2817       (3) Global Mark Stack. This handles local queue overflow. During
2818       marking only sets of entries are moved between it and the local
2819       queues, as access to it requires a mutex and more fine-grain
2820       interaction with it which might cause contention. If it
2821       overflows, then the marking phase should restart and iterate
2822       over the bitmap to identify grey objects. Throughout the marking
2823       phase, tasks attempt to keep the global mark stack at a small
2824       length but not totally empty, so that entries are available for
2825       popping by other tasks. Only when there is no more work, tasks
2826       will totally drain the global mark stack.
2827 
2828       (4) SATB Buffer Queue. This is where completed SATB buffers are
2829       made available. Buffers are regularly removed from this queue
2830       and scanned for roots, so that the queue doesn't get too
2831       long. During remark, all completed buffers are processed, as
2832       well as the filled in parts of any uncompleted buffers.
2833 
2834     The do_marking_step() method tries to abort when the time target
2835     has been reached. There are a few other cases when the
2836     do_marking_step() method also aborts:
2837 
2838       (1) When the marking phase has been aborted (after a Full GC).
2839 
2840       (2) When a global overflow (on the global stack) has been
2841       triggered. Before the task aborts, it will actually sync up with
2842       the other tasks to ensure that all the marking data structures
2843       (local queues, stacks, fingers etc.)  are re-initialized so that
2844       when do_marking_step() completes, the marking phase can
2845       immediately restart.
2846 
2847       (3) When enough completed SATB buffers are available. The
2848       do_marking_step() method only tries to drain SATB buffers right
2849       at the beginning. So, if enough buffers are available, the
2850       marking step aborts and the SATB buffers are processed at
2851       the beginning of the next invocation.
2852 
2853       (4) To yield. when we have to yield then we abort and yield
2854       right at the end of do_marking_step(). This saves us from a lot
2855       of hassle as, by yielding we might allow a Full GC. If this
2856       happens then objects will be compacted underneath our feet, the
2857       heap might shrink, etc. We save checking for this by just
2858       aborting and doing the yield right at the end.
2859 
2860     From the above it follows that the do_marking_step() method should
2861     be called in a loop (or, otherwise, regularly) until it completes.
2862 
2863     If a marking step completes without its has_aborted() flag being
2864     true, it means it has completed the current marking phase (and
2865     also all other marking tasks have done so and have all synced up).
2866 
2867     A method called regular_clock_call() is invoked "regularly" (in
2868     sub ms intervals) throughout marking. It is this clock method that
2869     checks all the abort conditions which were mentioned above and
2870     decides when the task should abort. A work-based scheme is used to
2871     trigger this clock method: when the number of object words the
2872     marking phase has scanned or the number of references the marking
2873     phase has visited reach a given limit. Additional invocations to
2874     the method clock have been planted in a few other strategic places
2875     too. The initial reason for the clock method was to avoid calling
2876     cpu time gathering too regularly, as it is quite expensive. So,
2877     once it was in place, it was natural to piggy-back all the other
2878     conditions on it too and not constantly check them throughout the code.
2879 
2880     If do_termination is true then do_marking_step will enter its
2881     termination protocol.
2882 
2883     The value of is_serial must be true when do_marking_step is being
2884     called serially (i.e. by the VMThread) and do_marking_step should
2885     skip any synchronization in the termination and overflow code.
2886     Examples include the serial remark code and the serial reference
2887     processing closures.
2888 
2889     The value of is_serial must be false when do_marking_step is
2890     being called by any of the worker threads.
2891     Examples include the concurrent marking code (CMMarkingTask),
2892     the MT remark code, and the MT reference processing closures.
2893 
2894  *****************************************************************************/
2895 
2896 void G1CMTask::do_marking_step(double time_target_ms,
2897                                bool do_termination,
2898                                bool is_serial) {
2899   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2900 
2901   _start_cpu_time_ns = os::current_thread_cpu_time();
2902 
2903   // If do_stealing is true then do_marking_step will attempt to
2904   // steal work from the other G1CMTasks. It only makes sense to
2905   // enable stealing when the termination protocol is enabled
2906   // and do_marking_step() is not being called serially.
2907   bool do_stealing = do_termination && !is_serial;
2908 
2909   G1Predictions const& predictor = _g1h->policy()->predictor();
2910   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2911   _time_target_ms = time_target_ms - diff_prediction_ms;
2912 
2913   // set up the variables that are used in the work-based scheme to
2914   // call the regular clock method
2915   _words_scanned = 0;
2916   _refs_reached  = 0;
2917   recalculate_limits();
2918 
2919   // clear all flags
2920   clear_has_aborted();
2921   _has_timed_out = false;
2922   _draining_satb_buffers = false;
2923 
2924   ++_calls;
2925 
2926   // Set up the bitmap and oop closures. Anything that uses them is
2927   // eventually called from this method, so it is OK to allocate these
2928   // statically.
2929   G1CMBitMapClosure bitmap_closure(this, _cm);
2930   G1CMOopClosure cm_oop_closure(_g1h, this);
2931   set_cm_oop_closure(&cm_oop_closure);
2932 
2933   if (_cm->has_overflown()) {
2934     // This can happen if the mark stack overflows during a GC pause
2935     // and this task, after a yield point, restarts. We have to abort
2936     // as we need to get into the overflow protocol which happens
2937     // right at the end of this task.
2938     set_has_aborted();
2939   }
2940 
2941   // First drain any available SATB buffers. After this, we will not
2942   // look at SATB buffers before the next invocation of this method.
2943   // If enough completed SATB buffers are queued up, the regular clock
2944   // will abort this task so that it restarts.
2945   drain_satb_buffers();
2946   // ...then partially drain the local queue and the global stack
2947   drain_local_queue(true);
2948   drain_global_stack(true);
2949 
2950   do {
2951     process_current_region(bitmap_closure);
2952     // At this point we have either completed iterating over the
2953     // region we were holding on to, or we have aborted.
2954 
2955     // We then partially drain the local queue and the global stack.
2956     drain_local_queue(true);
2957     drain_global_stack(true);
2958 
2959     claim_new_region();
2960 
2961     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2962            "at this point we should be out of regions");
2963   } while ( _curr_region != nullptr && !has_aborted());
2964 
2965   // We cannot check whether the global stack is empty, since other
2966   // tasks might be pushing objects to it concurrently.
2967   assert(has_aborted() || _cm->out_of_regions(),
2968          "at this point we should be out of regions");
2969   // Try to reduce the number of available SATB buffers so that
2970   // remark has less work to do.
2971   drain_satb_buffers();
2972 
2973   // Since we've done everything else, we can now totally drain the
2974   // local queue and global stack.
2975   drain_local_queue(false);
2976   drain_global_stack(false);
2977 
2978   // Attempt at work stealing from other task's queues.
2979   if (do_stealing && !has_aborted()) {
2980     // We have not aborted. This means that we have finished all that
2981     // we could. Let's try to do some stealing...
2982     attempt_stealing();
2983   }
2984 
2985   // We still haven't aborted. Now, let's try to get into the
2986   // termination protocol.
2987   if (do_termination && !has_aborted()) {
2988     attempt_termination(is_serial);
2989   }
2990 
2991   // Mainly for debugging purposes to make sure that a pointer to the
2992   // closure which was statically allocated in this frame doesn't
2993   // escape it by accident.
2994   set_cm_oop_closure(nullptr);
2995   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2996   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2997   // Update the step history.
2998   _step_times_ms.add(elapsed_time_ms);
2999 
3000   if (has_aborted()) {
3001     // The task was aborted for some reason.
3002     handle_abort(is_serial, elapsed_time_ms);
3003   }
3004 }
3005 
3006 G1CMTask::G1CMTask(uint worker_id,
3007                    G1ConcurrentMark* cm,
3008                    G1CMTaskQueue* task_queue,
3009                    G1RegionMarkStats* mark_stats) :
3010   _worker_id(worker_id),
3011   _g1h(G1CollectedHeap::heap()),
3012   _cm(cm),
3013   _mark_bitmap(nullptr),
3014   _task_queue(task_queue),
3015   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
3016   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
3017   _calls(0),
3018   _time_target_ms(0.0),
3019   _start_cpu_time_ns(0),
3020   _cm_oop_closure(nullptr),
3021   _curr_region(nullptr),
3022   _finger(nullptr),
3023   _region_limit(nullptr),
3024   _words_scanned(0),
3025   _words_scanned_limit(0),
3026   _real_words_scanned_limit(0),
3027   _refs_reached(0),
3028   _refs_reached_limit(0),
3029   _real_refs_reached_limit(0),
3030   _has_aborted(false),
3031   _has_timed_out(false),
3032   _draining_satb_buffers(false),
3033   _step_times_ms(),
3034   _elapsed_time_ms(0.0),
3035   _termination_time_ms(0.0),
3036   _marking_step_diff_ms()
3037 {
3038   guarantee(task_queue != nullptr, "invariant");
3039 
3040   _marking_step_diff_ms.add(0.5);
3041 }
3042 
3043 // These are formatting macros that are used below to ensure
3044 // consistent formatting. The *_H_* versions are used to format the
3045 // header for a particular value and they should be kept consistent
3046 // with the corresponding macro. Also note that most of the macros add
3047 // the necessary white space (as a prefix) which makes them a bit
3048 // easier to compose.
3049 
3050 // All the output lines are prefixed with this string to be able to
3051 // identify them easily in a large log file.
3052 #define G1PPRL_LINE_PREFIX            "###"
3053 
3054 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3055 #ifdef _LP64
3056 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3057 #else // _LP64
3058 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3059 #endif // _LP64
3060 
3061 // For per-region info
3062 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3063 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3064 #define G1PPRL_STATE_FORMAT           "   %-5s"
3065 #define G1PPRL_STATE_H_FORMAT         "   %5s"
3066 #define G1PPRL_BYTE_FORMAT            "  %9zu"
3067 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3068 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
3069 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
3070 #define G1PPRL_GID_H_FORMAT           "  %9s"
3071 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
3072 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
3073 #define G1PPRL_LEN_H_FORMAT           "  %14s"
3074 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
3075 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
3076 
3077 // For summary info
3078 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3079 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
3080 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3081 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3082 
3083 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
3084   _total_used_bytes(0),
3085   _total_capacity_bytes(0),
3086   _total_live_bytes(0),
3087   _total_remset_bytes(0),
3088   _total_code_roots_bytes(0)
3089 {
3090   if (!log_is_enabled(Trace, gc, liveness)) {
3091     return;
3092   }
3093 
3094   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3095   MemRegion reserved = g1h->reserved();
3096   double now = os::elapsedTime();
3097 
3098   // Print the header of the output.
3099   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3100   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3101                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3102                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3103                           p2i(reserved.start()), p2i(reserved.end()),
3104                           G1HeapRegion::GrainBytes);
3105   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3106   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3107                           G1PPRL_TYPE_H_FORMAT
3108                           G1PPRL_ADDR_BASE_H_FORMAT
3109                           G1PPRL_BYTE_H_FORMAT
3110                           G1PPRL_BYTE_H_FORMAT
3111                           G1PPRL_STATE_H_FORMAT
3112                           G1PPRL_BYTE_H_FORMAT
3113                           G1PPRL_GID_H_FORMAT,
3114                           "type", "address-range",
3115                           "used", "live",
3116                           "state", "code-roots",
3117                           "group-id");
3118   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3119                           G1PPRL_TYPE_H_FORMAT
3120                           G1PPRL_ADDR_BASE_H_FORMAT
3121                           G1PPRL_BYTE_H_FORMAT
3122                           G1PPRL_BYTE_H_FORMAT
3123                           G1PPRL_STATE_H_FORMAT
3124                           G1PPRL_BYTE_H_FORMAT
3125                           G1PPRL_GID_H_FORMAT,
3126                           "", "",
3127                           "(bytes)", "(bytes)",
3128                           "", "(bytes)", "");
3129 }
3130 
3131 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3132   if (!log_is_enabled(Trace, gc, liveness)) {
3133     return false;
3134   }
3135 
3136   const char* type       = r->get_type_str();
3137   HeapWord* bottom       = r->bottom();
3138   HeapWord* end          = r->end();
3139   size_t capacity_bytes  = r->capacity();
3140   size_t used_bytes      = r->used();
3141   size_t live_bytes      = r->live_bytes();
3142   size_t remset_bytes    = r->rem_set()->mem_size();
3143   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3144   const char* remset_type = r->rem_set()->get_short_state_str();
3145   uint cset_group_id     = r->rem_set()->has_cset_group()
3146                          ? r->rem_set()->cset_group_id()
3147                          : G1CSetCandidateGroup::NoRemSetId;
3148 
3149   _total_used_bytes      += used_bytes;
3150   _total_capacity_bytes  += capacity_bytes;
3151   _total_live_bytes      += live_bytes;
3152   _total_remset_bytes    += remset_bytes;
3153   _total_code_roots_bytes += code_roots_bytes;
3154 
3155   // Print a line for this particular region.
3156   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3157                         G1PPRL_TYPE_FORMAT
3158                         G1PPRL_ADDR_BASE_FORMAT
3159                         G1PPRL_BYTE_FORMAT
3160                         G1PPRL_BYTE_FORMAT
3161                         G1PPRL_STATE_FORMAT
3162                         G1PPRL_BYTE_FORMAT
3163                         G1PPRL_GID_FORMAT,
3164                         type, p2i(bottom), p2i(end),
3165                         used_bytes, live_bytes,
3166                         remset_type, code_roots_bytes,
3167                         cset_group_id);
3168 
3169   return false;
3170 }
3171 
3172 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3173   if (!log_is_enabled(Trace, gc, liveness)) {
3174     return;
3175   }
3176 
3177   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3178   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3179   // add static memory usages to remembered set sizes
3180   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3181 
3182   log_cset_candidate_groups();
3183 
3184   // Print the footer of the output.
3185   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3186   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3187                          " SUMMARY"
3188                          G1PPRL_SUM_MB_FORMAT("capacity")
3189                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3190                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3191                          G1PPRL_SUM_MB_FORMAT("remset")
3192                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3193                          bytes_to_mb(_total_capacity_bytes),
3194                          bytes_to_mb(_total_used_bytes),
3195                          percent_of(_total_used_bytes, _total_capacity_bytes),
3196                          bytes_to_mb(_total_live_bytes),
3197                          percent_of(_total_live_bytes, _total_capacity_bytes),
3198                          bytes_to_mb(_total_remset_bytes),
3199                          bytes_to_mb(_total_code_roots_bytes));
3200 }
3201 
3202 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3203   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3204                           G1PPRL_GID_FORMAT
3205                           G1PPRL_LEN_FORMAT
3206                           G1PPRL_GID_GCEFF_FORMAT
3207                           G1PPRL_GID_LIVENESS_FORMAT
3208                           G1PPRL_BYTE_FORMAT
3209                           G1PPRL_TYPE_H_FORMAT,
3210                           group->group_id(),
3211                           group->length(),
3212                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3213                           group->length() > 0 ? group->liveness_percent() : 0.0,
3214                           group->card_set()->mem_size(),
3215                           type);
3216   _total_remset_bytes += group->card_set()->mem_size();
3217 }
3218 
3219 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3220   for (G1CSetCandidateGroup* group : gl) {
3221     log_cset_candidate_group_add_total(group, type);
3222   }
3223 }
3224 
3225 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3226   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3227   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3228   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3229   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3230                           G1PPRL_GID_H_FORMAT
3231                           G1PPRL_LEN_H_FORMAT
3232                           G1PPRL_GCEFF_H_FORMAT
3233                           G1PPRL_BYTE_H_FORMAT
3234                           G1PPRL_BYTE_H_FORMAT
3235                           G1PPRL_TYPE_H_FORMAT,
3236                           "groud-id", "num-regions",
3237                           "gc-eff", "liveness",
3238                           "remset", "type");
3239 
3240   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3241                           G1PPRL_GID_H_FORMAT
3242                           G1PPRL_LEN_H_FORMAT
3243                           G1PPRL_GCEFF_H_FORMAT
3244                           G1PPRL_BYTE_H_FORMAT
3245                           G1PPRL_BYTE_H_FORMAT
3246                           G1PPRL_TYPE_H_FORMAT,
3247                           "", "",
3248                           "(bytes/ms)", "%",
3249                           "(bytes)", "");
3250 
3251   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3252 
3253   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3254 
3255   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3256   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3257   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3258 }