1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "gc/g1/g1BarrierSet.hpp"
  28 #include "gc/g1/g1BatchedTask.hpp"
  29 #include "gc/g1/g1CardSetMemory.hpp"
  30 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectionSetChooser.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "runtime/globals_extension.hpp"
  74 #include "runtime/handles.inline.hpp"
  75 #include "runtime/java.hpp"
  76 #include "runtime/orderAccess.hpp"
  77 #include "runtime/os.hpp"
  78 #include "runtime/prefetch.inline.hpp"
  79 #include "runtime/threads.hpp"
  80 #include "utilities/align.hpp"
  81 #include "utilities/checkedCast.hpp"
  82 #include "utilities/formatBuffer.hpp"
  83 #include "utilities/growableArray.hpp"
  84 #include "utilities/powerOfTwo.hpp"
  85 
  86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  87 
  88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  89   assert(cm != nullptr, "must be");
  90 }
  91 
  92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  93   assert(cm != nullptr, "must be");
  94   assert(_cm == nullptr, "double initialize");
  95   _cm = cm;
  96 }
  97 
  98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  99   assert(addr < _cm->finger(), "invariant");
 100   assert(addr >= _task->finger(), "invariant");
 101 
 102   // We move that task's local finger along.
 103   _task->move_finger_to(addr);
 104 
 105   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 106   // we only partially drain the local queue and global stack
 107   _task->drain_local_queue(true);
 108   _task->drain_global_stack(true);
 109 
 110   // if the has_aborted flag has been raised, we need to bail out of
 111   // the iteration
 112   return !_task->has_aborted();
 113 }
 114 
 115 G1CMMarkStack::G1CMMarkStack() :
 116   _chunk_allocator() {
 117   set_empty();
 118 }
 119 
 120 size_t G1CMMarkStack::capacity_alignment() {
 121   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 122 }
 123 
 124 bool G1CMMarkStack::initialize() {
 125   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 126 
 127   size_t initial_capacity = MarkStackSize;
 128   size_t max_capacity = MarkStackSizeMax;
 129 
 130   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 131 
 132   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 133   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134 
 135   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 136   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 137 
 138   size_t limit = (INT_MAX - 1);
 139   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 140   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141 
 142   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 143   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 144 
 145   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 146 
 147   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 148                 initial_num_chunks, max_capacity);
 149 
 150   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 151 }
 152 
 153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 154   if (_size.load_relaxed() >= _max_capacity) {
 155     return nullptr;
 156   }
 157 
 158   size_t cur_idx = _size.fetch_then_add(1u);
 159 
 160   if (cur_idx >= _max_capacity) {
 161     return nullptr;
 162   }
 163 
 164   size_t bucket = get_bucket(cur_idx);
 165   if (_buckets[bucket].load_acquire() == nullptr) {
 166     if (!_should_grow) {
 167       // Prefer to restart the CM.
 168       return nullptr;
 169     }
 170 
 171     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 172     if (_buckets[bucket].load_acquire() == nullptr) {
 173       size_t desired_capacity = bucket_size(bucket) * 2;
 174       if (!try_expand_to(desired_capacity)) {
 175         return nullptr;
 176       }
 177     }
 178   }
 179 
 180   size_t bucket_idx = get_bucket_index(cur_idx);
 181   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 182   result->next = nullptr;
 183   return result;
 184 }
 185 
 186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 187   _min_capacity(0),
 188   _max_capacity(0),
 189   _capacity(0),
 190   _num_buckets(0),
 191   _should_grow(false),
 192   _buckets(nullptr),
 193   _size(0)
 194 { }
 195 
 196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 197   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 198 
 199   _min_capacity = initial_capacity;
 200   _max_capacity = max_capacity;
 201   _num_buckets  = get_bucket(_max_capacity) + 1;
 202 
 203   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 204 
 205   for (size_t i = 0; i < _num_buckets; i++) {
 206     _buckets[i].store_relaxed(nullptr);
 207   }
 208 
 209   size_t new_capacity = bucket_size(0);
 210 
 211   if (!reserve(new_capacity)) {
 212     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 213     return false;
 214   }
 215   return true;
 216 }
 217 
 218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 219   if (_capacity == _max_capacity) {
 220     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 221     return false;
 222   }
 223 
 224   size_t old_capacity = _capacity;
 225   desired_capacity = MIN2(desired_capacity, _max_capacity);
 226 
 227   if (reserve(desired_capacity)) {
 228     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 229                   old_capacity, desired_capacity);
 230     return true;
 231   }
 232   return false;
 233 }
 234 
 235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 236   size_t new_capacity = _capacity * 2;
 237   return try_expand_to(new_capacity);
 238 }
 239 
 240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 241   if (_buckets == nullptr) {
 242     return;
 243   }
 244 
 245   for (size_t i = 0; i < _num_buckets; i++) {
 246     if (_buckets[i].load_relaxed() != nullptr) {
 247       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 248       _buckets[i].store_relaxed(nullptr);
 249     }
 250   }
 251 
 252   FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
 253 }
 254 
 255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 256   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 257 
 258   size_t highest_bucket = get_bucket(new_capacity - 1);
 259   size_t i = get_bucket(_capacity);
 260 
 261   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 262   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 263   // array and that the capacity accurately reflects the reserved memory.
 264   for (; i <= highest_bucket; i++) {
 265     if (_buckets[i].load_acquire() != nullptr) {
 266       continue; // Skip over already allocated buckets.
 267     }
 268 
 269     size_t bucket_capacity = bucket_size(i);
 270 
 271     // Trim bucket size so that we do not exceed the _max_capacity.
 272     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 273                       bucket_capacity :
 274                       _max_capacity - _capacity;
 275 
 276 
 277     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 278 
 279     if (bucket_base == nullptr) {
 280       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 281                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 282       return false;
 283     }
 284     _capacity += bucket_capacity;
 285     _buckets[i].release_store(bucket_base);
 286   }
 287   return true;
 288 }
 289 
 290 void G1CMMarkStack::expand() {
 291   _chunk_allocator.try_expand();
 292 }
 293 
 294 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 295   elem->next = *list;
 296   *list = elem;
 297 }
 298 
 299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 300   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 301   add_chunk_to_list(&_chunk_list, elem);
 302   _chunks_in_chunk_list++;
 303 }
 304 
 305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 306   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 307   add_chunk_to_list(&_free_list, elem);
 308 }
 309 
 310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 311   TaskQueueEntryChunk* result = *list;
 312   if (result != nullptr) {
 313     *list = (*list)->next;
 314   }
 315   return result;
 316 }
 317 
 318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 319   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 320   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 321   if (result != nullptr) {
 322     _chunks_in_chunk_list--;
 323   }
 324   return result;
 325 }
 326 
 327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 328   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 329   return remove_chunk_from_list(&_free_list);
 330 }
 331 
 332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 333   // Get a new chunk.
 334   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 335 
 336   if (new_chunk == nullptr) {
 337     // Did not get a chunk from the free list. Allocate from backing memory.
 338     new_chunk = _chunk_allocator.allocate_new_chunk();
 339 
 340     if (new_chunk == nullptr) {
 341       return false;
 342     }
 343   }
 344 
 345   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 346 
 347   add_chunk_to_chunk_list(new_chunk);
 348 
 349   return true;
 350 }
 351 
 352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 353   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 354 
 355   if (cur == nullptr) {
 356     return false;
 357   }
 358 
 359   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 360 
 361   add_chunk_to_free_list(cur);
 362   return true;
 363 }
 364 
 365 void G1CMMarkStack::set_empty() {
 366   _chunks_in_chunk_list = 0;
 367   _chunk_list = nullptr;
 368   _free_list = nullptr;
 369   _chunk_allocator.reset();
 370 }
 371 
 372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 373     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 374     _max_regions(max_regions),
 375     _num_root_regions(0),
 376     _claimed_root_regions(0),
 377     _scan_in_progress(false),
 378     _should_abort(false) { }
 379 
 380 G1CMRootMemRegions::~G1CMRootMemRegions() {
 381   MemRegion::destroy_array(_root_regions, _max_regions);
 382 }
 383 
 384 void G1CMRootMemRegions::reset() {
 385   _num_root_regions = 0;
 386 }
 387 
 388 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 389   assert_at_safepoint();
 390   size_t idx = AtomicAccess::fetch_then_add(&_num_root_regions, 1u);
 391   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
 392   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 393          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 394   _root_regions[idx].set_start(start);
 395   _root_regions[idx].set_end(end);
 396 }
 397 
 398 void G1CMRootMemRegions::prepare_for_scan() {
 399   assert(!scan_in_progress(), "pre-condition");
 400 
 401   _scan_in_progress = _num_root_regions > 0;
 402 
 403   _claimed_root_regions = 0;
 404   _should_abort = false;
 405 }
 406 
 407 const MemRegion* G1CMRootMemRegions::claim_next() {
 408   if (_should_abort) {
 409     // If someone has set the should_abort flag, we return null to
 410     // force the caller to bail out of their loop.
 411     return nullptr;
 412   }
 413 
 414   if (_claimed_root_regions >= _num_root_regions) {
 415     return nullptr;
 416   }
 417 
 418   size_t claimed_index = AtomicAccess::fetch_then_add(&_claimed_root_regions, 1u);
 419   if (claimed_index < _num_root_regions) {
 420     return &_root_regions[claimed_index];
 421   }
 422   return nullptr;
 423 }
 424 
 425 uint G1CMRootMemRegions::num_root_regions() const {
 426   return (uint)_num_root_regions;
 427 }
 428 
 429 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 430   for (uint i = 0; i < _num_root_regions; i++) {
 431     if (_root_regions[i].equals(mr)) {
 432       return true;
 433     }
 434   }
 435   return false;
 436 }
 437 
 438 void G1CMRootMemRegions::notify_scan_done() {
 439   MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440   _scan_in_progress = false;
 441   G1RootRegionScan_lock->notify_all();
 442 }
 443 
 444 void G1CMRootMemRegions::cancel_scan() {
 445   notify_scan_done();
 446 }
 447 
 448 void G1CMRootMemRegions::scan_finished() {
 449   assert(scan_in_progress(), "pre-condition");
 450 
 451   if (!_should_abort) {
 452     assert(_claimed_root_regions >= num_root_regions(),
 453            "we should have claimed all root regions, claimed %zu, length = %u",
 454            _claimed_root_regions, num_root_regions());
 455   }
 456 
 457   notify_scan_done();
 458 }
 459 
 460 bool G1CMRootMemRegions::wait_until_scan_finished() {
 461   if (!scan_in_progress()) {
 462     return false;
 463   }
 464 
 465   {
 466     MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 467     while (scan_in_progress()) {
 468       ml.wait();
 469     }
 470   }
 471   return true;
 472 }
 473 
 474 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 475                                    G1RegionToSpaceMapper* bitmap_storage) :
 476   // _cm_thread set inside the constructor
 477   _g1h(g1h),
 478 
 479   _mark_bitmap(),
 480 
 481   _heap(_g1h->reserved()),
 482 
 483   _root_regions(_g1h->max_num_regions()),
 484 
 485   _global_mark_stack(),
 486 
 487   // _finger set in set_non_marking_state
 488 
 489   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 490   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 491   // _num_active_tasks set in set_non_marking_state()
 492   // _tasks set inside the constructor
 493 
 494   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 495   _terminator(_max_num_tasks, _task_queues),
 496   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 497 
 498   _first_overflow_barrier_sync(),
 499   _second_overflow_barrier_sync(),
 500 
 501   _completed_mark_cycles(0),
 502   _has_overflown(false),
 503   _concurrent(false),
 504   _has_aborted(false),
 505   _restart_for_overflow(false),
 506   _gc_timer_cm(new ConcurrentGCTimer()),
 507   _gc_tracer_cm(new G1OldTracer()),
 508 
 509   // _verbose_level set below
 510 
 511   _remark_times(),
 512   _remark_mark_times(),
 513   _remark_weak_ref_times(),
 514   _cleanup_times(),
 515 
 516   _concurrent_workers(nullptr),
 517   _num_concurrent_workers(0),
 518   _max_concurrent_workers(0),
 519 
 520   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 521   _top_at_mark_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_num_regions(), mtGC)),
 522   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(HeapWord*, _g1h->max_num_regions(), mtGC)),
 523   _needs_remembered_set_rebuild(false)
 524 {
 525   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 526 
 527   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 528 
 529   // Create & start ConcurrentMark thread.
 530   _cm_thread = new G1ConcurrentMarkThread(this);
 531   if (_cm_thread->osthread() == nullptr) {
 532     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 533   }
 534 
 535   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 536   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 537 
 538   _max_concurrent_workers = ConcGCThreads;
 539 
 540   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 541   _concurrent_workers->initialize_workers();
 542   _num_concurrent_workers = _concurrent_workers->active_workers();
 543 
 544   if (!_global_mark_stack.initialize()) {
 545     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 546   }
 547 
 548   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 549 
 550   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 551   _num_active_tasks = _max_num_tasks;
 552 
 553   for (uint i = 0; i < _max_num_tasks; ++i) {
 554     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 555     _task_queues->register_queue(i, task_queue);
 556 
 557     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 558   }
 559 
 560   reset_at_marking_complete();
 561 }
 562 
 563 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 564   return _partial_array_state_manager;
 565 }
 566 
 567 void G1ConcurrentMark::reset() {
 568   _has_aborted = false;
 569 
 570   reset_marking_for_restart();
 571 
 572   // Reset all tasks, since different phases will use different number of active
 573   // threads. So, it's easiest to have all of them ready.
 574   for (uint i = 0; i < _max_num_tasks; ++i) {
 575     _tasks[i]->reset(mark_bitmap());
 576   }
 577 
 578   uint max_num_regions = _g1h->max_num_regions();
 579   for (uint i = 0; i < max_num_regions; i++) {
 580     _top_at_rebuild_starts[i] = nullptr;
 581     _region_mark_stats[i].clear();
 582   }
 583 
 584   _root_regions.reset();
 585 }
 586 
 587 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 588   uint region_idx = r->hrm_index();
 589   for (uint j = 0; j < _max_num_tasks; ++j) {
 590     _tasks[j]->clear_mark_stats_cache(region_idx);
 591   }
 592   _top_at_rebuild_starts[region_idx] = nullptr;
 593   _region_mark_stats[region_idx].clear();
 594 }
 595 
 596 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 597   assert_at_safepoint();
 598   assert(r->is_starts_humongous(), "Got humongous continues region here");
 599 
 600   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 601   mark_bitmap()->clear(r->bottom());
 602 
 603   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 604     return;
 605   }
 606 
 607   // Clear any statistics about the region gathered so far.
 608   _g1h->humongous_obj_regions_iterate(r,
 609                                       [&] (G1HeapRegion* r) {
 610                                         clear_statistics(r);
 611                                       });
 612 }
 613 
 614 void G1ConcurrentMark::reset_marking_for_restart() {
 615   _global_mark_stack.set_empty();
 616 
 617   // Expand the marking stack, if we have to and if we can.
 618   if (has_overflown()) {
 619     _global_mark_stack.expand();
 620 
 621     uint max_num_regions = _g1h->max_num_regions();
 622     for (uint i = 0; i < max_num_regions; i++) {
 623       _region_mark_stats[i].clear_during_overflow();
 624     }
 625   }
 626 
 627   clear_has_overflown();
 628   _finger = _heap.start();
 629 
 630   for (uint i = 0; i < _max_num_tasks; ++i) {
 631     G1CMTaskQueue* queue = _task_queues->queue(i);
 632     queue->set_empty();
 633   }
 634 }
 635 
 636 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 637   assert(active_tasks <= _max_num_tasks, "we should not have more");
 638 
 639   _num_active_tasks = active_tasks;
 640   // Need to update the three data structures below according to the
 641   // number of active threads for this phase.
 642   _terminator.reset_for_reuse(active_tasks);
 643   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 644   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 645 }
 646 
 647 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 648   set_concurrency(active_tasks);
 649 
 650   _concurrent = concurrent;
 651 
 652   if (!concurrent) {
 653     // At this point we should be in a STW phase, and completed marking.
 654     assert_at_safepoint_on_vm_thread();
 655     assert(out_of_regions(),
 656            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 657            p2i(_finger), p2i(_heap.end()));
 658   }
 659 }
 660 
 661 #if TASKQUEUE_STATS
 662 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 663 
 664   _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
 665 
 666   auto get_pa_stats = [&](uint i) {
 667     return _tasks[i]->partial_array_task_stats();
 668   };
 669 
 670   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 671                                  "G1ConcurrentMark Partial Array Task Stats");
 672 
 673   for (uint i = 0; i < _max_num_tasks; ++i) {
 674     get_pa_stats(i)->reset();
 675   }
 676 }
 677 #endif
 678 
 679 void G1ConcurrentMark::reset_at_marking_complete() {
 680   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 681   // We set the global marking state to some default values when we're
 682   // not doing marking.
 683   reset_marking_for_restart();
 684   _num_active_tasks = 0;
 685 }
 686 
 687 G1ConcurrentMark::~G1ConcurrentMark() {
 688   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_mark_starts);
 689   FREE_C_HEAP_ARRAY(HeapWord*, _top_at_rebuild_starts);
 690   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 691   // The G1ConcurrentMark instance is never freed.
 692   ShouldNotReachHere();
 693 }
 694 
 695 class G1ClearBitMapTask : public WorkerTask {
 696 public:
 697   static size_t chunk_size() { return M; }
 698 
 699 private:
 700   // Heap region closure used for clearing the _mark_bitmap.
 701   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 702   private:
 703     G1ConcurrentMark* _cm;
 704     G1CMBitMap* _bitmap;
 705     bool _suspendible; // If suspendible, do yield checks.
 706 
 707     bool suspendible() {
 708       return _suspendible;
 709     }
 710 
 711     bool is_clear_concurrent_undo() {
 712       return suspendible() && _cm->cm_thread()->in_undo_mark();
 713     }
 714 
 715     bool has_aborted() {
 716       if (suspendible()) {
 717         _cm->do_yield_check();
 718         return _cm->has_aborted();
 719       }
 720       return false;
 721     }
 722 
 723     HeapWord* region_clear_limit(G1HeapRegion* r) {
 724       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 725       // live_words data are current wrt to the _mark_bitmap. We use this information
 726       // to only clear ranges of the bitmap that require clearing.
 727       if (is_clear_concurrent_undo()) {
 728         // No need to clear bitmaps for empty regions (which includes regions we
 729         // did not mark through).
 730         if (!_cm->contains_live_object(r->hrm_index())) {
 731           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 732           return r->bottom();
 733         }
 734         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 735       }
 736       return r->end();
 737     }
 738 
 739   public:
 740     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 741       G1HeapRegionClosure(),
 742       _cm(cm),
 743       _bitmap(cm->mark_bitmap()),
 744       _suspendible(suspendible)
 745     { }
 746 
 747     virtual bool do_heap_region(G1HeapRegion* r) {
 748       if (has_aborted()) {
 749         return true;
 750       }
 751 
 752       HeapWord* cur = r->bottom();
 753       HeapWord* const end = region_clear_limit(r);
 754 
 755       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 756 
 757       while (cur < end) {
 758 
 759         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 760         _bitmap->clear_range(mr);
 761 
 762         cur += chunk_size_in_words;
 763 
 764         // Repeat the asserts from before the start of the closure. We will do them
 765         // as asserts here to minimize their overhead on the product. However, we
 766         // will have them as guarantees at the beginning / end of the bitmap
 767         // clearing to get some checking in the product.
 768         assert(!suspendible() || _cm->cm_thread()->in_progress(), "invariant");
 769         assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 770 
 771         // Abort iteration if necessary.
 772         if (has_aborted()) {
 773           return true;
 774         }
 775       }
 776       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 777 
 778       _cm->reset_top_at_mark_start(r);
 779 
 780       return false;
 781     }
 782   };
 783 
 784   G1ClearBitmapHRClosure _cl;
 785   G1HeapRegionClaimer _hr_claimer;
 786   bool _suspendible; // If the task is suspendible, workers must join the STS.
 787 
 788 public:
 789   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 790     WorkerTask("G1 Clear Bitmap"),
 791     _cl(cm, suspendible),
 792     _hr_claimer(n_workers),
 793     _suspendible(suspendible)
 794   { }
 795 
 796   void work(uint worker_id) {
 797     SuspendibleThreadSetJoiner sts_join(_suspendible);
 798     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 799   }
 800 
 801   bool is_complete() {
 802     return _cl.is_complete();
 803   }
 804 };
 805 
 806 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 807   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 808 
 809   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 810   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 811 
 812   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 813 
 814   G1ClearBitMapTask cl(this, num_workers, may_yield);
 815 
 816   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 817   workers->run_task(&cl, num_workers);
 818   guarantee(may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 819 }
 820 
 821 void G1ConcurrentMark::cleanup_for_next_mark() {
 822   // Make sure that the concurrent mark thread looks to still be in
 823   // the current cycle.
 824   guarantee(cm_thread()->in_progress(), "invariant");
 825 
 826   // We are finishing up the current cycle by clearing the next
 827   // marking bitmap and getting it ready for the next cycle. During
 828   // this time no other cycle can start. So, let's make sure that this
 829   // is the case.
 830   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 831 
 832   clear_bitmap(_concurrent_workers, true);
 833 
 834   reset_partial_array_state_manager();
 835 
 836   // Repeat the asserts from above.
 837   guarantee(cm_thread()->in_progress(), "invariant");
 838   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 839 }
 840 
 841 void G1ConcurrentMark::reset_partial_array_state_manager() {
 842   for (uint i = 0; i < _max_num_tasks; ++i) {
 843     _tasks[i]->unregister_partial_array_splitter();
 844   }
 845 
 846   partial_array_state_manager()->reset();
 847 
 848   for (uint i = 0; i < _max_num_tasks; ++i) {
 849     _tasks[i]->register_partial_array_splitter();
 850   }
 851 }
 852 
 853 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 854   assert_at_safepoint_on_vm_thread();
 855   // To avoid fragmentation the full collection requesting to clear the bitmap
 856   // might use fewer workers than available. To ensure the bitmap is cleared
 857   // as efficiently as possible the number of active workers are temporarily
 858   // increased to include all currently created workers.
 859   WithActiveWorkers update(workers, workers->created_workers());
 860   clear_bitmap(workers, false);
 861 }
 862 
 863 class G1PreConcurrentStartTask : public G1BatchedTask {
 864   // Reset marking state.
 865   class ResetMarkingStateTask;
 866   // For each region note start of marking.
 867   class NoteStartOfMarkTask;
 868 
 869 public:
 870   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 871 };
 872 
 873 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 874   G1ConcurrentMark* _cm;
 875 public:
 876   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 877 
 878   double worker_cost() const override { return 1.0; }
 879   void do_work(uint worker_id) override;
 880 };
 881 
 882 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 883   G1HeapRegionClaimer _claimer;
 884 public:
 885   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
 886 
 887   double worker_cost() const override {
 888     // The work done per region is very small, therefore we choose this magic number to cap the number
 889     // of threads used when there are few regions.
 890     const double regions_per_thread = 1000;
 891     return _claimer.n_regions() / regions_per_thread;
 892   }
 893 
 894   void set_max_workers(uint max_workers) override;
 895   void do_work(uint worker_id) override;
 896 };
 897 
 898 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 899   // Reset marking state.
 900   _cm->reset();
 901 }
 902 
 903 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 904   G1ConcurrentMark* _cm;
 905 
 906 public:
 907   NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 908 
 909   bool do_heap_region(G1HeapRegion* r) override {
 910     if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 911       _cm->update_top_at_mark_start(r);
 912     }
 913     return false;
 914   }
 915 };
 916 
 917 void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
 918   NoteStartOfMarkHRClosure start_cl;
 919   G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
 920 }
 921 
 922 void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
 923   _claimer.set_n_workers(max_workers);
 924 }
 925 
 926 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 927   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 928   add_serial_task(new ResetMarkingStateTask(cm));
 929   add_parallel_task(new NoteStartOfMarkTask());
 930 };
 931 
 932 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 933   assert_at_safepoint_on_vm_thread();
 934 
 935   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 936 
 937   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 938 
 939   G1PreConcurrentStartTask cl(cause, this);
 940   G1CollectedHeap::heap()->run_batch_task(&cl);
 941 
 942   _gc_tracer_cm->set_gc_cause(cause);
 943 }
 944 
 945 
 946 void G1ConcurrentMark::post_concurrent_mark_start() {
 947   // Start Concurrent Marking weak-reference discovery.
 948   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 949   rp->start_discovery(false /* always_clear */);
 950 
 951   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 952   // This is the start of  the marking cycle, we're expected all
 953   // threads to have SATB queues with active set to false.
 954   satb_mq_set.set_active_all_threads(true, /* new active value */
 955                                      false /* expected_active */);
 956 
 957   _root_regions.prepare_for_scan();
 958 
 959   // update_g1_committed() will be called at the end of an evac pause
 960   // when marking is on. So, it's also called at the end of the
 961   // concurrent start pause to update the heap end, if the heap expands
 962   // during it. No need to call it here.
 963 }
 964 
 965 void G1ConcurrentMark::post_concurrent_undo_start() {
 966   root_regions()->cancel_scan();
 967 }
 968 
 969 /*
 970  * Notice that in the next two methods, we actually leave the STS
 971  * during the barrier sync and join it immediately afterwards. If we
 972  * do not do this, the following deadlock can occur: one thread could
 973  * be in the barrier sync code, waiting for the other thread to also
 974  * sync up, whereas another one could be trying to yield, while also
 975  * waiting for the other threads to sync up too.
 976  *
 977  * Note, however, that this code is also used during remark and in
 978  * this case we should not attempt to leave / enter the STS, otherwise
 979  * we'll either hit an assert (debug / fastdebug) or deadlock
 980  * (product). So we should only leave / enter the STS if we are
 981  * operating concurrently.
 982  *
 983  * Because the thread that does the sync barrier has left the STS, it
 984  * is possible to be suspended for a Full GC or an evacuation pause
 985  * could occur. This is actually safe, since the entering the sync
 986  * barrier is one of the last things do_marking_step() does, and it
 987  * doesn't manipulate any data structures afterwards.
 988  */
 989 
 990 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 991   bool barrier_aborted;
 992   {
 993     SuspendibleThreadSetLeaver sts_leave(concurrent());
 994     barrier_aborted = !_first_overflow_barrier_sync.enter();
 995   }
 996 
 997   // at this point everyone should have synced up and not be doing any
 998   // more work
 999 
1000   if (barrier_aborted) {
1001     // If the barrier aborted we ignore the overflow condition and
1002     // just abort the whole marking phase as quickly as possible.
1003     return;
1004   }
1005 }
1006 
1007 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1008   SuspendibleThreadSetLeaver sts_leave(concurrent());
1009   _second_overflow_barrier_sync.enter();
1010 
1011   // at this point everything should be re-initialized and ready to go
1012 }
1013 
1014 class G1CMConcurrentMarkingTask : public WorkerTask {
1015   G1ConcurrentMark*     _cm;
1016 
1017 public:
1018   void work(uint worker_id) {
1019     ResourceMark rm;
1020 
1021     SuspendibleThreadSetJoiner sts_join;
1022 
1023     assert(worker_id < _cm->active_tasks(), "invariant");
1024 
1025     G1CMTask* task = _cm->task(worker_id);
1026     task->record_start_time();
1027     if (!_cm->has_aborted()) {
1028       do {
1029         task->do_marking_step(G1ConcMarkStepDurationMillis,
1030                               true  /* do_termination */,
1031                               false /* is_serial*/);
1032 
1033         _cm->do_yield_check();
1034       } while (!_cm->has_aborted() && task->has_aborted());
1035     }
1036     task->record_end_time();
1037     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1038   }
1039 
1040   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1041       WorkerTask("Concurrent Mark"), _cm(cm) { }
1042 
1043   ~G1CMConcurrentMarkingTask() { }
1044 };
1045 
1046 uint G1ConcurrentMark::calc_active_marking_workers() {
1047   uint result = 0;
1048   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1049     result = _max_concurrent_workers;
1050   } else {
1051     result =
1052       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1053                                                 1, /* Minimum workers */
1054                                                 _num_concurrent_workers,
1055                                                 Threads::number_of_non_daemon_threads());
1056     // Don't scale the result down by scale_concurrent_workers() because
1057     // that scaling has already gone into "_max_concurrent_workers".
1058   }
1059   assert(result > 0 && result <= _max_concurrent_workers,
1060          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1061          _max_concurrent_workers, result);
1062   return result;
1063 }
1064 
1065 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1066 #ifdef ASSERT
1067   HeapWord* last = region->last();
1068   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1069   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1070          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1071   assert(top_at_mark_start(hr) == region->start(),
1072          "MemRegion start should be equal to TAMS");
1073 #endif
1074 
1075   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1076 
1077   const uintx interval = PrefetchScanIntervalInBytes;
1078   HeapWord* curr = region->start();
1079   const HeapWord* end = region->end();
1080   while (curr < end) {
1081     Prefetch::read(curr, interval);
1082     oop obj = cast_to_oop(curr);
1083     size_t size = obj->oop_iterate_size(&cl);
1084     assert(size == obj->size(), "sanity");
1085     curr += size;
1086   }
1087 }
1088 
1089 class G1CMRootRegionScanTask : public WorkerTask {
1090   G1ConcurrentMark* _cm;
1091 public:
1092   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1093     WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1094 
1095   void work(uint worker_id) {
1096     G1CMRootMemRegions* root_regions = _cm->root_regions();
1097     const MemRegion* region = root_regions->claim_next();
1098     while (region != nullptr) {
1099       _cm->scan_root_region(region, worker_id);
1100       region = root_regions->claim_next();
1101     }
1102   }
1103 };
1104 
1105 void G1ConcurrentMark::scan_root_regions() {
1106   // scan_in_progress() will have been set to true only if there was
1107   // at least one root region to scan. So, if it's false, we
1108   // should not attempt to do any further work.
1109   if (root_regions()->scan_in_progress()) {
1110     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1111 
1112     // Assign one worker to each root-region but subject to the max constraint.
1113     const uint num_workers = MIN2(root_regions()->num_root_regions(),
1114                                   _max_concurrent_workers);
1115 
1116     G1CMRootRegionScanTask task(this);
1117     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1118                         task.name(), num_workers, root_regions()->num_root_regions());
1119     _concurrent_workers->run_task(&task, num_workers);
1120 
1121     // It's possible that has_aborted() is true here without actually
1122     // aborting the survivor scan earlier. This is OK as it's
1123     // mainly used for sanity checking.
1124     root_regions()->scan_finished();
1125   }
1126 }
1127 
1128 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1129   return root_regions()->wait_until_scan_finished();
1130 }
1131 
1132 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1133   root_regions()->add(top_at_mark_start(r), r->top());
1134 }
1135 
1136 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1137   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1138 }
1139 
1140 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1141   root_regions()->abort();
1142   root_regions()->wait_until_scan_finished();
1143 }
1144 
1145 void G1ConcurrentMark::concurrent_cycle_start() {
1146   _gc_timer_cm->register_gc_start();
1147 
1148   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1149 
1150   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1151 }
1152 
1153 uint G1ConcurrentMark::completed_mark_cycles() const {
1154   return AtomicAccess::load(&_completed_mark_cycles);
1155 }
1156 
1157 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1158   _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1159 
1160   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1161 
1162   if (mark_cycle_completed) {
1163     AtomicAccess::inc(&_completed_mark_cycles, memory_order_relaxed);
1164   }
1165 
1166   if (has_aborted()) {
1167     log_info(gc, marking)("Concurrent Mark Abort");
1168     _gc_tracer_cm->report_concurrent_mode_failure();
1169   }
1170 
1171   _gc_timer_cm->register_gc_end();
1172 
1173   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1174 }
1175 
1176 void G1ConcurrentMark::mark_from_roots() {
1177   _restart_for_overflow = false;
1178 
1179   uint active_workers = calc_active_marking_workers();
1180 
1181   // Setting active workers is not guaranteed since fewer
1182   // worker threads may currently exist and more may not be
1183   // available.
1184   active_workers = _concurrent_workers->set_active_workers(active_workers);
1185   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1186 
1187   _num_concurrent_workers = active_workers;
1188 
1189   // Parallel task terminator is set in "set_concurrency_and_phase()"
1190   set_concurrency_and_phase(active_workers, true /* concurrent */);
1191 
1192   G1CMConcurrentMarkingTask marking_task(this);
1193   _concurrent_workers->run_task(&marking_task);
1194   print_stats();
1195 }
1196 
1197 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1198   static const char* location_strings[] = { "Remark Before",
1199                                             "Remark After",
1200                                             "Remark Overflow",
1201                                             "Cleanup Before",
1202                                             "Cleanup After" };
1203   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1204 }
1205 
1206 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1207                                            VerifyLocation location) {
1208   G1HeapVerifier* verifier = _g1h->verifier();
1209 
1210   verifier->verify_region_sets_optional();
1211 
1212   const char* caller = verify_location_string(location);
1213 
1214   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1215     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1216 
1217     size_t const BufLen = 512;
1218     char buffer[BufLen];
1219 
1220     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1221     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1222 
1223     // Only check bitmap in Remark, and not at After-Verification because the regions
1224     // already have their TAMS'es reset.
1225     if (location != VerifyLocation::RemarkAfter) {
1226       verifier->verify_bitmap_clear(true /* above_tams_only */);
1227     }
1228   }
1229 }
1230 
1231 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1232   G1CollectedHeap* _g1h;
1233 public:
1234   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1235 
1236   bool do_object_b(oop obj) {
1237     return !_g1h->is_obj_dead(obj);
1238   }
1239 };
1240 
1241 void G1ConcurrentMark::remark() {
1242   assert_at_safepoint_on_vm_thread();
1243 
1244   // If a full collection has happened, we should not continue. However we might
1245   // have ended up here as the Remark VM operation has been scheduled already.
1246   if (has_aborted()) {
1247     return;
1248   }
1249 
1250   G1Policy* policy = _g1h->policy();
1251   policy->record_pause_start_time();
1252 
1253   double start = os::elapsedTime();
1254 
1255   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1256 
1257   {
1258     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1259     finalize_marking();
1260   }
1261 
1262   double mark_work_end = os::elapsedTime();
1263 
1264   bool const mark_finished = !has_overflown();
1265   if (mark_finished) {
1266     weak_refs_work();
1267 
1268     // Unload Klasses, String, Code Cache, etc.
1269     if (ClassUnloadingWithConcurrentMark) {
1270       G1CMIsAliveClosure is_alive(this);
1271       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1272     }
1273 
1274     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1275     // We're done with marking.
1276     // This is the end of the marking cycle, we're expected all
1277     // threads to have SATB queues with active set to true.
1278     satb_mq_set.set_active_all_threads(false, /* new active value */
1279                                        true /* expected_active */);
1280 
1281     {
1282       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1283       flush_all_task_caches();
1284     }
1285 
1286     // All marking completed. Check bitmap now as we will start to reset TAMSes
1287     // in parallel below so that we can not do this in the After-Remark verification.
1288     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1289 
1290     {
1291       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1292 
1293       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1294       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1295                                     _g1h->workers()->active_workers());
1296       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1297       _g1h->workers()->run_task(&cl, num_workers);
1298 
1299       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1300                                         _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1301 
1302       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1303 
1304       if (_needs_remembered_set_rebuild) {
1305         // Prune rebuild candidates based on G1HeapWastePercent.
1306         // Improves rebuild time in addition to remembered set memory usage.
1307         G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1308       }
1309     }
1310 
1311     if (log_is_enabled(Trace, gc, liveness)) {
1312       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1313       _g1h->heap_region_iterate(&cl);
1314     }
1315 
1316     // Potentially, some empty-regions have been reclaimed; make this a
1317     // "collection" so that pending allocation can retry before attempting a
1318     // GC pause.
1319     _g1h->increment_total_collections();
1320 
1321     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1322     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1323     // triggered, it likely means there are very few regular GCs, making resizing
1324     // based on gc heuristics less effective.
1325     if (_g1h->last_gc_was_periodic()) {
1326       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1327     }
1328 
1329     compute_new_sizes();
1330 
1331     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1332 
1333     assert(!restart_for_overflow(), "sanity");
1334     // Completely reset the marking state (except bitmaps) since marking completed.
1335     reset_at_marking_complete();
1336 
1337     G1CollectedHeap::finish_codecache_marking_cycle();
1338 
1339     {
1340       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1341       G1ObjectCountIsAliveClosure is_alive(_g1h);
1342       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1343     }
1344   } else {
1345     // We overflowed.  Restart concurrent marking.
1346     _restart_for_overflow = true;
1347 
1348     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1349 
1350     // Clear the marking state because we will be restarting
1351     // marking due to overflowing the global mark stack.
1352     reset_marking_for_restart();
1353   }
1354 
1355   // Statistics
1356   double now = os::elapsedTime();
1357   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1358   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1359   _remark_times.add((now - start) * 1000.0);
1360 
1361   _g1h->update_perf_counter_cpu_time();
1362 
1363   policy->record_concurrent_mark_remark_end();
1364 }
1365 
1366 void G1ConcurrentMark::compute_new_sizes() {
1367   MetaspaceGC::compute_new_size();
1368 
1369   // Cleanup will have freed any regions completely full of garbage.
1370   // Update the soft reference policy with the new heap occupancy.
1371   Universe::heap()->update_capacity_and_used_at_gc();
1372 
1373   // We reclaimed old regions so we should calculate the sizes to make
1374   // sure we update the old gen/space data.
1375   _g1h->monitoring_support()->update_sizes();
1376 }
1377 
1378 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1379   G1CollectedHeap* _g1h;
1380 
1381 public:
1382   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1383 
1384   bool do_heap_region(G1HeapRegion* r) override {
1385     // Update the remset tracking state from updating to complete
1386     // if remembered sets have been rebuilt.
1387     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1388     return false;
1389   }
1390 };
1391 
1392 void G1ConcurrentMark::cleanup() {
1393   assert_at_safepoint_on_vm_thread();
1394 
1395   // If a full collection has happened, we shouldn't do this.
1396   if (has_aborted()) {
1397     return;
1398   }
1399 
1400   G1Policy* policy = _g1h->policy();
1401   policy->record_pause_start_time();
1402 
1403   double start = os::elapsedTime();
1404 
1405   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1406 
1407   if (needs_remembered_set_rebuild()) {
1408     // Update the remset tracking information as well as marking all regions
1409     // as fully parsable.
1410     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1411     G1UpdateRegionsAfterRebuild cl(_g1h);
1412     _g1h->heap_region_iterate(&cl);
1413   } else {
1414     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1415   }
1416 
1417   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1418 
1419   // Local statistics
1420   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1421 
1422   {
1423     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1424     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1425   }
1426 }
1427 
1428 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1429 // Uses the G1CMTask associated with a worker thread (for serial reference
1430 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1431 // trace referent objects.
1432 //
1433 // Using the G1CMTask and embedded local queues avoids having the worker
1434 // threads operating on the global mark stack. This reduces the risk
1435 // of overflowing the stack - which we would rather avoid at this late
1436 // state. Also using the tasks' local queues removes the potential
1437 // of the workers interfering with each other that could occur if
1438 // operating on the global stack.
1439 
1440 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1441   G1ConcurrentMark* _cm;
1442   G1CMTask*         _task;
1443   uint              _ref_counter_limit;
1444   uint              _ref_counter;
1445   bool              _is_serial;
1446 public:
1447   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1448     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1449     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1450     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1451   }
1452 
1453   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1454   virtual void do_oop(      oop* p) { do_oop_work(p); }
1455 
1456   template <class T> void do_oop_work(T* p) {
1457     if (_cm->has_overflown()) {
1458       return;
1459     }
1460     if (!_task->deal_with_reference(p)) {
1461       // We did not add anything to the mark bitmap (or mark stack), so there is
1462       // no point trying to drain it.
1463       return;
1464     }
1465     _ref_counter--;
1466 
1467     if (_ref_counter == 0) {
1468       // We have dealt with _ref_counter_limit references, pushing them
1469       // and objects reachable from them on to the local stack (and
1470       // possibly the global stack). Call G1CMTask::do_marking_step() to
1471       // process these entries.
1472       //
1473       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1474       // there's nothing more to do (i.e. we're done with the entries that
1475       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1476       // above) or we overflow.
1477       //
1478       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1479       // flag while there may still be some work to do. (See the comment at
1480       // the beginning of G1CMTask::do_marking_step() for those conditions -
1481       // one of which is reaching the specified time target.) It is only
1482       // when G1CMTask::do_marking_step() returns without setting the
1483       // has_aborted() flag that the marking step has completed.
1484       do {
1485         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1486         _task->do_marking_step(mark_step_duration_ms,
1487                                false      /* do_termination */,
1488                                _is_serial);
1489       } while (_task->has_aborted() && !_cm->has_overflown());
1490       _ref_counter = _ref_counter_limit;
1491     }
1492   }
1493 };
1494 
1495 // 'Drain' oop closure used by both serial and parallel reference processing.
1496 // Uses the G1CMTask associated with a given worker thread (for serial
1497 // reference processing the G1CMtask for worker 0 is used). Calls the
1498 // do_marking_step routine, with an unbelievably large timeout value,
1499 // to drain the marking data structures of the remaining entries
1500 // added by the 'keep alive' oop closure above.
1501 
1502 class G1CMDrainMarkingStackClosure : public VoidClosure {
1503   G1ConcurrentMark* _cm;
1504   G1CMTask*         _task;
1505   bool              _is_serial;
1506  public:
1507   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1508     _cm(cm), _task(task), _is_serial(is_serial) {
1509     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1510   }
1511 
1512   void do_void() {
1513     do {
1514       // We call G1CMTask::do_marking_step() to completely drain the local
1515       // and global marking stacks of entries pushed by the 'keep alive'
1516       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1517       //
1518       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1519       // if there's nothing more to do (i.e. we've completely drained the
1520       // entries that were pushed as a result of applying the 'keep alive'
1521       // closure to the entries on the discovered ref lists) or we overflow
1522       // the global marking stack.
1523       //
1524       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1525       // flag while there may still be some work to do. (See the comment at
1526       // the beginning of G1CMTask::do_marking_step() for those conditions -
1527       // one of which is reaching the specified time target.) It is only
1528       // when G1CMTask::do_marking_step() returns without setting the
1529       // has_aborted() flag that the marking step has completed.
1530 
1531       _task->do_marking_step(1000000000.0 /* something very large */,
1532                              true         /* do_termination */,
1533                              _is_serial);
1534     } while (_task->has_aborted() && !_cm->has_overflown());
1535   }
1536 };
1537 
1538 class G1CMRefProcProxyTask : public RefProcProxyTask {
1539   G1CollectedHeap& _g1h;
1540   G1ConcurrentMark& _cm;
1541 
1542 public:
1543   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1544     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1545       _g1h(g1h),
1546       _cm(cm) {}
1547 
1548   void work(uint worker_id) override {
1549     assert(worker_id < _max_workers, "sanity");
1550     G1CMIsAliveClosure is_alive(&_cm);
1551     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1552     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1553     BarrierEnqueueDiscoveredFieldClosure enqueue;
1554     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1555     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1556   }
1557 
1558   void prepare_run_task_hook() override {
1559     // We need to reset the concurrency level before each
1560     // proxy task execution, so that the termination protocol
1561     // and overflow handling in G1CMTask::do_marking_step() knows
1562     // how many workers to wait for.
1563     _cm.set_concurrency(_queue_count);
1564   }
1565 };
1566 
1567 void G1ConcurrentMark::weak_refs_work() {
1568   ResourceMark rm;
1569 
1570   {
1571     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1572 
1573     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1574 
1575     // See the comment in G1CollectedHeap::ref_processing_init()
1576     // about how reference processing currently works in G1.
1577 
1578     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1579 
1580     // Prefer to grow the stack until the max capacity.
1581     _global_mark_stack.set_should_grow();
1582 
1583     // Parallel processing task executor.
1584     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1585     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1586 
1587     // Process the weak references.
1588     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1589     _gc_tracer_cm->report_gc_reference_stats(stats);
1590     pt.print_all_references();
1591 
1592     // The do_oop work routines of the keep_alive and drain_marking_stack
1593     // oop closures will set the has_overflown flag if we overflow the
1594     // global marking stack.
1595 
1596     assert(has_overflown() || _global_mark_stack.is_empty(),
1597            "Mark stack should be empty (unless it has overflown)");
1598   }
1599 
1600   if (has_overflown()) {
1601     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1602     // overflowed while processing references. Exit the VM.
1603     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1604           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1605           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1606           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1607     return;
1608   }
1609 
1610   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1611 
1612   {
1613     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1614     G1CMIsAliveClosure is_alive(this);
1615     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1616   }
1617 }
1618 
1619 class G1PrecleanYieldClosure : public YieldClosure {
1620   G1ConcurrentMark* _cm;
1621 
1622 public:
1623   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1624 
1625   virtual bool should_return() {
1626     return _cm->has_aborted();
1627   }
1628 
1629   virtual bool should_return_fine_grain() {
1630     _cm->do_yield_check();
1631     return _cm->has_aborted();
1632   }
1633 };
1634 
1635 void G1ConcurrentMark::preclean() {
1636   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1637 
1638   SuspendibleThreadSetJoiner joiner;
1639 
1640   BarrierEnqueueDiscoveredFieldClosure enqueue;
1641 
1642   set_concurrency_and_phase(1, true);
1643 
1644   G1PrecleanYieldClosure yield_cl(this);
1645 
1646   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1647   // Precleaning is single threaded. Temporarily disable MT discovery.
1648   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1649   rp->preclean_discovered_references(rp->is_alive_non_header(),
1650                                      &enqueue,
1651                                      &yield_cl,
1652                                      _gc_timer_cm);
1653 }
1654 
1655 // Closure for marking entries in SATB buffers.
1656 class G1CMSATBBufferClosure : public SATBBufferClosure {
1657 private:
1658   G1CMTask* _task;
1659   G1CollectedHeap* _g1h;
1660 
1661   // This is very similar to G1CMTask::deal_with_reference, but with
1662   // more relaxed requirements for the argument, so this must be more
1663   // circumspect about treating the argument as an object.
1664   void do_entry(void* entry) const {
1665     _task->increment_refs_reached();
1666     oop const obj = cast_to_oop(entry);
1667     _task->make_reference_grey(obj);
1668   }
1669 
1670 public:
1671   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1672     : _task(task), _g1h(g1h) { }
1673 
1674   virtual void do_buffer(void** buffer, size_t size) {
1675     for (size_t i = 0; i < size; ++i) {
1676       do_entry(buffer[i]);
1677     }
1678   }
1679 };
1680 
1681 class G1RemarkThreadsClosure : public ThreadClosure {
1682   G1SATBMarkQueueSet& _qset;
1683 
1684  public:
1685   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1686     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1687 
1688   void do_thread(Thread* thread) {
1689     // Transfer any partial buffer to the qset for completed buffer processing.
1690     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1691   }
1692 };
1693 
1694 class G1CMRemarkTask : public WorkerTask {
1695   // For Threads::possibly_parallel_threads_do
1696   ThreadsClaimTokenScope _threads_claim_token_scope;
1697   G1ConcurrentMark* _cm;
1698 public:
1699   void work(uint worker_id) {
1700     G1CMTask* task = _cm->task(worker_id);
1701     task->record_start_time();
1702     {
1703       ResourceMark rm;
1704 
1705       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1706       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1707     }
1708 
1709     do {
1710       task->do_marking_step(1000000000.0 /* something very large */,
1711                             true         /* do_termination       */,
1712                             false        /* is_serial            */);
1713     } while (task->has_aborted() && !_cm->has_overflown());
1714     // If we overflow, then we do not want to restart. We instead
1715     // want to abort remark and do concurrent marking again.
1716     task->record_end_time();
1717   }
1718 
1719   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1720     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1721     _cm->terminator()->reset_for_reuse(active_workers);
1722   }
1723 };
1724 
1725 void G1ConcurrentMark::finalize_marking() {
1726   ResourceMark rm;
1727 
1728   _g1h->ensure_parsability(false);
1729 
1730   // this is remark, so we'll use up all active threads
1731   uint active_workers = _g1h->workers()->active_workers();
1732   set_concurrency_and_phase(active_workers, false /* concurrent */);
1733   // Leave _parallel_marking_threads at it's
1734   // value originally calculated in the G1ConcurrentMark
1735   // constructor and pass values of the active workers
1736   // through the task.
1737 
1738   {
1739     G1CMRemarkTask remarkTask(this, active_workers);
1740     // We will start all available threads, even if we decide that the
1741     // active_workers will be fewer. The extra ones will just bail out
1742     // immediately.
1743     _g1h->workers()->run_task(&remarkTask);
1744   }
1745 
1746   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1747   guarantee(has_overflown() ||
1748             satb_mq_set.completed_buffers_num() == 0,
1749             "Invariant: has_overflown = %s, num buffers = %zu",
1750             BOOL_TO_STR(has_overflown()),
1751             satb_mq_set.completed_buffers_num());
1752 
1753   print_stats();
1754 }
1755 
1756 void G1ConcurrentMark::flush_all_task_caches() {
1757   size_t hits = 0;
1758   size_t misses = 0;
1759   for (uint i = 0; i < _max_num_tasks; i++) {
1760     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1761     hits += stats.first;
1762     misses += stats.second;
1763   }
1764   size_t sum = hits + misses;
1765   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1766                        hits, misses, percent_of(hits, sum));
1767 }
1768 
1769 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1770   assert_at_safepoint();
1771   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1772 }
1773 
1774 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1775   // "checkpoint" the finger
1776   HeapWord* finger = _finger;
1777 
1778   while (finger < _heap.end()) {
1779     assert(_g1h->is_in_reserved(finger), "invariant");
1780 
1781     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(finger);
1782     // Make sure that the reads below do not float before loading curr_region.
1783     OrderAccess::loadload();
1784     // Above heap_region_containing may return null as we always scan claim
1785     // until the end of the heap. In this case, just jump to the next region.
1786     HeapWord* end = curr_region != nullptr ? curr_region->end() : finger + G1HeapRegion::GrainWords;
1787 
1788     // Is the gap between reading the finger and doing the CAS too long?
1789     HeapWord* res = AtomicAccess::cmpxchg(&_finger, finger, end);
1790     if (res == finger && curr_region != nullptr) {
1791       // we succeeded
1792       HeapWord* bottom = curr_region->bottom();
1793       HeapWord* limit = top_at_mark_start(curr_region);
1794 
1795       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1796       // notice that _finger == end cannot be guaranteed here since,
1797       // someone else might have moved the finger even further
1798       assert(_finger >= end, "the finger should have moved forward");
1799 
1800       if (limit > bottom) {
1801         return curr_region;
1802       } else {
1803         assert(limit == bottom,
1804                "the region limit should be at bottom");
1805         // We return null and the caller should try calling
1806         // claim_region() again.
1807         return nullptr;
1808       }
1809     } else {
1810       assert(_finger > finger, "the finger should have moved forward");
1811       // read it again
1812       finger = _finger;
1813     }
1814   }
1815 
1816   return nullptr;
1817 }
1818 
1819 #ifndef PRODUCT
1820 class VerifyNoCSetOops {
1821   G1CollectedHeap* _g1h;
1822   const char* _phase;
1823   int _info;
1824 
1825 public:
1826   VerifyNoCSetOops(const char* phase, int info = -1) :
1827     _g1h(G1CollectedHeap::heap()),
1828     _phase(phase),
1829     _info(info)
1830   { }
1831 
1832   void operator()(G1TaskQueueEntry task_entry) const {
1833     if (task_entry.is_partial_array_state()) {
1834       oop obj = task_entry.to_partial_array_state()->source();
1835       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1836       return;
1837     }
1838     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1839               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1840               p2i(task_entry.to_oop()), _phase, _info);
1841     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1842     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1843               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1844               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1845   }
1846 };
1847 
1848 void G1ConcurrentMark::verify_no_collection_set_oops() {
1849   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1850          "should be at a safepoint or initializing");
1851   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1852     return;
1853   }
1854 
1855   // Verify entries on the global mark stack
1856   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1857 
1858   // Verify entries on the task queues
1859   for (uint i = 0; i < _max_num_tasks; ++i) {
1860     G1CMTaskQueue* queue = _task_queues->queue(i);
1861     queue->iterate(VerifyNoCSetOops("Queue", i));
1862   }
1863 
1864   // Verify the global finger
1865   HeapWord* global_finger = finger();
1866   if (global_finger != nullptr && global_finger < _heap.end()) {
1867     // Since we always iterate over all regions, we might get a null G1HeapRegion
1868     // here.
1869     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1870     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1871               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1872               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1873   }
1874 
1875   // Verify the task fingers
1876   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1877   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1878     G1CMTask* task = _tasks[i];
1879     HeapWord* task_finger = task->finger();
1880     if (task_finger != nullptr && task_finger < _heap.end()) {
1881       // See above note on the global finger verification.
1882       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1883       guarantee(r == nullptr || task_finger == r->bottom() ||
1884                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1885                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1886                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1887     }
1888   }
1889 }
1890 #endif // PRODUCT
1891 
1892 void G1ConcurrentMark::rebuild_and_scrub() {
1893   if (!needs_remembered_set_rebuild()) {
1894     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1895   }
1896 
1897   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1898 }
1899 
1900 void G1ConcurrentMark::print_stats() {
1901   if (!log_is_enabled(Debug, gc, stats)) {
1902     return;
1903   }
1904   log_debug(gc, stats)("---------------------------------------------------------------------");
1905   for (size_t i = 0; i < _num_active_tasks; ++i) {
1906     _tasks[i]->print_stats();
1907     log_debug(gc, stats)("---------------------------------------------------------------------");
1908   }
1909 }
1910 
1911 bool G1ConcurrentMark::concurrent_cycle_abort() {
1912   // If we start the compaction before the CM threads finish
1913   // scanning the root regions we might trip them over as we'll
1914   // be moving objects / updating references. So let's wait until
1915   // they are done. By telling them to abort, they should complete
1916   // early.
1917   root_region_scan_abort_and_wait();
1918 
1919   // We haven't started a concurrent cycle no need to do anything; we might have
1920   // aborted the marking because of shutting down though. In this case the marking
1921   // might have already completed the abort (leading to in_progress() below to
1922   // return false), however this still left marking state particularly in the
1923   // shared marking bitmap that must be cleaned up.
1924   // If there are multiple full gcs during shutdown we do this work repeatedly for
1925   // nothing, but this situation should be extremely rare (a full gc after shutdown
1926   // has been signalled is already rare), and this work should be negligible compared
1927   // to actual full gc work.
1928   if (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating()) {
1929     return false;
1930   }
1931 
1932   // Empty mark stack
1933   reset_marking_for_restart();
1934   for (uint i = 0; i < _max_num_tasks; ++i) {
1935     _tasks[i]->clear_region_fields();
1936   }
1937 
1938   abort_marking_threads();
1939 
1940   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1941   satb_mq_set.abandon_partial_marking();
1942   // This can be called either during or outside marking, we'll read
1943   // the expected_active value from the SATB queue set.
1944   satb_mq_set.set_active_all_threads(false, /* new active value */
1945                                      satb_mq_set.is_active() /* expected_active */);
1946   return true;
1947 }
1948 
1949 void G1ConcurrentMark::abort_marking_threads() {
1950   assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1951   _has_aborted = true;
1952   _first_overflow_barrier_sync.abort();
1953   _second_overflow_barrier_sync.abort();
1954 }
1955 
1956 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1957   class CountCpuTimeThreadClosure : public ThreadClosure {
1958   public:
1959     jlong _total_cpu_time;
1960 
1961     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1962 
1963     void do_thread(Thread* t) {
1964       _total_cpu_time += os::thread_cpu_time(t);
1965     }
1966   } cl;
1967 
1968   threads_do(&cl);
1969 
1970   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
1971 }
1972 
1973 static void print_ms_time_info(const char* prefix, const char* name,
1974                                NumberSeq& ns) {
1975   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1976                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1977   if (ns.num() > 0) {
1978     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1979                            prefix, ns.sd(), ns.maximum());
1980   }
1981 }
1982 
1983 void G1ConcurrentMark::print_summary_info() {
1984   Log(gc, marking) log;
1985   if (!log.is_trace()) {
1986     return;
1987   }
1988 
1989   log.trace(" Concurrent marking:");
1990   print_ms_time_info("  ", "remarks", _remark_times);
1991   {
1992     print_ms_time_info("     ", "final marks", _remark_mark_times);
1993     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
1994 
1995   }
1996   print_ms_time_info("  ", "cleanups", _cleanup_times);
1997   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
1998             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
1999   log.trace("  Total stop_world time = %8.2f s.",
2000             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2001   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2002             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2003 }
2004 
2005 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2006   _concurrent_workers->threads_do(tc);
2007 }
2008 
2009 void G1ConcurrentMark::print_on(outputStream* st) const {
2010   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2011   _mark_bitmap.print_on(st, " Bits: ");
2012 }
2013 
2014 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2015   ReferenceProcessor* result = g1h->ref_processor_cm();
2016   assert(result != nullptr, "CM reference processor should not be null");
2017   return result;
2018 }
2019 
2020 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2021                                G1CMTask* task)
2022   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2023     _g1h(g1h), _task(task)
2024 { }
2025 
2026 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2027   assert(hr != nullptr,
2028         "claim_region() should have filtered out null regions");
2029   _curr_region  = hr;
2030   _finger       = hr->bottom();
2031   update_region_limit();
2032 }
2033 
2034 void G1CMTask::update_region_limit() {
2035   G1HeapRegion* hr = _curr_region;
2036   HeapWord* bottom = hr->bottom();
2037   HeapWord* limit = _cm->top_at_mark_start(hr);
2038 
2039   if (limit == bottom) {
2040     // The region was collected underneath our feet.
2041     // We set the finger to bottom to ensure that the bitmap
2042     // iteration that will follow this will not do anything.
2043     // (this is not a condition that holds when we set the region up,
2044     // as the region is not supposed to be empty in the first place)
2045     _finger = bottom;
2046   } else if (limit >= _region_limit) {
2047     assert(limit >= _finger, "peace of mind");
2048   } else {
2049     assert(limit < _region_limit, "only way to get here");
2050     // This can happen under some pretty unusual circumstances.  An
2051     // evacuation pause empties the region underneath our feet (TAMS
2052     // at bottom). We then do some allocation in the region (TAMS
2053     // stays at bottom), followed by the region being used as a GC
2054     // alloc region (TAMS will move to top() and the objects
2055     // originally below it will be greyed). All objects now marked in
2056     // the region are explicitly greyed, if below the global finger,
2057     // and we do not need in fact to scan anything else. So, we simply
2058     // set _finger to be limit to ensure that the bitmap iteration
2059     // doesn't do anything.
2060     _finger = limit;
2061   }
2062 
2063   _region_limit = limit;
2064 }
2065 
2066 void G1CMTask::giveup_current_region() {
2067   assert(_curr_region != nullptr, "invariant");
2068   clear_region_fields();
2069 }
2070 
2071 void G1CMTask::clear_region_fields() {
2072   // Values for these three fields that indicate that we're not
2073   // holding on to a region.
2074   _curr_region   = nullptr;
2075   _finger        = nullptr;
2076   _region_limit  = nullptr;
2077 }
2078 
2079 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2080   if (cm_oop_closure == nullptr) {
2081     assert(_cm_oop_closure != nullptr, "invariant");
2082   } else {
2083     assert(_cm_oop_closure == nullptr, "invariant");
2084   }
2085   _cm_oop_closure = cm_oop_closure;
2086 }
2087 
2088 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2089   guarantee(mark_bitmap != nullptr, "invariant");
2090   _mark_bitmap              = mark_bitmap;
2091   clear_region_fields();
2092 
2093   _calls                         = 0;
2094   _elapsed_time_ms               = 0.0;
2095   _termination_time_ms           = 0.0;
2096 
2097   _mark_stats_cache.reset();
2098 }
2099 
2100 void G1CMTask::register_partial_array_splitter() {
2101 
2102   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2103                                                         _cm->max_num_tasks(),
2104                                                         ObjArrayMarkingStride);
2105 }
2106 
2107 void G1CMTask::unregister_partial_array_splitter() {
2108   _partial_array_splitter.~PartialArraySplitter();
2109 }
2110 
2111 bool G1CMTask::should_exit_termination() {
2112   if (!regular_clock_call()) {
2113     return true;
2114   }
2115 
2116   // This is called when we are in the termination protocol. We should
2117   // quit if, for some reason, this task wants to abort or the global
2118   // stack is not empty (this means that we can get work from it).
2119   return !_cm->mark_stack_empty() || has_aborted();
2120 }
2121 
2122 void G1CMTask::reached_limit() {
2123   assert(_words_scanned >= _words_scanned_limit ||
2124          _refs_reached >= _refs_reached_limit ,
2125          "shouldn't have been called otherwise");
2126   abort_marking_if_regular_check_fail();
2127 }
2128 
2129 bool G1CMTask::regular_clock_call() {
2130   if (has_aborted()) {
2131     return false;
2132   }
2133 
2134   // First, we need to recalculate the words scanned and refs reached
2135   // limits for the next clock call.
2136   recalculate_limits();
2137 
2138   // During the regular clock call we do the following
2139 
2140   // (1) If an overflow has been flagged, then we abort.
2141   if (_cm->has_overflown()) {
2142     return false;
2143   }
2144 
2145   // If we are not concurrent (i.e. we're doing remark) we don't need
2146   // to check anything else. The other steps are only needed during
2147   // the concurrent marking phase.
2148   if (!_cm->concurrent()) {
2149     return true;
2150   }
2151 
2152   // (2) If marking has been aborted for Full GC, then we also abort.
2153   if (_cm->has_aborted()) {
2154     return false;
2155   }
2156 
2157   // (4) We check whether we should yield. If we have to, then we abort.
2158   if (SuspendibleThreadSet::should_yield()) {
2159     // We should yield. To do this we abort the task. The caller is
2160     // responsible for yielding.
2161     return false;
2162   }
2163 
2164   // (5) We check whether we've reached our time quota. If we have,
2165   // then we abort.
2166   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2167   if (elapsed_time_ms > _time_target_ms) {
2168     _has_timed_out = true;
2169     return false;
2170   }
2171 
2172   // (6) Finally, we check whether there are enough completed STAB
2173   // buffers available for processing. If there are, we abort.
2174   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2175   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2176     // we do need to process SATB buffers, we'll abort and restart
2177     // the marking task to do so
2178     return false;
2179   }
2180   return true;
2181 }
2182 
2183 void G1CMTask::recalculate_limits() {
2184   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2185   _words_scanned_limit      = _real_words_scanned_limit;
2186 
2187   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2188   _refs_reached_limit       = _real_refs_reached_limit;
2189 }
2190 
2191 void G1CMTask::decrease_limits() {
2192   // This is called when we believe that we're going to do an infrequent
2193   // operation which will increase the per byte scanned cost (i.e. move
2194   // entries to/from the global stack). It basically tries to decrease the
2195   // scanning limit so that the clock is called earlier.
2196 
2197   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2198   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2199 }
2200 
2201 void G1CMTask::move_entries_to_global_stack() {
2202   // Local array where we'll store the entries that will be popped
2203   // from the local queue.
2204   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2205 
2206   size_t n = 0;
2207   G1TaskQueueEntry task_entry;
2208   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2209     buffer[n] = task_entry;
2210     ++n;
2211   }
2212   if (n < G1CMMarkStack::EntriesPerChunk) {
2213     buffer[n] = G1TaskQueueEntry();
2214   }
2215 
2216   if (n > 0) {
2217     if (!_cm->mark_stack_push(buffer)) {
2218       set_has_aborted();
2219     }
2220   }
2221 
2222   // This operation was quite expensive, so decrease the limits.
2223   decrease_limits();
2224 }
2225 
2226 bool G1CMTask::get_entries_from_global_stack() {
2227   // Local array where we'll store the entries that will be popped
2228   // from the global stack.
2229   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2230 
2231   if (!_cm->mark_stack_pop(buffer)) {
2232     return false;
2233   }
2234 
2235   // We did actually pop at least one entry.
2236   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2237     G1TaskQueueEntry task_entry = buffer[i];
2238     if (task_entry.is_null()) {
2239       break;
2240     }
2241     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2242     bool success = _task_queue->push(task_entry);
2243     // We only call this when the local queue is empty or under a
2244     // given target limit. So, we do not expect this push to fail.
2245     assert(success, "invariant");
2246   }
2247 
2248   // This operation was quite expensive, so decrease the limits
2249   decrease_limits();
2250   return true;
2251 }
2252 
2253 void G1CMTask::drain_local_queue(bool partially) {
2254   if (has_aborted()) {
2255     return;
2256   }
2257 
2258   // Decide what the target size is, depending whether we're going to
2259   // drain it partially (so that other tasks can steal if they run out
2260   // of things to do) or totally (at the very end).
2261   uint target_size;
2262   if (partially) {
2263     target_size = GCDrainStackTargetSize;
2264   } else {
2265     target_size = 0;
2266   }
2267 
2268   if (_task_queue->size() > target_size) {
2269     G1TaskQueueEntry entry;
2270     bool ret = _task_queue->pop_local(entry);
2271     while (ret) {
2272       process_entry(entry, false /* stolen */);
2273       if (_task_queue->size() <= target_size || has_aborted()) {
2274         ret = false;
2275       } else {
2276         ret = _task_queue->pop_local(entry);
2277       }
2278     }
2279   }
2280 }
2281 
2282 size_t G1CMTask::start_partial_array_processing(oop obj) {
2283   assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2284 
2285   objArrayOop obj_array = objArrayOop(obj);
2286   size_t array_length = obj_array->length();
2287 
2288   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2289 
2290   // Mark objArray klass metadata
2291   if (_cm_oop_closure->do_metadata()) {
2292     _cm_oop_closure->do_klass(obj_array->klass());
2293   }
2294 
2295   process_array_chunk(obj_array, 0, initial_chunk_size);
2296 
2297   // Include object header size
2298   return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2299 }
2300 
2301 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2302   PartialArrayState* state = task.to_partial_array_state();
2303   // Access state before release by claim().
2304   objArrayOop obj = objArrayOop(state->source());
2305 
2306   PartialArraySplitter::Claim claim =
2307     _partial_array_splitter.claim(state, _task_queue, stolen);
2308 
2309   process_array_chunk(obj, claim._start, claim._end);
2310   return heap_word_size((claim._end - claim._start) * heapOopSize);
2311 }
2312 
2313 void G1CMTask::drain_global_stack(bool partially) {
2314   if (has_aborted()) {
2315     return;
2316   }
2317 
2318   // We have a policy to drain the local queue before we attempt to
2319   // drain the global stack.
2320   assert(partially || _task_queue->size() == 0, "invariant");
2321 
2322   // Decide what the target size is, depending whether we're going to
2323   // drain it partially (so that other tasks can steal if they run out
2324   // of things to do) or totally (at the very end).
2325   // Notice that when draining the global mark stack partially, due to the racyness
2326   // of the mark stack size update we might in fact drop below the target. But,
2327   // this is not a problem.
2328   // In case of total draining, we simply process until the global mark stack is
2329   // totally empty, disregarding the size counter.
2330   if (partially) {
2331     size_t const target_size = _cm->partial_mark_stack_size_target();
2332     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2333       if (get_entries_from_global_stack()) {
2334         drain_local_queue(partially);
2335       }
2336     }
2337   } else {
2338     while (!has_aborted() && get_entries_from_global_stack()) {
2339       drain_local_queue(partially);
2340     }
2341   }
2342 }
2343 
2344 // SATB Queue has several assumptions on whether to call the par or
2345 // non-par versions of the methods. this is why some of the code is
2346 // replicated. We should really get rid of the single-threaded version
2347 // of the code to simplify things.
2348 void G1CMTask::drain_satb_buffers() {
2349   if (has_aborted()) {
2350     return;
2351   }
2352 
2353   // We set this so that the regular clock knows that we're in the
2354   // middle of draining buffers and doesn't set the abort flag when it
2355   // notices that SATB buffers are available for draining. It'd be
2356   // very counter productive if it did that. :-)
2357   _draining_satb_buffers = true;
2358 
2359   G1CMSATBBufferClosure satb_cl(this, _g1h);
2360   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2361 
2362   // This keeps claiming and applying the closure to completed buffers
2363   // until we run out of buffers or we need to abort.
2364   while (!has_aborted() &&
2365          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2366     abort_marking_if_regular_check_fail();
2367   }
2368 
2369   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2370   // some other thread might be adding to the queue.  If not concurrent,
2371   // some other thread might have won the race for the last buffer, but
2372   // has not yet decremented the count.
2373 
2374   _draining_satb_buffers = false;
2375 
2376   // again, this was a potentially expensive operation, decrease the
2377   // limits to get the regular clock call early
2378   decrease_limits();
2379 }
2380 
2381 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2382   _mark_stats_cache.reset(region_idx);
2383 }
2384 
2385 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2386   return _mark_stats_cache.evict_all();
2387 }
2388 
2389 void G1CMTask::print_stats() {
2390   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2391   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2392                        _elapsed_time_ms, _termination_time_ms);
2393   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2394                        _step_times_ms.num(),
2395                        _step_times_ms.avg(),
2396                        _step_times_ms.sd(),
2397                        _step_times_ms.maximum(),
2398                        _step_times_ms.sum());
2399   size_t const hits = _mark_stats_cache.hits();
2400   size_t const misses = _mark_stats_cache.misses();
2401   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2402                        hits, misses, percent_of(hits, hits + misses));
2403 }
2404 
2405 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2406   return _task_queues->steal(worker_id, task_entry);
2407 }
2408 
2409 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2410   if (has_aborted() || _curr_region == nullptr) {
2411     return;
2412   }
2413 
2414   // This means that we're already holding on to a region.
2415   assert(_finger != nullptr, "if region is not null, then the finger "
2416          "should not be null either");
2417 
2418   // We might have restarted this task after an evacuation pause
2419   // which might have evacuated the region we're holding on to
2420   // underneath our feet. Let's read its limit again to make sure
2421   // that we do not iterate over a region of the heap that
2422   // contains garbage (update_region_limit() will also move
2423   // _finger to the start of the region if it is found empty).
2424   update_region_limit();
2425   // We will start from _finger not from the start of the region,
2426   // as we might be restarting this task after aborting half-way
2427   // through scanning this region. In this case, _finger points to
2428   // the address where we last found a marked object. If this is a
2429   // fresh region, _finger points to start().
2430   MemRegion mr = MemRegion(_finger, _region_limit);
2431 
2432   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2433          "humongous regions should go around loop once only");
2434 
2435   // Some special cases:
2436   // If the memory region is empty, we can just give up the region.
2437   // If the current region is humongous then we only need to check
2438   // the bitmap for the bit associated with the start of the object,
2439   // scan the object if it's live, and give up the region.
2440   // Otherwise, let's iterate over the bitmap of the part of the region
2441   // that is left.
2442   // If the iteration is successful, give up the region.
2443   if (mr.is_empty()) {
2444     giveup_current_region();
2445     abort_marking_if_regular_check_fail();
2446   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2447     if (_mark_bitmap->is_marked(mr.start())) {
2448       // The object is marked - apply the closure
2449       bitmap_closure.do_addr(mr.start());
2450     }
2451     // Even if this task aborted while scanning the humongous object
2452     // we can (and should) give up the current region.
2453     giveup_current_region();
2454     abort_marking_if_regular_check_fail();
2455   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2456     giveup_current_region();
2457     abort_marking_if_regular_check_fail();
2458   } else {
2459     assert(has_aborted(), "currently the only way to do so");
2460     // The only way to abort the bitmap iteration is to return
2461     // false from the do_bit() method. However, inside the
2462     // do_bit() method we move the _finger to point to the
2463     // object currently being looked at. So, if we bail out, we
2464     // have definitely set _finger to something non-null.
2465     assert(_finger != nullptr, "invariant");
2466 
2467     // Region iteration was actually aborted. So now _finger
2468     // points to the address of the object we last scanned. If we
2469     // leave it there, when we restart this task, we will rescan
2470     // the object. It is easy to avoid this. We move the finger by
2471     // enough to point to the next possible object header.
2472     assert(_finger < _region_limit, "invariant");
2473     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2474     if (new_finger >= _region_limit) {
2475       giveup_current_region();
2476     } else {
2477       move_finger_to(new_finger);
2478     }
2479   }
2480 }
2481 
2482 void G1CMTask::claim_new_region() {
2483   // Read the note on the claim_region() method on why it might
2484   // return null with potentially more regions available for
2485   // claiming and why we have to check out_of_regions() to determine
2486   // whether we're done or not.
2487   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2488     // We are going to try to claim a new region. We should have
2489     // given up on the previous one.
2490     // Separated the asserts so that we know which one fires.
2491     assert(_curr_region  == nullptr, "invariant");
2492     assert(_finger       == nullptr, "invariant");
2493     assert(_region_limit == nullptr, "invariant");
2494     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2495     if (claimed_region != nullptr) {
2496       // Yes, we managed to claim one
2497       setup_for_region(claimed_region);
2498       assert(_curr_region == claimed_region, "invariant");
2499     }
2500     // It is important to call the regular clock here. It might take
2501     // a while to claim a region if, for example, we hit a large
2502     // block of empty regions. So we need to call the regular clock
2503     // method once round the loop to make sure it's called
2504     // frequently enough.
2505     abort_marking_if_regular_check_fail();
2506   }
2507 }
2508 
2509 void G1CMTask::attempt_stealing() {
2510   // We cannot check whether the global stack is empty, since other
2511   // tasks might be pushing objects to it concurrently.
2512   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2513          "only way to reach here");
2514   while (!has_aborted()) {
2515     G1TaskQueueEntry entry;
2516     if (_cm->try_stealing(_worker_id, entry)) {
2517       process_entry(entry, true /* stolen */);
2518 
2519       // And since we're towards the end, let's totally drain the
2520       // local queue and global stack.
2521       drain_local_queue(false);
2522       drain_global_stack(false);
2523     } else {
2524       break;
2525     }
2526   }
2527 }
2528 
2529 void G1CMTask::attempt_termination(bool is_serial) {
2530   // We cannot check whether the global stack is empty, since other
2531   // tasks might be concurrently pushing objects on it.
2532   // Separated the asserts so that we know which one fires.
2533   assert(_cm->out_of_regions(), "only way to reach here");
2534   assert(_task_queue->size() == 0, "only way to reach here");
2535   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2536 
2537   // The G1CMTask class also extends the TerminatorTerminator class,
2538   // hence its should_exit_termination() method will also decide
2539   // whether to exit the termination protocol or not.
2540   bool finished = (is_serial ||
2541                    _cm->terminator()->offer_termination(this));
2542   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2543 
2544   if (finished) {
2545     // We're all done.
2546 
2547     // We can now guarantee that the global stack is empty, since
2548     // all other tasks have finished. We separated the guarantees so
2549     // that, if a condition is false, we can immediately find out
2550     // which one.
2551     guarantee(_cm->out_of_regions(), "only way to reach here");
2552     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2553     guarantee(_task_queue->size() == 0, "only way to reach here");
2554     guarantee(!_cm->has_overflown(), "only way to reach here");
2555     guarantee(!has_aborted(), "should never happen if termination has completed");
2556   } else {
2557     // Apparently there's more work to do. Let's abort this task. We
2558     // will restart it and hopefully we can find more things to do.
2559     set_has_aborted();
2560   }
2561 }
2562 
2563 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2564   if (_has_timed_out) {
2565     double diff_ms = elapsed_time_ms - _time_target_ms;
2566     // Keep statistics of how well we did with respect to hitting
2567     // our target only if we actually timed out (if we aborted for
2568     // other reasons, then the results might get skewed).
2569     _marking_step_diff_ms.add(diff_ms);
2570   }
2571 
2572   if (!_cm->has_overflown()) {
2573     return;
2574   }
2575 
2576   // This is the interesting one. We aborted because a global
2577   // overflow was raised. This means we have to restart the
2578   // marking phase and start iterating over regions. However, in
2579   // order to do this we have to make sure that all tasks stop
2580   // what they are doing and re-initialize in a safe manner. We
2581   // will achieve this with the use of two barrier sync points.
2582   if (!is_serial) {
2583     // We only need to enter the sync barrier if being called
2584     // from a parallel context
2585     _cm->enter_first_sync_barrier(_worker_id);
2586 
2587     // When we exit this sync barrier we know that all tasks have
2588     // stopped doing marking work. So, it's now safe to
2589     // re-initialize our data structures.
2590   }
2591 
2592   clear_region_fields();
2593   flush_mark_stats_cache();
2594 
2595   if (!is_serial) {
2596     // If we're executing the concurrent phase of marking, reset the marking
2597     // state; otherwise the marking state is reset after reference processing,
2598     // during the remark pause.
2599     // If we reset here as a result of an overflow during the remark we will
2600     // see assertion failures from any subsequent set_concurrency_and_phase()
2601     // calls.
2602     if (_cm->concurrent() && _worker_id == 0) {
2603       // Worker 0 is responsible for clearing the global data structures because
2604       // of an overflow. During STW we should not clear the overflow flag (in
2605       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2606       // method to abort the pause and restart concurrent marking.
2607       _cm->reset_marking_for_restart();
2608 
2609       log_info(gc, marking)("Concurrent Mark reset for overflow");
2610     }
2611 
2612     // ...and enter the second barrier.
2613     _cm->enter_second_sync_barrier(_worker_id);
2614   }
2615 }
2616 
2617 /*****************************************************************************
2618 
2619     The do_marking_step(time_target_ms, ...) method is the building
2620     block of the parallel marking framework. It can be called in parallel
2621     with other invocations of do_marking_step() on different tasks
2622     (but only one per task, obviously) and concurrently with the
2623     mutator threads, or during remark, hence it eliminates the need
2624     for two versions of the code. When called during remark, it will
2625     pick up from where the task left off during the concurrent marking
2626     phase. Interestingly, tasks are also claimable during evacuation
2627     pauses too, since do_marking_step() ensures that it aborts before
2628     it needs to yield.
2629 
2630     The data structures that it uses to do marking work are the
2631     following:
2632 
2633       (1) Marking Bitmap. If there are grey objects that appear only
2634       on the bitmap (this happens either when dealing with an overflow
2635       or when the concurrent start pause has simply marked the roots
2636       and didn't push them on the stack), then tasks claim heap
2637       regions whose bitmap they then scan to find grey objects. A
2638       global finger indicates where the end of the last claimed region
2639       is. A local finger indicates how far into the region a task has
2640       scanned. The two fingers are used to determine how to grey an
2641       object (i.e. whether simply marking it is OK, as it will be
2642       visited by a task in the future, or whether it needs to be also
2643       pushed on a stack).
2644 
2645       (2) Local Queue. The local queue of the task which is accessed
2646       reasonably efficiently by the task. Other tasks can steal from
2647       it when they run out of work. Throughout the marking phase, a
2648       task attempts to keep its local queue short but not totally
2649       empty, so that entries are available for stealing by other
2650       tasks. Only when there is no more work, a task will totally
2651       drain its local queue.
2652 
2653       (3) Global Mark Stack. This handles local queue overflow. During
2654       marking only sets of entries are moved between it and the local
2655       queues, as access to it requires a mutex and more fine-grain
2656       interaction with it which might cause contention. If it
2657       overflows, then the marking phase should restart and iterate
2658       over the bitmap to identify grey objects. Throughout the marking
2659       phase, tasks attempt to keep the global mark stack at a small
2660       length but not totally empty, so that entries are available for
2661       popping by other tasks. Only when there is no more work, tasks
2662       will totally drain the global mark stack.
2663 
2664       (4) SATB Buffer Queue. This is where completed SATB buffers are
2665       made available. Buffers are regularly removed from this queue
2666       and scanned for roots, so that the queue doesn't get too
2667       long. During remark, all completed buffers are processed, as
2668       well as the filled in parts of any uncompleted buffers.
2669 
2670     The do_marking_step() method tries to abort when the time target
2671     has been reached. There are a few other cases when the
2672     do_marking_step() method also aborts:
2673 
2674       (1) When the marking phase has been aborted (after a Full GC).
2675 
2676       (2) When a global overflow (on the global stack) has been
2677       triggered. Before the task aborts, it will actually sync up with
2678       the other tasks to ensure that all the marking data structures
2679       (local queues, stacks, fingers etc.)  are re-initialized so that
2680       when do_marking_step() completes, the marking phase can
2681       immediately restart.
2682 
2683       (3) When enough completed SATB buffers are available. The
2684       do_marking_step() method only tries to drain SATB buffers right
2685       at the beginning. So, if enough buffers are available, the
2686       marking step aborts and the SATB buffers are processed at
2687       the beginning of the next invocation.
2688 
2689       (4) To yield. when we have to yield then we abort and yield
2690       right at the end of do_marking_step(). This saves us from a lot
2691       of hassle as, by yielding we might allow a Full GC. If this
2692       happens then objects will be compacted underneath our feet, the
2693       heap might shrink, etc. We save checking for this by just
2694       aborting and doing the yield right at the end.
2695 
2696     From the above it follows that the do_marking_step() method should
2697     be called in a loop (or, otherwise, regularly) until it completes.
2698 
2699     If a marking step completes without its has_aborted() flag being
2700     true, it means it has completed the current marking phase (and
2701     also all other marking tasks have done so and have all synced up).
2702 
2703     A method called regular_clock_call() is invoked "regularly" (in
2704     sub ms intervals) throughout marking. It is this clock method that
2705     checks all the abort conditions which were mentioned above and
2706     decides when the task should abort. A work-based scheme is used to
2707     trigger this clock method: when the number of object words the
2708     marking phase has scanned or the number of references the marking
2709     phase has visited reach a given limit. Additional invocations to
2710     the method clock have been planted in a few other strategic places
2711     too. The initial reason for the clock method was to avoid calling
2712     cpu time gathering too regularly, as it is quite expensive. So,
2713     once it was in place, it was natural to piggy-back all the other
2714     conditions on it too and not constantly check them throughout the code.
2715 
2716     If do_termination is true then do_marking_step will enter its
2717     termination protocol.
2718 
2719     The value of is_serial must be true when do_marking_step is being
2720     called serially (i.e. by the VMThread) and do_marking_step should
2721     skip any synchronization in the termination and overflow code.
2722     Examples include the serial remark code and the serial reference
2723     processing closures.
2724 
2725     The value of is_serial must be false when do_marking_step is
2726     being called by any of the worker threads.
2727     Examples include the concurrent marking code (CMMarkingTask),
2728     the MT remark code, and the MT reference processing closures.
2729 
2730  *****************************************************************************/
2731 
2732 void G1CMTask::do_marking_step(double time_target_ms,
2733                                bool do_termination,
2734                                bool is_serial) {
2735   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2736 
2737   _start_cpu_time_ns = os::current_thread_cpu_time();
2738 
2739   // If do_stealing is true then do_marking_step will attempt to
2740   // steal work from the other G1CMTasks. It only makes sense to
2741   // enable stealing when the termination protocol is enabled
2742   // and do_marking_step() is not being called serially.
2743   bool do_stealing = do_termination && !is_serial;
2744 
2745   G1Predictions const& predictor = _g1h->policy()->predictor();
2746   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2747   _time_target_ms = time_target_ms - diff_prediction_ms;
2748 
2749   // set up the variables that are used in the work-based scheme to
2750   // call the regular clock method
2751   _words_scanned = 0;
2752   _refs_reached  = 0;
2753   recalculate_limits();
2754 
2755   // clear all flags
2756   clear_has_aborted();
2757   _has_timed_out = false;
2758   _draining_satb_buffers = false;
2759 
2760   ++_calls;
2761 
2762   // Set up the bitmap and oop closures. Anything that uses them is
2763   // eventually called from this method, so it is OK to allocate these
2764   // statically.
2765   G1CMBitMapClosure bitmap_closure(this, _cm);
2766   G1CMOopClosure cm_oop_closure(_g1h, this);
2767   set_cm_oop_closure(&cm_oop_closure);
2768 
2769   if (_cm->has_overflown()) {
2770     // This can happen if the mark stack overflows during a GC pause
2771     // and this task, after a yield point, restarts. We have to abort
2772     // as we need to get into the overflow protocol which happens
2773     // right at the end of this task.
2774     set_has_aborted();
2775   }
2776 
2777   // First drain any available SATB buffers. After this, we will not
2778   // look at SATB buffers before the next invocation of this method.
2779   // If enough completed SATB buffers are queued up, the regular clock
2780   // will abort this task so that it restarts.
2781   drain_satb_buffers();
2782   // ...then partially drain the local queue and the global stack
2783   drain_local_queue(true);
2784   drain_global_stack(true);
2785 
2786   do {
2787     process_current_region(bitmap_closure);
2788     // At this point we have either completed iterating over the
2789     // region we were holding on to, or we have aborted.
2790 
2791     // We then partially drain the local queue and the global stack.
2792     drain_local_queue(true);
2793     drain_global_stack(true);
2794 
2795     claim_new_region();
2796 
2797     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2798            "at this point we should be out of regions");
2799   } while ( _curr_region != nullptr && !has_aborted());
2800 
2801   // We cannot check whether the global stack is empty, since other
2802   // tasks might be pushing objects to it concurrently.
2803   assert(has_aborted() || _cm->out_of_regions(),
2804          "at this point we should be out of regions");
2805   // Try to reduce the number of available SATB buffers so that
2806   // remark has less work to do.
2807   drain_satb_buffers();
2808 
2809   // Since we've done everything else, we can now totally drain the
2810   // local queue and global stack.
2811   drain_local_queue(false);
2812   drain_global_stack(false);
2813 
2814   // Attempt at work stealing from other task's queues.
2815   if (do_stealing && !has_aborted()) {
2816     // We have not aborted. This means that we have finished all that
2817     // we could. Let's try to do some stealing...
2818     attempt_stealing();
2819   }
2820 
2821   // We still haven't aborted. Now, let's try to get into the
2822   // termination protocol.
2823   if (do_termination && !has_aborted()) {
2824     attempt_termination(is_serial);
2825   }
2826 
2827   // Mainly for debugging purposes to make sure that a pointer to the
2828   // closure which was statically allocated in this frame doesn't
2829   // escape it by accident.
2830   set_cm_oop_closure(nullptr);
2831   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2832   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2833   // Update the step history.
2834   _step_times_ms.add(elapsed_time_ms);
2835 
2836   if (has_aborted()) {
2837     // The task was aborted for some reason.
2838     handle_abort(is_serial, elapsed_time_ms);
2839   }
2840 }
2841 
2842 G1CMTask::G1CMTask(uint worker_id,
2843                    G1ConcurrentMark* cm,
2844                    G1CMTaskQueue* task_queue,
2845                    G1RegionMarkStats* mark_stats) :
2846   _worker_id(worker_id),
2847   _g1h(G1CollectedHeap::heap()),
2848   _cm(cm),
2849   _mark_bitmap(nullptr),
2850   _task_queue(task_queue),
2851   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2852   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2853   _calls(0),
2854   _time_target_ms(0.0),
2855   _start_cpu_time_ns(0),
2856   _cm_oop_closure(nullptr),
2857   _curr_region(nullptr),
2858   _finger(nullptr),
2859   _region_limit(nullptr),
2860   _words_scanned(0),
2861   _words_scanned_limit(0),
2862   _real_words_scanned_limit(0),
2863   _refs_reached(0),
2864   _refs_reached_limit(0),
2865   _real_refs_reached_limit(0),
2866   _has_aborted(false),
2867   _has_timed_out(false),
2868   _draining_satb_buffers(false),
2869   _step_times_ms(),
2870   _elapsed_time_ms(0.0),
2871   _termination_time_ms(0.0),
2872   _marking_step_diff_ms()
2873 {
2874   guarantee(task_queue != nullptr, "invariant");
2875 
2876   _marking_step_diff_ms.add(0.5);
2877 }
2878 
2879 // These are formatting macros that are used below to ensure
2880 // consistent formatting. The *_H_* versions are used to format the
2881 // header for a particular value and they should be kept consistent
2882 // with the corresponding macro. Also note that most of the macros add
2883 // the necessary white space (as a prefix) which makes them a bit
2884 // easier to compose.
2885 
2886 // All the output lines are prefixed with this string to be able to
2887 // identify them easily in a large log file.
2888 #define G1PPRL_LINE_PREFIX            "###"
2889 
2890 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2891 #ifdef _LP64
2892 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2893 #else // _LP64
2894 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2895 #endif // _LP64
2896 
2897 // For per-region info
2898 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2899 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2900 #define G1PPRL_STATE_FORMAT           "   %-5s"
2901 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2902 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2903 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2904 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2905 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2906 #define G1PPRL_GID_H_FORMAT           "  %9s"
2907 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2908 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2909 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2910 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2911 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2912 
2913 // For summary info
2914 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2915 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2916 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2917 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2918 
2919 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2920   _total_used_bytes(0),
2921   _total_capacity_bytes(0),
2922   _total_live_bytes(0),
2923   _total_remset_bytes(0),
2924   _total_code_roots_bytes(0)
2925 {
2926   if (!log_is_enabled(Trace, gc, liveness)) {
2927     return;
2928   }
2929 
2930   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2931   MemRegion reserved = g1h->reserved();
2932   double now = os::elapsedTime();
2933 
2934   // Print the header of the output.
2935   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2936   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2937                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2938                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2939                           p2i(reserved.start()), p2i(reserved.end()),
2940                           G1HeapRegion::GrainBytes);
2941   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2942   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2943                           G1PPRL_TYPE_H_FORMAT
2944                           G1PPRL_ADDR_BASE_H_FORMAT
2945                           G1PPRL_BYTE_H_FORMAT
2946                           G1PPRL_BYTE_H_FORMAT
2947                           G1PPRL_STATE_H_FORMAT
2948                           G1PPRL_BYTE_H_FORMAT
2949                           G1PPRL_GID_H_FORMAT,
2950                           "type", "address-range",
2951                           "used", "live",
2952                           "state", "code-roots",
2953                           "group-id");
2954   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2955                           G1PPRL_TYPE_H_FORMAT
2956                           G1PPRL_ADDR_BASE_H_FORMAT
2957                           G1PPRL_BYTE_H_FORMAT
2958                           G1PPRL_BYTE_H_FORMAT
2959                           G1PPRL_STATE_H_FORMAT
2960                           G1PPRL_BYTE_H_FORMAT
2961                           G1PPRL_GID_H_FORMAT,
2962                           "", "",
2963                           "(bytes)", "(bytes)",
2964                           "", "(bytes)", "");
2965 }
2966 
2967 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
2968   if (!log_is_enabled(Trace, gc, liveness)) {
2969     return false;
2970   }
2971 
2972   const char* type       = r->get_type_str();
2973   HeapWord* bottom       = r->bottom();
2974   HeapWord* end          = r->end();
2975   size_t capacity_bytes  = r->capacity();
2976   size_t used_bytes      = r->used();
2977   size_t live_bytes      = r->live_bytes();
2978   size_t remset_bytes    = r->rem_set()->mem_size();
2979   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
2980   const char* remset_type = r->rem_set()->get_short_state_str();
2981   uint cset_group_id     = r->rem_set()->has_cset_group()
2982                          ? r->rem_set()->cset_group_id()
2983                          : G1CSetCandidateGroup::NoRemSetId;
2984 
2985   _total_used_bytes      += used_bytes;
2986   _total_capacity_bytes  += capacity_bytes;
2987   _total_live_bytes      += live_bytes;
2988   _total_remset_bytes    += remset_bytes;
2989   _total_code_roots_bytes += code_roots_bytes;
2990 
2991   // Print a line for this particular region.
2992   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2993                         G1PPRL_TYPE_FORMAT
2994                         G1PPRL_ADDR_BASE_FORMAT
2995                         G1PPRL_BYTE_FORMAT
2996                         G1PPRL_BYTE_FORMAT
2997                         G1PPRL_STATE_FORMAT
2998                         G1PPRL_BYTE_FORMAT
2999                         G1PPRL_GID_FORMAT,
3000                         type, p2i(bottom), p2i(end),
3001                         used_bytes, live_bytes,
3002                         remset_type, code_roots_bytes,
3003                         cset_group_id);
3004 
3005   return false;
3006 }
3007 
3008 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3009   if (!log_is_enabled(Trace, gc, liveness)) {
3010     return;
3011   }
3012 
3013   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3014   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3015   // add static memory usages to remembered set sizes
3016   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3017 
3018   log_cset_candidate_groups();
3019 
3020   // Print the footer of the output.
3021   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3022   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3023                          " SUMMARY"
3024                          G1PPRL_SUM_MB_FORMAT("capacity")
3025                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3026                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3027                          G1PPRL_SUM_MB_FORMAT("remset")
3028                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3029                          bytes_to_mb(_total_capacity_bytes),
3030                          bytes_to_mb(_total_used_bytes),
3031                          percent_of(_total_used_bytes, _total_capacity_bytes),
3032                          bytes_to_mb(_total_live_bytes),
3033                          percent_of(_total_live_bytes, _total_capacity_bytes),
3034                          bytes_to_mb(_total_remset_bytes),
3035                          bytes_to_mb(_total_code_roots_bytes));
3036 }
3037 
3038 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3039   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3040                           G1PPRL_GID_FORMAT
3041                           G1PPRL_LEN_FORMAT
3042                           G1PPRL_GID_GCEFF_FORMAT
3043                           G1PPRL_GID_LIVENESS_FORMAT
3044                           G1PPRL_BYTE_FORMAT
3045                           G1PPRL_TYPE_H_FORMAT,
3046                           group->group_id(),
3047                           group->length(),
3048                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3049                           group->length() > 0 ? group->liveness_percent() : 0.0,
3050                           group->card_set()->mem_size(),
3051                           type);
3052   _total_remset_bytes += group->card_set()->mem_size();
3053 }
3054 
3055 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3056   for (G1CSetCandidateGroup* group : gl) {
3057     log_cset_candidate_group_add_total(group, type);
3058   }
3059 }
3060 
3061 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3062   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3063   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3064   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3065   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3066                           G1PPRL_GID_H_FORMAT
3067                           G1PPRL_LEN_H_FORMAT
3068                           G1PPRL_GCEFF_H_FORMAT
3069                           G1PPRL_BYTE_H_FORMAT
3070                           G1PPRL_BYTE_H_FORMAT
3071                           G1PPRL_TYPE_H_FORMAT,
3072                           "groud-id", "num-regions",
3073                           "gc-eff", "liveness",
3074                           "remset", "type");
3075 
3076   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3077                           G1PPRL_GID_H_FORMAT
3078                           G1PPRL_LEN_H_FORMAT
3079                           G1PPRL_GCEFF_H_FORMAT
3080                           G1PPRL_BYTE_H_FORMAT
3081                           G1PPRL_BYTE_H_FORMAT
3082                           G1PPRL_TYPE_H_FORMAT,
3083                           "", "",
3084                           "(bytes/ms)", "%",
3085                           "(bytes)", "");
3086 
3087   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3088 
3089   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3090 
3091   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3092   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3093   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3094 }